LLVM 19.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCCState.h"
18#include "PPCCallingConv.h"
19#include "PPCFrameLowering.h"
20#include "PPCInstrInfo.h"
22#include "PPCPerfectShuffle.h"
23#include "PPCRegisterInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallSet.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringRef.h"
60#include "llvm/IR/CallingConv.h"
61#include "llvm/IR/Constant.h"
62#include "llvm/IR/Constants.h"
63#include "llvm/IR/DataLayout.h"
64#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/Function.h"
67#include "llvm/IR/GlobalValue.h"
68#include "llvm/IR/IRBuilder.h"
70#include "llvm/IR/Intrinsics.h"
71#include "llvm/IR/IntrinsicsPowerPC.h"
72#include "llvm/IR/Module.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/Value.h"
76#include "llvm/MC/MCContext.h"
77#include "llvm/MC/MCExpr.h"
87#include "llvm/Support/Debug.h"
89#include "llvm/Support/Format.h"
95#include <algorithm>
96#include <cassert>
97#include <cstdint>
98#include <iterator>
99#include <list>
100#include <optional>
101#include <utility>
102#include <vector>
103
104using namespace llvm;
105
106#define DEBUG_TYPE "ppc-lowering"
107
108static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
109cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
110
111static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
112cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
113
114static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
115cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
116
117static cl::opt<bool> DisableSCO("disable-ppc-sco",
118cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
119
120static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
121cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
122
123static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
124cl::desc("use absolute jump tables on ppc"), cl::Hidden);
125
126static cl::opt<bool>
127 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
128 cl::desc("disable vector permute decomposition"),
129 cl::init(true), cl::Hidden);
130
132 "disable-auto-paired-vec-st",
133 cl::desc("disable automatically generated 32byte paired vector stores"),
134 cl::init(true), cl::Hidden);
135
137 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
138 cl::desc("Set minimum number of entries to use a jump table on PPC"));
139
141 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
142 cl::desc("max depth when checking alias info in GatherAllAliases()"));
143
144STATISTIC(NumTailCalls, "Number of tail calls");
145STATISTIC(NumSiblingCalls, "Number of sibling calls");
146STATISTIC(ShufflesHandledWithVPERM,
147 "Number of shuffles lowered to a VPERM or XXPERM");
148STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
149
150static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
151
152static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
153
154static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
155
156// A faster local-[exec|dynamic] TLS access sequence (enabled with the
157// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
158// variables; consistent with the IBM XL compiler, we apply a max size of
159// slightly under 32KB.
161
162// FIXME: Remove this once the bug has been fixed!
164
166 const PPCSubtarget &STI)
167 : TargetLowering(TM), Subtarget(STI) {
168 // Initialize map that relates the PPC addressing modes to the computed flags
169 // of a load/store instruction. The map is used to determine the optimal
170 // addressing mode when selecting load and stores.
171 initializeAddrModeMap();
172 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
173 // arguments are at least 4/8 bytes aligned.
174 bool isPPC64 = Subtarget.isPPC64();
175 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
176
177 // Set up the register classes.
178 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
179 if (!useSoftFloat()) {
180 if (hasSPE()) {
181 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
182 // EFPU2 APU only supports f32
183 if (!Subtarget.hasEFPU2())
184 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
185 } else {
186 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
187 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
188 }
189 }
190
191 // Match BITREVERSE to customized fast code sequence in the td file.
194
195 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
197
198 // Custom lower inline assembly to check for special registers.
201
202 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
203 for (MVT VT : MVT::integer_valuetypes()) {
206 }
207
208 if (Subtarget.isISA3_0()) {
209 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
210 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
211 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
212 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
213 } else {
214 // No extending loads from f16 or HW conversions back and forth.
215 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
218 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
221 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
222 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
223 }
224
225 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
226
227 // PowerPC has pre-inc load and store's.
238 if (!Subtarget.hasSPE()) {
243 }
244
245 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
246 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
247 for (MVT VT : ScalarIntVTs) {
252 }
253
254 if (Subtarget.useCRBits()) {
256
257 if (isPPC64 || Subtarget.hasFPCVT()) {
260 isPPC64 ? MVT::i64 : MVT::i32);
263 isPPC64 ? MVT::i64 : MVT::i32);
264
267 isPPC64 ? MVT::i64 : MVT::i32);
270 isPPC64 ? MVT::i64 : MVT::i32);
271
274 isPPC64 ? MVT::i64 : MVT::i32);
277 isPPC64 ? MVT::i64 : MVT::i32);
278
281 isPPC64 ? MVT::i64 : MVT::i32);
284 isPPC64 ? MVT::i64 : MVT::i32);
285 } else {
290 }
291
292 // PowerPC does not support direct load/store of condition registers.
295
296 // FIXME: Remove this once the ANDI glue bug is fixed:
297 if (ANDIGlueBug)
299
300 for (MVT VT : MVT::integer_valuetypes()) {
303 setTruncStoreAction(VT, MVT::i1, Expand);
304 }
305
306 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
307 }
308
309 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
310 // PPC (the libcall is not available).
315
316 // We do not currently implement these libm ops for PowerPC.
317 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
318 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
319 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
320 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
322 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
323
324 // PowerPC has no SREM/UREM instructions unless we are on P9
325 // On P9 we may use a hardware instruction to compute the remainder.
326 // When the result of both the remainder and the division is required it is
327 // more efficient to compute the remainder from the result of the division
328 // rather than use the remainder instruction. The instructions are legalized
329 // directly because the DivRemPairsPass performs the transformation at the IR
330 // level.
331 if (Subtarget.isISA3_0()) {
336 } else {
341 }
342
343 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
352
353 // Handle constrained floating-point operations of scalar.
354 // TODO: Handle SPE specific operation.
360
365
366 if (!Subtarget.hasSPE()) {
369 }
370
371 if (Subtarget.hasVSX()) {
374 }
375
376 if (Subtarget.hasFSQRT()) {
379 }
380
381 if (Subtarget.hasFPRND()) {
386
391 }
392
393 // We don't support sin/cos/sqrt/fmod/pow
404
405 // MASS transformation for LLVM intrinsics with replicating fast-math flag
406 // to be consistent to PPCGenScalarMASSEntries pass
407 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
420 }
421
422 if (Subtarget.hasSPE()) {
425 } else {
426 setOperationAction(ISD::FMA , MVT::f64, Legal);
427 setOperationAction(ISD::FMA , MVT::f32, Legal);
428 }
429
430 if (Subtarget.hasSPE())
431 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
432
434
435 // If we're enabling GP optimizations, use hardware square root
436 if (!Subtarget.hasFSQRT() &&
437 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
438 Subtarget.hasFRE()))
440
441 if (!Subtarget.hasFSQRT() &&
442 !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
443 Subtarget.hasFRES()))
445
446 if (Subtarget.hasFCPSGN()) {
449 } else {
452 }
453
454 if (Subtarget.hasFPRND()) {
459
464 }
465
466 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
467 // instruction xxbrd to speed up scalar BSWAP64.
468 if (Subtarget.isISA3_1()) {
471 } else {
474 ISD::BSWAP, MVT::i64,
475 (Subtarget.hasP9Vector() && Subtarget.isPPC64()) ? Custom : Expand);
476 }
477
478 // CTPOP or CTTZ were introduced in P8/P9 respectively
479 if (Subtarget.isISA3_0()) {
480 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
481 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
482 } else {
483 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
484 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
485 }
486
487 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
490 } else {
493 }
494
495 // PowerPC does not have ROTR
498
499 if (!Subtarget.useCRBits()) {
500 // PowerPC does not have Select
505 }
506
507 // PowerPC wants to turn select_cc of FP into fsel when possible.
510
511 // PowerPC wants to optimize integer setcc a bit
512 if (!Subtarget.useCRBits())
514
515 if (Subtarget.hasFPU()) {
519
523 }
524
525 // PowerPC does not have BRCOND which requires SetCC
526 if (!Subtarget.useCRBits())
528
530
531 if (Subtarget.hasSPE()) {
532 // SPE has built-in conversions
539
540 // SPE supports signaling compare of f32/f64.
543 } else {
544 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
547
548 // PowerPC does not have [U|S]INT_TO_FP
553 }
554
555 if (Subtarget.hasDirectMove() && isPPC64) {
560 if (TM.Options.UnsafeFPMath) {
569 }
570 } else {
575 }
576
577 // We cannot sextinreg(i1). Expand to shifts.
579
580 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
581 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
582 // support continuation, user-level threading, and etc.. As a result, no
583 // other SjLj exception interfaces are implemented and please don't build
584 // your own exception handling based on them.
585 // LLVM/Clang supports zero-cost DWARF exception handling.
588
589 // We want to legalize GlobalAddress and ConstantPool nodes into the
590 // appropriate instructions to materialize the address.
601
602 // TRAP is legal.
603 setOperationAction(ISD::TRAP, MVT::Other, Legal);
604
605 // TRAMPOLINE is custom lowered.
608
609 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
611
612 if (Subtarget.is64BitELFABI()) {
613 // VAARG always uses double-word chunks, so promote anything smaller.
615 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
617 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
619 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
621 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
623 } else if (Subtarget.is32BitELFABI()) {
624 // VAARG is custom lowered with the 32-bit SVR4 ABI.
627 } else
629
630 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
631 if (Subtarget.is32BitELFABI())
633 else
635
636 // Use the default implementation.
637 setOperationAction(ISD::VAEND , MVT::Other, Expand);
646
647 // We want to custom lower some of our intrinsics.
653
654 // To handle counter-based loop conditions.
656
661
662 // Comparisons that require checking two conditions.
663 if (Subtarget.hasSPE()) {
668 }
681
684
685 if (Subtarget.has64BitSupport()) {
686 // They also have instructions for converting between i64 and fp.
695 // This is just the low 32 bits of a (signed) fp->i64 conversion.
696 // We cannot do this with Promote because i64 is not a legal type.
699
700 if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
703 }
704 } else {
705 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
706 if (Subtarget.hasSPE()) {
709 } else {
712 }
713 }
714
715 // With the instructions enabled under FPCVT, we can do everything.
716 if (Subtarget.hasFPCVT()) {
717 if (Subtarget.has64BitSupport()) {
726 }
727
736 }
737
738 if (Subtarget.use64BitRegs()) {
739 // 64-bit PowerPC implementations can support i64 types directly
740 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
741 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
743 // 64-bit PowerPC wants to expand i128 shifts itself.
747 } else {
748 // 32-bit PowerPC wants to expand i64 shifts itself.
752 }
753
754 // PowerPC has better expansions for funnel shifts than the generic
755 // TargetLowering::expandFunnelShift.
756 if (Subtarget.has64BitSupport()) {
759 }
762
763 if (Subtarget.hasVSX()) {
768 }
769
770 if (Subtarget.hasAltivec()) {
771 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
776 }
777 // First set operation action for all vector types to expand. Then we
778 // will selectively turn on ones that can be effectively codegen'd.
780 // add/sub are legal for all supported vector VT's.
783
784 // For v2i64, these are only valid with P8Vector. This is corrected after
785 // the loop.
786 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
791 }
792 else {
797 }
798
799 if (Subtarget.hasVSX()) {
802 }
803
804 // Vector instructions introduced in P8
805 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
808 }
809 else {
812 }
813
814 // Vector instructions introduced in P9
815 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
817 else
819
820 // We promote all shuffles to v16i8.
822 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
823
824 // We promote all non-typed operations to v4i32.
826 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
828 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
830 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
832 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
834 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
837 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
839 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
840
841 // No other operations are legal.
880
881 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
882 setTruncStoreAction(VT, InnerVT, Expand);
885 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
886 }
887 }
889 if (!Subtarget.hasP8Vector()) {
890 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
891 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
892 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
893 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
894 }
895
896 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
897 // with merges, splats, etc.
899
900 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
901 // are cheap, so handle them before they get expanded to scalar.
907
908 setOperationAction(ISD::AND , MVT::v4i32, Legal);
909 setOperationAction(ISD::OR , MVT::v4i32, Legal);
910 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
911 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
913 Subtarget.useCRBits() ? Legal : Expand);
914 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
924 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
927
928 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
929 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
930 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
931 if (Subtarget.hasAltivec())
932 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
934 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
935 if (Subtarget.hasP8Altivec())
936 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
937
938 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
939 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
940 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
941 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
942
943 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
944 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
945
946 if (Subtarget.hasVSX()) {
947 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
948 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
950 }
951
952 if (Subtarget.hasP8Altivec())
953 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
954 else
955 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
956
957 if (Subtarget.isISA3_1()) {
958 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
959 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
960 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
961 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
962 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
963 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
964 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
965 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
966 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
967 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
968 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
969 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
970 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
971 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
972 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
973 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
974 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
975 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
976 }
977
978 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
979 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
980
983
988
989 // Altivec does not contain unordered floating-point compare instructions
990 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
992 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
994
995 if (Subtarget.hasVSX()) {
998 if (Subtarget.hasP8Vector()) {
1001 }
1002 if (Subtarget.hasDirectMove() && isPPC64) {
1011 }
1013
1014 // The nearbyint variants are not allowed to raise the inexact exception
1015 // so we can only code-gen them with unsafe math.
1016 if (TM.Options.UnsafeFPMath) {
1019 }
1020
1021 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1022 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1023 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1025 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1026 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1029
1031 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1032 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1035
1036 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1037 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1038
1039 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1040 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1041
1042 // Share the Altivec comparison restrictions.
1043 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1044 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1045 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1046 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1047
1048 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1049 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1050
1052
1053 if (Subtarget.hasP8Vector())
1054 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1055
1056 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1057
1058 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1059 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1060 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1061
1062 if (Subtarget.hasP8Altivec()) {
1063 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1064 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1065 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1066
1067 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1068 // SRL, but not for SRA because of the instructions available:
1069 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1070 // doing
1071 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1072 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1073 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1074
1075 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1076 }
1077 else {
1078 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1079 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1080 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1081
1082 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1083
1084 // VSX v2i64 only supports non-arithmetic operations.
1085 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1086 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1087 }
1088
1089 if (Subtarget.isISA3_1())
1090 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1091 else
1092 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1093
1094 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1095 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1097 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1098
1100
1109
1110 // Custom handling for partial vectors of integers converted to
1111 // floating point. We already have optimal handling for v2i32 through
1112 // the DAG combine, so those aren't necessary.
1129
1130 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1131 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1132 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1133 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1136
1139
1140 // Handle constrained floating-point operations of vector.
1141 // The predictor is `hasVSX` because altivec instruction has
1142 // no exception but VSX vector instruction has.
1156
1170
1171 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1172 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1173
1174 for (MVT FPT : MVT::fp_valuetypes())
1175 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1176
1177 // Expand the SELECT to SELECT_CC
1179
1180 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1181 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1182
1183 // No implementation for these ops for PowerPC.
1185 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1186 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1187 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1189 setOperationAction(ISD::FREM, MVT::f128, Expand);
1190 }
1191
1192 if (Subtarget.hasP8Altivec()) {
1193 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1194 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1195 }
1196
1197 if (Subtarget.hasP9Vector()) {
1200
1201 // Test data class instructions store results in CR bits.
1202 if (Subtarget.useCRBits()) {
1206 }
1207
1208 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1209 // SRL, but not for SRA because of the instructions available:
1210 // VS{RL} and VS{RL}O.
1211 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1212 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1213 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1214
1215 setOperationAction(ISD::FADD, MVT::f128, Legal);
1216 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1217 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1218 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1220
1221 setOperationAction(ISD::FMA, MVT::f128, Legal);
1228
1230 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1232 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1235
1239
1240 // Handle constrained floating-point operations of fp128
1257 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1258 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1259 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1260 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1261 } else if (Subtarget.hasVSX()) {
1264
1265 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1266 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1267
1268 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1269 // fp_to_uint and int_to_fp.
1272
1273 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1274 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1275 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1276 setOperationAction(ISD::FABS, MVT::f128, Expand);
1278 setOperationAction(ISD::FMA, MVT::f128, Expand);
1280
1281 // Expand the fp_extend if the target type is fp128.
1284
1285 // Expand the fp_round if the source type is fp128.
1286 for (MVT VT : {MVT::f32, MVT::f64}) {
1289 }
1290
1295
1296 // Lower following f128 select_cc pattern:
1297 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1299
1300 // We need to handle f128 SELECT_CC with integer result type.
1302 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1303 }
1304
1305 if (Subtarget.hasP9Altivec()) {
1306 if (Subtarget.isISA3_1()) {
1311 } else {
1314 }
1322
1323 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1324 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1325 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1326 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1327 }
1328
1329 if (Subtarget.hasP10Vector()) {
1331 }
1332 }
1333
1334 if (Subtarget.pairedVectorMemops()) {
1335 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1336 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1337 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1338 }
1339 if (Subtarget.hasMMA()) {
1340 if (Subtarget.isISAFuture())
1341 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1342 else
1343 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1344 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1345 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1347 }
1348
1349 if (Subtarget.has64BitSupport())
1351
1352 if (Subtarget.isISA3_1())
1353 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1354
1355 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1356
1357 if (!isPPC64) {
1360 }
1361
1366 }
1367
1369
1370 if (Subtarget.hasAltivec()) {
1371 // Altivec instructions set fields to all zeros or all ones.
1373 }
1374
1375 setLibcallName(RTLIB::MULO_I128, nullptr);
1376 if (!isPPC64) {
1377 // These libcalls are not available in 32-bit.
1378 setLibcallName(RTLIB::SHL_I128, nullptr);
1379 setLibcallName(RTLIB::SRL_I128, nullptr);
1380 setLibcallName(RTLIB::SRA_I128, nullptr);
1381 setLibcallName(RTLIB::MUL_I128, nullptr);
1382 setLibcallName(RTLIB::MULO_I64, nullptr);
1383 }
1384
1387 else if (isPPC64)
1389 else
1391
1392 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1393
1394 // We have target-specific dag combine patterns for the following nodes:
1397 if (Subtarget.hasFPCVT())
1400 if (Subtarget.useCRBits())
1404
1406
1408
1409 if (Subtarget.useCRBits()) {
1411 }
1412
1413 setLibcallName(RTLIB::LOG_F128, "logf128");
1414 setLibcallName(RTLIB::LOG2_F128, "log2f128");
1415 setLibcallName(RTLIB::LOG10_F128, "log10f128");
1416 setLibcallName(RTLIB::EXP_F128, "expf128");
1417 setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1418 setLibcallName(RTLIB::SIN_F128, "sinf128");
1419 setLibcallName(RTLIB::COS_F128, "cosf128");
1420 setLibcallName(RTLIB::SINCOS_F128, "sincosf128");
1421 setLibcallName(RTLIB::POW_F128, "powf128");
1422 setLibcallName(RTLIB::FMIN_F128, "fminf128");
1423 setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1424 setLibcallName(RTLIB::REM_F128, "fmodf128");
1425 setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1426 setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1427 setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1428 setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1429 setLibcallName(RTLIB::ROUND_F128, "roundf128");
1430 setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1431 setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1432 setLibcallName(RTLIB::RINT_F128, "rintf128");
1433 setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1434 setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1435 setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1436 setLibcallName(RTLIB::FMA_F128, "fmaf128");
1437 setLibcallName(RTLIB::FREXP_F128, "frexpf128");
1438
1439 if (Subtarget.isAIXABI()) {
1440 setLibcallName(RTLIB::MEMCPY, isPPC64 ? "___memmove64" : "___memmove");
1441 setLibcallName(RTLIB::MEMMOVE, isPPC64 ? "___memmove64" : "___memmove");
1442 setLibcallName(RTLIB::MEMSET, isPPC64 ? "___memset64" : "___memset");
1443 setLibcallName(RTLIB::BZERO, isPPC64 ? "___bzero64" : "___bzero");
1444 }
1445
1446 // With 32 condition bits, we don't need to sink (and duplicate) compares
1447 // aggressively in CodeGenPrep.
1448 if (Subtarget.useCRBits()) {
1451 }
1452
1453 // TODO: The default entry number is set to 64. This stops most jump table
1454 // generation on PPC. But it is good for current PPC HWs because the indirect
1455 // branch instruction mtctr to the jump table may lead to bad branch predict.
1456 // Re-evaluate this value on future HWs that can do better with mtctr.
1458
1460
1461 switch (Subtarget.getCPUDirective()) {
1462 default: break;
1463 case PPC::DIR_970:
1464 case PPC::DIR_A2:
1465 case PPC::DIR_E500:
1466 case PPC::DIR_E500mc:
1467 case PPC::DIR_E5500:
1468 case PPC::DIR_PWR4:
1469 case PPC::DIR_PWR5:
1470 case PPC::DIR_PWR5X:
1471 case PPC::DIR_PWR6:
1472 case PPC::DIR_PWR6X:
1473 case PPC::DIR_PWR7:
1474 case PPC::DIR_PWR8:
1475 case PPC::DIR_PWR9:
1476 case PPC::DIR_PWR10:
1480 break;
1481 }
1482
1483 if (Subtarget.enableMachineScheduler())
1485 else
1487
1489
1490 // The Freescale cores do better with aggressive inlining of memcpy and
1491 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1492 if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1493 Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1494 MaxStoresPerMemset = 32;
1496 MaxStoresPerMemcpy = 32;
1500 } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1501 // The A2 also benefits from (very) aggressive inlining of memcpy and
1502 // friends. The overhead of a the function call, even when warm, can be
1503 // over one hundred cycles.
1504 MaxStoresPerMemset = 128;
1505 MaxStoresPerMemcpy = 128;
1506 MaxStoresPerMemmove = 128;
1507 MaxLoadsPerMemcmp = 128;
1508 } else {
1511 }
1512
1513 IsStrictFPEnabled = true;
1514
1515 // Let the subtarget (CPU) decide if a predictable select is more expensive
1516 // than the corresponding branch. This information is used in CGP to decide
1517 // when to convert selects into branches.
1519
1521}
1522
1523// *********************************** NOTE ************************************
1524// For selecting load and store instructions, the addressing modes are defined
1525// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1526// patterns to match the load the store instructions.
1527//
1528// The TD definitions for the addressing modes correspond to their respective
1529// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1530// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1531// address mode flags of a particular node. Afterwards, the computed address
1532// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1533// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1534// accordingly, based on the preferred addressing mode.
1535//
1536// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1537// MemOpFlags contains all the possible flags that can be used to compute the
1538// optimal addressing mode for load and store instructions.
1539// AddrMode contains all the possible load and store addressing modes available
1540// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1541//
1542// When adding new load and store instructions, it is possible that new address
1543// flags may need to be added into MemOpFlags, and a new addressing mode will
1544// need to be added to AddrMode. An entry of the new addressing mode (consisting
1545// of the minimal and main distinguishing address flags for the new load/store
1546// instructions) will need to be added into initializeAddrModeMap() below.
1547// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1548// need to be updated to account for selecting the optimal addressing mode.
1549// *****************************************************************************
1550/// Initialize the map that relates the different addressing modes of the load
1551/// and store instructions to a set of flags. This ensures the load/store
1552/// instruction is correctly matched during instruction selection.
1553void PPCTargetLowering::initializeAddrModeMap() {
1554 AddrModesMap[PPC::AM_DForm] = {
1555 // LWZ, STW
1560 // LBZ, LHZ, STB, STH
1565 // LHA
1570 // LFS, LFD, STFS, STFD
1575 };
1576 AddrModesMap[PPC::AM_DSForm] = {
1577 // LWA
1581 // LD, STD
1585 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1589 };
1590 AddrModesMap[PPC::AM_DQForm] = {
1591 // LXV, STXV
1595 };
1596 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1598 // TODO: Add mapping for quadword load/store.
1599}
1600
1601/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1602/// the desired ByVal argument alignment.
1603static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1604 if (MaxAlign == MaxMaxAlign)
1605 return;
1606 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1607 if (MaxMaxAlign >= 32 &&
1608 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1609 MaxAlign = Align(32);
1610 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1611 MaxAlign < 16)
1612 MaxAlign = Align(16);
1613 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1614 Align EltAlign;
1615 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1616 if (EltAlign > MaxAlign)
1617 MaxAlign = EltAlign;
1618 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1619 for (auto *EltTy : STy->elements()) {
1620 Align EltAlign;
1621 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1622 if (EltAlign > MaxAlign)
1623 MaxAlign = EltAlign;
1624 if (MaxAlign == MaxMaxAlign)
1625 break;
1626 }
1627 }
1628}
1629
1630/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1631/// function arguments in the caller parameter area.
1633 const DataLayout &DL) const {
1634 // 16byte and wider vectors are passed on 16byte boundary.
1635 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1636 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1637 if (Subtarget.hasAltivec())
1638 getMaxByValAlign(Ty, Alignment, Align(16));
1639 return Alignment.value();
1640}
1641
1643 return Subtarget.useSoftFloat();
1644}
1645
1647 return Subtarget.hasSPE();
1648}
1649
1651 return VT.isScalarInteger();
1652}
1653
1655 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1656 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1657 return false;
1658
1659 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1660 if (VTy->getScalarType()->isIntegerTy()) {
1661 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1662 if (ElemSizeInBits == 32) {
1663 Index = Subtarget.isLittleEndian() ? 2 : 1;
1664 return true;
1665 }
1666 if (ElemSizeInBits == 64) {
1667 Index = Subtarget.isLittleEndian() ? 1 : 0;
1668 return true;
1669 }
1670 }
1671 }
1672 return false;
1673}
1674
1675const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1676 switch ((PPCISD::NodeType)Opcode) {
1677 case PPCISD::FIRST_NUMBER: break;
1678 case PPCISD::FSEL: return "PPCISD::FSEL";
1679 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1680 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1681 case PPCISD::FCFID: return "PPCISD::FCFID";
1682 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1683 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1684 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1685 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1686 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1687 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1688 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1689 case PPCISD::FRE: return "PPCISD::FRE";
1690 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1691 case PPCISD::FTSQRT:
1692 return "PPCISD::FTSQRT";
1693 case PPCISD::FSQRT:
1694 return "PPCISD::FSQRT";
1695 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1696 case PPCISD::VPERM: return "PPCISD::VPERM";
1697 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1699 return "PPCISD::XXSPLTI_SP_TO_DP";
1701 return "PPCISD::XXSPLTI32DX";
1702 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1703 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1704 case PPCISD::XXPERM:
1705 return "PPCISD::XXPERM";
1706 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1707 case PPCISD::CMPB: return "PPCISD::CMPB";
1708 case PPCISD::Hi: return "PPCISD::Hi";
1709 case PPCISD::Lo: return "PPCISD::Lo";
1710 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1711 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1712 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1713 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1714 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1715 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1716 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1717 case PPCISD::SRL: return "PPCISD::SRL";
1718 case PPCISD::SRA: return "PPCISD::SRA";
1719 case PPCISD::SHL: return "PPCISD::SHL";
1720 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1721 case PPCISD::CALL: return "PPCISD::CALL";
1722 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1723 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1724 case PPCISD::CALL_RM:
1725 return "PPCISD::CALL_RM";
1727 return "PPCISD::CALL_NOP_RM";
1729 return "PPCISD::CALL_NOTOC_RM";
1730 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1731 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1732 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1733 case PPCISD::BCTRL_RM:
1734 return "PPCISD::BCTRL_RM";
1736 return "PPCISD::BCTRL_LOAD_TOC_RM";
1737 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1738 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1739 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1740 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1741 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1742 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1743 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1744 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1745 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1746 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1748 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1750 return "PPCISD::ANDI_rec_1_EQ_BIT";
1752 return "PPCISD::ANDI_rec_1_GT_BIT";
1753 case PPCISD::VCMP: return "PPCISD::VCMP";
1754 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1755 case PPCISD::LBRX: return "PPCISD::LBRX";
1756 case PPCISD::STBRX: return "PPCISD::STBRX";
1757 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1758 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1759 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1760 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1761 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1762 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1763 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1764 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1765 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1767 return "PPCISD::ST_VSR_SCAL_INT";
1768 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1769 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1770 case PPCISD::BDZ: return "PPCISD::BDZ";
1771 case PPCISD::MFFS: return "PPCISD::MFFS";
1772 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1773 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1774 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1775 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1776 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1777 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1778 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1779 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1780 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1781 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1782 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1783 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1784 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1785 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1786 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1787 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1788 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1789 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1790 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1791 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1792 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1793 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1794 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1796 return "PPCISD::PADDI_DTPREL";
1797 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1798 case PPCISD::SC: return "PPCISD::SC";
1799 case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1800 case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1801 case PPCISD::RFEBB: return "PPCISD::RFEBB";
1802 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1803 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1804 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1805 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1806 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1807 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1808 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1809 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1810 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1812 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1814 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1815 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1816 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1817 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1818 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1819 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1820 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1821 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1822 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1824 return "PPCISD::STRICT_FADDRTZ";
1826 return "PPCISD::STRICT_FCTIDZ";
1828 return "PPCISD::STRICT_FCTIWZ";
1830 return "PPCISD::STRICT_FCTIDUZ";
1832 return "PPCISD::STRICT_FCTIWUZ";
1834 return "PPCISD::STRICT_FCFID";
1836 return "PPCISD::STRICT_FCFIDU";
1838 return "PPCISD::STRICT_FCFIDS";
1840 return "PPCISD::STRICT_FCFIDUS";
1841 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1842 case PPCISD::STORE_COND:
1843 return "PPCISD::STORE_COND";
1844 }
1845 return nullptr;
1846}
1847
1849 EVT VT) const {
1850 if (!VT.isVector())
1851 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1852
1854}
1855
1857 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1858 return true;
1859}
1860
1861//===----------------------------------------------------------------------===//
1862// Node matching predicates, for use by the tblgen matching code.
1863//===----------------------------------------------------------------------===//
1864
1865/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1867 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1868 return CFP->getValueAPF().isZero();
1869 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1870 // Maybe this has already been legalized into the constant pool?
1871 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1872 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1873 return CFP->getValueAPF().isZero();
1874 }
1875 return false;
1876}
1877
1878/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1879/// true if Op is undef or if it matches the specified value.
1880static bool isConstantOrUndef(int Op, int Val) {
1881 return Op < 0 || Op == Val;
1882}
1883
1884/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1885/// VPKUHUM instruction.
1886/// The ShuffleKind distinguishes between big-endian operations with
1887/// two different inputs (0), either-endian operations with two identical
1888/// inputs (1), and little-endian operations with two different inputs (2).
1889/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1891 SelectionDAG &DAG) {
1892 bool IsLE = DAG.getDataLayout().isLittleEndian();
1893 if (ShuffleKind == 0) {
1894 if (IsLE)
1895 return false;
1896 for (unsigned i = 0; i != 16; ++i)
1897 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1898 return false;
1899 } else if (ShuffleKind == 2) {
1900 if (!IsLE)
1901 return false;
1902 for (unsigned i = 0; i != 16; ++i)
1903 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1904 return false;
1905 } else if (ShuffleKind == 1) {
1906 unsigned j = IsLE ? 0 : 1;
1907 for (unsigned i = 0; i != 8; ++i)
1908 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1909 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1910 return false;
1911 }
1912 return true;
1913}
1914
1915/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1916/// VPKUWUM instruction.
1917/// The ShuffleKind distinguishes between big-endian operations with
1918/// two different inputs (0), either-endian operations with two identical
1919/// inputs (1), and little-endian operations with two different inputs (2).
1920/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1922 SelectionDAG &DAG) {
1923 bool IsLE = DAG.getDataLayout().isLittleEndian();
1924 if (ShuffleKind == 0) {
1925 if (IsLE)
1926 return false;
1927 for (unsigned i = 0; i != 16; i += 2)
1928 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1929 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1930 return false;
1931 } else if (ShuffleKind == 2) {
1932 if (!IsLE)
1933 return false;
1934 for (unsigned i = 0; i != 16; i += 2)
1935 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1936 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1937 return false;
1938 } else if (ShuffleKind == 1) {
1939 unsigned j = IsLE ? 0 : 2;
1940 for (unsigned i = 0; i != 8; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1943 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1944 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1945 return false;
1946 }
1947 return true;
1948}
1949
1950/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1951/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1952/// current subtarget.
1953///
1954/// The ShuffleKind distinguishes between big-endian operations with
1955/// two different inputs (0), either-endian operations with two identical
1956/// inputs (1), and little-endian operations with two different inputs (2).
1957/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1959 SelectionDAG &DAG) {
1960 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1961 if (!Subtarget.hasP8Vector())
1962 return false;
1963
1964 bool IsLE = DAG.getDataLayout().isLittleEndian();
1965 if (ShuffleKind == 0) {
1966 if (IsLE)
1967 return false;
1968 for (unsigned i = 0; i != 16; i += 4)
1969 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1970 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1971 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1972 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1973 return false;
1974 } else if (ShuffleKind == 2) {
1975 if (!IsLE)
1976 return false;
1977 for (unsigned i = 0; i != 16; i += 4)
1978 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1979 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1980 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1981 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1982 return false;
1983 } else if (ShuffleKind == 1) {
1984 unsigned j = IsLE ? 0 : 4;
1985 for (unsigned i = 0; i != 8; i += 4)
1986 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1987 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1988 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1989 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1990 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1991 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1992 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1993 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1994 return false;
1995 }
1996 return true;
1997}
1998
1999/// isVMerge - Common function, used to match vmrg* shuffles.
2000///
2001static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2002 unsigned LHSStart, unsigned RHSStart) {
2003 if (N->getValueType(0) != MVT::v16i8)
2004 return false;
2005 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2006 "Unsupported merge size!");
2007
2008 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2009 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2010 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2011 LHSStart+j+i*UnitSize) ||
2012 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2013 RHSStart+j+i*UnitSize))
2014 return false;
2015 }
2016 return true;
2017}
2018
2019/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2020/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2021/// The ShuffleKind distinguishes between big-endian merges with two
2022/// different inputs (0), either-endian merges with two identical inputs (1),
2023/// and little-endian merges with two different inputs (2). For the latter,
2024/// the input operands are swapped (see PPCInstrAltivec.td).
2026 unsigned ShuffleKind, SelectionDAG &DAG) {
2027 if (DAG.getDataLayout().isLittleEndian()) {
2028 if (ShuffleKind == 1) // unary
2029 return isVMerge(N, UnitSize, 0, 0);
2030 else if (ShuffleKind == 2) // swapped
2031 return isVMerge(N, UnitSize, 0, 16);
2032 else
2033 return false;
2034 } else {
2035 if (ShuffleKind == 1) // unary
2036 return isVMerge(N, UnitSize, 8, 8);
2037 else if (ShuffleKind == 0) // normal
2038 return isVMerge(N, UnitSize, 8, 24);
2039 else
2040 return false;
2041 }
2042}
2043
2044/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2045/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2046/// The ShuffleKind distinguishes between big-endian merges with two
2047/// different inputs (0), either-endian merges with two identical inputs (1),
2048/// and little-endian merges with two different inputs (2). For the latter,
2049/// the input operands are swapped (see PPCInstrAltivec.td).
2051 unsigned ShuffleKind, SelectionDAG &DAG) {
2052 if (DAG.getDataLayout().isLittleEndian()) {
2053 if (ShuffleKind == 1) // unary
2054 return isVMerge(N, UnitSize, 8, 8);
2055 else if (ShuffleKind == 2) // swapped
2056 return isVMerge(N, UnitSize, 8, 24);
2057 else
2058 return false;
2059 } else {
2060 if (ShuffleKind == 1) // unary
2061 return isVMerge(N, UnitSize, 0, 0);
2062 else if (ShuffleKind == 0) // normal
2063 return isVMerge(N, UnitSize, 0, 16);
2064 else
2065 return false;
2066 }
2067}
2068
2069/**
2070 * Common function used to match vmrgew and vmrgow shuffles
2071 *
2072 * The indexOffset determines whether to look for even or odd words in
2073 * the shuffle mask. This is based on the of the endianness of the target
2074 * machine.
2075 * - Little Endian:
2076 * - Use offset of 0 to check for odd elements
2077 * - Use offset of 4 to check for even elements
2078 * - Big Endian:
2079 * - Use offset of 0 to check for even elements
2080 * - Use offset of 4 to check for odd elements
2081 * A detailed description of the vector element ordering for little endian and
2082 * big endian can be found at
2083 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2084 * Targeting your applications - what little endian and big endian IBM XL C/C++
2085 * compiler differences mean to you
2086 *
2087 * The mask to the shuffle vector instruction specifies the indices of the
2088 * elements from the two input vectors to place in the result. The elements are
2089 * numbered in array-access order, starting with the first vector. These vectors
2090 * are always of type v16i8, thus each vector will contain 16 elements of size
2091 * 8. More info on the shuffle vector can be found in the
2092 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2093 * Language Reference.
2094 *
2095 * The RHSStartValue indicates whether the same input vectors are used (unary)
2096 * or two different input vectors are used, based on the following:
2097 * - If the instruction uses the same vector for both inputs, the range of the
2098 * indices will be 0 to 15. In this case, the RHSStart value passed should
2099 * be 0.
2100 * - If the instruction has two different vectors then the range of the
2101 * indices will be 0 to 31. In this case, the RHSStart value passed should
2102 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2103 * to 31 specify elements in the second vector).
2104 *
2105 * \param[in] N The shuffle vector SD Node to analyze
2106 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2107 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2108 * vector to the shuffle_vector instruction
2109 * \return true iff this shuffle vector represents an even or odd word merge
2110 */
2111static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2112 unsigned RHSStartValue) {
2113 if (N->getValueType(0) != MVT::v16i8)
2114 return false;
2115
2116 for (unsigned i = 0; i < 2; ++i)
2117 for (unsigned j = 0; j < 4; ++j)
2118 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2119 i*RHSStartValue+j+IndexOffset) ||
2120 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2121 i*RHSStartValue+j+IndexOffset+8))
2122 return false;
2123 return true;
2124}
2125
2126/**
2127 * Determine if the specified shuffle mask is suitable for the vmrgew or
2128 * vmrgow instructions.
2129 *
2130 * \param[in] N The shuffle vector SD Node to analyze
2131 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2132 * \param[in] ShuffleKind Identify the type of merge:
2133 * - 0 = big-endian merge with two different inputs;
2134 * - 1 = either-endian merge with two identical inputs;
2135 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2136 * little-endian merges).
2137 * \param[in] DAG The current SelectionDAG
2138 * \return true iff this shuffle mask
2139 */
2141 unsigned ShuffleKind, SelectionDAG &DAG) {
2142 if (DAG.getDataLayout().isLittleEndian()) {
2143 unsigned indexOffset = CheckEven ? 4 : 0;
2144 if (ShuffleKind == 1) // Unary
2145 return isVMerge(N, indexOffset, 0);
2146 else if (ShuffleKind == 2) // swapped
2147 return isVMerge(N, indexOffset, 16);
2148 else
2149 return false;
2150 }
2151 else {
2152 unsigned indexOffset = CheckEven ? 0 : 4;
2153 if (ShuffleKind == 1) // Unary
2154 return isVMerge(N, indexOffset, 0);
2155 else if (ShuffleKind == 0) // Normal
2156 return isVMerge(N, indexOffset, 16);
2157 else
2158 return false;
2159 }
2160 return false;
2161}
2162
2163/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2164/// amount, otherwise return -1.
2165/// The ShuffleKind distinguishes between big-endian operations with two
2166/// different inputs (0), either-endian operations with two identical inputs
2167/// (1), and little-endian operations with two different inputs (2). For the
2168/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2169int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2170 SelectionDAG &DAG) {
2171 if (N->getValueType(0) != MVT::v16i8)
2172 return -1;
2173
2174 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2175
2176 // Find the first non-undef value in the shuffle mask.
2177 unsigned i;
2178 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2179 /*search*/;
2180
2181 if (i == 16) return -1; // all undef.
2182
2183 // Otherwise, check to see if the rest of the elements are consecutively
2184 // numbered from this value.
2185 unsigned ShiftAmt = SVOp->getMaskElt(i);
2186 if (ShiftAmt < i) return -1;
2187
2188 ShiftAmt -= i;
2189 bool isLE = DAG.getDataLayout().isLittleEndian();
2190
2191 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2192 // Check the rest of the elements to see if they are consecutive.
2193 for (++i; i != 16; ++i)
2194 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2195 return -1;
2196 } else if (ShuffleKind == 1) {
2197 // Check the rest of the elements to see if they are consecutive.
2198 for (++i; i != 16; ++i)
2199 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2200 return -1;
2201 } else
2202 return -1;
2203
2204 if (isLE)
2205 ShiftAmt = 16 - ShiftAmt;
2206
2207 return ShiftAmt;
2208}
2209
2210/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2211/// specifies a splat of a single element that is suitable for input to
2212/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2214 EVT VT = N->getValueType(0);
2215 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2216 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2217
2218 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2219 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2220
2221 // The consecutive indices need to specify an element, not part of two
2222 // different elements. So abandon ship early if this isn't the case.
2223 if (N->getMaskElt(0) % EltSize != 0)
2224 return false;
2225
2226 // This is a splat operation if each element of the permute is the same, and
2227 // if the value doesn't reference the second vector.
2228 unsigned ElementBase = N->getMaskElt(0);
2229
2230 // FIXME: Handle UNDEF elements too!
2231 if (ElementBase >= 16)
2232 return false;
2233
2234 // Check that the indices are consecutive, in the case of a multi-byte element
2235 // splatted with a v16i8 mask.
2236 for (unsigned i = 1; i != EltSize; ++i)
2237 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2238 return false;
2239
2240 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2241 if (N->getMaskElt(i) < 0) continue;
2242 for (unsigned j = 0; j != EltSize; ++j)
2243 if (N->getMaskElt(i+j) != N->getMaskElt(j))
2244 return false;
2245 }
2246 return true;
2247}
2248
2249/// Check that the mask is shuffling N byte elements. Within each N byte
2250/// element of the mask, the indices could be either in increasing or
2251/// decreasing order as long as they are consecutive.
2252/// \param[in] N the shuffle vector SD Node to analyze
2253/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2254/// Word/DoubleWord/QuadWord).
2255/// \param[in] StepLen the delta indices number among the N byte element, if
2256/// the mask is in increasing/decreasing order then it is 1/-1.
2257/// \return true iff the mask is shuffling N byte elements.
2258static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2259 int StepLen) {
2260 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2261 "Unexpected element width.");
2262 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2263
2264 unsigned NumOfElem = 16 / Width;
2265 unsigned MaskVal[16]; // Width is never greater than 16
2266 for (unsigned i = 0; i < NumOfElem; ++i) {
2267 MaskVal[0] = N->getMaskElt(i * Width);
2268 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2269 return false;
2270 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2271 return false;
2272 }
2273
2274 for (unsigned int j = 1; j < Width; ++j) {
2275 MaskVal[j] = N->getMaskElt(i * Width + j);
2276 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2277 return false;
2278 }
2279 }
2280 }
2281
2282 return true;
2283}
2284
2285bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2286 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2287 if (!isNByteElemShuffleMask(N, 4, 1))
2288 return false;
2289
2290 // Now we look at mask elements 0,4,8,12
2291 unsigned M0 = N->getMaskElt(0) / 4;
2292 unsigned M1 = N->getMaskElt(4) / 4;
2293 unsigned M2 = N->getMaskElt(8) / 4;
2294 unsigned M3 = N->getMaskElt(12) / 4;
2295 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2296 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2297
2298 // Below, let H and L be arbitrary elements of the shuffle mask
2299 // where H is in the range [4,7] and L is in the range [0,3].
2300 // H, 1, 2, 3 or L, 5, 6, 7
2301 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2302 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2303 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2304 InsertAtByte = IsLE ? 12 : 0;
2305 Swap = M0 < 4;
2306 return true;
2307 }
2308 // 0, H, 2, 3 or 4, L, 6, 7
2309 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2310 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2311 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2312 InsertAtByte = IsLE ? 8 : 4;
2313 Swap = M1 < 4;
2314 return true;
2315 }
2316 // 0, 1, H, 3 or 4, 5, L, 7
2317 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2318 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2319 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2320 InsertAtByte = IsLE ? 4 : 8;
2321 Swap = M2 < 4;
2322 return true;
2323 }
2324 // 0, 1, 2, H or 4, 5, 6, L
2325 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2326 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2327 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2328 InsertAtByte = IsLE ? 0 : 12;
2329 Swap = M3 < 4;
2330 return true;
2331 }
2332
2333 // If both vector operands for the shuffle are the same vector, the mask will
2334 // contain only elements from the first one and the second one will be undef.
2335 if (N->getOperand(1).isUndef()) {
2336 ShiftElts = 0;
2337 Swap = true;
2338 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2339 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2340 InsertAtByte = IsLE ? 12 : 0;
2341 return true;
2342 }
2343 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2344 InsertAtByte = IsLE ? 8 : 4;
2345 return true;
2346 }
2347 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2348 InsertAtByte = IsLE ? 4 : 8;
2349 return true;
2350 }
2351 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2352 InsertAtByte = IsLE ? 0 : 12;
2353 return true;
2354 }
2355 }
2356
2357 return false;
2358}
2359
2361 bool &Swap, bool IsLE) {
2362 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2363 // Ensure each byte index of the word is consecutive.
2364 if (!isNByteElemShuffleMask(N, 4, 1))
2365 return false;
2366
2367 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2368 unsigned M0 = N->getMaskElt(0) / 4;
2369 unsigned M1 = N->getMaskElt(4) / 4;
2370 unsigned M2 = N->getMaskElt(8) / 4;
2371 unsigned M3 = N->getMaskElt(12) / 4;
2372
2373 // If both vector operands for the shuffle are the same vector, the mask will
2374 // contain only elements from the first one and the second one will be undef.
2375 if (N->getOperand(1).isUndef()) {
2376 assert(M0 < 4 && "Indexing into an undef vector?");
2377 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2378 return false;
2379
2380 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2381 Swap = false;
2382 return true;
2383 }
2384
2385 // Ensure each word index of the ShuffleVector Mask is consecutive.
2386 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2387 return false;
2388
2389 if (IsLE) {
2390 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2391 // Input vectors don't need to be swapped if the leading element
2392 // of the result is one of the 3 left elements of the second vector
2393 // (or if there is no shift to be done at all).
2394 Swap = false;
2395 ShiftElts = (8 - M0) % 8;
2396 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2397 // Input vectors need to be swapped if the leading element
2398 // of the result is one of the 3 left elements of the first vector
2399 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2400 Swap = true;
2401 ShiftElts = (4 - M0) % 4;
2402 }
2403
2404 return true;
2405 } else { // BE
2406 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2407 // Input vectors don't need to be swapped if the leading element
2408 // of the result is one of the 4 elements of the first vector.
2409 Swap = false;
2410 ShiftElts = M0;
2411 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2412 // Input vectors need to be swapped if the leading element
2413 // of the result is one of the 4 elements of the right vector.
2414 Swap = true;
2415 ShiftElts = M0 - 4;
2416 }
2417
2418 return true;
2419 }
2420}
2421
2423 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2424
2425 if (!isNByteElemShuffleMask(N, Width, -1))
2426 return false;
2427
2428 for (int i = 0; i < 16; i += Width)
2429 if (N->getMaskElt(i) != i + Width - 1)
2430 return false;
2431
2432 return true;
2433}
2434
2436 return isXXBRShuffleMaskHelper(N, 2);
2437}
2438
2440 return isXXBRShuffleMaskHelper(N, 4);
2441}
2442
2444 return isXXBRShuffleMaskHelper(N, 8);
2445}
2446
2448 return isXXBRShuffleMaskHelper(N, 16);
2449}
2450
2451/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2452/// if the inputs to the instruction should be swapped and set \p DM to the
2453/// value for the immediate.
2454/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2455/// AND element 0 of the result comes from the first input (LE) or second input
2456/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2457/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2458/// mask.
2460 bool &Swap, bool IsLE) {
2461 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2462
2463 // Ensure each byte index of the double word is consecutive.
2464 if (!isNByteElemShuffleMask(N, 8, 1))
2465 return false;
2466
2467 unsigned M0 = N->getMaskElt(0) / 8;
2468 unsigned M1 = N->getMaskElt(8) / 8;
2469 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2470
2471 // If both vector operands for the shuffle are the same vector, the mask will
2472 // contain only elements from the first one and the second one will be undef.
2473 if (N->getOperand(1).isUndef()) {
2474 if ((M0 | M1) < 2) {
2475 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2476 Swap = false;
2477 return true;
2478 } else
2479 return false;
2480 }
2481
2482 if (IsLE) {
2483 if (M0 > 1 && M1 < 2) {
2484 Swap = false;
2485 } else if (M0 < 2 && M1 > 1) {
2486 M0 = (M0 + 2) % 4;
2487 M1 = (M1 + 2) % 4;
2488 Swap = true;
2489 } else
2490 return false;
2491
2492 // Note: if control flow comes here that means Swap is already set above
2493 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2494 return true;
2495 } else { // BE
2496 if (M0 < 2 && M1 > 1) {
2497 Swap = false;
2498 } else if (M0 > 1 && M1 < 2) {
2499 M0 = (M0 + 2) % 4;
2500 M1 = (M1 + 2) % 4;
2501 Swap = true;
2502 } else
2503 return false;
2504
2505 // Note: if control flow comes here that means Swap is already set above
2506 DM = (M0 << 1) + (M1 & 1);
2507 return true;
2508 }
2509}
2510
2511
2512/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2513/// appropriate for PPC mnemonics (which have a big endian bias - namely
2514/// elements are counted from the left of the vector register).
2515unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2516 SelectionDAG &DAG) {
2517 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2518 assert(isSplatShuffleMask(SVOp, EltSize));
2519 EVT VT = SVOp->getValueType(0);
2520
2521 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2522 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2523 : SVOp->getMaskElt(0);
2524
2525 if (DAG.getDataLayout().isLittleEndian())
2526 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2527 else
2528 return SVOp->getMaskElt(0) / EltSize;
2529}
2530
2531/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2532/// by using a vspltis[bhw] instruction of the specified element size, return
2533/// the constant being splatted. The ByteSize field indicates the number of
2534/// bytes of each element [124] -> [bhw].
2536 SDValue OpVal;
2537
2538 // If ByteSize of the splat is bigger than the element size of the
2539 // build_vector, then we have a case where we are checking for a splat where
2540 // multiple elements of the buildvector are folded together into a single
2541 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2542 unsigned EltSize = 16/N->getNumOperands();
2543 if (EltSize < ByteSize) {
2544 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2545 SDValue UniquedVals[4];
2546 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2547
2548 // See if all of the elements in the buildvector agree across.
2549 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2550 if (N->getOperand(i).isUndef()) continue;
2551 // If the element isn't a constant, bail fully out.
2552 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2553
2554 if (!UniquedVals[i&(Multiple-1)].getNode())
2555 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2556 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2557 return SDValue(); // no match.
2558 }
2559
2560 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2561 // either constant or undef values that are identical for each chunk. See
2562 // if these chunks can form into a larger vspltis*.
2563
2564 // Check to see if all of the leading entries are either 0 or -1. If
2565 // neither, then this won't fit into the immediate field.
2566 bool LeadingZero = true;
2567 bool LeadingOnes = true;
2568 for (unsigned i = 0; i != Multiple-1; ++i) {
2569 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2570
2571 LeadingZero &= isNullConstant(UniquedVals[i]);
2572 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2573 }
2574 // Finally, check the least significant entry.
2575 if (LeadingZero) {
2576 if (!UniquedVals[Multiple-1].getNode())
2577 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2578 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2579 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2580 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2581 }
2582 if (LeadingOnes) {
2583 if (!UniquedVals[Multiple-1].getNode())
2584 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2585 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2586 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2587 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2588 }
2589
2590 return SDValue();
2591 }
2592
2593 // Check to see if this buildvec has a single non-undef value in its elements.
2594 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2595 if (N->getOperand(i).isUndef()) continue;
2596 if (!OpVal.getNode())
2597 OpVal = N->getOperand(i);
2598 else if (OpVal != N->getOperand(i))
2599 return SDValue();
2600 }
2601
2602 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2603
2604 unsigned ValSizeInBytes = EltSize;
2605 uint64_t Value = 0;
2606 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2607 Value = CN->getZExtValue();
2608 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2609 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2610 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2611 }
2612
2613 // If the splat value is larger than the element value, then we can never do
2614 // this splat. The only case that we could fit the replicated bits into our
2615 // immediate field for would be zero, and we prefer to use vxor for it.
2616 if (ValSizeInBytes < ByteSize) return SDValue();
2617
2618 // If the element value is larger than the splat value, check if it consists
2619 // of a repeated bit pattern of size ByteSize.
2620 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2621 return SDValue();
2622
2623 // Properly sign extend the value.
2624 int MaskVal = SignExtend32(Value, ByteSize * 8);
2625
2626 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2627 if (MaskVal == 0) return SDValue();
2628
2629 // Finally, if this value fits in a 5 bit sext field, return it
2630 if (SignExtend32<5>(MaskVal) == MaskVal)
2631 return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2632 return SDValue();
2633}
2634
2635//===----------------------------------------------------------------------===//
2636// Addressing Mode Selection
2637//===----------------------------------------------------------------------===//
2638
2639/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2640/// or 64-bit immediate, and if the value can be accurately represented as a
2641/// sign extension from a 16-bit value. If so, this returns true and the
2642/// immediate.
2643bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2644 if (!isa<ConstantSDNode>(N))
2645 return false;
2646
2647 Imm = (int16_t)N->getAsZExtVal();
2648 if (N->getValueType(0) == MVT::i32)
2649 return Imm == (int32_t)N->getAsZExtVal();
2650 else
2651 return Imm == (int64_t)N->getAsZExtVal();
2652}
2654 return isIntS16Immediate(Op.getNode(), Imm);
2655}
2656
2657/// Used when computing address flags for selecting loads and stores.
2658/// If we have an OR, check if the LHS and RHS are provably disjoint.
2659/// An OR of two provably disjoint values is equivalent to an ADD.
2660/// Most PPC load/store instructions compute the effective address as a sum,
2661/// so doing this conversion is useful.
2662static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2663 if (N.getOpcode() != ISD::OR)
2664 return false;
2665 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2666 if (!LHSKnown.Zero.getBoolValue())
2667 return false;
2668 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2669 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2670}
2671
2672/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2673/// be represented as an indexed [r+r] operation.
2675 SDValue &Index,
2676 SelectionDAG &DAG) const {
2677 for (SDNode *U : N->uses()) {
2678 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2679 if (Memop->getMemoryVT() == MVT::f64) {
2680 Base = N.getOperand(0);
2681 Index = N.getOperand(1);
2682 return true;
2683 }
2684 }
2685 }
2686 return false;
2687}
2688
2689/// isIntS34Immediate - This method tests if value of node given can be
2690/// accurately represented as a sign extension from a 34-bit value. If so,
2691/// this returns true and the immediate.
2692bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2693 if (!isa<ConstantSDNode>(N))
2694 return false;
2695
2696 Imm = (int64_t)N->getAsZExtVal();
2697 return isInt<34>(Imm);
2698}
2700 return isIntS34Immediate(Op.getNode(), Imm);
2701}
2702
2703/// SelectAddressRegReg - Given the specified addressed, check to see if it
2704/// can be represented as an indexed [r+r] operation. Returns false if it
2705/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2706/// non-zero and N can be represented by a base register plus a signed 16-bit
2707/// displacement, make a more precise judgement by checking (displacement % \p
2708/// EncodingAlignment).
2711 MaybeAlign EncodingAlignment) const {
2712 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2713 // a [pc+imm].
2715 return false;
2716
2717 int16_t Imm = 0;
2718 if (N.getOpcode() == ISD::ADD) {
2719 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2720 // SPE load/store can only handle 8-bit offsets.
2721 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2722 return true;
2723 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2724 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2725 return false; // r+i
2726 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2727 return false; // r+i
2728
2729 Base = N.getOperand(0);
2730 Index = N.getOperand(1);
2731 return true;
2732 } else if (N.getOpcode() == ISD::OR) {
2733 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2734 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2735 return false; // r+i can fold it if we can.
2736
2737 // If this is an or of disjoint bitfields, we can codegen this as an add
2738 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2739 // disjoint.
2740 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2741
2742 if (LHSKnown.Zero.getBoolValue()) {
2743 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2744 // If all of the bits are known zero on the LHS or RHS, the add won't
2745 // carry.
2746 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2747 Base = N.getOperand(0);
2748 Index = N.getOperand(1);
2749 return true;
2750 }
2751 }
2752 }
2753
2754 return false;
2755}
2756
2757// If we happen to be doing an i64 load or store into a stack slot that has
2758// less than a 4-byte alignment, then the frame-index elimination may need to
2759// use an indexed load or store instruction (because the offset may not be a
2760// multiple of 4). The extra register needed to hold the offset comes from the
2761// register scavenger, and it is possible that the scavenger will need to use
2762// an emergency spill slot. As a result, we need to make sure that a spill slot
2763// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2764// stack slot.
2765static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2766 // FIXME: This does not handle the LWA case.
2767 if (VT != MVT::i64)
2768 return;
2769
2770 // NOTE: We'll exclude negative FIs here, which come from argument
2771 // lowering, because there are no known test cases triggering this problem
2772 // using packed structures (or similar). We can remove this exclusion if
2773 // we find such a test case. The reason why this is so test-case driven is
2774 // because this entire 'fixup' is only to prevent crashes (from the
2775 // register scavenger) on not-really-valid inputs. For example, if we have:
2776 // %a = alloca i1
2777 // %b = bitcast i1* %a to i64*
2778 // store i64* a, i64 b
2779 // then the store should really be marked as 'align 1', but is not. If it
2780 // were marked as 'align 1' then the indexed form would have been
2781 // instruction-selected initially, and the problem this 'fixup' is preventing
2782 // won't happen regardless.
2783 if (FrameIdx < 0)
2784 return;
2785
2787 MachineFrameInfo &MFI = MF.getFrameInfo();
2788
2789 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2790 return;
2791
2792 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2793 FuncInfo->setHasNonRISpills();
2794}
2795
2796/// Returns true if the address N can be represented by a base register plus
2797/// a signed 16-bit displacement [r+imm], and if it is not better
2798/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2799/// displacements that are multiples of that value.
2801 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2802 MaybeAlign EncodingAlignment) const {
2803 // FIXME dl should come from parent load or store, not from address
2804 SDLoc dl(N);
2805
2806 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2807 // a [pc+imm].
2809 return false;
2810
2811 // If this can be more profitably realized as r+r, fail.
2812 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2813 return false;
2814
2815 if (N.getOpcode() == ISD::ADD) {
2816 int16_t imm = 0;
2817 if (isIntS16Immediate(N.getOperand(1), imm) &&
2818 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2819 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2820 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2821 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2822 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2823 } else {
2824 Base = N.getOperand(0);
2825 }
2826 return true; // [r+i]
2827 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2828 // Match LOAD (ADD (X, Lo(G))).
2829 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2830 "Cannot handle constant offsets yet!");
2831 Disp = N.getOperand(1).getOperand(0); // The global address.
2836 Base = N.getOperand(0);
2837 return true; // [&g+r]
2838 }
2839 } else if (N.getOpcode() == ISD::OR) {
2840 int16_t imm = 0;
2841 if (isIntS16Immediate(N.getOperand(1), imm) &&
2842 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2843 // If this is an or of disjoint bitfields, we can codegen this as an add
2844 // (for better address arithmetic) if the LHS and RHS of the OR are
2845 // provably disjoint.
2846 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2847
2848 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2849 // If all of the bits are known zero on the LHS or RHS, the add won't
2850 // carry.
2851 if (FrameIndexSDNode *FI =
2852 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2853 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2854 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2855 } else {
2856 Base = N.getOperand(0);
2857 }
2858 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2859 return true;
2860 }
2861 }
2862 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2863 // Loading from a constant address.
2864
2865 // If this address fits entirely in a 16-bit sext immediate field, codegen
2866 // this as "d, 0"
2867 int16_t Imm;
2868 if (isIntS16Immediate(CN, Imm) &&
2869 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2870 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2871 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2872 CN->getValueType(0));
2873 return true;
2874 }
2875
2876 // Handle 32-bit sext immediates with LIS + addr mode.
2877 if ((CN->getValueType(0) == MVT::i32 ||
2878 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2879 (!EncodingAlignment ||
2880 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2881 int Addr = (int)CN->getZExtValue();
2882
2883 // Otherwise, break this down into an LIS + disp.
2884 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2885
2886 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2887 MVT::i32);
2888 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2889 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2890 return true;
2891 }
2892 }
2893
2894 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2895 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2896 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2897 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2898 } else
2899 Base = N;
2900 return true; // [r+0]
2901}
2902
2903/// Similar to the 16-bit case but for instructions that take a 34-bit
2904/// displacement field (prefixed loads/stores).
2906 SDValue &Base,
2907 SelectionDAG &DAG) const {
2908 // Only on 64-bit targets.
2909 if (N.getValueType() != MVT::i64)
2910 return false;
2911
2912 SDLoc dl(N);
2913 int64_t Imm = 0;
2914
2915 if (N.getOpcode() == ISD::ADD) {
2916 if (!isIntS34Immediate(N.getOperand(1), Imm))
2917 return false;
2918 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2919 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2920 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2921 else
2922 Base = N.getOperand(0);
2923 return true;
2924 }
2925
2926 if (N.getOpcode() == ISD::OR) {
2927 if (!isIntS34Immediate(N.getOperand(1), Imm))
2928 return false;
2929 // If this is an or of disjoint bitfields, we can codegen this as an add
2930 // (for better address arithmetic) if the LHS and RHS of the OR are
2931 // provably disjoint.
2932 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2933 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2934 return false;
2935 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2936 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2937 else
2938 Base = N.getOperand(0);
2939 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2940 return true;
2941 }
2942
2943 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2944 Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2945 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2946 return true;
2947 }
2948
2949 return false;
2950}
2951
2952/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2953/// represented as an indexed [r+r] operation.
2955 SDValue &Index,
2956 SelectionDAG &DAG) const {
2957 // Check to see if we can easily represent this as an [r+r] address. This
2958 // will fail if it thinks that the address is more profitably represented as
2959 // reg+imm, e.g. where imm = 0.
2960 if (SelectAddressRegReg(N, Base, Index, DAG))
2961 return true;
2962
2963 // If the address is the result of an add, we will utilize the fact that the
2964 // address calculation includes an implicit add. However, we can reduce
2965 // register pressure if we do not materialize a constant just for use as the
2966 // index register. We only get rid of the add if it is not an add of a
2967 // value and a 16-bit signed constant and both have a single use.
2968 int16_t imm = 0;
2969 if (N.getOpcode() == ISD::ADD &&
2970 (!isIntS16Immediate(N.getOperand(1), imm) ||
2971 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2972 Base = N.getOperand(0);
2973 Index = N.getOperand(1);
2974 return true;
2975 }
2976
2977 // Otherwise, do it the hard way, using R0 as the base register.
2978 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2979 N.getValueType());
2980 Index = N;
2981 return true;
2982}
2983
2984template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2985 Ty *PCRelCand = dyn_cast<Ty>(N);
2986 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2987}
2988
2989/// Returns true if this address is a PC Relative address.
2990/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2991/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2993 // This is a materialize PC Relative node. Always select this as PC Relative.
2994 Base = N;
2995 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2996 return true;
2997 if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2998 isValidPCRelNode<GlobalAddressSDNode>(N) ||
2999 isValidPCRelNode<JumpTableSDNode>(N) ||
3000 isValidPCRelNode<BlockAddressSDNode>(N))
3001 return true;
3002 return false;
3003}
3004
3005/// Returns true if we should use a direct load into vector instruction
3006/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3007static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3008
3009 // If there are any other uses other than scalar to vector, then we should
3010 // keep it as a scalar load -> direct move pattern to prevent multiple
3011 // loads.
3012 LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
3013 if (!LD)
3014 return false;
3015
3016 EVT MemVT = LD->getMemoryVT();
3017 if (!MemVT.isSimple())
3018 return false;
3019 switch(MemVT.getSimpleVT().SimpleTy) {
3020 case MVT::i64:
3021 break;
3022 case MVT::i32:
3023 if (!ST.hasP8Vector())
3024 return false;
3025 break;
3026 case MVT::i16:
3027 case MVT::i8:
3028 if (!ST.hasP9Vector())
3029 return false;
3030 break;
3031 default:
3032 return false;
3033 }
3034
3035 SDValue LoadedVal(N, 0);
3036 if (!LoadedVal.hasOneUse())
3037 return false;
3038
3039 for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
3040 UI != UE; ++UI)
3041 if (UI.getUse().get().getResNo() == 0 &&
3042 UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3043 UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
3044 return false;
3045
3046 return true;
3047}
3048
3049/// getPreIndexedAddressParts - returns true by value, base pointer and
3050/// offset pointer and addressing mode by reference if the node's address
3051/// can be legally represented as pre-indexed load / store address.
3053 SDValue &Offset,
3055 SelectionDAG &DAG) const {
3056 if (DisablePPCPreinc) return false;
3057
3058 bool isLoad = true;
3059 SDValue Ptr;
3060 EVT VT;
3061 Align Alignment;
3062 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3063 Ptr = LD->getBasePtr();
3064 VT = LD->getMemoryVT();
3065 Alignment = LD->getAlign();
3066 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3067 Ptr = ST->getBasePtr();
3068 VT = ST->getMemoryVT();
3069 Alignment = ST->getAlign();
3070 isLoad = false;
3071 } else
3072 return false;
3073
3074 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3075 // instructions because we can fold these into a more efficient instruction
3076 // instead, (such as LXSD).
3077 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3078 return false;
3079 }
3080
3081 // PowerPC doesn't have preinc load/store instructions for vectors
3082 if (VT.isVector())
3083 return false;
3084
3085 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3086 // Common code will reject creating a pre-inc form if the base pointer
3087 // is a frame index, or if N is a store and the base pointer is either
3088 // the same as or a predecessor of the value being stored. Check for
3089 // those situations here, and try with swapped Base/Offset instead.
3090 bool Swap = false;
3091
3092 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
3093 Swap = true;
3094 else if (!isLoad) {
3095 SDValue Val = cast<StoreSDNode>(N)->getValue();
3096 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3097 Swap = true;
3098 }
3099
3100 if (Swap)
3102
3103 AM = ISD::PRE_INC;
3104 return true;
3105 }
3106
3107 // LDU/STU can only handle immediates that are a multiple of 4.
3108 if (VT != MVT::i64) {
3109 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3110 return false;
3111 } else {
3112 // LDU/STU need an address with at least 4-byte alignment.
3113 if (Alignment < Align(4))
3114 return false;
3115
3116 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3117 return false;
3118 }
3119
3120 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3121 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3122 // sext i32 to i64 when addr mode is r+i.
3123 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3124 LD->getExtensionType() == ISD::SEXTLOAD &&
3125 isa<ConstantSDNode>(Offset))
3126 return false;
3127 }
3128
3129 AM = ISD::PRE_INC;
3130 return true;
3131}
3132
3133//===----------------------------------------------------------------------===//
3134// LowerOperation implementation
3135//===----------------------------------------------------------------------===//
3136
3137/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3138/// and LoOpFlags to the target MO flags.
3139static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3140 unsigned &HiOpFlags, unsigned &LoOpFlags,
3141 const GlobalValue *GV = nullptr) {
3142 HiOpFlags = PPCII::MO_HA;
3143 LoOpFlags = PPCII::MO_LO;
3144
3145 // Don't use the pic base if not in PIC relocation model.
3146 if (IsPIC) {
3147 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3148 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3149 }
3150}
3151
3152static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3153 SelectionDAG &DAG) {
3154 SDLoc DL(HiPart);
3155 EVT PtrVT = HiPart.getValueType();
3156 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3157
3158 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3159 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3160
3161 // With PIC, the first instruction is actually "GR+hi(&G)".
3162 if (isPIC)
3163 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3164 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3165
3166 // Generate non-pic code that has direct accesses to the constant pool.
3167 // The address of the global is just (hi(&g)+lo(&g)).
3168 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3169}
3170
3172 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3173 FuncInfo->setUsesTOCBasePtr();
3174}
3175
3178}
3179
3180SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3181 SDValue GA) const {
3182 const bool Is64Bit = Subtarget.isPPC64();
3183 EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3184 SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3185 : Subtarget.isAIXABI()
3186 ? DAG.getRegister(PPC::R2, VT)
3187 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3188 SDValue Ops[] = { GA, Reg };
3189 return DAG.getMemIntrinsicNode(
3190 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3193}
3194
3195SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3196 SelectionDAG &DAG) const {
3197 EVT PtrVT = Op.getValueType();
3198 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3199 const Constant *C = CP->getConstVal();
3200
3201 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3202 // The actual address of the GlobalValue is stored in the TOC.
3203 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3204 if (Subtarget.isUsingPCRelativeCalls()) {
3205 SDLoc DL(CP);
3206 EVT Ty = getPointerTy(DAG.getDataLayout());
3207 SDValue ConstPool = DAG.getTargetConstantPool(
3208 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3209 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3210 }
3211 setUsesTOCBasePtr(DAG);
3212 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3213 return getTOCEntry(DAG, SDLoc(CP), GA);
3214 }
3215
3216 unsigned MOHiFlag, MOLoFlag;
3217 bool IsPIC = isPositionIndependent();
3218 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3219
3220 if (IsPIC && Subtarget.isSVR4ABI()) {
3221 SDValue GA =
3222 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3223 return getTOCEntry(DAG, SDLoc(CP), GA);
3224 }
3225
3226 SDValue CPIHi =
3227 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3228 SDValue CPILo =
3229 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3230 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3231}
3232
3233// For 64-bit PowerPC, prefer the more compact relative encodings.
3234// This trades 32 bits per jump table entry for one or two instructions
3235// on the jump site.
3237 if (isJumpTableRelative())
3239
3241}
3242
3245 return false;
3246 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3247 return true;
3249}
3250
3252 SelectionDAG &DAG) const {
3253 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3255
3256 switch (getTargetMachine().getCodeModel()) {
3257 case CodeModel::Small:
3258 case CodeModel::Medium:
3260 default:
3261 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3263 }
3264}
3265
3266const MCExpr *
3268 unsigned JTI,
3269 MCContext &Ctx) const {
3270 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3272
3273 switch (getTargetMachine().getCodeModel()) {
3274 case CodeModel::Small:
3275 case CodeModel::Medium:
3277 default:
3278 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3279 }
3280}
3281
3282SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3283 EVT PtrVT = Op.getValueType();
3284 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3285
3286 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3287 if (Subtarget.isUsingPCRelativeCalls()) {
3288 SDLoc DL(JT);
3289 EVT Ty = getPointerTy(DAG.getDataLayout());
3290 SDValue GA =
3291 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3292 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3293 return MatAddr;
3294 }
3295
3296 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3297 // The actual address of the GlobalValue is stored in the TOC.
3298 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3299 setUsesTOCBasePtr(DAG);
3300 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3301 return getTOCEntry(DAG, SDLoc(JT), GA);
3302 }
3303
3304 unsigned MOHiFlag, MOLoFlag;
3305 bool IsPIC = isPositionIndependent();
3306 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3307
3308 if (IsPIC && Subtarget.isSVR4ABI()) {
3309 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3311 return getTOCEntry(DAG, SDLoc(GA), GA);
3312 }
3313
3314 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3315 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3316 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3317}
3318
3319SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3320 SelectionDAG &DAG) const {
3321 EVT PtrVT = Op.getValueType();
3322 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3323 const BlockAddress *BA = BASDN->getBlockAddress();
3324
3325 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3326 if (Subtarget.isUsingPCRelativeCalls()) {
3327 SDLoc DL(BASDN);
3328 EVT Ty = getPointerTy(DAG.getDataLayout());
3329 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3331 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3332 return MatAddr;
3333 }
3334
3335 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3336 // The actual BlockAddress is stored in the TOC.
3337 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3338 setUsesTOCBasePtr(DAG);
3339 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3340 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3341 }
3342
3343 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3344 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3345 return getTOCEntry(
3346 DAG, SDLoc(BASDN),
3347 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3348
3349 unsigned MOHiFlag, MOLoFlag;
3350 bool IsPIC = isPositionIndependent();
3351 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3352 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3353 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3354 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3355}
3356
3357SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3358 SelectionDAG &DAG) const {
3359 if (Subtarget.isAIXABI())
3360 return LowerGlobalTLSAddressAIX(Op, DAG);
3361
3362 return LowerGlobalTLSAddressLinux(Op, DAG);
3363}
3364
3365SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3366 SelectionDAG &DAG) const {
3367 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3368
3369 if (DAG.getTarget().useEmulatedTLS())
3370 report_fatal_error("Emulated TLS is not yet supported on AIX");
3371
3372 SDLoc dl(GA);
3373 const GlobalValue *GV = GA->getGlobal();
3374 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3375 bool Is64Bit = Subtarget.isPPC64();
3377 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3378
3379 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3380 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3381 bool HasAIXSmallTLSGlobalAttr = false;
3382 SDValue VariableOffsetTGA =
3383 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3384 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3385 SDValue TLSReg;
3386
3387 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3388 if (GVar->hasAttribute("aix-small-tls"))
3389 HasAIXSmallTLSGlobalAttr = true;
3390
3391 if (Is64Bit) {
3392 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3393 // involves a load of the variable offset (from the TOC), followed by an
3394 // add of the loaded variable offset to R13 (the thread pointer).
3395 // This code sequence looks like:
3396 // ld reg1,var[TC](2)
3397 // add reg2, reg1, r13 // r13 contains the thread pointer
3398 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3399
3400 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3401 // global variable attribute, produce a faster access sequence for
3402 // local-exec TLS variables where the offset from the TLS base is encoded
3403 // as an immediate operand.
3404 //
3405 // We only utilize the faster local-exec access sequence when the TLS
3406 // variable has a size within the policy limit. We treat types that are
3407 // not sized or are empty as being over the policy size limit.
3408 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3409 IsTLSLocalExecModel) {
3410 Type *GVType = GV->getValueType();
3411 if (GVType->isSized() && !GVType->isEmptyTy() &&
3412 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3414 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3415 }
3416 } else {
3417 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3418 // involves loading the variable offset from the TOC, generating a call to
3419 // .__get_tpointer to get the thread pointer (which will be in R3), and
3420 // adding the two together:
3421 // lwz reg1,var[TC](2)
3422 // bla .__get_tpointer
3423 // add reg2, reg1, r3
3424 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3425
3426 // We do not implement the 32-bit version of the faster access sequence
3427 // for local-exec that is controlled by the -maix-small-local-exec-tls
3428 // option, or the "aix-small-tls" global variable attribute.
3429 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3430 report_fatal_error("The small-local-exec TLS access sequence is "
3431 "currently only supported on AIX (64-bit mode).");
3432 }
3433 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3434 }
3435
3436 if (Model == TLSModel::LocalDynamic) {
3437 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3438
3439 // We do not implement the 32-bit version of the faster access sequence
3440 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3441 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3442 report_fatal_error("The small-local-dynamic TLS access sequence is "
3443 "currently only supported on AIX (64-bit mode).");
3444
3445 // For local-dynamic on AIX, we need to generate one TOC entry for each
3446 // variable offset, and a single module-handle TOC entry for the entire
3447 // file.
3448
3449 SDValue VariableOffsetTGA =
3450 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3451 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3452
3454 GlobalVariable *TLSGV =
3455 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3456 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3458 assert(TLSGV && "Not able to create GV for _$TLSML.");
3459 SDValue ModuleHandleTGA =
3460 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3461 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3462 SDValue ModuleHandle =
3463 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3464
3465 // With the -maix-small-local-dynamic-tls option, produce a faster access
3466 // sequence for local-dynamic TLS variables where the offset from the
3467 // module-handle is encoded as an immediate operand.
3468 //
3469 // We only utilize the faster local-dynamic access sequence when the TLS
3470 // variable has a size within the policy limit. We treat types that are
3471 // not sized or are empty as being over the policy size limit.
3472 if (HasAIXSmallLocalDynamicTLS) {
3473 Type *GVType = GV->getValueType();
3474 if (GVType->isSized() && !GVType->isEmptyTy() &&
3475 GV->getParent()->getDataLayout().getTypeAllocSize(GVType) <=
3477 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3478 ModuleHandle);
3479 }
3480
3481 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3482 }
3483
3484 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3485 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3486 // need to generate two TOC entries, one for the variable offset, one for the
3487 // region handle. The global address for the TOC entry of the region handle is
3488 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3489 // entry of the variable offset is created with MO_TLSGD_FLAG.
3490 SDValue VariableOffsetTGA =
3491 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3492 SDValue RegionHandleTGA =
3493 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3494 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3495 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3496 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3497 RegionHandle);
3498}
3499
3500SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3501 SelectionDAG &DAG) const {
3502 // FIXME: TLS addresses currently use medium model code sequences,
3503 // which is the most useful form. Eventually support for small and
3504 // large models could be added if users need it, at the cost of
3505 // additional complexity.
3506 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3507 if (DAG.getTarget().useEmulatedTLS())
3508 return LowerToTLSEmulatedModel(GA, DAG);
3509
3510 SDLoc dl(GA);
3511 const GlobalValue *GV = GA->getGlobal();
3512 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3513 bool is64bit = Subtarget.isPPC64();
3514 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3515 PICLevel::Level picLevel = M->getPICLevel();
3516
3518 TLSModel::Model Model = TM.getTLSModel(GV);
3519
3520 if (Model == TLSModel::LocalExec) {
3521 if (Subtarget.isUsingPCRelativeCalls()) {
3522 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3523 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3525 SDValue MatAddr =
3526 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3527 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3528 }
3529
3530 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3532 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3534 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3535 : DAG.getRegister(PPC::R2, MVT::i32);
3536
3537 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3538 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3539 }
3540
3541 if (Model == TLSModel::InitialExec) {
3542 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3544 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3545 SDValue TGATLS = DAG.getTargetGlobalAddress(
3546 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3547 SDValue TPOffset;
3548 if (IsPCRel) {
3549 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3550 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3552 } else {
3553 SDValue GOTPtr;
3554 if (is64bit) {
3555 setUsesTOCBasePtr(DAG);
3556 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3557 GOTPtr =
3558 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3559 } else {
3560 if (!TM.isPositionIndependent())
3561 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3562 else if (picLevel == PICLevel::SmallPIC)
3563 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3564 else
3565 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3566 }
3567 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3568 }
3569 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3570 }
3571
3572 if (Model == TLSModel::GeneralDynamic) {
3573 if (Subtarget.isUsingPCRelativeCalls()) {
3574 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3576 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3577 }
3578
3579 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3580 SDValue GOTPtr;
3581 if (is64bit) {
3582 setUsesTOCBasePtr(DAG);
3583 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3584 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3585 GOTReg, TGA);
3586 } else {
3587 if (picLevel == PICLevel::SmallPIC)
3588 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3589 else
3590 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3591 }
3592 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3593 GOTPtr, TGA, TGA);
3594 }
3595
3596 if (Model == TLSModel::LocalDynamic) {
3597 if (Subtarget.isUsingPCRelativeCalls()) {
3598 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3600 SDValue MatPCRel =
3601 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3602 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3603 }
3604
3605 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3606 SDValue GOTPtr;
3607 if (is64bit) {
3608 setUsesTOCBasePtr(DAG);
3609 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3610 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3611 GOTReg, TGA);
3612 } else {
3613 if (picLevel == PICLevel::SmallPIC)
3614 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3615 else
3616 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3617 }
3618 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3619 PtrVT, GOTPtr, TGA, TGA);
3620 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3621 PtrVT, TLSAddr, TGA);
3622 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3623 }
3624
3625 llvm_unreachable("Unknown TLS model!");
3626}
3627
3628SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3629 SelectionDAG &DAG) const {
3630 EVT PtrVT = Op.getValueType();
3631 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3632 SDLoc DL(GSDN);
3633 const GlobalValue *GV = GSDN->getGlobal();
3634
3635 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3636 // The actual address of the GlobalValue is stored in the TOC.
3637 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3638 if (Subtarget.isUsingPCRelativeCalls()) {
3639 EVT Ty = getPointerTy(DAG.getDataLayout());
3641 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3643 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3644 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3646 return Load;
3647 } else {
3648 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3650 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3651 }
3652 }
3653 setUsesTOCBasePtr(DAG);
3654 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3655 return getTOCEntry(DAG, DL, GA);
3656 }
3657
3658 unsigned MOHiFlag, MOLoFlag;
3659 bool IsPIC = isPositionIndependent();
3660 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3661
3662 if (IsPIC && Subtarget.isSVR4ABI()) {
3663 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3664 GSDN->getOffset(),
3666 return getTOCEntry(DAG, DL, GA);
3667 }
3668
3669 SDValue GAHi =
3670 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3671 SDValue GALo =
3672 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3673
3674 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3675}
3676
3677SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3678 bool IsStrict = Op->isStrictFPOpcode();
3680 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3681 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3682 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3683 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3684 EVT LHSVT = LHS.getValueType();
3685 SDLoc dl(Op);
3686
3687 // Soften the setcc with libcall if it is fp128.
3688 if (LHSVT == MVT::f128) {
3689 assert(!Subtarget.hasP9Vector() &&
3690 "SETCC for f128 is already legal under Power9!");
3691 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3692 Op->getOpcode() == ISD::STRICT_FSETCCS);
3693 if (RHS.getNode())
3694 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3695 DAG.getCondCode(CC));
3696 if (IsStrict)
3697 return DAG.getMergeValues({LHS, Chain}, dl);
3698 return LHS;
3699 }
3700
3701 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3702
3703 if (Op.getValueType() == MVT::v2i64) {
3704 // When the operands themselves are v2i64 values, we need to do something
3705 // special because VSX has no underlying comparison operations for these.
3706 if (LHS.getValueType() == MVT::v2i64) {
3707 // Equality can be handled by casting to the legal type for Altivec
3708 // comparisons, everything else needs to be expanded.
3709 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3710 return SDValue();
3711 SDValue SetCC32 = DAG.getSetCC(
3712 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3713 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3714 int ShuffV[] = {1, 0, 3, 2};
3715 SDValue Shuff =
3716 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3717 return DAG.getBitcast(MVT::v2i64,
3719 dl, MVT::v4i32, Shuff, SetCC32));
3720 }
3721
3722 // We handle most of these in the usual way.
3723 return Op;
3724 }
3725
3726 // If we're comparing for equality to zero, expose the fact that this is
3727 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3728 // fold the new nodes.
3729 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3730 return V;
3731
3732 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3733 // Leave comparisons against 0 and -1 alone for now, since they're usually
3734 // optimized. FIXME: revisit this when we can custom lower all setcc
3735 // optimizations.
3736 if (C->isAllOnes() || C->isZero())
3737 return SDValue();
3738 }
3739
3740 // If we have an integer seteq/setne, turn it into a compare against zero
3741 // by xor'ing the rhs with the lhs, which is faster than setting a
3742 // condition register, reading it back out, and masking the correct bit. The
3743 // normal approach here uses sub to do this instead of xor. Using xor exposes
3744 // the result to other bit-twiddling opportunities.
3745 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3746 EVT VT = Op.getValueType();
3747 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3748 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3749 }
3750 return SDValue();
3751}
3752
3753SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3754 SDNode *Node = Op.getNode();
3755 EVT VT = Node->getValueType(0);
3756 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3757 SDValue InChain = Node->getOperand(0);
3758 SDValue VAListPtr = Node->getOperand(1);
3759 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3760 SDLoc dl(Node);
3761
3762 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3763
3764 // gpr_index
3765 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3766 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3767 InChain = GprIndex.getValue(1);
3768
3769 if (VT == MVT::i64) {
3770 // Check if GprIndex is even
3771 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3772 DAG.getConstant(1, dl, MVT::i32));
3773 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3774 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3775 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3776 DAG.getConstant(1, dl, MVT::i32));
3777 // Align GprIndex to be even if it isn't
3778 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3779 GprIndex);
3780 }
3781
3782 // fpr index is 1 byte after gpr
3783 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3784 DAG.getConstant(1, dl, MVT::i32));
3785
3786 // fpr
3787 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3788 FprPtr, MachinePointerInfo(SV), MVT::i8);
3789 InChain = FprIndex.getValue(1);
3790
3791 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3792 DAG.getConstant(8, dl, MVT::i32));
3793
3794 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3795 DAG.getConstant(4, dl, MVT::i32));
3796
3797 // areas
3798 SDValue OverflowArea =
3799 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3800 InChain = OverflowArea.getValue(1);
3801
3802 SDValue RegSaveArea =
3803 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3804 InChain = RegSaveArea.getValue(1);
3805
3806 // select overflow_area if index > 8
3807 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3808 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3809
3810 // adjustment constant gpr_index * 4/8
3811 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3812 VT.isInteger() ? GprIndex : FprIndex,
3813 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3814 MVT::i32));
3815
3816 // OurReg = RegSaveArea + RegConstant
3817 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3818 RegConstant);
3819
3820 // Floating types are 32 bytes into RegSaveArea
3821 if (VT.isFloatingPoint())
3822 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3823 DAG.getConstant(32, dl, MVT::i32));
3824
3825 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3826 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3827 VT.isInteger() ? GprIndex : FprIndex,
3828 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3829 MVT::i32));
3830
3831 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3832 VT.isInteger() ? VAListPtr : FprPtr,
3833 MachinePointerInfo(SV), MVT::i8);
3834
3835 // determine if we should load from reg_save_area or overflow_area
3836 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3837
3838 // increase overflow_area by 4/8 if gpr/fpr > 8
3839 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3840 DAG.getConstant(VT.isInteger() ? 4 : 8,
3841 dl, MVT::i32));
3842
3843 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3844 OverflowAreaPlusN);
3845
3846 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3847 MachinePointerInfo(), MVT::i32);
3848
3849 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3850}
3851
3852SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3853 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3854
3855 // We have to copy the entire va_list struct:
3856 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3857 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3858 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3859 false, true, false, MachinePointerInfo(),
3861}
3862
3863SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3864 SelectionDAG &DAG) const {
3865 if (Subtarget.isAIXABI())
3866 report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3867
3868 return Op.getOperand(0);
3869}
3870
3871SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3874
3875 assert((Op.getOpcode() == ISD::INLINEASM ||
3876 Op.getOpcode() == ISD::INLINEASM_BR) &&
3877 "Expecting Inline ASM node.");
3878
3879 // If an LR store is already known to be required then there is not point in
3880 // checking this ASM as well.
3881 if (MFI.isLRStoreRequired())
3882 return Op;
3883
3884 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3885 // type MVT::Glue. We want to ignore this last operand if that is the case.
3886 unsigned NumOps = Op.getNumOperands();
3887 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3888 --NumOps;
3889
3890 // Check all operands that may contain the LR.
3891 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3892 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3893 unsigned NumVals = Flags.getNumOperandRegisters();
3894 ++i; // Skip the ID value.
3895
3896 switch (Flags.getKind()) {
3897 default:
3898 llvm_unreachable("Bad flags!");
3902 i += NumVals;
3903 break;
3907 for (; NumVals; --NumVals, ++i) {
3908 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3909 if (Reg != PPC::LR && Reg != PPC::LR8)
3910 continue;
3911 MFI.setLRStoreRequired();
3912 return Op;
3913 }
3914 break;
3915 }
3916 }
3917 }
3918
3919 return Op;
3920}
3921
3922SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3923 SelectionDAG &DAG) const {
3924 if (Subtarget.isAIXABI())
3925 report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3926
3927 SDValue Chain = Op.getOperand(0);
3928 SDValue Trmp = Op.getOperand(1); // trampoline
3929 SDValue FPtr = Op.getOperand(2); // nested function
3930 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3931 SDLoc dl(Op);
3932
3933 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3934 bool isPPC64 = (PtrVT == MVT::i64);
3935 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3936
3939
3940 Entry.Ty = IntPtrTy;
3941 Entry.Node = Trmp; Args.push_back(Entry);
3942
3943 // TrampSize == (isPPC64 ? 48 : 40);
3944 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3945 isPPC64 ? MVT::i64 : MVT::i32);
3946 Args.push_back(Entry);
3947
3948 Entry.Node = FPtr; Args.push_back(Entry);
3949 Entry.Node = Nest; Args.push_back(Entry);
3950
3951 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3953 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3955 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3956
3957 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3958 return CallResult.second;
3959}
3960
3961SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3963 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3964 EVT PtrVT = getPointerTy(MF.getDataLayout());
3965
3966 SDLoc dl(Op);
3967
3968 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3969 // vastart just stores the address of the VarArgsFrameIndex slot into the
3970 // memory location argument.
3971 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3972 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3973 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3974 MachinePointerInfo(SV));
3975 }
3976
3977 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3978 // We suppose the given va_list is already allocated.
3979 //
3980 // typedef struct {
3981 // char gpr; /* index into the array of 8 GPRs
3982 // * stored in the register save area
3983 // * gpr=0 corresponds to r3,
3984 // * gpr=1 to r4, etc.
3985 // */
3986 // char fpr; /* index into the array of 8 FPRs
3987 // * stored in the register save area
3988 // * fpr=0 corresponds to f1,
3989 // * fpr=1 to f2, etc.
3990 // */
3991 // char *overflow_arg_area;
3992 // /* location on stack that holds
3993 // * the next overflow argument
3994 // */
3995 // char *reg_save_area;
3996 // /* where r3:r10 and f1:f8 (if saved)
3997 // * are stored
3998 // */
3999 // } va_list[1];
4000
4001 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4002 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4003 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4004 PtrVT);
4005 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4006 PtrVT);
4007
4008 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4009 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4010
4011 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4012 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4013
4014 uint64_t FPROffset = 1;
4015 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4016
4017 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4018
4019 // Store first byte : number of int regs
4020 SDValue firstStore =
4021 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4022 MachinePointerInfo(SV), MVT::i8);
4023 uint64_t nextOffset = FPROffset;
4024 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4025 ConstFPROffset);
4026
4027 // Store second byte : number of float regs
4028 SDValue secondStore =
4029 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4030 MachinePointerInfo(SV, nextOffset), MVT::i8);
4031 nextOffset += StackOffset;
4032 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4033
4034 // Store second word : arguments given on stack
4035 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4036 MachinePointerInfo(SV, nextOffset));
4037 nextOffset += FrameOffset;
4038 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4039
4040 // Store third word : arguments given in registers
4041 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4042 MachinePointerInfo(SV, nextOffset));
4043}
4044
4045/// FPR - The set of FP registers that should be allocated for arguments
4046/// on Darwin and AIX.
4047static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4048 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4049 PPC::F11, PPC::F12, PPC::F13};
4050
4051/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4052/// the stack.
4053static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4054 unsigned PtrByteSize) {
4055 unsigned ArgSize = ArgVT.getStoreSize();
4056 if (Flags.isByVal())
4057 ArgSize = Flags.getByValSize();
4058
4059 // Round up to multiples of the pointer size, except for array members,
4060 // which are always packed.
4061 if (!Flags.isInConsecutiveRegs())
4062 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4063
4064 return ArgSize;
4065}
4066
4067/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4068/// on the stack.
4070 ISD::ArgFlagsTy Flags,
4071 unsigned PtrByteSize) {
4072 Align Alignment(PtrByteSize);
4073
4074 // Altivec parameters are padded to a 16 byte boundary.
4075 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4076 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4077 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4078 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4079 Alignment = Align(16);
4080
4081 // ByVal parameters are aligned as requested.
4082 if (Flags.isByVal()) {
4083 auto BVAlign = Flags.getNonZeroByValAlign();
4084 if (BVAlign > PtrByteSize) {
4085 if (BVAlign.value() % PtrByteSize != 0)
4087 "ByVal alignment is not a multiple of the pointer size");
4088
4089 Alignment = BVAlign;
4090 }
4091 }
4092
4093 // Array members are always packed to their original alignment.
4094 if (Flags.isInConsecutiveRegs()) {
4095 // If the array member was split into multiple registers, the first
4096 // needs to be aligned to the size of the full type. (Except for
4097 // ppcf128, which is only aligned as its f64 components.)
4098 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4099 Alignment = Align(OrigVT.getStoreSize());
4100 else
4101 Alignment = Align(ArgVT.getStoreSize());
4102 }
4103
4104 return Alignment;
4105}
4106
4107/// CalculateStackSlotUsed - Return whether this argument will use its
4108/// stack slot (instead of being passed in registers). ArgOffset,
4109/// AvailableFPRs, and AvailableVRs must hold the current argument
4110/// position, and will be updated to account for this argument.
4111static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4112 unsigned PtrByteSize, unsigned LinkageSize,
4113 unsigned ParamAreaSize, unsigned &ArgOffset,
4114 unsigned &AvailableFPRs,
4115 unsigned &AvailableVRs) {
4116 bool UseMemory = false;
4117
4118 // Respect alignment of argument on the stack.
4119 Align Alignment =
4120 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4121 ArgOffset = alignTo(ArgOffset, Alignment);
4122 // If there's no space left in the argument save area, we must
4123 // use memory (this check also catches zero-sized arguments).
4124 if (ArgOffset >= LinkageSize + ParamAreaSize)
4125 UseMemory = true;
4126
4127 // Allocate argument on the stack.
4128 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4129 if (Flags.isInConsecutiveRegsLast())
4130 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4131 // If we overran the argument save area, we must use memory
4132 // (this check catches arguments passed partially in memory)
4133 if (ArgOffset > LinkageSize + ParamAreaSize)
4134 UseMemory = true;
4135
4136 // However, if the argument is actually passed in an FPR or a VR,
4137 // we don't use memory after all.
4138 if (!Flags.isByVal()) {
4139 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4140 if (AvailableFPRs > 0) {
4141 --AvailableFPRs;
4142 return false;
4143 }
4144 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4145 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4146 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4147 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4148 if (AvailableVRs > 0) {
4149 --AvailableVRs;
4150 return false;
4151 }
4152 }
4153
4154 return UseMemory;
4155}
4156
4157/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4158/// ensure minimum alignment required for target.
4160 unsigned NumBytes) {
4161 return alignTo(NumBytes, Lowering->getStackAlign());
4162}
4163
4164SDValue PPCTargetLowering::LowerFormalArguments(
4165 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4166 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4167 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4168 if (Subtarget.isAIXABI())
4169 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4170 InVals);
4171 if (Subtarget.is64BitELFABI())
4172 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4173 InVals);
4174 assert(Subtarget.is32BitELFABI());
4175 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4176 InVals);
4177}
4178
4179SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4180 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4181 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4182 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4183
4184 // 32-bit SVR4 ABI Stack Frame Layout:
4185 // +-----------------------------------+
4186 // +--> | Back chain |
4187 // | +-----------------------------------+
4188 // | | Floating-point register save area |
4189 // | +-----------------------------------+
4190 // | | General register save area |
4191 // | +-----------------------------------+
4192 // | | CR save word |
4193 // | +-----------------------------------+
4194 // | | VRSAVE save word |
4195 // | +-----------------------------------+
4196 // | | Alignment padding |
4197 // | +-----------------------------------+
4198 // | | Vector register save area |
4199 // | +-----------------------------------+
4200 // | | Local variable space |
4201 // | +-----------------------------------+
4202 // | | Parameter list area |
4203 // | +-----------------------------------+
4204 // | | LR save word |
4205 // | +-----------------------------------+
4206 // SP--> +--- | Back chain |
4207 // +-----------------------------------+
4208 //
4209 // Specifications:
4210 // System V Application Binary Interface PowerPC Processor Supplement
4211 // AltiVec Technology Programming Interface Manual
4212
4214 MachineFrameInfo &MFI = MF.getFrameInfo();
4215 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4216
4217 EVT PtrVT = getPointerTy(MF.getDataLayout());
4218 // Potential tail calls could cause overwriting of argument stack slots.
4219 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4220 (CallConv == CallingConv::Fast));
4221 const Align PtrAlign(4);
4222
4223 // Assign locations to all of the incoming arguments.
4225 PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4226 *DAG.getContext());
4227
4228 // Reserve space for the linkage area on the stack.
4229 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4230 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4231 if (useSoftFloat())
4232 CCInfo.PreAnalyzeFormalArguments(Ins);
4233
4234 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4235 CCInfo.clearWasPPCF128();
4236
4237 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4238 CCValAssign &VA = ArgLocs[i];
4239
4240 // Arguments stored in registers.
4241 if (VA.isRegLoc()) {
4242 const TargetRegisterClass *RC;
4243 EVT ValVT = VA.getValVT();
4244
4245 switch (ValVT.getSimpleVT().SimpleTy) {
4246 default:
4247 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4248 case MVT::i1:
4249 case MVT::i32:
4250 RC = &PPC::GPRCRegClass;
4251 break;
4252 case MVT::f32:
4253 if (Subtarget.hasP8Vector())
4254 RC = &PPC::VSSRCRegClass;
4255 else if (Subtarget.hasSPE())
4256 RC = &PPC::GPRCRegClass;
4257 else
4258 RC = &PPC::F4RCRegClass;
4259 break;
4260 case MVT::f64:
4261 if (Subtarget.hasVSX())
4262 RC = &PPC::VSFRCRegClass;
4263 else if (Subtarget.hasSPE())
4264 // SPE passes doubles in GPR pairs.
4265 RC = &PPC::GPRCRegClass;
4266 else
4267 RC = &PPC::F8RCRegClass;
4268 break;
4269 case MVT::v16i8:
4270 case MVT::v8i16:
4271 case MVT::v4i32:
4272 RC = &PPC::VRRCRegClass;
4273 break;
4274 case MVT::v4f32:
4275 RC = &PPC::VRRCRegClass;
4276 break;
4277 case MVT::v2f64:
4278 case MVT::v2i64:
4279 RC = &PPC::VRRCRegClass;
4280 break;
4281 }
4282
4283 SDValue ArgValue;
4284 // Transform the arguments stored in physical registers into
4285 // virtual ones.
4286 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4287 assert(i + 1 < e && "No second half of double precision argument");
4288 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4289 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4290 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4291 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4292 if (!Subtarget.isLittleEndian())
4293 std::swap (ArgValueLo, ArgValueHi);
4294 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4295 ArgValueHi);
4296 } else {
4297 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4298 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4299 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4300 if (ValVT == MVT::i1)
4301 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4302 }
4303
4304 InVals.push_back(ArgValue);
4305 } else {
4306 // Argument stored in memory.
4307 assert(VA.isMemLoc());
4308
4309 // Get the extended size of the argument type in stack
4310 unsigned ArgSize = VA.getLocVT().getStoreSize();
4311 // Get the actual size of the argument type
4312 unsigned ObjSize = VA.getValVT().getStoreSize();
4313 unsigned ArgOffset = VA.getLocMemOffset();
4314 // Stack objects in PPC32 are right justified.
4315 ArgOffset += ArgSize - ObjSize;
4316 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4317
4318 // Create load nodes to retrieve arguments from the stack.
4319 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4320 InVals.push_back(
4321 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4322 }
4323 }
4324
4325 // Assign locations to all of the incoming aggregate by value arguments.
4326 // Aggregates passed by value are stored in the local variable space of the
4327 // caller's stack frame, right above the parameter list area.
4328 SmallVector<CCValAssign, 16> ByValArgLocs;
4329 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4330 ByValArgLocs, *DAG.getContext());
4331
4332 // Reserve stack space for the allocations in CCInfo.
4333 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4334
4335 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4336
4337 // Area that is at least reserved in the caller of this function.
4338 unsigned MinReservedArea = CCByValInfo.getStackSize();
4339 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4340
4341 // Set the size that is at least reserved in caller of this function. Tail
4342 // call optimized function's reserved stack space needs to be aligned so that
4343 // taking the difference between two stack areas will result in an aligned
4344 // stack.
4345 MinReservedArea =
4346 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4347 FuncInfo->setMinReservedArea(MinReservedArea);
4348
4350
4351 // If the function takes variable number of arguments, make a frame index for
4352 // the start of the first vararg value... for expansion of llvm.va_start.
4353 if (isVarArg) {
4354 static const MCPhysReg GPArgRegs[] = {
4355 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4356 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4357 };
4358 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4359
4360 static const MCPhysReg FPArgRegs[] = {
4361 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4362 PPC::F8
4363 };
4364 unsigned NumFPArgRegs = std::size(FPArgRegs);
4365
4366 if (useSoftFloat() || hasSPE())
4367 NumFPArgRegs = 0;
4368
4369 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4370 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4371
4372 // Make room for NumGPArgRegs and NumFPArgRegs.
4373 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4374 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4375
4377 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4378
4379 FuncInfo->setVarArgsFrameIndex(
4380 MFI.CreateStackObject(Depth, Align(8), false));
4381 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4382
4383 // The fixed integer arguments of a variadic function are stored to the
4384 // VarArgsFrameIndex on the stack so that they may be loaded by
4385 // dereferencing the result of va_next.
4386 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4387 // Get an existing live-in vreg, or add a new one.
4388 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4389 if (!VReg)
4390 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4391
4392 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4393 SDValue Store =
4394 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4395 MemOps.push_back(Store);
4396 // Increment the address by four for the next argument to store
4397 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4398 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4399 }
4400
4401 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4402 // is set.
4403 // The double arguments are stored to the VarArgsFrameIndex
4404 // on the stack.
4405 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4406 // Get an existing live-in vreg, or add a new one.
4407 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4408 if (!VReg)
4409 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4410
4411 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4412 SDValue Store =
4413 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4414 MemOps.push_back(Store);
4415 // Increment the address by eight for the next argument to store
4416 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4417 PtrVT);
4418 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4419 }
4420 }
4421
4422 if (!MemOps.empty())
4423 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4424
4425 return Chain;
4426}
4427
4428// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4429// value to MVT::i64 and then truncate to the correct register size.
4430SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4431 EVT ObjectVT, SelectionDAG &DAG,
4432 SDValue ArgVal,
4433 const SDLoc &dl) const {
4434 if (Flags.isSExt())
4435 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4436 DAG.getValueType(ObjectVT));
4437 else if (Flags.isZExt())
4438 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4439 DAG.getValueType(ObjectVT));
4440
4441 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4442}
4443
4444SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4445 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4446 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4447 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4448 // TODO: add description of PPC stack frame format, or at least some docs.
4449 //
4450 bool isELFv2ABI = Subtarget.isELFv2ABI();
4451 bool isLittleEndian = Subtarget.isLittleEndian();
4453 MachineFrameInfo &MFI = MF.getFrameInfo();
4454 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4455
4456 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4457 "fastcc not supported on varargs functions");
4458
4459 EVT PtrVT = getPointerTy(MF.getDataLayout());
4460 // Potential tail calls could cause overwriting of argument stack slots.
4461 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4462 (CallConv == CallingConv::Fast));
4463 unsigned PtrByteSize = 8;
4464 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4465
4466 static const MCPhysReg GPR[] = {
4467 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4468 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4469 };
4470 static const MCPhysReg VR[] = {
4471 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4472 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4473 };
4474
4475 const unsigned Num_GPR_Regs = std::size(GPR);
4476 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4477 const unsigned Num_VR_Regs = std::size(VR);
4478
4479 // Do a first pass over the arguments to determine whether the ABI
4480 // guarantees that our caller has allocated the parameter save area
4481 // on its stack frame. In the ELFv1 ABI, this is always the case;
4482 // in the ELFv2 ABI, it is true if this is a vararg function or if
4483 // any parameter is located in a stack slot.
4484
4485 bool HasParameterArea = !isELFv2ABI || isVarArg;
4486 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4487 unsigned NumBytes = LinkageSize;
4488 unsigned AvailableFPRs = Num_FPR_Regs;
4489 unsigned AvailableVRs = Num_VR_Regs;
4490 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4491 if (Ins[i].Flags.isNest())
4492 continue;
4493
4494 if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4495 PtrByteSize, LinkageSize, ParamAreaSize,
4496 NumBytes, AvailableFPRs, AvailableVRs))
4497 HasParameterArea = true;
4498 }
4499
4500 // Add DAG nodes to load the arguments or copy them out of registers. On
4501 // entry to a function on PPC, the arguments start after the linkage area,
4502 // although the first ones are often in registers.
4503
4504 unsigned ArgOffset = LinkageSize;
4505 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4508 unsigned CurArgIdx = 0;
4509 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4510 SDValue ArgVal;
4511 bool needsLoad = false;
4512 EVT ObjectVT = Ins[ArgNo].VT;
4513 EVT OrigVT = Ins[ArgNo].ArgVT;
4514 unsigned ObjSize = ObjectVT.getStoreSize();
4515 unsigned ArgSize = ObjSize;
4516 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4517 if (Ins[ArgNo].isOrigArg()) {
4518 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4519 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4520 }
4521 // We re-align the argument offset for each argument, except when using the
4522 // fast calling convention, when we need to make sure we do that only when
4523 // we'll actually use a stack slot.
4524 unsigned CurArgOffset;
4525 Align Alignment;
4526 auto ComputeArgOffset = [&]() {
4527 /* Respect alignment of argument on the stack. */
4528 Alignment =
4529 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4530 ArgOffset = alignTo(ArgOffset, Alignment);
4531 CurArgOffset = ArgOffset;
4532 };
4533
4534 if (CallConv != CallingConv::Fast) {
4535 ComputeArgOffset();
4536
4537 /* Compute GPR index associated with argument offset. */
4538 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4539 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4540 }
4541
4542 // FIXME the codegen can be much improved in some cases.
4543 // We do not have to keep everything in memory.
4544 if (Flags.isByVal()) {
4545 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4546
4547 if (CallConv == CallingConv::Fast)
4548 ComputeArgOffset();
4549
4550 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4551 ObjSize = Flags.getByValSize();
4552 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4553 // Empty aggregate parameters do not take up registers. Examples:
4554 // struct { } a;
4555 // union { } b;
4556 // int c[0];
4557 // etc. However, we have to provide a place-holder in InVals, so
4558 // pretend we have an 8-byte item at the current address for that
4559 // purpose.
4560 if (!ObjSize) {
4561 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4562 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4563 InVals.push_back(FIN);
4564 continue;
4565 }
4566
4567 // Create a stack object covering all stack doublewords occupied
4568 // by the argument. If the argument is (fully or partially) on
4569 // the stack, or if the argument is fully in registers but the
4570 // caller has allocated the parameter save anyway, we can refer
4571 // directly to the caller's stack frame. Otherwise, create a
4572 // local copy in our own frame.
4573 int FI;
4574 if (HasParameterArea ||
4575 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4576 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4577 else
4578 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4579 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4580
4581 // Handle aggregates smaller than 8 bytes.
4582 if (ObjSize < PtrByteSize) {
4583 // The value of the object is its address, which differs from the
4584 // address of the enclosing doubleword on big-endian systems.
4585 SDValue Arg = FIN;
4586 if (!isLittleEndian) {
4587 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4588 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4589 }
4590 InVals.push_back(Arg);
4591
4592 if (GPR_idx != Num_GPR_Regs) {
4593 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4594 FuncInfo->addLiveInAttr(VReg, Flags);
4595 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4596 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4597 SDValue Store =
4598 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4599 MachinePointerInfo(&*FuncArg), ObjType);
4600 MemOps.push_back(Store);
4601 }
4602 // Whether we copied from a register or not, advance the offset
4603 // into the parameter save area by a full doubleword.
4604 ArgOffset += PtrByteSize;
4605 continue;
4606 }
4607
4608 // The value of the object is its address, which is the address of
4609 // its first stack doubleword.
4610 InVals.push_back(FIN);
4611
4612 // Store whatever pieces of the object are in registers to memory.
4613 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4614 if (GPR_idx == Num_GPR_Regs)
4615 break;
4616
4617 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4618 FuncInfo->addLiveInAttr(VReg, Flags);
4619 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4620 SDValue Addr = FIN;
4621 if (j) {
4622 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4623 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4624 }
4625 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4626 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4627 SDValue Store =
4628 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4629 MachinePointerInfo(&*FuncArg, j), ObjType);
4630 MemOps.push_back(Store);
4631 ++GPR_idx;
4632 }
4633 ArgOffset += ArgSize;
4634 continue;
4635 }
4636
4637 switch (ObjectVT.getSimpleVT().SimpleTy) {
4638 default: llvm_unreachable("Unhandled argument type!");
4639 case MVT::i1:
4640 case MVT::i32:
4641 case MVT::i64:
4642 if (Flags.isNest()) {
4643 // The 'nest' parameter, if any, is passed in R11.
4644 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4645 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4646
4647 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4648 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4649
4650 break;
4651 }
4652
4653 // These can be scalar arguments or elements of an integer array type
4654 // passed directly. Clang may use those instead of "byval" aggregate
4655 // types to avoid forcing arguments to memory unnecessarily.
4656 if (GPR_idx != Num_GPR_Regs) {
4657 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4658 FuncInfo->addLiveInAttr(VReg, Flags);
4659 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4660
4661 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4662 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4663 // value to MVT::i64 and then truncate to the correct register size.
4664 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4665 } else {
4666 if (CallConv == CallingConv::Fast)
4667 ComputeArgOffset();
4668
4669 needsLoad = true;
4670 ArgSize = PtrByteSize;
4671 }
4672 if (CallConv != CallingConv::Fast || needsLoad)
4673 ArgOffset += 8;
4674 break;
4675
4676 case MVT::f32:
4677 case MVT::f64:
4678 // These can be scalar arguments or elements of a float array type
4679 // passed directly. The latter are used to implement ELFv2 homogenous
4680 // float aggregates.
4681 if (FPR_idx != Num_FPR_Regs) {
4682 unsigned VReg;
4683
4684 if (ObjectVT == MVT::f32)
4685 VReg = MF.addLiveIn(FPR[FPR_idx],
4686 Subtarget.hasP8Vector()
4687 ? &PPC::VSSRCRegClass
4688 : &PPC::F4RCRegClass);
4689 else
4690 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4691 ? &PPC::VSFRCRegClass
4692 : &PPC::F8RCRegClass);
4693
4694 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4695 ++FPR_idx;
4696 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4697 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4698 // once we support fp <-> gpr moves.
4699
4700 // This can only ever happen in the presence of f32 array types,
4701 // since otherwise we never run out of FPRs before running out
4702 // of GPRs.
4703 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4704 FuncInfo->addLiveInAttr(VReg, Flags);
4705 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4706
4707 if (ObjectVT == MVT::f32) {
4708 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4709 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4710 DAG.getConstant(32, dl, MVT::i32));
4711 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4712 }
4713
4714 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4715 } else {
4716 if (CallConv == CallingConv::Fast)
4717 ComputeArgOffset();
4718
4719 needsLoad = true;
4720 }
4721
4722 // When passing an array of floats, the array occupies consecutive
4723 // space in the argument area; only round up to the next doubleword
4724 // at the end of the array. Otherwise, each float takes 8 bytes.
4725 if (CallConv != CallingConv::Fast || needsLoad) {
4726 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4727 ArgOffset += ArgSize;
4728 if (Flags.isInConsecutiveRegsLast())
4729 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4730 }
4731 break;
4732 case MVT::v4f32:
4733 case MVT::v4i32:
4734 case MVT::v8i16:
4735 case MVT::v16i8:
4736 case MVT::v2f64:
4737 case MVT::v2i64:
4738 case MVT::v1i128:
4739 case MVT::f128:
4740 // These can be scalar arguments or elements of a vector array type
4741 // passed directly. The latter are used to implement ELFv2 homogenous
4742 // vector aggregates.
4743 if (VR_idx != Num_VR_Regs) {
4744 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4745 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4746 ++VR_idx;
4747 } else {
4748 if (CallConv == CallingConv::Fast)
4749 ComputeArgOffset();
4750 needsLoad = true;
4751 }
4752 if (CallConv != CallingConv::Fast || needsLoad)
4753 ArgOffset += 16;
4754 break;
4755 }
4756
4757 // We need to load the argument to a virtual register if we determined
4758 // above that we ran out of physical registers of the appropriate type.
4759 if (needsLoad) {
4760 if (ObjSize < ArgSize && !isLittleEndian)
4761 CurArgOffset += ArgSize - ObjSize;
4762 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4763 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4764 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4765 }
4766
4767 InVals.push_back(ArgVal);
4768 }
4769
4770 // Area that is at least reserved in the caller of this function.
4771 unsigned MinReservedArea;
4772 if (HasParameterArea)
4773 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4774 else
4775 MinReservedArea = LinkageSize;
4776
4777 // Set the size that is at least reserved in caller of this function. Tail
4778 // call optimized functions' reserved stack space needs to be aligned so that
4779 // taking the difference between two stack areas will result in an aligned
4780 // stack.
4781 MinReservedArea =
4782 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4783 FuncInfo->setMinReservedArea(MinReservedArea);
4784
4785 // If the function takes variable number of arguments, make a frame index for
4786 // the start of the first vararg value... for expansion of llvm.va_start.
4787 // On ELFv2ABI spec, it writes:
4788 // C programs that are intended to be *portable* across different compilers
4789 // and architectures must use the header file <stdarg.h> to deal with variable
4790 // argument lists.
4791 if (isVarArg && MFI.hasVAStart()) {
4792 int Depth = ArgOffset;
4793
4794 FuncInfo->setVarArgsFrameIndex(
4795 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4796 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4797
4798 // If this function is vararg, store any remaining integer argument regs
4799 // to their spots on the stack so that they may be loaded by dereferencing
4800 // the result of va_next.
4801 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4802 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4803 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4804 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4805 SDValue Store =
4806 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4807 MemOps.push_back(Store);
4808 // Increment the address by four for the next argument to store
4809 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4810 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4811 }
4812 }
4813
4814 if (!MemOps.empty())
4815 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4816
4817 return Chain;
4818}
4819
4820/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4821/// adjusted to accommodate the arguments for the tailcall.
4822static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4823 unsigned ParamSize) {
4824
4825 if (!isTailCall) return 0;
4826
4828 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4829 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4830 // Remember only if the new adjustment is bigger.
4831 if (SPDiff < FI->getTailCallSPDelta())
4832 FI->setTailCallSPDelta(SPDiff);
4833
4834 return SPDiff;
4835}
4836
4837static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4838
4839static bool callsShareTOCBase(const Function *Caller,
4840 const GlobalValue *CalleeGV,
4841 const TargetMachine &TM) {
4842 // It does not make sense to call callsShareTOCBase() with a caller that
4843 // is PC Relative since PC Relative callers do not have a TOC.
4844#ifndef NDEBUG
4845 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4846 assert(!STICaller->isUsingPCRelativeCalls() &&
4847 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4848#endif
4849
4850 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4851 // don't have enough information to determine if the caller and callee share
4852 // the same TOC base, so we have to pessimistically assume they don't for
4853 // correctness.
4854 if (!CalleeGV)
4855 return false;
4856
4857 // If the callee is preemptable, then the static linker will use a plt-stub
4858 // which saves the toc to the stack, and needs a nop after the call
4859 // instruction to convert to a toc-restore.
4860 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4861 return false;
4862
4863 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4864 // We may need a TOC restore in the situation where the caller requires a
4865 // valid TOC but the callee is PC Relative and does not.
4866 const Function *F = dyn_cast<Function>(CalleeGV);
4867 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4868
4869 // If we have an Alias we can try to get the function from there.
4870 if (Alias) {
4871 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4872 F = dyn_cast<Function>(GlobalObj);
4873 }
4874
4875 // If we still have no valid function pointer we do not have enough
4876 // information to determine if the callee uses PC Relative calls so we must
4877 // assume that it does.
4878 if (!F)
4879 return false;
4880
4881 // If the callee uses PC Relative we cannot guarantee that the callee won't
4882 // clobber the TOC of the caller and so we must assume that the two
4883 // functions do not share a TOC base.
4884 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4885 if (STICallee->isUsingPCRelativeCalls())
4886 return false;
4887
4888 // If the GV is not a strong definition then we need to assume it can be
4889 // replaced by another function at link time. The function that replaces
4890 // it may not share the same TOC as the caller since the callee may be
4891 // replaced by a PC Relative version of the same function.
4892 if (!CalleeGV->isStrongDefinitionForLinker())
4893 return false;
4894
4895 // The medium and large code models are expected to provide a sufficiently
4896 // large TOC to provide all data addressing needs of a module with a
4897 // single TOC.
4898 if (CodeModel::Medium == TM.getCodeModel() ||
4899 CodeModel::Large == TM.getCodeModel())
4900 return true;
4901
4902 // Any explicitly-specified sections and section prefixes must also match.
4903 // Also, if we're using -ffunction-sections, then each function is always in
4904 // a different section (the same is true for COMDAT functions).
4905 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4906 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4907 return false;
4908 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4909 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4910 return false;
4911 }
4912
4913 return true;
4914}
4915
4916static bool
4918 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4919 assert(Subtarget.is64BitELFABI());
4920
4921 const unsigned PtrByteSize = 8;
4922 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4923
4924 static const MCPhysReg GPR[] = {
4925 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4926 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4927 };
4928 static const MCPhysReg VR[] = {
4929 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4930 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4931 };
4932
4933 const unsigned NumGPRs = std::size(GPR);
4934 const unsigned NumFPRs = 13;
4935 const unsigned NumVRs = std::size(VR);
4936 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4937
4938 unsigned NumBytes = LinkageSize;
4939 unsigned AvailableFPRs = NumFPRs;
4940 unsigned AvailableVRs = NumVRs;
4941
4942 for (const ISD::OutputArg& Param : Outs) {
4943 if (Param.Flags.isNest()) continue;
4944
4945 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4946 LinkageSize, ParamAreaSize, NumBytes,
4947 AvailableFPRs, AvailableVRs))
4948 return true;
4949 }
4950 return false;
4951}
4952
4953static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4954 if (CB.arg_size() != CallerFn->arg_size())
4955 return false;
4956
4957 auto CalleeArgIter = CB.arg_begin();
4958 auto CalleeArgEnd = CB.arg_end();
4959 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4960
4961 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4962 const Value* CalleeArg = *CalleeArgIter;
4963 const Value* CallerArg = &(*CallerArgIter);
4964 if (CalleeArg == CallerArg)
4965 continue;
4966
4967 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4968 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4969 // }
4970 // 1st argument of callee is undef and has the same type as caller.
4971 if (CalleeArg->getType() == CallerArg->getType() &&
4972 isa<UndefValue>(CalleeArg))
4973 continue;
4974
4975 return false;
4976 }
4977
4978 return true;
4979}
4980
4981// Returns true if TCO is possible between the callers and callees
4982// calling conventions.
4983static bool
4985 CallingConv::ID CalleeCC) {
4986 // Tail calls are possible with fastcc and ccc.
4987 auto isTailCallableCC = [] (CallingConv::ID CC){
4988 return CC == CallingConv::C || CC == CallingConv::Fast;
4989 };
4990 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4991 return false;
4992
4993 // We can safely tail call both fastcc and ccc callees from a c calling
4994 // convention caller. If the caller is fastcc, we may have less stack space
4995 // than a non-fastcc caller with the same signature so disable tail-calls in
4996 // that case.
4997 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4998}
4999
5000bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5001 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5002 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5004 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5005 bool isCalleeExternalSymbol) const {
5006 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5007
5008 if (DisableSCO && !TailCallOpt) return false;
5009
5010 // Variadic argument functions are not supported.
5011 if (isVarArg) return false;
5012
5013 // Check that the calling conventions are compatible for tco.
5014 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5015 return false;
5016
5017 // Caller contains any byval parameter is not supported.
5018 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5019 return false;
5020
5021 // Callee contains any byval parameter is not supported, too.
5022 // Note: This is a quick work around, because in some cases, e.g.
5023 // caller's stack size > callee's stack size, we are still able to apply
5024 // sibling call optimization. For example, gcc is able to do SCO for caller1
5025 // in the following example, but not for caller2.
5026 // struct test {
5027 // long int a;
5028 // char ary[56];
5029 // } gTest;
5030 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5031 // b->a = v.a;
5032 // return 0;
5033 // }
5034 // void caller1(struct test a, struct test c, struct test *b) {
5035 // callee(gTest, b); }
5036 // void caller2(struct test *b) { callee(gTest, b); }
5037 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5038 return false;
5039
5040 // If callee and caller use different calling conventions, we cannot pass
5041 // parameters on stack since offsets for the parameter area may be different.
5042 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5043 return false;
5044
5045 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5046 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5047 // callee potentially have different TOC bases then we cannot tail call since
5048 // we need to restore the TOC pointer after the call.
5049 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5050 // We cannot guarantee this for indirect calls or calls to external functions.
5051 // When PC-Relative addressing is used, the concept of the TOC is no longer
5052 // applicable so this check is not required.
5053 // Check first for indirect calls.
5054 if (!Subtarget.isUsingPCRelativeCalls() &&
5055 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5056 return false;
5057
5058 // Check if we share the TOC base.
5059 if (!Subtarget.isUsingPCRelativeCalls() &&
5060 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5061 return false;
5062
5063 // TCO allows altering callee ABI, so we don't have to check further.
5064 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5065 return true;
5066
5067 if (DisableSCO) return false;
5068
5069 // If callee use the same argument list that caller is using, then we can
5070 // apply SCO on this case. If it is not, then we need to check if callee needs
5071 // stack for passing arguments.
5072 // PC Relative tail calls may not have a CallBase.
5073 // If there is no CallBase we cannot verify if we have the same argument
5074 // list so assume that we don't have the same argument list.
5075 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5076 needStackSlotPassParameters(Subtarget, Outs))
5077 return false;
5078 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5079 return false;
5080
5081 return true;
5082}
5083
5084/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5085/// for tail call optimization. Targets which want to do tail call
5086/// optimization should implement this function.
5087bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5088 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5089 CallingConv::ID CallerCC, bool isVarArg,
5090 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5091 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5092 return false;
5093
5094 // Variable argument functions are not supported.
5095 if (isVarArg)
5096 return false;
5097
5098 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5099 // Functions containing by val parameters are not supported.
5100 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5101 return false;
5102
5103 // Non-PIC/GOT tail calls are supported.
5104 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5105 return true;
5106
5107 // At the moment we can only do local tail calls (in same module, hidden
5108 // or protected) if we are generating PIC.
5109 if (CalleeGV)
5110 return CalleeGV->hasHiddenVisibility() ||
5111 CalleeGV->hasProtectedVisibility();
5112 }
5113
5114 return false;
5115}
5116
5117/// isCallCompatibleAddress - Return the immediate to use if the specified
5118/// 32-bit value is representable in the immediate field of a BxA instruction.
5120 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5121 if (!C) return nullptr;
5122
5123 int Addr = C->getZExtValue();
5124 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5125 SignExtend32<26>(Addr) != Addr)
5126 return nullptr; // Top 6 bits have to be sext of immediate.
5127
5128 return DAG
5129 .getConstant(
5130 (int)C->getZExtValue() >> 2, SDLoc(Op),
5132 .getNode();
5133}
5134
5135namespace {
5136
5137struct TailCallArgumentInfo {
5138 SDValue Arg;
5139 SDValue FrameIdxOp;
5140 int FrameIdx = 0;
5141
5142 TailCallArgumentInfo() = default;
5143};
5144
5145} // end anonymous namespace
5146
5147/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5149 SelectionDAG &DAG, SDValue Chain,
5150 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5151 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5152 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5153 SDValue Arg = TailCallArgs[i].Arg;
5154 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5155 int FI = TailCallArgs[i].FrameIdx;
5156 // Store relative to framepointer.
5157 MemOpChains.push_back(DAG.getStore(
5158 Chain, dl, Arg, FIN,
5160 }
5161}
5162
5163/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5164/// the appropriate stack slot for the tail call optimized function call.
5166 SDValue OldRetAddr, SDValue OldFP,
5167 int SPDiff, const SDLoc &dl) {
5168 if (SPDiff) {
5169 // Calculate the new stack slot for the return address.
5171 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5172 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5173 bool isPPC64 = Subtarget.isPPC64();
5174 int SlotSize = isPPC64 ? 8 : 4;
5175 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5176 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5177 NewRetAddrLoc, true);
5178 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5179 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
5180 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5181 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5182 }
5183 return Chain;
5184}
5185
5186/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5187/// the position of the argument.
5188static void
5190 SDValue Arg, int SPDiff, unsigned ArgOffset,
5191 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
5192 int Offset = ArgOffset + SPDiff;
5193 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5194 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5195 EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
5196 SDValue FIN = DAG.getFrameIndex(FI, VT);
5197 TailCallArgumentInfo Info;
5198 Info.Arg = Arg;
5199 Info.FrameIdxOp = FIN;
5200 Info.FrameIdx = FI;
5201 TailCallArguments.push_back(Info);
5202}
5203
5204/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5205/// stack slot. Returns the chain as result and the loaded frame pointers in
5206/// LROpOut/FPOpout. Used when tail calling.
5207SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5208 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5209 SDValue &FPOpOut, const SDLoc &dl) const {
5210 if (SPDiff) {
5211 // Load the LR and FP stack slot for later adjusting.
5212 EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5213 LROpOut = getReturnAddrFrameIndex(DAG);
5214 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5215 Chain = SDValue(LROpOut.getNode(), 1);
5216 }
5217 return Chain;
5218}
5219
5220/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5221/// by "Src" to address "Dst" of size "Size". Alignment information is
5222/// specified by the specific parameter attribute. The copy will be passed as
5223/// a byval function parameter.
5224/// Sometimes what we are copying is the end of a larger object, the part that
5225/// does not fit in registers.
5227 SDValue Chain, ISD::ArgFlagsTy Flags,
5228 SelectionDAG &DAG, const SDLoc &dl) {
5229 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5230 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5231 Flags.getNonZeroByValAlign(), false, false, false,
5233}
5234
5235/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5236/// tail calls.
5238 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5239 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5240 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5241 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5243 if (!isTailCall) {
5244 if (isVector) {
5245 SDValue StackPtr;
5246 if (isPPC64)
5247 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5248 else
5249 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5250 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5251 DAG.getConstant(ArgOffset, dl, PtrVT));
5252 }
5253 MemOpChains.push_back(
5254 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5255 // Calculate and remember argument location.
5256 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5257 TailCallArguments);
5258}
5259
5260static void
5262 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5263 SDValue FPOp,
5264 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5265 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5266 // might overwrite each other in case of tail call optimization.
5267 SmallVector<SDValue, 8> MemOpChains2;
5268 // Do not flag preceding copytoreg stuff together with the following stuff.
5269 InGlue = SDValue();
5270 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5271 MemOpChains2, dl);
5272 if (!MemOpChains2.empty())
5273 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5274
5275 // Store the return address to the appropriate stack slot.
5276 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5277
5278 // Emit callseq_end just before tailcall node.
5279 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5280 InGlue = Chain.getValue(1);
5281}
5282
5283// Is this global address that of a function that can be called by name? (as
5284// opposed to something that must hold a descriptor for an indirect call).
5285static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5286 if (GV) {
5287 if (GV->isThreadLocal())
5288 return false;
5289
5290 return GV->getValueType()->isFunctionTy();
5291 }
5292
5293 return false;
5294}
5295
5296SDValue PPCTargetLowering::LowerCallResult(
5297 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5298 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5299 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5301 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5302 *DAG.getContext());
5303
5304 CCRetInfo.AnalyzeCallResult(
5305 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5307 : RetCC_PPC);
5308
5309 // Copy all of the result registers out of their specified physreg.
5310 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5311 CCValAssign &VA = RVLocs[i];
5312 assert(VA.isRegLoc() && "Can only return in registers!");
5313
5314 SDValue Val;
5315
5316 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5317 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5318 InGlue);
5319 Chain = Lo.getValue(1);
5320 InGlue = Lo.getValue(2);
5321 VA = RVLocs[++i]; // skip ahead to next loc
5322 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5323 InGlue);
5324 Chain = Hi.getValue(1);
5325 InGlue = Hi.getValue(2);
5326 if (!Subtarget.isLittleEndian())
5327 std::swap (Lo, Hi);
5328 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5329 } else {
5330 Val = DAG.getCopyFromReg(Chain, dl,
5331 VA.getLocReg(), VA.getLocVT(), InGlue);
5332 Chain = Val.getValue(1);
5333 InGlue = Val.getValue(2);
5334 }
5335
5336 switch (VA.getLocInfo()) {
5337 default: llvm_unreachable("Unknown loc info!");
5338 case CCValAssign::Full: break;
5339 case CCValAssign::AExt:
5340 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5341 break;
5342 case CCValAssign::ZExt:
5343 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5344 DAG.getValueType(VA.getValVT()));
5345 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5346 break;
5347 case CCValAssign::SExt:
5348 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5349 DAG.getValueType(VA.getValVT()));
5350 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5351 break;
5352 }
5353
5354 InVals.push_back(Val);
5355 }
5356
5357 return Chain;
5358}
5359
5360static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5361 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5362 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5363 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5364
5365 // PatchPoint calls are not indirect.
5366 if (isPatchPoint)
5367 return false;
5368
5369 if (isFunctionGlobalAddress(GV) || isa<ExternalSymbolSDNode>(Callee))
5370 return false;
5371
5372 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5373 // becuase the immediate function pointer points to a descriptor instead of
5374 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5375 // pointer immediate points to the global entry point, while the BLA would
5376 // need to jump to the local entry point (see rL211174).
5377 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5378 isBLACompatibleAddress(Callee, DAG))
5379 return false;
5380
5381 return true;
5382}
5383
5384// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5385static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5386 return Subtarget.isAIXABI() ||
5387 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5388}
5389
5391 const Function &Caller, const SDValue &Callee,
5392 const PPCSubtarget &Subtarget,
5393 const TargetMachine &TM,
5394 bool IsStrictFPCall = false) {
5395 if (CFlags.IsTailCall)
5396 return PPCISD::TC_RETURN;
5397
5398 unsigned RetOpc = 0;
5399 // This is a call through a function pointer.
5400 if (CFlags.IsIndirect) {
5401 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5402 // indirect calls. The save of the caller's TOC pointer to the stack will be
5403 // inserted into the DAG as part of call lowering. The restore of the TOC
5404 // pointer is modeled by using a pseudo instruction for the call opcode that
5405 // represents the 2 instruction sequence of an indirect branch and link,
5406 // immediately followed by a load of the TOC pointer from the stack save
5407 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5408 // as it is not saved or used.
5410 : PPCISD::BCTRL;
5411 } else if (Subtarget.isUsingPCRelativeCalls()) {
5412 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5413 RetOpc = PPCISD::CALL_NOTOC;
5414 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5415 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5416 // immediately following the call instruction if the caller and callee may
5417 // have different TOC bases. At link time if the linker determines the calls
5418 // may not share a TOC base, the call is redirected to a trampoline inserted
5419 // by the linker. The trampoline will (among other things) save the callers
5420 // TOC pointer at an ABI designated offset in the linkage area and the
5421 // linker will rewrite the nop to be a load of the TOC pointer from the
5422 // linkage area into gpr2.
5423 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5424 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5425 RetOpc =
5427 } else
5428 RetOpc = PPCISD::CALL;
5429 if (IsStrictFPCall) {
5430 switch (RetOpc) {
5431 default:
5432 llvm_unreachable("Unknown call opcode");
5435 break;
5436 case PPCISD::BCTRL:
5437 RetOpc = PPCISD::BCTRL_RM;
5438 break;
5439 case PPCISD::CALL_NOTOC:
5440 RetOpc = PPCISD::CALL_NOTOC_RM;
5441 break;
5442 case PPCISD::CALL:
5443 RetOpc = PPCISD::CALL_RM;
5444 break;
5445 case PPCISD::CALL_NOP:
5446 RetOpc = PPCISD::CALL_NOP_RM;
5447 break;
5448 }
5449 }
5450 return RetOpc;
5451}
5452
5453static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5454 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5455 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5456 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5457 return SDValue(Dest, 0);
5458
5459 // Returns true if the callee is local, and false otherwise.
5460 auto isLocalCallee = [&]() {
5461 const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5462 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5463
5464 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5465 !isa_and_nonnull<GlobalIFunc>(GV);
5466 };
5467
5468 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5469 // a static relocation model causes some versions of GNU LD (2.17.50, at
5470 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5471 // built with secure-PLT.
5472 bool UsePlt =
5473 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5475
5476 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5477 const TargetMachine &TM = Subtarget.getTargetMachine();
5478 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5479 MCSymbolXCOFF *S =
5480 cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5481
5483 return DAG.getMCSymbol(S, PtrVT);
5484 };
5485
5486 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5487 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5488 if (isFunctionGlobalAddress(GV)) {
5489 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5490
5491 if (Subtarget.isAIXABI()) {
5492 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5493 return getAIXFuncEntryPointSymbolSDNode(GV);
5494 }
5495 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5496 UsePlt ? PPCII::MO_PLT : 0);
5497 }
5498
5499 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5500 const char *SymName = S->getSymbol();
5501 if (Subtarget.isAIXABI()) {
5502 // If there exists a user-declared function whose name is the same as the
5503 // ExternalSymbol's, then we pick up the user-declared version.
5505 if (const Function *F =
5506 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5507 return getAIXFuncEntryPointSymbolSDNode(F);
5508
5509 // On AIX, direct function calls reference the symbol for the function's
5510 // entry point, which is named by prepending a "." before the function's
5511 // C-linkage name. A Qualname is returned here because an external
5512 // function entry point is a csect with XTY_ER property.
5513 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5514 auto &Context = DAG.getMachineFunction().getMMI().getContext();
5515 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5516 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5518 return Sec->getQualNameSymbol();
5519 };
5520
5521 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5522 }
5523 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5524 UsePlt ? PPCII::MO_PLT : 0);
5525 }
5526
5527 // No transformation needed.
5528 assert(Callee.getNode() && "What no callee?");
5529 return Callee;
5530}
5531
5533 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5534 "Expected a CALLSEQ_STARTSDNode.");
5535
5536 // The last operand is the chain, except when the node has glue. If the node
5537 // has glue, then the last operand is the glue, and the chain is the second
5538 // last operand.
5539 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5540 if (LastValue.getValueType() != MVT::Glue)
5541 return LastValue;
5542
5543 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5544}
5545
5546// Creates the node that moves a functions address into the count register
5547// to prepare for an indirect call instruction.
5548static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5549 SDValue &Glue, SDValue &Chain,
5550 const SDLoc &dl) {
5551 SDValue MTCTROps[] = {Chain, Callee, Glue};
5552 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5553 Chain = DAG.getNode(PPCISD::MTCTR, dl, ArrayRef(ReturnTypes, 2),
5554 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5555 // The glue is the second value produced.
5556 Glue = Chain.getValue(1);
5557}
5558
5560 SDValue &Glue, SDValue &Chain,
5561 SDValue CallSeqStart,
5562 const CallBase *CB, const SDLoc &dl,
5563 bool hasNest,
5564 const PPCSubtarget &Subtarget) {
5565 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5566 // entry point, but to the function descriptor (the function entry point
5567 // address is part of the function descriptor though).
5568 // The function descriptor is a three doubleword structure with the
5569 // following fields: function entry point, TOC base address and
5570 // environment pointer.
5571 // Thus for a call through a function pointer, the following actions need
5572 // to be performed:
5573 // 1. Save the TOC of the caller in the TOC save area of its stack
5574 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5575 // 2. Load the address of the function entry point from the function
5576 // descriptor.
5577 // 3. Load the TOC of the callee from the function descriptor into r2.
5578 // 4. Load the environment pointer from the function descriptor into
5579 // r11.
5580 // 5. Branch to the function entry point address.
5581 // 6. On return of the callee, the TOC of the caller needs to be
5582 // restored (this is done in FinishCall()).
5583 //
5584 // The loads are scheduled at the beginning of the call sequence, and the
5585 // register copies are flagged together to ensure that no other
5586 // operations can be scheduled in between. E.g. without flagging the
5587 // copies together, a TOC access in the caller could be scheduled between
5588 // the assignment of the callee TOC and the branch to the callee, which leads
5589 // to incorrect code.
5590
5591 // Start by loading the function address from the descriptor.
5592 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5593 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5597
5598 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5599
5600 // Registers used in building the DAG.
5601 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5602 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5603
5604 // Offsets of descriptor members.
5605 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5606 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5607
5608 const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5609 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5610
5611 // One load for the functions entry point address.
5612 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5613 Alignment, MMOFlags);
5614
5615 // One for loading the TOC anchor for the module that contains the called
5616 // function.
5617 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5618 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5619 SDValue TOCPtr =
5620 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5621 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5622
5623 // One for loading the environment pointer.
5624 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5625 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5626 SDValue LoadEnvPtr =
5627 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5628 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5629
5630
5631 // Then copy the newly loaded TOC anchor to the TOC pointer.
5632 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5633 Chain = TOCVal.getValue(0);
5634 Glue = TOCVal.getValue(1);
5635
5636 // If the function call has an explicit 'nest' parameter, it takes the
5637 // place of the environment pointer.
5638 assert((!hasNest || !Subtarget.isAIXABI()) &&
5639 "Nest parameter is not supported on AIX.");
5640 if (!hasNest) {
5641 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5642 Chain = EnvVal.getValue(0);
5643 Glue = EnvVal.getValue(1);
5644 }
5645
5646 // The rest of the indirect call sequence is the same as the non-descriptor
5647 // DAG.
5648 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5649}
5650
5651static void
5653 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5654 SelectionDAG &DAG,
5655 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5656 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5657 const PPCSubtarget &Subtarget) {
5658 const bool IsPPC64 = Subtarget.isPPC64();
5659 // MVT for a general purpose register.
5660 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5661
5662 // First operand is always the chain.
5663 Ops.push_back(Chain);
5664
5665 // If it's a direct call pass the callee as the second operand.
5666 if (!CFlags.IsIndirect)
5667 Ops.push_back(Callee);
5668 else {
5669 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5670
5671 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5672 // on the stack (this would have been done in `LowerCall_64SVR4` or
5673 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5674 // represents both the indirect branch and a load that restores the TOC
5675 // pointer from the linkage area. The operand for the TOC restore is an add
5676 // of the TOC save offset to the stack pointer. This must be the second
5677 // operand: after the chain input but before any other variadic arguments.
5678 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5679 // saved or used.
5680 if (isTOCSaveRestoreRequired(Subtarget)) {
5681 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5682
5683 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5684 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5685 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5686 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5687 Ops.push_back(AddTOC);
5688 }
5689
5690 // Add the register used for the environment pointer.
5691 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5693 RegVT));
5694
5695
5696 // Add CTR register as callee so a bctr can be emitted later.
5697 if (CFlags.IsTailCall)
5698 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5699 }
5700
5701 // If this is a tail call add stack pointer delta.
5702 if (CFlags.IsTailCall)
5703 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5704
5705 // Add argument registers to the end of the list so that they are known live
5706 // into the call.
5707 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5708 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5709 RegsToPass[i].second.getValueType()));
5710
5711 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5712 // no way to mark dependencies as implicit here.
5713 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5714 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5715 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5716 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5717
5718 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5719 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5720 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5721
5722 // Add a register mask operand representing the call-preserved registers.
5723 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5724 const uint32_t *Mask =
5725 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5726 assert(Mask && "Missing call preserved mask for calling convention");
5727 Ops.push_back(DAG.getRegisterMask(Mask));
5728
5729 // If the glue is valid, it is the last operand.
5730 if (Glue.getNode())
5731 Ops.push_back(Glue);
5732}
5733
5734SDValue PPCTargetLowering::FinishCall(
5735 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5736 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5737 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5738 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5739 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5740
5741 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5742 Subtarget.isAIXABI())
5743 setUsesTOCBasePtr(DAG);
5744
5745 unsigned CallOpc =
5746 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5747 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5748
5749 if (!CFlags.IsIndirect)
5750 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5751 else if (Subtarget.usesFunctionDescriptors())
5752 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5753 dl, CFlags.HasNest, Subtarget);
5754 else
5755 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5756
5757 // Build the operand list for the call instruction.
5759 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5760 SPDiff, Subtarget);
5761
5762 // Emit tail call.
5763 if (CFlags.IsTailCall) {
5764 // Indirect tail call when using PC Relative calls do not have the same
5765 // constraints.
5766 assert(((Callee.getOpcode() == ISD::Register &&
5767 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5768 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5769 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5770 isa<ConstantSDNode>(Callee) ||
5771 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5772 "Expecting a global address, external symbol, absolute value, "
5773 "register or an indirect tail call when PC Relative calls are "
5774 "used.");
5775 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5776 assert(CallOpc == PPCISD::TC_RETURN &&
5777 "Unexpected call opcode for a tail call.");
5779 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5780 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5781 return Ret;
5782 }
5783
5784 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5785 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5786 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5787 Glue = Chain.getValue(1);
5788
5789 // When performing tail call optimization the callee pops its arguments off
5790 // the stack. Account for this here so these bytes can be pushed back on in
5791 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5792 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5794 ? NumBytes
5795 : 0;
5796
5797 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5798 Glue = Chain.getValue(1);
5799
5800 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5801 DAG, InVals);
5802}
5803
5805 CallingConv::ID CalleeCC = CB->getCallingConv();
5806 const Function *CallerFunc = CB->getCaller();
5807 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5808 const Function *CalleeFunc = CB->getCalledFunction();
5809 if (!CalleeFunc)
5810 return false;
5811 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5812
5815
5816 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5817 CalleeFunc->getAttributes(), Outs, *this,
5818 CalleeFunc->getParent()->getDataLayout());
5819
5820 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5821 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5822 false /*isCalleeExternalSymbol*/);
5823}
5824
5825bool PPCTargetLowering::isEligibleForTCO(
5826 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5827 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5829 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5830 bool isCalleeExternalSymbol) const {
5831 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5832 return false;
5833
5834 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5835 return IsEligibleForTailCallOptimization_64SVR4(
5836 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5837 isCalleeExternalSymbol);
5838 else
5839 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5840 isVarArg, Ins);
5841}
5842
5843SDValue
5844PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5845 SmallVectorImpl<SDValue> &InVals) const {
5846 SelectionDAG &DAG = CLI.DAG;
5847 SDLoc &dl = CLI.DL;
5849 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5851 SDValue Chain = CLI.Chain;
5852 SDValue Callee = CLI.Callee;
5853 bool &isTailCall = CLI.IsTailCall;
5854 CallingConv::ID CallConv = CLI.CallConv;
5855 bool isVarArg = CLI.IsVarArg;
5856 bool isPatchPoint = CLI.IsPatchPoint;
5857 const CallBase *CB = CLI.CB;
5858
5859 if (isTailCall) {
5861 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5862 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5863 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5864 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5865
5866 isTailCall =
5867 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5868 &(MF.getFunction()), IsCalleeExternalSymbol);
5869 if (isTailCall) {
5870 ++NumTailCalls;
5871 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5872 ++NumSiblingCalls;
5873
5874 // PC Relative calls no longer guarantee that the callee is a Global
5875 // Address Node. The callee could be an indirect tail call in which
5876 // case the SDValue for the callee could be a load (to load the address
5877 // of a function pointer) or it may be a register copy (to move the
5878 // address of the callee from a function parameter into a virtual
5879 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5880 assert((Subtarget.isUsingPCRelativeCalls() ||
5881 isa<GlobalAddressSDNode>(Callee)) &&
5882 "Callee should be an llvm::Function object.");
5883
5884 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5885 << "\nTCO callee: ");
5886 LLVM_DEBUG(Callee.dump());
5887 }
5888 }
5889
5890 if (!isTailCall && CB && CB->isMustTailCall())
5891 report_fatal_error("failed to perform tail call elimination on a call "
5892 "site marked musttail");
5893
5894 // When long calls (i.e. indirect calls) are always used, calls are always
5895 // made via function pointer. If we have a function name, first translate it
5896 // into a pointer.
5897 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5898 !isTailCall)
5899 Callee = LowerGlobalAddress(Callee, DAG);
5900
5901 CallFlags CFlags(
5902 CallConv, isTailCall, isVarArg, isPatchPoint,
5903 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5904 // hasNest
5905 Subtarget.is64BitELFABI() &&
5906 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5907 CLI.NoMerge);
5908
5909 if (Subtarget.isAIXABI())
5910 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5911 InVals, CB);
5912
5913 assert(Subtarget.isSVR4ABI());
5914 if (Subtarget.isPPC64())
5915 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5916 InVals, CB);
5917 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5918 InVals, CB);
5919}
5920
5921SDValue PPCTargetLowering::LowerCall_32SVR4(
5922 SDValue Chain, SDValue Callee, CallFlags CFlags,
5924 const SmallVectorImpl<SDValue> &OutVals,
5925 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5927 const CallBase *CB) const {
5928 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5929 // of the 32-bit SVR4 ABI stack frame layout.
5930
5931 const CallingConv::ID CallConv = CFlags.CallConv;
5932 const bool IsVarArg = CFlags.IsVarArg;
5933 const bool IsTailCall = CFlags.IsTailCall;
5934
5935 assert((CallConv == CallingConv::C ||
5936 CallConv == CallingConv::Cold ||
5937 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5938
5939 const Align PtrAlign(4);
5940
5942
5943 // Mark this function as potentially containing a function that contains a
5944 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5945 // and restoring the callers stack pointer in this functions epilog. This is
5946 // done because by tail calling the called function might overwrite the value
5947 // in this function's (MF) stack pointer stack slot 0(SP).
5948 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5949 CallConv == CallingConv::Fast)
5950 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5951
5952 // Count how many bytes are to be pushed on the stack, including the linkage
5953 // area, parameter list area and the part of the local variable space which
5954 // contains copies of aggregates which are passed by value.
5955
5956 // Assign locations to all of the outgoing arguments.
5958 PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5959
5960 // Reserve space for the linkage area on the stack.
5961 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5962 PtrAlign);
5963 if (useSoftFloat())
5964 CCInfo.PreAnalyzeCallOperands(Outs);
5965
5966 if (IsVarArg) {
5967 // Handle fixed and variable vector arguments differently.
5968 // Fixed vector arguments go into registers as long as registers are
5969 // available. Variable vector arguments always go into memory.
5970 unsigned NumArgs = Outs.size();
5971
5972 for (unsigned i = 0; i != NumArgs; ++i) {
5973 MVT ArgVT = Outs[i].VT;
5974 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5975 bool Result;
5976
5977 if (Outs[i].IsFixed) {
5978 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5979 CCInfo);
5980 } else {
5982 ArgFlags, CCInfo);
5983 }
5984
5985 if (Result) {
5986#ifndef NDEBUG
5987 errs() << "Call operand #" << i << " has unhandled type "
5988 << ArgVT << "\n";
5989#endif
5990 llvm_unreachable(nullptr);
5991 }
5992 }
5993 } else {
5994 // All arguments are treated the same.
5995 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5996 }
5997 CCInfo.clearWasPPCF128();
5998
5999 // Assign locations to all of the outgoing aggregate by value arguments.
6000 SmallVector<CCValAssign, 16> ByValArgLocs;
6001 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6002
6003 // Reserve stack space for the allocations in CCInfo.
6004 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6005
6006 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6007
6008 // Size of the linkage area, parameter list area and the part of the local
6009 // space variable where copies of aggregates which are passed by value are
6010 // stored.
6011 unsigned NumBytes = CCByValInfo.getStackSize();
6012
6013 // Calculate by how many bytes the stack has to be adjusted in case of tail
6014 // call optimization.
6015 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6016
6017 // Adjust the stack pointer for the new arguments...
6018 // These operations are automatically eliminated by the prolog/epilog pass
6019 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6020 SDValue CallSeqStart = Chain;
6021
6022 // Load the return address and frame pointer so it can be moved somewhere else
6023 // later.
6024 SDValue LROp, FPOp;
6025 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6026
6027 // Set up a copy of the stack pointer for use loading and storing any
6028 // arguments that may not fit in the registers available for argument
6029 // passing.
6030 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6031
6033 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6034 SmallVector<SDValue, 8> MemOpChains;
6035
6036 bool seenFloatArg = false;
6037 // Walk the register/memloc assignments, inserting copies/loads.
6038 // i - Tracks the index into the list of registers allocated for the call
6039 // RealArgIdx - Tracks the index into the list of actual function arguments
6040 // j - Tracks the index into the list of byval arguments
6041 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6042 i != e;
6043 ++i, ++RealArgIdx) {
6044 CCValAssign &VA = ArgLocs[i];
6045 SDValue Arg = OutVals[RealArgIdx];
6046 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6047
6048 if (Flags.isByVal()) {
6049 // Argument is an aggregate which is passed by value, thus we need to
6050 // create a copy of it in the local variable space of the current stack
6051 // frame (which is the stack frame of the caller) and pass the address of
6052 // this copy to the callee.
6053 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6054 CCValAssign &ByValVA = ByValArgLocs[j++];
6055 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6056
6057 // Memory reserved in the local variable space of the callers stack frame.
6058 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6059
6060 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6061 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6062 StackPtr, PtrOff);
6063
6064 // Create a copy of the argument in the local area of the current
6065 // stack frame.
6066 SDValue MemcpyCall =
6067 CreateCopyOfByValArgument(Arg, PtrOff,
6068 CallSeqStart.getNode()->getOperand(0),
6069 Flags, DAG, dl);
6070
6071 // This must go outside the CALLSEQ_START..END.
6072 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6073 SDLoc(MemcpyCall));
6074 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6075 NewCallSeqStart.getNode());
6076 Chain = CallSeqStart = NewCallSeqStart;
6077
6078 // Pass the address of the aggregate copy on the stack either in a
6079 // physical register or in the parameter list area of the current stack
6080 // frame to the callee.
6081 Arg = PtrOff;
6082 }
6083
6084 // When useCRBits() is true, there can be i1 arguments.
6085 // It is because getRegisterType(MVT::i1) => MVT::i1,
6086 // and for other integer types getRegisterType() => MVT::i32.
6087 // Extend i1 and ensure callee will get i32.
6088 if (Arg.getValueType() == MVT::i1)
6089 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6090 dl, MVT::i32, Arg);
6091
6092 if (VA.isRegLoc()) {
6093 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6094 // Put argument in a physical register.
6095 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6096 bool IsLE = Subtarget.isLittleEndian();
6097 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6098 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6099 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6100 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6101 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6102 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6103 SVal.getValue(0)));
6104 } else
6105 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6106 } else {
6107 // Put argument in the parameter list area of the current stack frame.
6108 assert(VA.isMemLoc());
6109 unsigned LocMemOffset = VA.getLocMemOffset();
6110
6111 if (!IsTailCall) {
6112 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6113 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6114 StackPtr, PtrOff);
6115
6116 MemOpChains.push_back(
6117 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6118 } else {
6119 // Calculate and remember argument location.
6120 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6121 TailCallArguments);
6122 }
6123 }
6124 }
6125
6126 if (!MemOpChains.empty())
6127 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6128
6129 // Build a sequence of copy-to-reg nodes chained together with token chain
6130 // and flag operands which copy the outgoing args into the appropriate regs.
6131 SDValue InGlue;
6132 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6133 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6134 RegsToPass[i].second, InGlue);
6135 InGlue = Chain.getValue(1);
6136 }
6137
6138 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6139 // registers.
6140 if (IsVarArg) {
6141 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6142 SDValue Ops[] = { Chain, InGlue };
6143
6144 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6145 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6146
6147 InGlue = Chain.getValue(1);
6148 }
6149
6150 if (IsTailCall)
6151 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6152 TailCallArguments);
6153
6154 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6155 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6156}
6157
6158// Copy an argument into memory, being careful to do this outside the
6159// call sequence for the call to which the argument belongs.
6160SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6161 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6162 SelectionDAG &DAG, const SDLoc &dl) const {
6163 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6164 CallSeqStart.getNode()->getOperand(0),
6165 Flags, DAG, dl);
6166 // The MEMCPY must go outside the CALLSEQ_START..END.
6167 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6168 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6169 SDLoc(MemcpyCall));
6170 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6171 NewCallSeqStart.getNode());
6172 return NewCallSeqStart;
6173}
6174
6175SDValue PPCTargetLowering::LowerCall_64SVR4(
6176 SDValue Chain, SDValue Callee, CallFlags CFlags,
6178 const SmallVectorImpl<SDValue> &OutVals,
6179 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6181 const CallBase *CB) const {
6182 bool isELFv2ABI = Subtarget.isELFv2ABI();
6183 bool isLittleEndian = Subtarget.isLittleEndian();
6184 unsigned NumOps = Outs.size();
6185 bool IsSibCall = false;
6186 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6187
6188 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6189 unsigned PtrByteSize = 8;
6190
6192
6193 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6194 IsSibCall = true;
6195
6196 // Mark this function as potentially containing a function that contains a
6197 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6198 // and restoring the callers stack pointer in this functions epilog. This is
6199 // done because by tail calling the called function might overwrite the value
6200 // in this function's (MF) stack pointer stack slot 0(SP).
6201 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6202 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6203
6204 assert(!(IsFastCall && CFlags.IsVarArg) &&
6205 "fastcc not supported on varargs functions");
6206
6207 // Count how many bytes are to be pushed on the stack, including the linkage
6208 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6209 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6210 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6211 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6212 unsigned NumBytes = LinkageSize;
6213 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6214
6215 static const MCPhysReg GPR[] = {
6216 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6217 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6218 };
6219 static const MCPhysReg VR[] = {
6220 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6221 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6222 };
6223
6224 const unsigned NumGPRs = std::size(GPR);
6225 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6226 const unsigned NumVRs = std::size(VR);
6227
6228 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6229 // can be passed to the callee in registers.
6230 // For the fast calling convention, there is another check below.
6231 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6232 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6233 if (!HasParameterArea) {
6234 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6235 unsigned AvailableFPRs = NumFPRs;
6236 unsigned AvailableVRs = NumVRs;
6237 unsigned NumBytesTmp = NumBytes;
6238 for (unsigned i = 0; i != NumOps; ++i) {
6239 if (Outs[i].Flags.isNest()) continue;
6240 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6241 PtrByteSize, LinkageSize, ParamAreaSize,
6242 NumBytesTmp, AvailableFPRs, AvailableVRs))
6243 HasParameterArea = true;
6244 }
6245 }
6246
6247 // When using the fast calling convention, we don't provide backing for
6248 // arguments that will be in registers.
6249 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6250
6251 // Avoid allocating parameter area for fastcc functions if all the arguments
6252 // can be passed in the registers.
6253 if (IsFastCall)
6254 HasParameterArea = false;
6255
6256 // Add up all the space actually used.
6257 for (unsigned i = 0; i != NumOps; ++i) {
6258 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6259 EVT ArgVT = Outs[i].VT;
6260 EVT OrigVT = Outs[i].ArgVT;
6261
6262 if (Flags.isNest())
6263 continue;
6264
6265 if (IsFastCall) {
6266 if (Flags.isByVal()) {
6267 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6268 if (NumGPRsUsed > NumGPRs)
6269 HasParameterArea = true;
6270 } else {
6271 switch (ArgVT.getSimpleVT().SimpleTy) {
6272 default: llvm_unreachable("Unexpected ValueType for argument!");
6273 case MVT::i1:
6274 case MVT::i32:
6275 case MVT::i64:
6276 if (++NumGPRsUsed <= NumGPRs)
6277 continue;
6278 break;
6279 case MVT::v4i32:
6280 case MVT::v8i16:
6281 case MVT::v16i8:
6282 case MVT::v2f64:
6283 case MVT::v2i64:
6284 case MVT::v1i128:
6285 case MVT::f128:
6286 if (++NumVRsUsed <= NumVRs)
6287 continue;
6288 break;
6289 case MVT::v4f32:
6290 if (++NumVRsUsed <= NumVRs)
6291 continue;
6292 break;
6293 case MVT::f32:
6294 case MVT::f64:
6295 if (++NumFPRsUsed <= NumFPRs)
6296 continue;
6297 break;
6298 }
6299 HasParameterArea = true;
6300 }
6301 }
6302
6303 /* Respect alignment of argument on the stack. */
6304 auto Alignement =
6305 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6306 NumBytes = alignTo(NumBytes, Alignement);
6307
6308 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6309 if (Flags.isInConsecutiveRegsLast())
6310 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6311 }
6312
6313 unsigned NumBytesActuallyUsed = NumBytes;
6314
6315 // In the old ELFv1 ABI,
6316 // the prolog code of the callee may store up to 8 GPR argument registers to
6317 // the stack, allowing va_start to index over them in memory if its varargs.
6318 // Because we cannot tell if this is needed on the caller side, we have to
6319 // conservatively assume that it is needed. As such, make sure we have at
6320 // least enough stack space for the caller to store the 8 GPRs.
6321 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6322 // really requires memory operands, e.g. a vararg function.
6323 if (HasParameterArea)
6324 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6325 else
6326 NumBytes = LinkageSize;
6327
6328 // Tail call needs the stack to be aligned.
6329 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6330 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6331
6332 int SPDiff = 0;
6333
6334 // Calculate by how many bytes the stack has to be adjusted in case of tail
6335 // call optimization.
6336 if (!IsSibCall)
6337 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6338
6339 // To protect arguments on the stack from being clobbered in a tail call,
6340 // force all the loads to happen before doing any other lowering.
6341 if (CFlags.IsTailCall)
6342 Chain = DAG.getStackArgumentTokenFactor(Chain);
6343
6344 // Adjust the stack pointer for the new arguments...
6345 // These operations are automatically eliminated by the prolog/epilog pass
6346 if (!IsSibCall)
6347 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6348 SDValue CallSeqStart = Chain;
6349
6350 // Load the return address and frame pointer so it can be move somewhere else
6351 // later.
6352 SDValue LROp, FPOp;
6353 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6354
6355 // Set up a copy of the stack pointer for use loading and storing any
6356 // arguments that may not fit in the registers available for argument
6357 // passing.
6358 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6359
6360 // Figure out which arguments are going to go in registers, and which in
6361 // memory. Also, if this is a vararg function, floating point operations
6362 // must be stored to our stack, and loaded into integer regs as well, if
6363 // any integer regs are available for argument passing.
6364 unsigned ArgOffset = LinkageSize;
6365
6367 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6368
6369 SmallVector<SDValue, 8> MemOpChains;
6370 for (unsigned i = 0; i != NumOps; ++i) {
6371 SDValue Arg = OutVals[i];
6372 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6373 EVT ArgVT = Outs[i].VT;
6374 EVT OrigVT = Outs[i].ArgVT;
6375
6376 // PtrOff will be used to store the current argument to the stack if a
6377 // register cannot be found for it.
6378 SDValue PtrOff;
6379
6380 // We re-align the argument offset for each argument, except when using the
6381 // fast calling convention, when we need to make sure we do that only when
6382 // we'll actually use a stack slot.
6383 auto ComputePtrOff = [&]() {
6384 /* Respect alignment of argument on the stack. */
6385 auto Alignment =
6386 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6387 ArgOffset = alignTo(ArgOffset, Alignment);
6388
6389 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6390
6391 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6392 };
6393
6394 if (!IsFastCall) {
6395 ComputePtrOff();
6396
6397 /* Compute GPR index associated with argument offset. */
6398 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6399 GPR_idx = std::min(GPR_idx, NumGPRs);
6400 }
6401
6402 // Promote integers to 64-bit values.
6403 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6404 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6405 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6406 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6407 }
6408
6409 // FIXME memcpy is used way more than necessary. Correctness first.
6410 // Note: "by value" is code for passing a structure by value, not
6411 // basic types.
6412 if (Flags.isByVal()) {
6413 // Note: Size includes alignment padding, so
6414 // struct x { short a; char b; }
6415 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6416 // These are the proper values we need for right-justifying the
6417 // aggregate in a parameter register.
6418 unsigned Size = Flags.getByValSize();
6419
6420 // An empty aggregate parameter takes up no storage and no
6421 // registers.
6422 if (Size == 0)
6423 continue;
6424
6425 if (IsFastCall)
6426 ComputePtrOff();
6427
6428 // All aggregates smaller than 8 bytes must be passed right-justified.
6429 if (Size==1 || Size==2 || Size==4) {
6430 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6431 if (GPR_idx != NumGPRs) {
6432 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6433 MachinePointerInfo(), VT);
6434 MemOpChains.push_back(Load.getValue(1));
6435 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6436
6437 ArgOffset += PtrByteSize;
6438 continue;
6439 }
6440 }
6441
6442 if (GPR_idx == NumGPRs && Size < 8) {
6443 SDValue AddPtr = PtrOff;
6444 if (!isLittleEndian) {
6445 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6446 PtrOff.getValueType());
6447 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6448 }
6449 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6450 CallSeqStart,
6451 Flags, DAG, dl);
6452 ArgOffset += PtrByteSize;
6453 continue;
6454 }
6455 // Copy the object to parameter save area if it can not be entirely passed
6456 // by registers.
6457 // FIXME: we only need to copy the parts which need to be passed in
6458 // parameter save area. For the parts passed by registers, we don't need
6459 // to copy them to the stack although we need to allocate space for them
6460 // in parameter save area.
6461 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6462 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6463 CallSeqStart,
6464 Flags, DAG, dl);
6465
6466 // When a register is available, pass a small aggregate right-justified.
6467 if (Size < 8 && GPR_idx != NumGPRs) {
6468 // The easiest way to get this right-justified in a register
6469 // is to copy the structure into the rightmost portion of a
6470 // local variable slot, then load the whole slot into the
6471 // register.
6472 // FIXME: The memcpy seems to produce pretty awful code for
6473 // small aggregates, particularly for packed ones.
6474 // FIXME: It would be preferable to use the slot in the
6475 // parameter save area instead of a new local variable.
6476 SDValue AddPtr = PtrOff;
6477 if (!isLittleEndian) {
6478 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6479 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6480 }
6481 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6482 CallSeqStart,
6483 Flags, DAG, dl);
6484
6485 // Load the slot into the register.
6486 SDValue Load =
6487 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6488 MemOpChains.push_back(Load.getValue(1));
6489 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6490
6491 // Done with this argument.
6492 ArgOffset += PtrByteSize;
6493 continue;
6494 }
6495
6496 // For aggregates larger than PtrByteSize, copy the pieces of the
6497 // object that fit into registers from the parameter save area.
6498 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6499 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6500 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6501 if (GPR_idx != NumGPRs) {
6502 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6503 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6504 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6505 MachinePointerInfo(), ObjType);
6506
6507 MemOpChains.push_back(Load.getValue(1));
6508 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6509 ArgOffset += PtrByteSize;
6510 } else {
6511 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6512 break;
6513 }
6514 }
6515 continue;
6516 }
6517
6518 switch (Arg.getSimpleValueType().SimpleTy) {
6519 default: llvm_unreachable("Unexpected ValueType for argument!");
6520 case MVT::i1:
6521 case MVT::i32:
6522 case MVT::i64:
6523 if (Flags.isNest()) {
6524 // The 'nest' parameter, if any, is passed in R11.
6525 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6526 break;
6527 }
6528
6529 // These can be scalar arguments or elements of an integer array type
6530 // passed directly. Clang may use those instead of "byval" aggregate
6531 // types to avoid forcing arguments to memory unnecessarily.
6532 if (GPR_idx != NumGPRs) {
6533 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6534 } else {
6535 if (IsFastCall)
6536 ComputePtrOff();
6537
6538 assert(HasParameterArea &&
6539 "Parameter area must exist to pass an argument in memory.");
6540 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6541 true, CFlags.IsTailCall, false, MemOpChains,
6542 TailCallArguments, dl);
6543 if (IsFastCall)
6544 ArgOffset += PtrByteSize;
6545 }
6546 if (!IsFastCall)
6547 ArgOffset += PtrByteSize;
6548 break;
6549 case MVT::f32:
6550 case MVT::f64: {
6551 // These can be scalar arguments or elements of a float array type
6552 // passed directly. The latter are used to implement ELFv2 homogenous
6553 // float aggregates.
6554
6555 // Named arguments go into FPRs first, and once they overflow, the
6556 // remaining arguments go into GPRs and then the parameter save area.
6557 // Unnamed arguments for vararg functions always go to GPRs and
6558 // then the parameter save area. For now, put all arguments to vararg
6559 // routines always in both locations (FPR *and* GPR or stack slot).
6560 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6561 bool NeededLoad = false;
6562
6563 // First load the argument into the next available FPR.
6564 if (FPR_idx != NumFPRs)
6565 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6566
6567 // Next, load the argument into GPR or stack slot if needed.
6568 if (!NeedGPROrStack)
6569 ;
6570 else if (GPR_idx != NumGPRs && !IsFastCall) {
6571 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6572 // once we support fp <-> gpr moves.
6573
6574 // In the non-vararg case, this can only ever happen in the
6575 // presence of f32 array types, since otherwise we never run
6576 // out of FPRs before running out of GPRs.
6577 SDValue ArgVal;
6578
6579 // Double values are always passed in a single GPR.
6580 if (Arg.getValueType() != MVT::f32) {
6581 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6582
6583 // Non-array float values are extended and passed in a GPR.
6584 } else if (!Flags.isInConsecutiveRegs()) {
6585 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6586 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6587
6588 // If we have an array of floats, we collect every odd element
6589 // together with its predecessor into one GPR.
6590 } else if (ArgOffset % PtrByteSize != 0) {
6591 SDValue Lo, Hi;
6592 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6593 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6594 if (!isLittleEndian)
6595 std::swap(Lo, Hi);
6596 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6597
6598 // The final element, if even, goes into the first half of a GPR.
6599 } else if (Flags.isInConsecutiveRegsLast()) {
6600 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6601 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6602 if (!isLittleEndian)
6603 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6604 DAG.getConstant(32, dl, MVT::i32));
6605
6606 // Non-final even elements are skipped; they will be handled
6607 // together the with subsequent argument on the next go-around.
6608 } else
6609 ArgVal = SDValue();
6610
6611 if (ArgVal.getNode())
6612 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6613 } else {
6614 if (IsFastCall)
6615 ComputePtrOff();
6616
6617 // Single-precision floating-point values are mapped to the
6618 // second (rightmost) word of the stack doubleword.
6619 if (Arg.getValueType() == MVT::f32 &&
6620 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6621 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6622 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6623 }
6624
6625 assert(HasParameterArea &&
6626 "Parameter area must exist to pass an argument in memory.");
6627 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6628 true, CFlags.IsTailCall, false, MemOpChains,
6629 TailCallArguments, dl);
6630
6631 NeededLoad = true;
6632 }
6633 // When passing an array of floats, the array occupies consecutive
6634 // space in the argument area; only round up to the next doubleword
6635 // at the end of the array. Otherwise, each float takes 8 bytes.
6636 if (!IsFastCall || NeededLoad) {
6637 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6638 Flags.isInConsecutiveRegs()) ? 4 : 8;
6639 if (Flags.isInConsecutiveRegsLast())
6640 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6641 }
6642 break;
6643 }
6644 case MVT::v4f32:
6645 case MVT::v4i32:
6646 case MVT::v8i16:
6647 case MVT::v16i8:
6648 case MVT::v2f64:
6649 case MVT::v2i64:
6650 case MVT::v1i128:
6651 case MVT::f128:
6652 // These can be scalar arguments or elements of a vector array type
6653 // passed directly. The latter are used to implement ELFv2 homogenous
6654 // vector aggregates.
6655
6656 // For a varargs call, named arguments go into VRs or on the stack as
6657 // usual; unnamed arguments always go to the stack or the corresponding
6658 // GPRs when within range. For now, we always put the value in both
6659 // locations (or even all three).
6660 if (CFlags.IsVarArg) {
6661 assert(HasParameterArea &&
6662 "Parameter area must exist if we have a varargs call.");
6663 // We could elide this store in the case where the object fits
6664 // entirely in R registers. Maybe later.
6665 SDValue Store =
6666 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6667 MemOpChains.push_back(Store);
6668 if (VR_idx != NumVRs) {
6669 SDValue Load =
6670 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6671 MemOpChains.push_back(Load.getValue(1));
6672 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6673 }
6674 ArgOffset += 16;
6675 for (unsigned i=0; i<16; i+=PtrByteSize) {
6676 if (GPR_idx == NumGPRs)
6677 break;
6678 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6679 DAG.getConstant(i, dl, PtrVT));
6680 SDValue Load =
6681 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6682 MemOpChains.push_back(Load.getValue(1));
6683 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6684 }
6685 break;
6686 }
6687
6688 // Non-varargs Altivec params go into VRs or on the stack.
6689 if (VR_idx != NumVRs) {
6690 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6691 } else {
6692 if (IsFastCall)
6693 ComputePtrOff();
6694
6695 assert(HasParameterArea &&
6696 "Parameter area must exist to pass an argument in memory.");
6697 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6698 true, CFlags.IsTailCall, true, MemOpChains,
6699 TailCallArguments, dl);
6700 if (IsFastCall)
6701 ArgOffset += 16;
6702 }
6703
6704 if (!IsFastCall)
6705 ArgOffset += 16;
6706 break;
6707 }
6708 }
6709
6710 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6711 "mismatch in size of parameter area");
6712 (void)NumBytesActuallyUsed;
6713
6714 if (!MemOpChains.empty())
6715 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6716
6717 // Check if this is an indirect call (MTCTR/BCTRL).
6718 // See prepareDescriptorIndirectCall and buildCallOperands for more
6719 // information about calls through function pointers in the 64-bit SVR4 ABI.
6720 if (CFlags.IsIndirect) {
6721 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6722 // caller in the TOC save area.
6723 if (isTOCSaveRestoreRequired(Subtarget)) {
6724 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6725 // Load r2 into a virtual register and store it to the TOC save area.
6726 setUsesTOCBasePtr(DAG);
6727 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6728 // TOC save area offset.
6729 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6730 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6731 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6732 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6734 DAG.getMachineFunction(), TOCSaveOffset));
6735 }
6736 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6737 // This does not mean the MTCTR instruction must use R12; it's easier
6738 // to model this as an extra parameter, so do that.
6739 if (isELFv2ABI && !CFlags.IsPatchPoint)
6740 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6741 }
6742
6743 // Build a sequence of copy-to-reg nodes chained together with token chain
6744 // and flag operands which copy the outgoing args into the appropriate regs.
6745 SDValue InGlue;
6746 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6747 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6748 RegsToPass[i].second, InGlue);
6749 InGlue = Chain.getValue(1);
6750 }
6751
6752 if (CFlags.IsTailCall && !IsSibCall)
6753 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6754 TailCallArguments);
6755
6756 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6757 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6758}
6759
6760// Returns true when the shadow of a general purpose argument register
6761// in the parameter save area is aligned to at least 'RequiredAlign'.
6762static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6763 assert(RequiredAlign.value() <= 16 &&
6764 "Required alignment greater than stack alignment.");
6765 switch (Reg) {
6766 default:
6767 report_fatal_error("called on invalid register.");
6768 case PPC::R5:
6769 case PPC::R9:
6770 case PPC::X3:
6771 case PPC::X5:
6772 case PPC::X7:
6773 case PPC::X9:
6774 // These registers are 16 byte aligned which is the most strict aligment
6775 // we can support.
6776 return true;
6777 case PPC::R3:
6778 case PPC::R7:
6779 case PPC::X4:
6780 case PPC::X6:
6781 case PPC::X8:
6782 case PPC::X10:
6783 // The shadow of these registers in the PSA is 8 byte aligned.
6784 return RequiredAlign <= 8;
6785 case PPC::R4:
6786 case PPC::R6:
6787 case PPC::R8:
6788 case PPC::R10:
6789 return RequiredAlign <= 4;
6790 }
6791}
6792
6793static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6794 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6795 CCState &S) {
6796 AIXCCState &State = static_cast<AIXCCState &>(S);
6797 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6799 const bool IsPPC64 = Subtarget.isPPC64();
6800 const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6801 const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6802
6803 if (ValVT == MVT::f128)
6804 report_fatal_error("f128 is unimplemented on AIX.");
6805
6806 if (ArgFlags.isNest())
6807 report_fatal_error("Nest arguments are unimplemented.");
6808
6809 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6810 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6811 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6812 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6813 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6814 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6815
6816 static const MCPhysReg VR[] = {// Vector registers.
6817 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6818 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6819 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6820
6821 if (ArgFlags.isByVal()) {
6822 if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6823 report_fatal_error("Pass-by-value arguments with alignment greater than "
6824 "register width are not supported.");
6825
6826 const unsigned ByValSize = ArgFlags.getByValSize();
6827
6828 // An empty aggregate parameter takes up no storage and no registers,
6829 // but needs a MemLoc for a stack slot for the formal arguments side.
6830 if (ByValSize == 0) {
6832 State.getStackSize(), RegVT, LocInfo));
6833 return false;
6834 }
6835
6836 const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6837 unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6838 for (const unsigned E = Offset + StackSize; Offset < E;
6839 Offset += PtrAlign.value()) {
6840 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6841 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6842 else {
6845 LocInfo));
6846 break;
6847 }
6848 }
6849 return false;
6850 }
6851
6852 // Arguments always reserve parameter save area.
6853 switch (ValVT.SimpleTy) {
6854 default:
6855 report_fatal_error("Unhandled value type for argument.");
6856 case MVT::i64:
6857 // i64 arguments should have been split to i32 for PPC32.
6858 assert(IsPPC64 && "PPC32 should have split i64 values.");
6859 [[fallthrough]];
6860 case MVT::i1:
6861 case MVT::i32: {
6862 const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6863 // AIX integer arguments are always passed in register width.
6864 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6865 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6867 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6868 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6869 else
6870 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6871
6872 return false;
6873 }
6874 case MVT::f32:
6875 case MVT::f64: {
6876 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6877 const unsigned StoreSize = LocVT.getStoreSize();
6878 // Floats are always 4-byte aligned in the PSA on AIX.
6879 // This includes f64 in 64-bit mode for ABI compatibility.
6880 const unsigned Offset =
6881 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6882 unsigned FReg = State.AllocateReg(FPR);
6883 if (FReg)
6884 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6885
6886 // Reserve and initialize GPRs or initialize the PSA as required.
6887 for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6888 if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6889 assert(FReg && "An FPR should be available when a GPR is reserved.");
6890 if (State.isVarArg()) {
6891 // Successfully reserved GPRs are only initialized for vararg calls.
6892 // Custom handling is required for:
6893 // f64 in PPC32 needs to be split into 2 GPRs.
6894 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6895 State.addLoc(
6896 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6897 }
6898 } else {
6899 // If there are insufficient GPRs, the PSA needs to be initialized.
6900 // Initialization occurs even if an FPR was initialized for
6901 // compatibility with the AIX XL compiler. The full memory for the
6902 // argument will be initialized even if a prior word is saved in GPR.
6903 // A custom memLoc is used when the argument also passes in FPR so
6904 // that the callee handling can skip over it easily.
6905 State.addLoc(
6906 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6907 LocInfo)
6908 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6909 break;
6910 }
6911 }
6912
6913 return false;
6914 }
6915 case MVT::v4f32:
6916 case MVT::v4i32:
6917 case MVT::v8i16:
6918 case MVT::v16i8:
6919 case MVT::v2i64:
6920 case MVT::v2f64:
6921 case MVT::v1i128: {
6922 const unsigned VecSize = 16;
6923 const Align VecAlign(VecSize);
6924
6925 if (!State.isVarArg()) {
6926 // If there are vector registers remaining we don't consume any stack
6927 // space.
6928 if (unsigned VReg = State.AllocateReg(VR)) {
6929 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6930 return false;
6931 }
6932 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6933 // might be allocated in the portion of the PSA that is shadowed by the
6934 // GPRs.
6935 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6936 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6937 return false;
6938 }
6939
6940 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6941 ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6942
6943 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6944 // Burn any underaligned registers and their shadowed stack space until
6945 // we reach the required alignment.
6946 while (NextRegIndex != GPRs.size() &&
6947 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6948 // Shadow allocate register and its stack shadow.
6949 unsigned Reg = State.AllocateReg(GPRs);
6950 State.AllocateStack(PtrSize, PtrAlign);
6951 assert(Reg && "Allocating register unexpectedly failed.");
6952 (void)Reg;
6953 NextRegIndex = State.getFirstUnallocated(GPRs);
6954 }
6955
6956 // Vectors that are passed as fixed arguments are handled differently.
6957 // They are passed in VRs if any are available (unlike arguments passed
6958 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6959 // functions)
6960 if (State.isFixed(ValNo)) {
6961 if (unsigned VReg = State.AllocateReg(VR)) {
6962 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6963 // Shadow allocate GPRs and stack space even though we pass in a VR.
6964 for (unsigned I = 0; I != VecSize; I += PtrSize)
6965 State.AllocateReg(GPRs);
6966 State.AllocateStack(VecSize, VecAlign);
6967 return false;
6968 }
6969 // No vector registers remain so pass on the stack.
6970 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6971 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6972 return false;
6973 }
6974
6975 // If all GPRS are consumed then we pass the argument fully on the stack.
6976 if (NextRegIndex == GPRs.size()) {
6977 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6978 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6979 return false;
6980 }
6981
6982 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6983 // half of the argument, and then need to pass the remaining half on the
6984 // stack.
6985 if (GPRs[NextRegIndex] == PPC::R9) {
6986 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6987 State.addLoc(
6988 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6989
6990 const unsigned FirstReg = State.AllocateReg(PPC::R9);
6991 const unsigned SecondReg = State.AllocateReg(PPC::R10);
6992 assert(FirstReg && SecondReg &&
6993 "Allocating R9 or R10 unexpectedly failed.");
6994 State.addLoc(
6995 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6996 State.addLoc(
6997 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6998 return false;
6999 }
7000
7001 // We have enough GPRs to fully pass the vector argument, and we have
7002 // already consumed any underaligned registers. Start with the custom
7003 // MemLoc and then the custom RegLocs.
7004 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7005 State.addLoc(
7006 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7007 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7008 const unsigned Reg = State.AllocateReg(GPRs);
7009 assert(Reg && "Failed to allocated register for vararg vector argument");
7010 State.addLoc(
7011 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7012 }
7013 return false;
7014 }
7015 }
7016 return true;
7017}
7018
7019// So far, this function is only used by LowerFormalArguments_AIX()
7021 bool IsPPC64,
7022 bool HasP8Vector,
7023 bool HasVSX) {
7024 assert((IsPPC64 || SVT != MVT::i64) &&
7025 "i64 should have been split for 32-bit codegen.");
7026
7027 switch (SVT) {
7028 default:
7029 report_fatal_error("Unexpected value type for formal argument");
7030 case MVT::i1:
7031 case MVT::i32:
7032 case MVT::i64:
7033 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7034 case MVT::f32:
7035 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7036 case MVT::f64:
7037 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7038 case MVT::v4f32:
7039 case MVT::v4i32:
7040 case MVT::v8i16:
7041 case MVT::v16i8:
7042 case MVT::v2i64:
7043 case MVT::v2f64:
7044 case MVT::v1i128:
7045 return &PPC::VRRCRegClass;
7046 }
7047}
7048
7050 SelectionDAG &DAG, SDValue ArgValue,
7051 MVT LocVT, const SDLoc &dl) {
7052 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7053 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7054
7055 if (Flags.isSExt())
7056 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7057 DAG.getValueType(ValVT));
7058 else if (Flags.isZExt())
7059 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7060 DAG.getValueType(ValVT));
7061
7062 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7063}
7064
7065static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7066 const unsigned LASize = FL->getLinkageSize();
7067
7068 if (PPC::GPRCRegClass.contains(Reg)) {
7069 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7070 "Reg must be a valid argument register!");
7071 return LASize + 4 * (Reg - PPC::R3);
7072 }
7073
7074 if (PPC::G8RCRegClass.contains(Reg)) {
7075 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7076 "Reg must be a valid argument register!");
7077 return LASize + 8 * (Reg - PPC::X3);
7078 }
7079
7080 llvm_unreachable("Only general purpose registers expected.");
7081}
7082
7083// AIX ABI Stack Frame Layout:
7084//
7085// Low Memory +--------------------------------------------+
7086// SP +---> | Back chain | ---+
7087// | +--------------------------------------------+ |
7088// | | Saved Condition Register | |
7089// | +--------------------------------------------+ |
7090// | | Saved Linkage Register | |
7091// | +--------------------------------------------+ | Linkage Area
7092// | | Reserved for compilers | |
7093// | +--------------------------------------------+ |
7094// | | Reserved for binders | |
7095// | +--------------------------------------------+ |
7096// | | Saved TOC pointer | ---+
7097// | +--------------------------------------------+
7098// | | Parameter save area |
7099// | +--------------------------------------------+
7100// | | Alloca space |
7101// | +--------------------------------------------+
7102// | | Local variable space |
7103// | +--------------------------------------------+
7104// | | Float/int conversion temporary |
7105// | +--------------------------------------------+
7106// | | Save area for AltiVec registers |
7107// | +--------------------------------------------+
7108// | | AltiVec alignment padding |
7109// | +--------------------------------------------+
7110// | | Save area for VRSAVE register |
7111// | +--------------------------------------------+
7112// | | Save area for General Purpose registers |
7113// | +--------------------------------------------+
7114// | | Save area for Floating Point registers |
7115// | +--------------------------------------------+
7116// +---- | Back chain |
7117// High Memory +--------------------------------------------+
7118//
7119// Specifications:
7120// AIX 7.2 Assembler Language Reference
7121// Subroutine linkage convention
7122
7123SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7124 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7125 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7126 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7127
7128 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7129 CallConv == CallingConv::Fast) &&
7130 "Unexpected calling convention!");
7131
7132 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7133 report_fatal_error("Tail call support is unimplemented on AIX.");
7134
7135 if (useSoftFloat())
7136 report_fatal_error("Soft float support is unimplemented on AIX.");
7137
7138 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7139
7140 const bool IsPPC64 = Subtarget.isPPC64();
7141 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7142
7143 // Assign locations to all of the incoming arguments.
7146 MachineFrameInfo &MFI = MF.getFrameInfo();
7147 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7148 AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7149
7150 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7151 // Reserve space for the linkage area on the stack.
7152 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7153 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7154 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7155
7157
7158 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7159 CCValAssign &VA = ArgLocs[I++];
7160 MVT LocVT = VA.getLocVT();
7161 MVT ValVT = VA.getValVT();
7162 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7163 // For compatibility with the AIX XL compiler, the float args in the
7164 // parameter save area are initialized even if the argument is available
7165 // in register. The caller is required to initialize both the register
7166 // and memory, however, the callee can choose to expect it in either.
7167 // The memloc is dismissed here because the argument is retrieved from
7168 // the register.
7169 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7170 continue;
7171
7172 auto HandleMemLoc = [&]() {
7173 const unsigned LocSize = LocVT.getStoreSize();
7174 const unsigned ValSize = ValVT.getStoreSize();
7175 assert((ValSize <= LocSize) &&
7176 "Object size is larger than size of MemLoc");
7177 int CurArgOffset = VA.getLocMemOffset();
7178 // Objects are right-justified because AIX is big-endian.
7179 if (LocSize > ValSize)
7180 CurArgOffset += LocSize - ValSize;
7181 // Potential tail calls could cause overwriting of argument stack slots.
7182 const bool IsImmutable =
7184 (CallConv == CallingConv::Fast));
7185 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7186 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7187 SDValue ArgValue =
7188 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7189 InVals.push_back(ArgValue);
7190 };
7191
7192 // Vector arguments to VaArg functions are passed both on the stack, and
7193 // in any available GPRs. Load the value from the stack and add the GPRs
7194 // as live ins.
7195 if (VA.isMemLoc() && VA.needsCustom()) {
7196 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7197 assert(isVarArg && "Only use custom memloc for vararg.");
7198 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7199 // matching custom RegLocs.
7200 const unsigned OriginalValNo = VA.getValNo();
7201 (void)OriginalValNo;
7202
7203 auto HandleCustomVecRegLoc = [&]() {
7204 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7205 "Missing custom RegLoc.");
7206 VA = ArgLocs[I++];
7207 assert(VA.getValVT().isVector() &&
7208 "Unexpected Val type for custom RegLoc.");
7209 assert(VA.getValNo() == OriginalValNo &&
7210 "ValNo mismatch between custom MemLoc and RegLoc.");
7212 MF.addLiveIn(VA.getLocReg(),
7213 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7214 Subtarget.hasVSX()));
7215 };
7216
7217 HandleMemLoc();
7218 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7219 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7220 // R10.
7221 HandleCustomVecRegLoc();
7222 HandleCustomVecRegLoc();
7223
7224 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7225 // we passed the vector in R5, R6, R7 and R8.
7226 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7227 assert(!IsPPC64 &&
7228 "Only 2 custom RegLocs expected for 64-bit codegen.");
7229 HandleCustomVecRegLoc();
7230 HandleCustomVecRegLoc();
7231 }
7232
7233 continue;
7234 }
7235
7236 if (VA.isRegLoc()) {
7237 if (VA.getValVT().isScalarInteger())
7239 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7240 switch (VA.getValVT().SimpleTy) {
7241 default:
7242 report_fatal_error("Unhandled value type for argument.");
7243 case MVT::f32:
7245 break;
7246 case MVT::f64:
7248 break;
7249 }
7250 } else if (VA.getValVT().isVector()) {
7251 switch (VA.getValVT().SimpleTy) {
7252 default:
7253 report_fatal_error("Unhandled value type for argument.");
7254 case MVT::v16i8:
7256 break;
7257 case MVT::v8i16:
7259 break;
7260 case MVT::v4i32:
7261 case MVT::v2i64:
7262 case MVT::v1i128:
7264 break;
7265 case MVT::v4f32:
7266 case MVT::v2f64:
7268 break;
7269 }
7270 }
7271 }
7272
7273 if (Flags.isByVal() && VA.isMemLoc()) {
7274 const unsigned Size =
7275 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7276 PtrByteSize);
7277 const int FI = MF.getFrameInfo().CreateFixedObject(
7278 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7279 /* IsAliased */ true);
7280 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7281 InVals.push_back(FIN);
7282
7283 continue;
7284 }
7285
7286 if (Flags.isByVal()) {
7287 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7288
7289 const MCPhysReg ArgReg = VA.getLocReg();
7290 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7291
7292 if (Flags.getNonZeroByValAlign() > PtrByteSize)
7293 report_fatal_error("Over aligned byvals not supported yet.");
7294
7295 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7296 const int FI = MF.getFrameInfo().CreateFixedObject(
7297 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7298 /* IsAliased */ true);
7299 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7300 InVals.push_back(FIN);
7301
7302 // Add live ins for all the RegLocs for the same ByVal.
7303 const TargetRegisterClass *RegClass =
7304 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7305
7306 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7307 unsigned Offset) {
7308 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7309 // Since the callers side has left justified the aggregate in the
7310 // register, we can simply store the entire register into the stack
7311 // slot.
7312 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7313 // The store to the fixedstack object is needed becuase accessing a
7314 // field of the ByVal will use a gep and load. Ideally we will optimize
7315 // to extracting the value from the register directly, and elide the
7316 // stores when the arguments address is not taken, but that will need to
7317 // be future work.
7318 SDValue Store = DAG.getStore(
7319 CopyFrom.getValue(1), dl, CopyFrom,
7322
7323 MemOps.push_back(Store);
7324 };
7325
7326 unsigned Offset = 0;
7327 HandleRegLoc(VA.getLocReg(), Offset);
7328 Offset += PtrByteSize;
7329 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7330 Offset += PtrByteSize) {
7331 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7332 "RegLocs should be for ByVal argument.");
7333
7334 const CCValAssign RL = ArgLocs[I++];
7335 HandleRegLoc(RL.getLocReg(), Offset);
7337 }
7338
7339 if (Offset != StackSize) {
7340 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7341 "Expected MemLoc for remaining bytes.");
7342 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7343 // Consume the MemLoc.The InVal has already been emitted, so nothing
7344 // more needs to be done.
7345 ++I;
7346 }
7347
7348 continue;
7349 }
7350
7351 if (VA.isRegLoc() && !VA.needsCustom()) {
7352 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7353 Register VReg =
7354 MF.addLiveIn(VA.getLocReg(),
7355 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7356 Subtarget.hasVSX()));
7357 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7358 if (ValVT.isScalarInteger() &&
7359 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7360 ArgValue =
7361 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7362 }
7363 InVals.push_back(ArgValue);
7364 continue;
7365 }
7366 if (VA.isMemLoc()) {
7367 HandleMemLoc();
7368 continue;
7369 }
7370 }
7371
7372 // On AIX a minimum of 8 words is saved to the parameter save area.
7373 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7374 // Area that is at least reserved in the caller of this function.
7375 unsigned CallerReservedArea = std::max<unsigned>(
7376 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7377
7378 // Set the size that is at least reserved in caller of this function. Tail
7379 // call optimized function's reserved stack space needs to be aligned so
7380 // that taking the difference between two stack areas will result in an
7381 // aligned stack.
7382 CallerReservedArea =
7383 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7384 FuncInfo->setMinReservedArea(CallerReservedArea);
7385
7386 if (isVarArg) {
7387 FuncInfo->setVarArgsFrameIndex(
7388 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7389 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7390
7391 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7392 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7393
7394 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7395 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7396 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7397
7398 // The fixed integer arguments of a variadic function are stored to the
7399 // VarArgsFrameIndex on the stack so that they may be loaded by
7400 // dereferencing the result of va_next.
7401 for (unsigned GPRIndex =
7402 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7403 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7404
7405 const Register VReg =
7406 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7407 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7408
7409 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7410 SDValue Store =
7411 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7412 MemOps.push_back(Store);
7413 // Increment the address for the next argument to store.
7414 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7415 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7416 }
7417 }
7418
7419 if (!MemOps.empty())
7420 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7421
7422 return Chain;
7423}
7424
7425SDValue PPCTargetLowering::LowerCall_AIX(
7426 SDValue Chain, SDValue Callee, CallFlags CFlags,
7428 const SmallVectorImpl<SDValue> &OutVals,
7429 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7431 const CallBase *CB) const {
7432 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7433 // AIX ABI stack frame layout.
7434
7435 assert((CFlags.CallConv == CallingConv::C ||
7436 CFlags.CallConv == CallingConv::Cold ||
7437 CFlags.CallConv == CallingConv::Fast) &&
7438 "Unexpected calling convention!");
7439
7440 if (CFlags.IsPatchPoint)
7441 report_fatal_error("This call type is unimplemented on AIX.");
7442
7443 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7444
7447 AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7448 *DAG.getContext());
7449
7450 // Reserve space for the linkage save area (LSA) on the stack.
7451 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7452 // [SP][CR][LR][2 x reserved][TOC].
7453 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7454 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7455 const bool IsPPC64 = Subtarget.isPPC64();
7456 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7457 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7458 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7459 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7460
7461 // The prolog code of the callee may store up to 8 GPR argument registers to
7462 // the stack, allowing va_start to index over them in memory if the callee
7463 // is variadic.
7464 // Because we cannot tell if this is needed on the caller side, we have to
7465 // conservatively assume that it is needed. As such, make sure we have at
7466 // least enough stack space for the caller to store the 8 GPRs.
7467 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7468 const unsigned NumBytes = std::max<unsigned>(
7469 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7470
7471 // Adjust the stack pointer for the new arguments...
7472 // These operations are automatically eliminated by the prolog/epilog pass.
7473 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7474 SDValue CallSeqStart = Chain;
7475
7477 SmallVector<SDValue, 8> MemOpChains;
7478
7479 // Set up a copy of the stack pointer for loading and storing any
7480 // arguments that may not fit in the registers available for argument
7481 // passing.
7482 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7483 : DAG.getRegister(PPC::R1, MVT::i32);
7484
7485 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7486 const unsigned ValNo = ArgLocs[I].getValNo();
7487 SDValue Arg = OutVals[ValNo];
7488 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7489
7490 if (Flags.isByVal()) {
7491 const unsigned ByValSize = Flags.getByValSize();
7492
7493 // Nothing to do for zero-sized ByVals on the caller side.
7494 if (!ByValSize) {
7495 ++I;
7496 continue;
7497 }
7498
7499 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7500 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7501 (LoadOffset != 0)
7502 ? DAG.getObjectPtrOffset(
7503 dl, Arg, TypeSize::getFixed(LoadOffset))
7504 : Arg,
7505 MachinePointerInfo(), VT);
7506 };
7507
7508 unsigned LoadOffset = 0;
7509
7510 // Initialize registers, which are fully occupied by the by-val argument.
7511 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7512 SDValue Load = GetLoad(PtrVT, LoadOffset);
7513 MemOpChains.push_back(Load.getValue(1));
7514 LoadOffset += PtrByteSize;
7515 const CCValAssign &ByValVA = ArgLocs[I++];
7516 assert(ByValVA.getValNo() == ValNo &&
7517 "Unexpected location for pass-by-value argument.");
7518 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7519 }
7520
7521 if (LoadOffset == ByValSize)
7522 continue;
7523
7524 // There must be one more loc to handle the remainder.
7525 assert(ArgLocs[I].getValNo() == ValNo &&
7526 "Expected additional location for by-value argument.");
7527
7528 if (ArgLocs[I].isMemLoc()) {
7529 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7530 const CCValAssign &ByValVA = ArgLocs[I++];
7531 ISD::ArgFlagsTy MemcpyFlags = Flags;
7532 // Only memcpy the bytes that don't pass in register.
7533 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7534 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7535 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7536 dl, Arg, TypeSize::getFixed(LoadOffset))
7537 : Arg,
7539 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7540 CallSeqStart, MemcpyFlags, DAG, dl);
7541 continue;
7542 }
7543
7544 // Initialize the final register residue.
7545 // Any residue that occupies the final by-val arg register must be
7546 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7547 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7548 // 2 and 1 byte loads.
7549 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7550 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7551 "Unexpected register residue for by-value argument.");
7552 SDValue ResidueVal;
7553 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7554 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7555 const MVT VT =
7556 N == 1 ? MVT::i8
7557 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7558 SDValue Load = GetLoad(VT, LoadOffset);
7559 MemOpChains.push_back(Load.getValue(1));
7560 LoadOffset += N;
7561 Bytes += N;
7562
7563 // By-val arguments are passed left-justfied in register.
7564 // Every load here needs to be shifted, otherwise a full register load
7565 // should have been used.
7566 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7567 "Unexpected load emitted during handling of pass-by-value "
7568 "argument.");
7569 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7570 EVT ShiftAmountTy =
7571 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7572 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7573 SDValue ShiftedLoad =
7574 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7575 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7576 ShiftedLoad)
7577 : ShiftedLoad;
7578 }
7579
7580 const CCValAssign &ByValVA = ArgLocs[I++];
7581 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7582 continue;
7583 }
7584
7585 CCValAssign &VA = ArgLocs[I++];
7586 const MVT LocVT = VA.getLocVT();
7587 const MVT ValVT = VA.getValVT();
7588
7589 switch (VA.getLocInfo()) {
7590 default:
7591 report_fatal_error("Unexpected argument extension type.");
7592 case CCValAssign::Full:
7593 break;
7594 case CCValAssign::ZExt:
7595 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7596 break;
7597 case CCValAssign::SExt:
7598 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7599 break;
7600 }
7601
7602 if (VA.isRegLoc() && !VA.needsCustom()) {
7603 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7604 continue;
7605 }
7606
7607 // Vector arguments passed to VarArg functions need custom handling when
7608 // they are passed (at least partially) in GPRs.
7609 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7610 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7611 // Store value to its stack slot.
7612 SDValue PtrOff =
7613 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7614 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7615 SDValue Store =
7616 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7617 MemOpChains.push_back(Store);
7618 const unsigned OriginalValNo = VA.getValNo();
7619 // Then load the GPRs from the stack
7620 unsigned LoadOffset = 0;
7621 auto HandleCustomVecRegLoc = [&]() {
7622 assert(I != E && "Unexpected end of CCvalAssigns.");
7623 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7624 "Expected custom RegLoc.");
7625 CCValAssign RegVA = ArgLocs[I++];
7626 assert(RegVA.getValNo() == OriginalValNo &&
7627 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7628 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7629 DAG.getConstant(LoadOffset, dl, PtrVT));
7630 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7631 MemOpChains.push_back(Load.getValue(1));
7632 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7633 LoadOffset += PtrByteSize;
7634 };
7635
7636 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7637 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7638 // R10.
7639 HandleCustomVecRegLoc();
7640 HandleCustomVecRegLoc();
7641
7642 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7643 ArgLocs[I].getValNo() == OriginalValNo) {
7644 assert(!IsPPC64 &&
7645 "Only 2 custom RegLocs expected for 64-bit codegen.");
7646 HandleCustomVecRegLoc();
7647 HandleCustomVecRegLoc();
7648 }
7649
7650 continue;
7651 }
7652
7653 if (VA.isMemLoc()) {
7654 SDValue PtrOff =
7655 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7656 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7657 MemOpChains.push_back(
7658 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7659
7660 continue;
7661 }
7662
7663 if (!ValVT.isFloatingPoint())
7665 "Unexpected register handling for calling convention.");
7666
7667 // Custom handling is used for GPR initializations for vararg float
7668 // arguments.
7669 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7670 LocVT.isInteger() &&
7671 "Custom register handling only expected for VarArg.");
7672
7673 SDValue ArgAsInt =
7674 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7675
7676 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7677 // f32 in 32-bit GPR
7678 // f64 in 64-bit GPR
7679 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7680 else if (Arg.getValueType().getFixedSizeInBits() <
7681 LocVT.getFixedSizeInBits())
7682 // f32 in 64-bit GPR.
7683 RegsToPass.push_back(std::make_pair(
7684 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7685 else {
7686 // f64 in two 32-bit GPRs
7687 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7688 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7689 "Unexpected custom register for argument!");
7690 CCValAssign &GPR1 = VA;
7691 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7692 DAG.getConstant(32, dl, MVT::i8));
7693 RegsToPass.push_back(std::make_pair(
7694 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7695
7696 if (I != E) {
7697 // If only 1 GPR was available, there will only be one custom GPR and
7698 // the argument will also pass in memory.
7699 CCValAssign &PeekArg = ArgLocs[I];
7700 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7701 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7702 CCValAssign &GPR2 = ArgLocs[I++];
7703 RegsToPass.push_back(std::make_pair(
7704 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7705 }
7706 }
7707 }
7708 }
7709
7710 if (!MemOpChains.empty())
7711 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7712
7713 // For indirect calls, we need to save the TOC base to the stack for
7714 // restoration after the call.
7715 if (CFlags.IsIndirect) {
7716 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7717 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7718 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7719 const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7720 const unsigned TOCSaveOffset =
7721 Subtarget.getFrameLowering()->getTOCSaveOffset();
7722
7723 setUsesTOCBasePtr(DAG);
7724 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7725 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7726 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7727 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7728 Chain = DAG.getStore(
7729 Val.getValue(1), dl, Val, AddPtr,
7730 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7731 }
7732
7733 // Build a sequence of copy-to-reg nodes chained together with token chain
7734 // and flag operands which copy the outgoing args into the appropriate regs.
7735 SDValue InGlue;
7736 for (auto Reg : RegsToPass) {
7737 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7738 InGlue = Chain.getValue(1);
7739 }
7740
7741 const int SPDiff = 0;
7742 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7743 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7744}
7745
7746bool
7747PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7748 MachineFunction &MF, bool isVarArg,
7750 LLVMContext &Context) const {
7752 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7753 return CCInfo.CheckReturn(
7754 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7756 : RetCC_PPC);
7757}
7758
7759SDValue
7760PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7761 bool isVarArg,
7763 const SmallVectorImpl<SDValue> &OutVals,
7764 const SDLoc &dl, SelectionDAG &DAG) const {
7766 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7767 *DAG.getContext());
7768 CCInfo.AnalyzeReturn(Outs,
7769 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7771 : RetCC_PPC);
7772
7773 SDValue Glue;
7774 SmallVector<SDValue, 4> RetOps(1, Chain);
7775
7776 // Copy the result values into the output registers.
7777 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7778 CCValAssign &VA = RVLocs[i];
7779 assert(VA.isRegLoc() && "Can only return in registers!");
7780
7781 SDValue Arg = OutVals[RealResIdx];
7782
7783 switch (VA.getLocInfo()) {
7784 default: llvm_unreachable("Unknown loc info!");
7785 case CCValAssign::Full: break;
7786 case CCValAssign::AExt:
7787 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7788 break;
7789 case CCValAssign::ZExt:
7790 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7791 break;
7792 case CCValAssign::SExt:
7793 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7794 break;
7795 }
7796 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7797 bool isLittleEndian = Subtarget.isLittleEndian();
7798 // Legalize ret f64 -> ret 2 x i32.
7799 SDValue SVal =
7800 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7801 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7802 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7803 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7804 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7805 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7806 Glue = Chain.getValue(1);
7807 VA = RVLocs[++i]; // skip ahead to next loc
7808 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7809 } else
7810 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7811 Glue = Chain.getValue(1);
7812 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7813 }
7814
7815 RetOps[0] = Chain; // Update chain.
7816
7817 // Add the glue if we have it.
7818 if (Glue.getNode())
7819 RetOps.push_back(Glue);
7820
7821 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7822}
7823
7824SDValue
7825PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7826 SelectionDAG &DAG) const {
7827 SDLoc dl(Op);
7828
7829 // Get the correct type for integers.
7830 EVT IntVT = Op.getValueType();
7831
7832 // Get the inputs.
7833 SDValue Chain = Op.getOperand(0);
7834 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7835 // Build a DYNAREAOFFSET node.
7836 SDValue Ops[2] = {Chain, FPSIdx};
7837 SDVTList VTs = DAG.getVTList(IntVT);
7838 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7839}
7840
7841SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7842 SelectionDAG &DAG) const {
7843 // When we pop the dynamic allocation we need to restore the SP link.
7844 SDLoc dl(Op);
7845
7846 // Get the correct type for pointers.
7847 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7848
7849 // Construct the stack pointer operand.
7850 bool isPPC64 = Subtarget.isPPC64();
7851 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7852 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7853
7854 // Get the operands for the STACKRESTORE.
7855 SDValue Chain = Op.getOperand(0);
7856 SDValue SaveSP = Op.getOperand(1);
7857
7858 // Load the old link SP.
7859 SDValue LoadLinkSP =
7860 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7861
7862 // Restore the stack pointer.
7863 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7864
7865 // Store the old link SP.
7866 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7867}
7868
7869SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7871 bool isPPC64 = Subtarget.isPPC64();
7872 EVT PtrVT = getPointerTy(MF.getDataLayout());
7873
7874 // Get current frame pointer save index. The users of this index will be
7875 // primarily DYNALLOC instructions.
7877 int RASI = FI->getReturnAddrSaveIndex();
7878
7879 // If the frame pointer save index hasn't been defined yet.
7880 if (!RASI) {
7881 // Find out what the fix offset of the frame pointer save area.
7882 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7883 // Allocate the frame index for frame pointer save area.
7884 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7885 // Save the result.
7886 FI->setReturnAddrSaveIndex(RASI);
7887 }
7888 return DAG.getFrameIndex(RASI, PtrVT);
7889}
7890
7891SDValue
7892PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7894 bool isPPC64 = Subtarget.isPPC64();
7895 EVT PtrVT = getPointerTy(MF.getDataLayout());
7896
7897 // Get current frame pointer save index. The users of this index will be
7898 // primarily DYNALLOC instructions.
7900 int FPSI = FI->getFramePointerSaveIndex();
7901
7902 // If the frame pointer save index hasn't been defined yet.
7903 if (!FPSI) {
7904 // Find out what the fix offset of the frame pointer save area.
7905 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7906 // Allocate the frame index for frame pointer save area.
7907 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7908 // Save the result.
7909 FI->setFramePointerSaveIndex(FPSI);
7910 }
7911 return DAG.getFrameIndex(FPSI, PtrVT);
7912}
7913
7914SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7915 SelectionDAG &DAG) const {
7917 // Get the inputs.
7918 SDValue Chain = Op.getOperand(0);
7919 SDValue Size = Op.getOperand(1);
7920 SDLoc dl(Op);
7921
7922 // Get the correct type for pointers.
7923 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7924 // Negate the size.
7925 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7926 DAG.getConstant(0, dl, PtrVT), Size);
7927 // Construct a node for the frame pointer save index.
7928 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7929 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7930 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7931 if (hasInlineStackProbe(MF))
7932 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7933 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7934}
7935
7936SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7937 SelectionDAG &DAG) const {
7939
7940 bool isPPC64 = Subtarget.isPPC64();
7941 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7942
7943 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7944 return DAG.getFrameIndex(FI, PtrVT);
7945}
7946
7947SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7948 SelectionDAG &DAG) const {
7949 SDLoc DL(Op);
7950 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7951 DAG.getVTList(MVT::i32, MVT::Other),
7952 Op.getOperand(0), Op.getOperand(1));
7953}
7954
7955SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7956 SelectionDAG &DAG) const {
7957 SDLoc DL(Op);
7958 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7959 Op.getOperand(0), Op.getOperand(1));
7960}
7961
7962SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7963 if (Op.getValueType().isVector())
7964 return LowerVectorLoad(Op, DAG);
7965
7966 assert(Op.getValueType() == MVT::i1 &&
7967 "Custom lowering only for i1 loads");
7968
7969 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7970
7971 SDLoc dl(Op);
7972 LoadSDNode *LD = cast<LoadSDNode>(Op);
7973
7974 SDValue Chain = LD->getChain();
7975 SDValue BasePtr = LD->getBasePtr();
7976 MachineMemOperand *MMO = LD->getMemOperand();
7977
7978 SDValue NewLD =
7979 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7980 BasePtr, MVT::i8, MMO);
7981 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7982
7983 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7984 return DAG.getMergeValues(Ops, dl);
7985}
7986
7987SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7988 if (Op.getOperand(1).getValueType().isVector())
7989 return LowerVectorStore(Op, DAG);
7990
7991 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7992 "Custom lowering only for i1 stores");
7993
7994 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7995
7996 SDLoc dl(Op);
7997 StoreSDNode *ST = cast<StoreSDNode>(Op);
7998
7999 SDValue Chain = ST->getChain();
8000 SDValue BasePtr = ST->getBasePtr();
8001 SDValue Value = ST->getValue();
8002 MachineMemOperand *MMO = ST->getMemOperand();
8003
8005 Value);
8006 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8007}
8008
8009// FIXME: Remove this once the ANDI glue bug is fixed:
8010SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8011 assert(Op.getValueType() == MVT::i1 &&
8012 "Custom lowering only for i1 results");
8013
8014 SDLoc DL(Op);
8015 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8016}
8017
8018SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8019 SelectionDAG &DAG) const {
8020
8021 // Implements a vector truncate that fits in a vector register as a shuffle.
8022 // We want to legalize vector truncates down to where the source fits in
8023 // a vector register (and target is therefore smaller than vector register
8024 // size). At that point legalization will try to custom lower the sub-legal
8025 // result and get here - where we can contain the truncate as a single target
8026 // operation.
8027
8028 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8029 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8030 //
8031 // We will implement it for big-endian ordering as this (where x denotes
8032 // undefined):
8033 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8034 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8035 //
8036 // The same operation in little-endian ordering will be:
8037 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8038 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8039
8040 EVT TrgVT = Op.getValueType();
8041 assert(TrgVT.isVector() && "Vector type expected.");
8042 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8043 EVT EltVT = TrgVT.getVectorElementType();
8044 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8045 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8046 !llvm::has_single_bit<uint32_t>(EltVT.getSizeInBits()))
8047 return SDValue();
8048
8049 SDValue N1 = Op.getOperand(0);
8050 EVT SrcVT = N1.getValueType();
8051 unsigned SrcSize = SrcVT.getSizeInBits();
8052 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8053 !llvm::has_single_bit<uint32_t>(
8055 return SDValue();
8056 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8057 return SDValue();
8058
8059 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8060 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8061
8062 SDLoc DL(Op);
8063 SDValue Op1, Op2;
8064 if (SrcSize == 256) {
8065 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8066 EVT SplitVT =
8068 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8069 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8070 DAG.getConstant(0, DL, VecIdxTy));
8071 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8072 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8073 }
8074 else {
8075 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8076 Op2 = DAG.getUNDEF(WideVT);
8077 }
8078
8079 // First list the elements we want to keep.
8080 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8081 SmallVector<int, 16> ShuffV;
8082 if (Subtarget.isLittleEndian())
8083 for (unsigned i = 0; i < TrgNumElts; ++i)
8084 ShuffV.push_back(i * SizeMult);
8085 else
8086 for (unsigned i = 1; i <= TrgNumElts; ++i)
8087 ShuffV.push_back(i * SizeMult - 1);
8088
8089 // Populate the remaining elements with undefs.
8090 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8091 // ShuffV.push_back(i + WideNumElts);
8092 ShuffV.push_back(WideNumElts + 1);
8093
8094 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8095 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8096 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8097}
8098
8099/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8100/// possible.
8101SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8102 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8103 EVT ResVT = Op.getValueType();
8104 EVT CmpVT = Op.getOperand(0).getValueType();
8105 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8106 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8107 SDLoc dl(Op);
8108
8109 // Without power9-vector, we don't have native instruction for f128 comparison.
8110 // Following transformation to libcall is needed for setcc:
8111 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8112 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8113 SDValue Z = DAG.getSetCC(
8114 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8115 LHS, RHS, CC);
8116 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8117 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8118 }
8119
8120 // Not FP, or using SPE? Not a fsel.
8121 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8122 Subtarget.hasSPE())
8123 return Op;
8124
8125 SDNodeFlags Flags = Op.getNode()->getFlags();
8126
8127 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8128 // presence of infinities.
8129 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8130 switch (CC) {
8131 default:
8132 break;
8133 case ISD::SETOGT:
8134 case ISD::SETGT:
8135 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8136 case ISD::SETOLT:
8137 case ISD::SETLT:
8138 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8139 }
8140 }
8141
8142 // We might be able to do better than this under some circumstances, but in
8143 // general, fsel-based lowering of select is a finite-math-only optimization.
8144 // For more information, see section F.3 of the 2.06 ISA specification.
8145 // With ISA 3.0
8146 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8147 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8148 ResVT == MVT::f128)
8149 return Op;
8150
8151 // If the RHS of the comparison is a 0.0, we don't need to do the
8152 // subtraction at all.
8153 SDValue Sel1;
8154 if (isFloatingPointZero(RHS))
8155 switch (CC) {
8156 default: break; // SETUO etc aren't handled by fsel.
8157 case ISD::SETNE:
8158 std::swap(TV, FV);
8159 [[fallthrough]];
8160 case ISD::SETEQ:
8161 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8162 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8163 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8164 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8165 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8166 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8167 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8168 case ISD::SETULT:
8169 case ISD::SETLT:
8170 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8171 [[fallthrough]];
8172 case ISD::SETOGE:
8173 case ISD::SETGE:
8174 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8175 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8176 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8177 case ISD::SETUGT:
8178 case ISD::SETGT:
8179 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8180 [[fallthrough]];
8181 case ISD::SETOLE:
8182 case ISD::SETLE:
8183 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8184 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8185 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8186 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8187 }
8188
8189 SDValue Cmp;
8190 switch (CC) {
8191 default: break; // SETUO etc aren't handled by fsel.
8192 case ISD::SETNE:
8193 std::swap(TV, FV);
8194 [[fallthrough]];
8195 case ISD::SETEQ:
8196 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8197 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8198 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8199 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8200 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8201 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8202 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8203 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8204 case ISD::SETULT:
8205 case ISD::SETLT:
8206 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8207 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8208 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8209 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8210 case ISD::SETOGE:
8211 case ISD::SETGE:
8212 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8213 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8214 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8215 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8216 case ISD::SETUGT:
8217 case ISD::SETGT:
8218 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8219 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8220 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8221 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8222 case ISD::SETOLE:
8223 case ISD::SETLE:
8224 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8225 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8226 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8227 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8228 }
8229 return Op;
8230}
8231
8232static unsigned getPPCStrictOpcode(unsigned Opc) {
8233 switch (Opc) {
8234 default:
8235 llvm_unreachable("No strict version of this opcode!");
8236 case PPCISD::FCTIDZ:
8237 return PPCISD::STRICT_FCTIDZ;
8238 case PPCISD::FCTIWZ:
8239 return PPCISD::STRICT_FCTIWZ;
8240 case PPCISD::FCTIDUZ:
8242 case PPCISD::FCTIWUZ:
8244 case PPCISD::FCFID:
8245 return PPCISD::STRICT_FCFID;
8246 case PPCISD::FCFIDU:
8247 return PPCISD::STRICT_FCFIDU;
8248 case PPCISD::FCFIDS:
8249 return PPCISD::STRICT_FCFIDS;
8250 case PPCISD::FCFIDUS:
8252 }
8253}
8254
8256 const PPCSubtarget &Subtarget) {
8257 SDLoc dl(Op);
8258 bool IsStrict = Op->isStrictFPOpcode();
8259 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8260 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8261
8262 // TODO: Any other flags to propagate?
8263 SDNodeFlags Flags;
8264 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8265
8266 // For strict nodes, source is the second operand.
8267 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8268 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8269 MVT DestTy = Op.getSimpleValueType();
8270 assert(Src.getValueType().isFloatingPoint() &&
8271 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8272 DestTy == MVT::i64) &&
8273 "Invalid FP_TO_INT types");
8274 if (Src.getValueType() == MVT::f32) {
8275 if (IsStrict) {
8276 Src =
8278 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8279 Chain = Src.getValue(1);
8280 } else
8281 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8282 }
8283 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8284 DestTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
8285 unsigned Opc = ISD::DELETED_NODE;
8286 switch (DestTy.SimpleTy) {
8287 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8288 case MVT::i32:
8289 Opc = IsSigned ? PPCISD::FCTIWZ
8290 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8291 break;
8292 case MVT::i64:
8293 assert((IsSigned || Subtarget.hasFPCVT()) &&
8294 "i64 FP_TO_UINT is supported only with FPCVT");
8295 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8296 }
8297 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8298 SDValue Conv;
8299 if (IsStrict) {
8300 Opc = getPPCStrictOpcode(Opc);
8301 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8302 Flags);
8303 } else {
8304 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8305 }
8306 return Conv;
8307}
8308
8309void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8310 SelectionDAG &DAG,
8311 const SDLoc &dl) const {
8312 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8313 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8314 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8315 bool IsStrict = Op->isStrictFPOpcode();
8316
8317 // Convert the FP value to an int value through memory.
8318 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8319 (IsSigned || Subtarget.hasFPCVT());
8320 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8321 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8322 MachinePointerInfo MPI =
8324
8325 // Emit a store to the stack slot.
8326 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8327 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8328 if (i32Stack) {
8330 Alignment = Align(4);
8331 MachineMemOperand *MMO =
8332 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8333 SDValue Ops[] = { Chain, Tmp, FIPtr };
8334 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8335 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8336 } else
8337 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8338
8339 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8340 // add in a bias on big endian.
8341 if (Op.getValueType() == MVT::i32 && !i32Stack) {
8342 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8343 DAG.getConstant(4, dl, FIPtr.getValueType()));
8344 MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8345 }
8346
8347 RLI.Chain = Chain;
8348 RLI.Ptr = FIPtr;
8349 RLI.MPI = MPI;
8350 RLI.Alignment = Alignment;
8351}
8352
8353/// Custom lowers floating point to integer conversions to use
8354/// the direct move instructions available in ISA 2.07 to avoid the
8355/// need for load/store combinations.
8356SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8357 SelectionDAG &DAG,
8358 const SDLoc &dl) const {
8359 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8360 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8361 if (Op->isStrictFPOpcode())
8362 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8363 else
8364 return Mov;
8365}
8366
8367SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8368 const SDLoc &dl) const {
8369 bool IsStrict = Op->isStrictFPOpcode();
8370 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8371 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8372 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8373 EVT SrcVT = Src.getValueType();
8374 EVT DstVT = Op.getValueType();
8375
8376 // FP to INT conversions are legal for f128.
8377 if (SrcVT == MVT::f128)
8378 return Subtarget.hasP9Vector() ? Op : SDValue();
8379
8380 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8381 // PPC (the libcall is not available).
8382 if (SrcVT == MVT::ppcf128) {
8383 if (DstVT == MVT::i32) {
8384 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8385 // set other fast-math flags to FP operations in both strict and
8386 // non-strict cases. (FP_TO_SINT, FSUB)
8388 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8389
8390 if (IsSigned) {
8391 SDValue Lo, Hi;
8392 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8393
8394 // Add the two halves of the long double in round-to-zero mode, and use
8395 // a smaller FP_TO_SINT.
8396 if (IsStrict) {
8398 DAG.getVTList(MVT::f64, MVT::Other),
8399 {Op.getOperand(0), Lo, Hi}, Flags);
8400 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8401 DAG.getVTList(MVT::i32, MVT::Other),
8402 {Res.getValue(1), Res}, Flags);
8403 } else {
8404 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8405 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8406 }
8407 } else {
8408 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8409 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8410 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8411 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8412 if (IsStrict) {
8413 // Sel = Src < 0x80000000
8414 // FltOfs = select Sel, 0.0, 0x80000000
8415 // IntOfs = select Sel, 0, 0x80000000
8416 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8417 SDValue Chain = Op.getOperand(0);
8418 EVT SetCCVT =
8419 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8420 EVT DstSetCCVT =
8421 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8422 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8423 Chain, true);
8424 Chain = Sel.getValue(1);
8425
8426 SDValue FltOfs = DAG.getSelect(
8427 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8428 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8429
8430 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8431 DAG.getVTList(SrcVT, MVT::Other),
8432 {Chain, Src, FltOfs}, Flags);
8433 Chain = Val.getValue(1);
8434 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8435 DAG.getVTList(DstVT, MVT::Other),
8436 {Chain, Val}, Flags);
8437 Chain = SInt.getValue(1);
8438 SDValue IntOfs = DAG.getSelect(
8439 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8440 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8441 return DAG.getMergeValues({Result, Chain}, dl);
8442 } else {
8443 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8444 // FIXME: generated code sucks.
8445 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8446 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8447 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8448 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8449 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8450 }
8451 }
8452 }
8453
8454 return SDValue();
8455 }
8456
8457 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8458 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8459
8460 ReuseLoadInfo RLI;
8461 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8462
8463 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8464 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8465}
8466
8467// We're trying to insert a regular store, S, and then a load, L. If the
8468// incoming value, O, is a load, we might just be able to have our load use the
8469// address used by O. However, we don't know if anything else will store to
8470// that address before we can load from it. To prevent this situation, we need
8471// to insert our load, L, into the chain as a peer of O. To do this, we give L
8472// the same chain operand as O, we create a token factor from the chain results
8473// of O and L, and we replace all uses of O's chain result with that token
8474// factor (see spliceIntoChain below for this last part).
8475bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8476 ReuseLoadInfo &RLI,
8477 SelectionDAG &DAG,
8478 ISD::LoadExtType ET) const {
8479 // Conservatively skip reusing for constrained FP nodes.
8480 if (Op->isStrictFPOpcode())
8481 return false;
8482
8483 SDLoc dl(Op);
8484 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8485 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8486 if (ET == ISD::NON_EXTLOAD &&
8487 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8488 isOperationLegalOrCustom(Op.getOpcode(),
8489 Op.getOperand(0).getValueType())) {
8490
8491 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8492 return true;
8493 }
8494
8495 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8496 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8497 LD->isNonTemporal())
8498 return false;
8499 if (LD->getMemoryVT() != MemVT)
8500 return false;
8501
8502 // If the result of the load is an illegal type, then we can't build a
8503 // valid chain for reuse since the legalised loads and token factor node that
8504 // ties the legalised loads together uses a different output chain then the
8505 // illegal load.
8506 if (!isTypeLegal(LD->getValueType(0)))
8507 return false;
8508
8509 RLI.Ptr = LD->getBasePtr();
8510 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8511 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8512 "Non-pre-inc AM on PPC?");
8513 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8514 LD->getOffset());
8515 }
8516
8517 RLI.Chain = LD->getChain();
8518 RLI.MPI = LD->getPointerInfo();
8519 RLI.IsDereferenceable = LD->isDereferenceable();
8520 RLI.IsInvariant = LD->isInvariant();
8521 RLI.Alignment = LD->getAlign();
8522 RLI.AAInfo = LD->getAAInfo();
8523 RLI.Ranges = LD->getRanges();
8524
8525 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8526 return true;
8527}
8528
8529// Given the head of the old chain, ResChain, insert a token factor containing
8530// it and NewResChain, and make users of ResChain now be users of that token
8531// factor.
8532// TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8533void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8534 SDValue NewResChain,
8535 SelectionDAG &DAG) const {
8536 if (!ResChain)
8537 return;
8538
8539 SDLoc dl(NewResChain);
8540
8541 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
8542 NewResChain, DAG.getUNDEF(MVT::Other));
8543 assert(TF.getNode() != NewResChain.getNode() &&
8544 "A new TF really is required here");
8545
8546 DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8547 DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8548}
8549
8550/// Analyze profitability of direct move
8551/// prefer float load to int load plus direct move
8552/// when there is no integer use of int load
8553bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8554 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8555 if (Origin->getOpcode() != ISD::LOAD)
8556 return true;
8557
8558 // If there is no LXSIBZX/LXSIHZX, like Power8,
8559 // prefer direct move if the memory size is 1 or 2 bytes.
8560 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8561 if (!Subtarget.hasP9Vector() &&
8562 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8563 return true;
8564
8565 for (SDNode::use_iterator UI = Origin->use_begin(),
8566 UE = Origin->use_end();
8567 UI != UE; ++UI) {
8568
8569 // Only look at the users of the loaded value.
8570 if (UI.getUse().get().getResNo() != 0)
8571 continue;
8572
8573 if (UI->getOpcode() != ISD::SINT_TO_FP &&
8574 UI->getOpcode() != ISD::UINT_TO_FP &&
8575 UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8576 UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8577 return true;
8578 }
8579
8580 return false;
8581}
8582
8584 const PPCSubtarget &Subtarget,
8585 SDValue Chain = SDValue()) {
8586 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8587 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8588 SDLoc dl(Op);
8589
8590 // TODO: Any other flags to propagate?
8591 SDNodeFlags Flags;
8592 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8593
8594 // If we have FCFIDS, then use it when converting to single-precision.
8595 // Otherwise, convert to double-precision and then round.
8596 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8597 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8598 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8599 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8600 if (Op->isStrictFPOpcode()) {
8601 if (!Chain)
8602 Chain = Op.getOperand(0);
8603 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8604 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8605 } else
8606 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8607}
8608
8609/// Custom lowers integer to floating point conversions to use
8610/// the direct move instructions available in ISA 2.07 to avoid the
8611/// need for load/store combinations.
8612SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8613 SelectionDAG &DAG,
8614 const SDLoc &dl) const {
8615 assert((Op.getValueType() == MVT::f32 ||
8616 Op.getValueType() == MVT::f64) &&
8617 "Invalid floating point type as target of conversion");
8618 assert(Subtarget.hasFPCVT() &&
8619 "Int to FP conversions with direct moves require FPCVT");
8620 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8621 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8622 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8623 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8624 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8625 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8626 return convertIntToFP(Op, Mov, DAG, Subtarget);
8627}
8628
8629static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8630
8631 EVT VecVT = Vec.getValueType();
8632 assert(VecVT.isVector() && "Expected a vector type.");
8633 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8634
8635 EVT EltVT = VecVT.getVectorElementType();
8636 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8637 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8638
8639 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8640 SmallVector<SDValue, 16> Ops(NumConcat);
8641 Ops[0] = Vec;
8642 SDValue UndefVec = DAG.getUNDEF(VecVT);
8643 for (unsigned i = 1; i < NumConcat; ++i)
8644 Ops[i] = UndefVec;
8645
8646 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8647}
8648
8649SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8650 const SDLoc &dl) const {
8651 bool IsStrict = Op->isStrictFPOpcode();
8652 unsigned Opc = Op.getOpcode();
8653 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8654 assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8656 "Unexpected conversion type");
8657 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8658 "Supports conversions to v2f64/v4f32 only.");
8659
8660 // TODO: Any other flags to propagate?
8662 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8663
8664 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8665 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8666
8667 SDValue Wide = widenVec(DAG, Src, dl);
8668 EVT WideVT = Wide.getValueType();
8669 unsigned WideNumElts = WideVT.getVectorNumElements();
8670 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8671
8672 SmallVector<int, 16> ShuffV;
8673 for (unsigned i = 0; i < WideNumElts; ++i)
8674 ShuffV.push_back(i + WideNumElts);
8675
8676 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8677 int SaveElts = FourEltRes ? 4 : 2;
8678 if (Subtarget.isLittleEndian())
8679 for (int i = 0; i < SaveElts; i++)
8680 ShuffV[i * Stride] = i;
8681 else
8682 for (int i = 1; i <= SaveElts; i++)
8683 ShuffV[i * Stride - 1] = i - 1;
8684
8685 SDValue ShuffleSrc2 =
8686 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8687 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8688
8689 SDValue Extend;
8690 if (SignedConv) {
8691 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8692 EVT ExtVT = Src.getValueType();
8693 if (Subtarget.hasP9Altivec())
8694 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8695 IntermediateVT.getVectorNumElements());
8696
8697 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8698 DAG.getValueType(ExtVT));
8699 } else
8700 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8701
8702 if (IsStrict)
8703 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8704 {Op.getOperand(0), Extend}, Flags);
8705
8706 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8707}
8708
8709SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8710 SelectionDAG &DAG) const {
8711 SDLoc dl(Op);
8712 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8713 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8714 bool IsStrict = Op->isStrictFPOpcode();
8715 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8716 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8717
8718 // TODO: Any other flags to propagate?
8720 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8721
8722 EVT InVT = Src.getValueType();
8723 EVT OutVT = Op.getValueType();
8724 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8725 isOperationCustom(Op.getOpcode(), InVT))
8726 return LowerINT_TO_FPVector(Op, DAG, dl);
8727
8728 // Conversions to f128 are legal.
8729 if (Op.getValueType() == MVT::f128)
8730 return Subtarget.hasP9Vector() ? Op : SDValue();
8731
8732 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8733 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8734 return SDValue();
8735
8736 if (Src.getValueType() == MVT::i1) {
8737 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8738 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8739 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8740 if (IsStrict)
8741 return DAG.getMergeValues({Sel, Chain}, dl);
8742 else
8743 return Sel;
8744 }
8745
8746 // If we have direct moves, we can do all the conversion, skip the store/load
8747 // however, without FPCVT we can't do most conversions.
8748 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8749 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8750 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8751
8752 assert((IsSigned || Subtarget.hasFPCVT()) &&
8753 "UINT_TO_FP is supported only with FPCVT");
8754
8755 if (Src.getValueType() == MVT::i64) {
8756 SDValue SINT = Src;
8757 // When converting to single-precision, we actually need to convert
8758 // to double-precision first and then round to single-precision.
8759 // To avoid double-rounding effects during that operation, we have
8760 // to prepare the input operand. Bits that might be truncated when
8761 // converting to double-precision are replaced by a bit that won't
8762 // be lost at this stage, but is below the single-precision rounding
8763 // position.
8764 //
8765 // However, if -enable-unsafe-fp-math is in effect, accept double
8766 // rounding to avoid the extra overhead.
8767 if (Op.getValueType() == MVT::f32 &&
8768 !Subtarget.hasFPCVT() &&
8770
8771 // Twiddle input to make sure the low 11 bits are zero. (If this
8772 // is the case, we are guaranteed the value will fit into the 53 bit
8773 // mantissa of an IEEE double-precision value without rounding.)
8774 // If any of those low 11 bits were not zero originally, make sure
8775 // bit 12 (value 2048) is set instead, so that the final rounding
8776 // to single-precision gets the correct result.
8777 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8778 SINT, DAG.getConstant(2047, dl, MVT::i64));
8779 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8780 Round, DAG.getConstant(2047, dl, MVT::i64));
8781 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8782 Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8783 Round, DAG.getConstant(-2048, dl, MVT::i64));
8784
8785 // However, we cannot use that value unconditionally: if the magnitude
8786 // of the input value is small, the bit-twiddling we did above might
8787 // end up visibly changing the output. Fortunately, in that case, we
8788 // don't need to twiddle bits since the original input will convert
8789 // exactly to double-precision floating-point already. Therefore,
8790 // construct a conditional to use the original value if the top 11
8791 // bits are all sign-bit copies, and use the rounded value computed
8792 // above otherwise.
8793 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8794 SINT, DAG.getConstant(53, dl, MVT::i32));
8795 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8796 Cond, DAG.getConstant(1, dl, MVT::i64));
8797 Cond = DAG.getSetCC(
8798 dl,
8799 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8800 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8801
8802 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8803 }
8804
8805 ReuseLoadInfo RLI;
8806 SDValue Bits;
8807
8809 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8810 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8811 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8812 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8813 } else if (Subtarget.hasLFIWAX() &&
8814 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8815 MachineMemOperand *MMO =
8817 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8818 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8820 DAG.getVTList(MVT::f64, MVT::Other),
8821 Ops, MVT::i32, MMO);
8822 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8823 } else if (Subtarget.hasFPCVT() &&
8824 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8825 MachineMemOperand *MMO =
8827 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8828 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8830 DAG.getVTList(MVT::f64, MVT::Other),
8831 Ops, MVT::i32, MMO);
8832 spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8833 } else if (((Subtarget.hasLFIWAX() &&
8834 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8835 (Subtarget.hasFPCVT() &&
8836 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8837 SINT.getOperand(0).getValueType() == MVT::i32) {
8838 MachineFrameInfo &MFI = MF.getFrameInfo();
8839 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8840
8841 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8842 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8843
8844 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8846 DAG.getMachineFunction(), FrameIdx));
8847 Chain = Store;
8848
8849 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8850 "Expected an i32 store");
8851
8852 RLI.Ptr = FIdx;
8853 RLI.Chain = Chain;
8854 RLI.MPI =
8856 RLI.Alignment = Align(4);
8857
8858 MachineMemOperand *MMO =
8860 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8861 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8864 dl, DAG.getVTList(MVT::f64, MVT::Other),
8865 Ops, MVT::i32, MMO);
8866 Chain = Bits.getValue(1);
8867 } else
8868 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8869
8870 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8871 if (IsStrict)
8872 Chain = FP.getValue(1);
8873
8874 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8875 if (IsStrict)
8877 DAG.getVTList(MVT::f32, MVT::Other),
8878 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8879 else
8880 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8881 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8882 }
8883 return FP;
8884 }
8885
8886 assert(Src.getValueType() == MVT::i32 &&
8887 "Unhandled INT_TO_FP type in custom expander!");
8888 // Since we only generate this in 64-bit mode, we can take advantage of
8889 // 64-bit registers. In particular, sign extend the input value into the
8890 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8891 // then lfd it and fcfid it.
8893 MachineFrameInfo &MFI = MF.getFrameInfo();
8894 EVT PtrVT = getPointerTy(MF.getDataLayout());
8895
8896 SDValue Ld;
8897 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8898 ReuseLoadInfo RLI;
8899 bool ReusingLoad;
8900 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8901 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8902 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8903
8904 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8906 DAG.getMachineFunction(), FrameIdx));
8907 Chain = Store;
8908
8909 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8910 "Expected an i32 store");
8911
8912 RLI.Ptr = FIdx;
8913 RLI.Chain = Chain;
8914 RLI.MPI =
8916 RLI.Alignment = Align(4);
8917 }
8918
8919 MachineMemOperand *MMO =
8921 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8922 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8923 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8924 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8925 MVT::i32, MMO);
8926 Chain = Ld.getValue(1);
8927 if (ReusingLoad)
8928 spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8929 } else {
8930 assert(Subtarget.isPPC64() &&
8931 "i32->FP without LFIWAX supported only on PPC64");
8932
8933 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8934 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8935
8936 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8937
8938 // STD the extended value into the stack slot.
8939 SDValue Store = DAG.getStore(
8940 Chain, dl, Ext64, FIdx,
8942 Chain = Store;
8943
8944 // Load the value as a double.
8945 Ld = DAG.getLoad(
8946 MVT::f64, dl, Chain, FIdx,
8948 Chain = Ld.getValue(1);
8949 }
8950
8951 // FCFID it and return it.
8952 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8953 if (IsStrict)
8954 Chain = FP.getValue(1);
8955 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8956 if (IsStrict)
8958 DAG.getVTList(MVT::f32, MVT::Other),
8959 {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8960 else
8961 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8962 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8963 }
8964 return FP;
8965}
8966
8967SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
8968 SelectionDAG &DAG) const {
8969 SDLoc dl(Op);
8970 /*
8971 The rounding mode is in bits 30:31 of FPSR, and has the following
8972 settings:
8973 00 Round to nearest
8974 01 Round to 0
8975 10 Round to +inf
8976 11 Round to -inf
8977
8978 GET_ROUNDING, on the other hand, expects the following:
8979 -1 Undefined
8980 0 Round to 0
8981 1 Round to nearest
8982 2 Round to +inf
8983 3 Round to -inf
8984
8985 To perform the conversion, we do:
8986 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8987 */
8988
8990 EVT VT = Op.getValueType();
8991 EVT PtrVT = getPointerTy(MF.getDataLayout());
8992
8993 // Save FP Control Word to register
8994 SDValue Chain = Op.getOperand(0);
8995 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8996 Chain = MFFS.getValue(1);
8997
8998 SDValue CWD;
8999 if (isTypeLegal(MVT::i64)) {
9000 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9001 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9002 } else {
9003 // Save FP register to stack slot
9004 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9005 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9006 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9007
9008 // Load FP Control Word from low 32 bits of stack slot.
9010 "Stack slot adjustment is valid only on big endian subtargets!");
9011 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9012 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9013 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9014 Chain = CWD.getValue(1);
9015 }
9016
9017 // Transform as necessary
9018 SDValue CWD1 =
9019 DAG.getNode(ISD::AND, dl, MVT::i32,
9020 CWD, DAG.getConstant(3, dl, MVT::i32));
9021 SDValue CWD2 =
9022 DAG.getNode(ISD::SRL, dl, MVT::i32,
9023 DAG.getNode(ISD::AND, dl, MVT::i32,
9024 DAG.getNode(ISD::XOR, dl, MVT::i32,
9025 CWD, DAG.getConstant(3, dl, MVT::i32)),
9026 DAG.getConstant(3, dl, MVT::i32)),
9027 DAG.getConstant(1, dl, MVT::i32));
9028
9029 SDValue RetVal =
9030 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9031
9032 RetVal =
9034 dl, VT, RetVal);
9035
9036 return DAG.getMergeValues({RetVal, Chain}, dl);
9037}
9038
9039SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9040 EVT VT = Op.getValueType();
9041 unsigned BitWidth = VT.getSizeInBits();
9042 SDLoc dl(Op);
9043 assert(Op.getNumOperands() == 3 &&
9044 VT == Op.getOperand(1).getValueType() &&
9045 "Unexpected SHL!");
9046
9047 // Expand into a bunch of logical ops. Note that these ops
9048 // depend on the PPC behavior for oversized shift amounts.
9049 SDValue Lo = Op.getOperand(0);
9050 SDValue Hi = Op.getOperand(1);
9051 SDValue Amt = Op.getOperand(2);
9052 EVT AmtVT = Amt.getValueType();
9053
9054 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9055 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9056 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9057 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9058 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9059 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9060 DAG.getConstant(-BitWidth, dl, AmtVT));
9061 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9062 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9063 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9064 SDValue OutOps[] = { OutLo, OutHi };
9065 return DAG.getMergeValues(OutOps, dl);
9066}
9067
9068SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9069 EVT VT = Op.getValueType();
9070 SDLoc dl(Op);
9071 unsigned BitWidth = VT.getSizeInBits();
9072 assert(Op.getNumOperands() == 3 &&
9073 VT == Op.getOperand(1).getValueType() &&
9074 "Unexpected SRL!");
9075
9076 // Expand into a bunch of logical ops. Note that these ops
9077 // depend on the PPC behavior for oversized shift amounts.
9078 SDValue Lo = Op.getOperand(0);
9079 SDValue Hi = Op.getOperand(1);
9080 SDValue Amt = Op.getOperand(2);
9081 EVT AmtVT = Amt.getValueType();
9082
9083 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9084 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9085 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9086 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9087 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9088 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9089 DAG.getConstant(-BitWidth, dl, AmtVT));
9090 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9091 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9092 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9093 SDValue OutOps[] = { OutLo, OutHi };
9094 return DAG.getMergeValues(OutOps, dl);
9095}
9096
9097SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9098 SDLoc dl(Op);
9099 EVT VT = Op.getValueType();
9100 unsigned BitWidth = VT.getSizeInBits();
9101 assert(Op.getNumOperands() == 3 &&
9102 VT == Op.getOperand(1).getValueType() &&
9103 "Unexpected SRA!");
9104
9105 // Expand into a bunch of logical ops, followed by a select_cc.
9106 SDValue Lo = Op.getOperand(0);
9107 SDValue Hi = Op.getOperand(1);
9108 SDValue Amt = Op.getOperand(2);
9109 EVT AmtVT = Amt.getValueType();
9110
9111 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9112 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9113 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9114 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9115 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9116 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9117 DAG.getConstant(-BitWidth, dl, AmtVT));
9118 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9119 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9120 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9121 Tmp4, Tmp6, ISD::SETLE);
9122 SDValue OutOps[] = { OutLo, OutHi };
9123 return DAG.getMergeValues(OutOps, dl);
9124}
9125
9126SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9127 SelectionDAG &DAG) const {
9128 SDLoc dl(Op);
9129 EVT VT = Op.getValueType();
9130 unsigned BitWidth = VT.getSizeInBits();
9131
9132 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9133 SDValue X = Op.getOperand(0);
9134 SDValue Y = Op.getOperand(1);
9135 SDValue Z = Op.getOperand(2);
9136 EVT AmtVT = Z.getValueType();
9137
9138 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9139 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9140 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9141 // on PowerPC shift by BW being well defined.
9142 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9143 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9144 SDValue SubZ =
9145 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9146 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9147 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9148 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9149}
9150
9151//===----------------------------------------------------------------------===//
9152// Vector related lowering.
9153//
9154
9155/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9156/// element size of SplatSize. Cast the result to VT.
9157static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9158 SelectionDAG &DAG, const SDLoc &dl) {
9159 static const MVT VTys[] = { // canonical VT to use for each size.
9160 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9161 };
9162
9163 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9164
9165 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9166 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9167 SplatSize = 1;
9168 Val = 0xFF;
9169 }
9170
9171 EVT CanonicalVT = VTys[SplatSize-1];
9172
9173 // Build a canonical splat for this value.
9174 return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
9175}
9176
9177/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9178/// specified intrinsic ID.
9180 const SDLoc &dl, EVT DestVT = MVT::Other) {
9181 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9182 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9183 DAG.getConstant(IID, dl, MVT::i32), Op);
9184}
9185
9186/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9187/// specified intrinsic ID.
9188static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS,
9189 SelectionDAG &DAG, const SDLoc &dl,
9190 EVT DestVT = MVT::Other) {
9191 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9192 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9193 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9194}
9195
9196/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9197/// specified intrinsic ID.
9198static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9199 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9200 EVT DestVT = MVT::Other) {
9201 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9202 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9203 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9204}
9205
9206/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9207/// amount. The result has the specified value type.
9208static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9209 SelectionDAG &DAG, const SDLoc &dl) {
9210 // Force LHS/RHS to be the right type.
9211 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9212 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9213
9214 int Ops[16];
9215 for (unsigned i = 0; i != 16; ++i)
9216 Ops[i] = i + Amt;
9217 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9218 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9219}
9220
9221/// Do we have an efficient pattern in a .td file for this node?
9222///
9223/// \param V - pointer to the BuildVectorSDNode being matched
9224/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9225///
9226/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9227/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9228/// the opposite is true (expansion is beneficial) are:
9229/// - The node builds a vector out of integers that are not 32 or 64-bits
9230/// - The node builds a vector out of constants
9231/// - The node is a "load-and-splat"
9232/// In all other cases, we will choose to keep the BUILD_VECTOR.
9234 bool HasDirectMove,
9235 bool HasP8Vector) {
9236 EVT VecVT = V->getValueType(0);
9237 bool RightType = VecVT == MVT::v2f64 ||
9238 (HasP8Vector && VecVT == MVT::v4f32) ||
9239 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9240 if (!RightType)
9241 return false;
9242
9243 bool IsSplat = true;
9244 bool IsLoad = false;
9245 SDValue Op0 = V->getOperand(0);
9246
9247 // This function is called in a block that confirms the node is not a constant
9248 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9249 // different constants.
9250 if (V->isConstant())
9251 return false;
9252 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9253 if (V->getOperand(i).isUndef())
9254 return false;
9255 // We want to expand nodes that represent load-and-splat even if the
9256 // loaded value is a floating point truncation or conversion to int.
9257 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9258 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9259 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9260 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9261 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9262 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9263 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9264 IsLoad = true;
9265 // If the operands are different or the input is not a load and has more
9266 // uses than just this BV node, then it isn't a splat.
9267 if (V->getOperand(i) != Op0 ||
9268 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9269 IsSplat = false;
9270 }
9271 return !(IsSplat && IsLoad);
9272}
9273
9274// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9275SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9276
9277 SDLoc dl(Op);
9278 SDValue Op0 = Op->getOperand(0);
9279
9280 if ((Op.getValueType() != MVT::f128) ||
9281 (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9282 (Op0.getOperand(0).getValueType() != MVT::i64) ||
9283 (Op0.getOperand(1).getValueType() != MVT::i64))
9284 return SDValue();
9285
9286 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9287 Op0.getOperand(1));
9288}
9289
9290static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9291 const SDValue *InputLoad = &Op;
9292 while (InputLoad->getOpcode() == ISD::BITCAST)
9293 InputLoad = &InputLoad->getOperand(0);
9294 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9296 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9297 InputLoad = &InputLoad->getOperand(0);
9298 }
9299 if (InputLoad->getOpcode() != ISD::LOAD)
9300 return nullptr;
9301 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9302 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9303}
9304
9305// Convert the argument APFloat to a single precision APFloat if there is no
9306// loss in information during the conversion to single precision APFloat and the
9307// resulting number is not a denormal number. Return true if successful.
9309 APFloat APFloatToConvert = ArgAPFloat;
9310 bool LosesInfo = true;
9312 &LosesInfo);
9313 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9314 if (Success)
9315 ArgAPFloat = APFloatToConvert;
9316 return Success;
9317}
9318
9319// Bitcast the argument APInt to a double and convert it to a single precision
9320// APFloat, bitcast the APFloat to an APInt and assign it to the original
9321// argument if there is no loss in information during the conversion from
9322// double to single precision APFloat and the resulting number is not a denormal
9323// number. Return true if successful.
9325 double DpValue = ArgAPInt.bitsToDouble();
9326 APFloat APFloatDp(DpValue);
9327 bool Success = convertToNonDenormSingle(APFloatDp);
9328 if (Success)
9329 ArgAPInt = APFloatDp.bitcastToAPInt();
9330 return Success;
9331}
9332
9333// Nondestructive check for convertTonNonDenormSingle.
9335 // Only convert if it loses info, since XXSPLTIDP should
9336 // handle the other case.
9337 APFloat APFloatToConvert = ArgAPFloat;
9338 bool LosesInfo = true;
9340 &LosesInfo);
9341
9342 return (!LosesInfo && !APFloatToConvert.isDenormal());
9343}
9344
9345static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9346 unsigned &Opcode) {
9347 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9348 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9349 return false;
9350
9351 EVT Ty = Op->getValueType(0);
9352 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9353 // as we cannot handle extending loads for these types.
9354 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9355 ISD::isNON_EXTLoad(InputNode))
9356 return true;
9357
9358 EVT MemVT = InputNode->getMemoryVT();
9359 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9360 // memory VT is the same vector element VT type.
9361 // The loads feeding into the v8i16 and v16i8 types will be extending because
9362 // scalar i8/i16 are not legal types.
9363 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9364 (MemVT == Ty.getVectorElementType()))
9365 return true;
9366
9367 if (Ty == MVT::v2i64) {
9368 // Check the extend type, when the input type is i32, and the output vector
9369 // type is v2i64.
9370 if (MemVT == MVT::i32) {
9371 if (ISD::isZEXTLoad(InputNode))
9372 Opcode = PPCISD::ZEXT_LD_SPLAT;
9373 if (ISD::isSEXTLoad(InputNode))
9374 Opcode = PPCISD::SEXT_LD_SPLAT;
9375 }
9376 return true;
9377 }
9378 return false;
9379}
9380
9381// If this is a case we can't handle, return null and let the default
9382// expansion code take care of it. If we CAN select this case, and if it
9383// selects to a single instruction, return Op. Otherwise, if we can codegen
9384// this case more efficiently than a constant pool load, lower it to the
9385// sequence of ops that should be used.
9386SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9387 SelectionDAG &DAG) const {
9388 SDLoc dl(Op);
9389 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9390 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9391
9392 // Check if this is a splat of a constant value.
9393 APInt APSplatBits, APSplatUndef;
9394 unsigned SplatBitSize;
9395 bool HasAnyUndefs;
9396 bool BVNIsConstantSplat =
9397 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9398 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9399
9400 // If it is a splat of a double, check if we can shrink it to a 32 bit
9401 // non-denormal float which when converted back to double gives us the same
9402 // double. This is to exploit the XXSPLTIDP instruction.
9403 // If we lose precision, we use XXSPLTI32DX.
9404 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9405 Subtarget.hasPrefixInstrs()) {
9406 // Check the type first to short-circuit so we don't modify APSplatBits if
9407 // this block isn't executed.
9408 if ((Op->getValueType(0) == MVT::v2f64) &&
9409 convertToNonDenormSingle(APSplatBits)) {
9410 SDValue SplatNode = DAG.getNode(
9411 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9412 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9413 return DAG.getBitcast(Op.getValueType(), SplatNode);
9414 } else {
9415 // We may lose precision, so we have to use XXSPLTI32DX.
9416
9417 uint32_t Hi =
9418 (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9419 uint32_t Lo =
9420 (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9421 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9422
9423 if (!Hi || !Lo)
9424 // If either load is 0, then we should generate XXLXOR to set to 0.
9425 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9426
9427 if (Hi)
9428 SplatNode = DAG.getNode(
9429 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9430 DAG.getTargetConstant(0, dl, MVT::i32),
9431 DAG.getTargetConstant(Hi, dl, MVT::i32));
9432
9433 if (Lo)
9434 SplatNode =
9435 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9436 DAG.getTargetConstant(1, dl, MVT::i32),
9437 DAG.getTargetConstant(Lo, dl, MVT::i32));
9438
9439 return DAG.getBitcast(Op.getValueType(), SplatNode);
9440 }
9441 }
9442
9443 if (!BVNIsConstantSplat || SplatBitSize > 32) {
9444 unsigned NewOpcode = PPCISD::LD_SPLAT;
9445
9446 // Handle load-and-splat patterns as we have instructions that will do this
9447 // in one go.
9448 if (DAG.isSplatValue(Op, true) &&
9449 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9450 const SDValue *InputLoad = &Op.getOperand(0);
9451 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9452
9453 // If the input load is an extending load, it will be an i32 -> i64
9454 // extending load and isValidSplatLoad() will update NewOpcode.
9455 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9456 unsigned ElementSize =
9457 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9458
9459 assert(((ElementSize == 2 * MemorySize)
9460 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9461 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9462 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9463 "Unmatched element size and opcode!\n");
9464
9465 // Checking for a single use of this load, we have to check for vector
9466 // width (128 bits) / ElementSize uses (since each operand of the
9467 // BUILD_VECTOR is a separate use of the value.
9468 unsigned NumUsesOfInputLD = 128 / ElementSize;
9469 for (SDValue BVInOp : Op->ops())
9470 if (BVInOp.isUndef())
9471 NumUsesOfInputLD--;
9472
9473 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9474 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9475 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9476 // 15", but function IsValidSplatLoad() now will only return true when
9477 // the data at index 0 is not nullptr. So we will not get into trouble for
9478 // these cases.
9479 //
9480 // case 1 - lfiwzx/lfiwax
9481 // 1.1: load result is i32 and is sign/zero extend to i64;
9482 // 1.2: build a v2i64 vector type with above loaded value;
9483 // 1.3: the vector has only one value at index 0, others are all undef;
9484 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9485 if (NumUsesOfInputLD == 1 &&
9486 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9487 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9488 Subtarget.hasLFIWAX()))
9489 return SDValue();
9490
9491 // case 2 - lxvr[hb]x
9492 // 2.1: load result is at most i16;
9493 // 2.2: build a vector with above loaded value;
9494 // 2.3: the vector has only one value at index 0, others are all undef;
9495 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9496 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9497 Subtarget.isISA3_1() && ElementSize <= 16)
9498 return SDValue();
9499
9500 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9501 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9502 Subtarget.hasVSX()) {
9503 SDValue Ops[] = {
9504 LD->getChain(), // Chain
9505 LD->getBasePtr(), // Ptr
9506 DAG.getValueType(Op.getValueType()) // VT
9507 };
9508 SDValue LdSplt = DAG.getMemIntrinsicNode(
9509 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9510 LD->getMemoryVT(), LD->getMemOperand());
9511 // Replace all uses of the output chain of the original load with the
9512 // output chain of the new load.
9513 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9514 LdSplt.getValue(1));
9515 return LdSplt;
9516 }
9517 }
9518
9519 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9520 // 32-bits can be lowered to VSX instructions under certain conditions.
9521 // Without VSX, there is no pattern more efficient than expanding the node.
9522 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9523 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9524 Subtarget.hasP8Vector()))
9525 return Op;
9526 return SDValue();
9527 }
9528
9529 uint64_t SplatBits = APSplatBits.getZExtValue();
9530 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9531 unsigned SplatSize = SplatBitSize / 8;
9532
9533 // First, handle single instruction cases.
9534
9535 // All zeros?
9536 if (SplatBits == 0) {
9537 // Canonicalize all zero vectors to be v4i32.
9538 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9539 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9540 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9541 }
9542 return Op;
9543 }
9544
9545 // We have XXSPLTIW for constant splats four bytes wide.
9546 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9547 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9548 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9549 // turned into a 4-byte splat of 0xABABABAB.
9550 if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9551 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9552 Op.getValueType(), DAG, dl);
9553
9554 if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9555 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9556 dl);
9557
9558 // We have XXSPLTIB for constant splats one byte wide.
9559 if (Subtarget.hasP9Vector() && SplatSize == 1)
9560 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9561 dl);
9562
9563 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9564 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9565 (32-SplatBitSize));
9566 if (SextVal >= -16 && SextVal <= 15)
9567 return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9568 dl);
9569
9570 // Two instruction sequences.
9571
9572 // If this value is in the range [-32,30] and is even, use:
9573 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9574 // If this value is in the range [17,31] and is odd, use:
9575 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9576 // If this value is in the range [-31,-17] and is odd, use:
9577 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9578 // Note the last two are three-instruction sequences.
9579 if (SextVal >= -32 && SextVal <= 31) {
9580 // To avoid having these optimizations undone by constant folding,
9581 // we convert to a pseudo that will be expanded later into one of
9582 // the above forms.
9583 SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9584 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9585 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9586 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9587 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9588 if (VT == Op.getValueType())
9589 return RetVal;
9590 else
9591 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9592 }
9593
9594 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9595 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9596 // for fneg/fabs.
9597 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9598 // Make -1 and vspltisw -1:
9599 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9600
9601 // Make the VSLW intrinsic, computing 0x8000_0000.
9602 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9603 OnesV, DAG, dl);
9604
9605 // xor by OnesV to invert it.
9606 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9607 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9608 }
9609
9610 // Check to see if this is a wide variety of vsplti*, binop self cases.
9611 static const signed char SplatCsts[] = {
9612 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9613 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9614 };
9615
9616 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9617 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9618 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9619 int i = SplatCsts[idx];
9620
9621 // Figure out what shift amount will be used by altivec if shifted by i in
9622 // this splat size.
9623 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9624
9625 // vsplti + shl self.
9626 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9627 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9628 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9629 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9630 Intrinsic::ppc_altivec_vslw
9631 };
9632 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9633 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9634 }
9635
9636 // vsplti + srl self.
9637 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9638 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9639 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9640 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9641 Intrinsic::ppc_altivec_vsrw
9642 };
9643 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9644 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9645 }
9646
9647 // vsplti + rol self.
9648 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9649 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9650 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9651 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9652 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9653 Intrinsic::ppc_altivec_vrlw
9654 };
9655 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9656 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9657 }
9658
9659 // t = vsplti c, result = vsldoi t, t, 1
9660 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9661 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9662 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9663 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9664 }
9665 // t = vsplti c, result = vsldoi t, t, 2
9666 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9667 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9668 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9669 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9670 }
9671 // t = vsplti c, result = vsldoi t, t, 3
9672 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9673 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9674 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9675 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9676 }
9677 }
9678
9679 return SDValue();
9680}
9681
9682/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9683/// the specified operations to build the shuffle.
9684static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9685 SDValue RHS, SelectionDAG &DAG,
9686 const SDLoc &dl) {
9687 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9688 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9689 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9690
9691 enum {
9692 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9693 OP_VMRGHW,
9694 OP_VMRGLW,
9695 OP_VSPLTISW0,
9696 OP_VSPLTISW1,
9697 OP_VSPLTISW2,
9698 OP_VSPLTISW3,
9699 OP_VSLDOI4,
9700 OP_VSLDOI8,
9701 OP_VSLDOI12
9702 };
9703
9704 if (OpNum == OP_COPY) {
9705 if (LHSID == (1*9+2)*9+3) return LHS;
9706 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9707 return RHS;
9708 }
9709
9710 SDValue OpLHS, OpRHS;
9711 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9712 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9713
9714 int ShufIdxs[16];
9715 switch (OpNum) {
9716 default: llvm_unreachable("Unknown i32 permute!");
9717 case OP_VMRGHW:
9718 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9719 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9720 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9721 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9722 break;
9723 case OP_VMRGLW:
9724 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9725 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9726 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9727 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9728 break;
9729 case OP_VSPLTISW0:
9730 for (unsigned i = 0; i != 16; ++i)
9731 ShufIdxs[i] = (i&3)+0;
9732 break;
9733 case OP_VSPLTISW1:
9734 for (unsigned i = 0; i != 16; ++i)
9735 ShufIdxs[i] = (i&3)+4;
9736 break;
9737 case OP_VSPLTISW2:
9738 for (unsigned i = 0; i != 16; ++i)
9739 ShufIdxs[i] = (i&3)+8;
9740 break;
9741 case OP_VSPLTISW3:
9742 for (unsigned i = 0; i != 16; ++i)
9743 ShufIdxs[i] = (i&3)+12;
9744 break;
9745 case OP_VSLDOI4:
9746 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9747 case OP_VSLDOI8:
9748 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9749 case OP_VSLDOI12:
9750 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9751 }
9752 EVT VT = OpLHS.getValueType();
9753 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9754 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9755 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9756 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9757}
9758
9759/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9760/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9761/// SDValue.
9762SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9763 SelectionDAG &DAG) const {
9764 const unsigned BytesInVector = 16;
9765 bool IsLE = Subtarget.isLittleEndian();
9766 SDLoc dl(N);
9767 SDValue V1 = N->getOperand(0);
9768 SDValue V2 = N->getOperand(1);
9769 unsigned ShiftElts = 0, InsertAtByte = 0;
9770 bool Swap = false;
9771
9772 // Shifts required to get the byte we want at element 7.
9773 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9774 0, 15, 14, 13, 12, 11, 10, 9};
9775 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9776 1, 2, 3, 4, 5, 6, 7, 8};
9777
9778 ArrayRef<int> Mask = N->getMask();
9779 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9780
9781 // For each mask element, find out if we're just inserting something
9782 // from V2 into V1 or vice versa.
9783 // Possible permutations inserting an element from V2 into V1:
9784 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9785 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9786 // ...
9787 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9788 // Inserting from V1 into V2 will be similar, except mask range will be
9789 // [16,31].
9790
9791 bool FoundCandidate = false;
9792 // If both vector operands for the shuffle are the same vector, the mask
9793 // will contain only elements from the first one and the second one will be
9794 // undef.
9795 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9796 // Go through the mask of half-words to find an element that's being moved
9797 // from one vector to the other.
9798 for (unsigned i = 0; i < BytesInVector; ++i) {
9799 unsigned CurrentElement = Mask[i];
9800 // If 2nd operand is undefined, we should only look for element 7 in the
9801 // Mask.
9802 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9803 continue;
9804
9805 bool OtherElementsInOrder = true;
9806 // Examine the other elements in the Mask to see if they're in original
9807 // order.
9808 for (unsigned j = 0; j < BytesInVector; ++j) {
9809 if (j == i)
9810 continue;
9811 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9812 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9813 // in which we always assume we're always picking from the 1st operand.
9814 int MaskOffset =
9815 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9816 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9817 OtherElementsInOrder = false;
9818 break;
9819 }
9820 }
9821 // If other elements are in original order, we record the number of shifts
9822 // we need to get the element we want into element 7. Also record which byte
9823 // in the vector we should insert into.
9824 if (OtherElementsInOrder) {
9825 // If 2nd operand is undefined, we assume no shifts and no swapping.
9826 if (V2.isUndef()) {
9827 ShiftElts = 0;
9828 Swap = false;
9829 } else {
9830 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9831 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9832 : BigEndianShifts[CurrentElement & 0xF];
9833 Swap = CurrentElement < BytesInVector;
9834 }
9835 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9836 FoundCandidate = true;
9837 break;
9838 }
9839 }
9840
9841 if (!FoundCandidate)
9842 return SDValue();
9843
9844 // Candidate found, construct the proper SDAG sequence with VINSERTB,
9845 // optionally with VECSHL if shift is required.
9846 if (Swap)
9847 std::swap(V1, V2);
9848 if (V2.isUndef())
9849 V2 = V1;
9850 if (ShiftElts) {
9851 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9852 DAG.getConstant(ShiftElts, dl, MVT::i32));
9853 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9854 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9855 }
9856 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9857 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9858}
9859
9860/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9861/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9862/// SDValue.
9863SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9864 SelectionDAG &DAG) const {
9865 const unsigned NumHalfWords = 8;
9866 const unsigned BytesInVector = NumHalfWords * 2;
9867 // Check that the shuffle is on half-words.
9868 if (!isNByteElemShuffleMask(N, 2, 1))
9869 return SDValue();
9870
9871 bool IsLE = Subtarget.isLittleEndian();
9872 SDLoc dl(N);
9873 SDValue V1 = N->getOperand(0);
9874 SDValue V2 = N->getOperand(1);
9875 unsigned ShiftElts = 0, InsertAtByte = 0;
9876 bool Swap = false;
9877
9878 // Shifts required to get the half-word we want at element 3.
9879 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9880 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9881
9882 uint32_t Mask = 0;
9883 uint32_t OriginalOrderLow = 0x1234567;
9884 uint32_t OriginalOrderHigh = 0x89ABCDEF;
9885 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9886 // 32-bit space, only need 4-bit nibbles per element.
9887 for (unsigned i = 0; i < NumHalfWords; ++i) {
9888 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9889 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9890 }
9891
9892 // For each mask element, find out if we're just inserting something
9893 // from V2 into V1 or vice versa. Possible permutations inserting an element
9894 // from V2 into V1:
9895 // X, 1, 2, 3, 4, 5, 6, 7
9896 // 0, X, 2, 3, 4, 5, 6, 7
9897 // 0, 1, X, 3, 4, 5, 6, 7
9898 // 0, 1, 2, X, 4, 5, 6, 7
9899 // 0, 1, 2, 3, X, 5, 6, 7
9900 // 0, 1, 2, 3, 4, X, 6, 7
9901 // 0, 1, 2, 3, 4, 5, X, 7
9902 // 0, 1, 2, 3, 4, 5, 6, X
9903 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9904
9905 bool FoundCandidate = false;
9906 // Go through the mask of half-words to find an element that's being moved
9907 // from one vector to the other.
9908 for (unsigned i = 0; i < NumHalfWords; ++i) {
9909 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9910 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9911 uint32_t MaskOtherElts = ~(0xF << MaskShift);
9912 uint32_t TargetOrder = 0x0;
9913
9914 // If both vector operands for the shuffle are the same vector, the mask
9915 // will contain only elements from the first one and the second one will be
9916 // undef.
9917 if (V2.isUndef()) {
9918 ShiftElts = 0;
9919 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9920 TargetOrder = OriginalOrderLow;
9921 Swap = false;
9922 // Skip if not the correct element or mask of other elements don't equal
9923 // to our expected order.
9924 if (MaskOneElt == VINSERTHSrcElem &&
9925 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9926 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9927 FoundCandidate = true;
9928 break;
9929 }
9930 } else { // If both operands are defined.
9931 // Target order is [8,15] if the current mask is between [0,7].
9932 TargetOrder =
9933 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9934 // Skip if mask of other elements don't equal our expected order.
9935 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9936 // We only need the last 3 bits for the number of shifts.
9937 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9938 : BigEndianShifts[MaskOneElt & 0x7];
9939 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9940 Swap = MaskOneElt < NumHalfWords;
9941 FoundCandidate = true;
9942 break;
9943 }
9944 }
9945 }
9946
9947 if (!FoundCandidate)
9948 return SDValue();
9949
9950 // Candidate found, construct the proper SDAG sequence with VINSERTH,
9951 // optionally with VECSHL if shift is required.
9952 if (Swap)
9953 std::swap(V1, V2);
9954 if (V2.isUndef())
9955 V2 = V1;
9956 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9957 if (ShiftElts) {
9958 // Double ShiftElts because we're left shifting on v16i8 type.
9959 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9960 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9961 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9962 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9963 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9964 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9965 }
9966 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9967 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9968 DAG.getConstant(InsertAtByte, dl, MVT::i32));
9969 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9970}
9971
9972/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9973/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9974/// return the default SDValue.
9975SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9976 SelectionDAG &DAG) const {
9977 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9978 // to v16i8. Peek through the bitcasts to get the actual operands.
9981
9982 auto ShuffleMask = SVN->getMask();
9983 SDValue VecShuffle(SVN, 0);
9984 SDLoc DL(SVN);
9985
9986 // Check that we have a four byte shuffle.
9987 if (!isNByteElemShuffleMask(SVN, 4, 1))
9988 return SDValue();
9989
9990 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9991 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9992 std::swap(LHS, RHS);
9994 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
9995 if (!CommutedSV)
9996 return SDValue();
9997 ShuffleMask = CommutedSV->getMask();
9998 }
9999
10000 // Ensure that the RHS is a vector of constants.
10001 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10002 if (!BVN)
10003 return SDValue();
10004
10005 // Check if RHS is a splat of 4-bytes (or smaller).
10006 APInt APSplatValue, APSplatUndef;
10007 unsigned SplatBitSize;
10008 bool HasAnyUndefs;
10009 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10010 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10011 SplatBitSize > 32)
10012 return SDValue();
10013
10014 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10015 // The instruction splats a constant C into two words of the source vector
10016 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10017 // Thus we check that the shuffle mask is the equivalent of
10018 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10019 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10020 // within each word are consecutive, so we only need to check the first byte.
10021 SDValue Index;
10022 bool IsLE = Subtarget.isLittleEndian();
10023 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10024 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10025 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10026 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10027 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10028 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10029 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10030 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10031 else
10032 return SDValue();
10033
10034 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10035 // for XXSPLTI32DX.
10036 unsigned SplatVal = APSplatValue.getZExtValue();
10037 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10038 SplatVal |= (SplatVal << SplatBitSize);
10039
10040 SDValue SplatNode = DAG.getNode(
10041 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10042 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10043 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10044}
10045
10046/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10047/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10048/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10049/// i.e (or (shl x, C1), (srl x, 128-C1)).
10050SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10051 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10052 assert(Op.getValueType() == MVT::v1i128 &&
10053 "Only set v1i128 as custom, other type shouldn't reach here!");
10054 SDLoc dl(Op);
10055 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10056 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10057 unsigned SHLAmt = N1.getConstantOperandVal(0);
10058 if (SHLAmt % 8 == 0) {
10059 std::array<int, 16> Mask;
10060 std::iota(Mask.begin(), Mask.end(), 0);
10061 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10062 if (SDValue Shuffle =
10063 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10064 DAG.getUNDEF(MVT::v16i8), Mask))
10065 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10066 }
10067 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10068 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10069 DAG.getConstant(SHLAmt, dl, MVT::i32));
10070 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10071 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10072 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10073 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10074}
10075
10076/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10077/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10078/// return the code it can be lowered into. Worst case, it can always be
10079/// lowered into a vperm.
10080SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10081 SelectionDAG &DAG) const {
10082 SDLoc dl(Op);
10083 SDValue V1 = Op.getOperand(0);
10084 SDValue V2 = Op.getOperand(1);
10085 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10086
10087 // Any nodes that were combined in the target-independent combiner prior
10088 // to vector legalization will not be sent to the target combine. Try to
10089 // combine it here.
10090 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10091 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10092 return NewShuffle;
10093 Op = NewShuffle;
10094 SVOp = cast<ShuffleVectorSDNode>(Op);
10095 V1 = Op.getOperand(0);
10096 V2 = Op.getOperand(1);
10097 }
10098 EVT VT = Op.getValueType();
10099 bool isLittleEndian = Subtarget.isLittleEndian();
10100
10101 unsigned ShiftElts, InsertAtByte;
10102 bool Swap = false;
10103
10104 // If this is a load-and-splat, we can do that with a single instruction
10105 // in some cases. However if the load has multiple uses, we don't want to
10106 // combine it because that will just produce multiple loads.
10107 bool IsPermutedLoad = false;
10108 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10109 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10110 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10111 InputLoad->hasOneUse()) {
10112 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10113 int SplatIdx =
10114 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10115
10116 // The splat index for permuted loads will be in the left half of the vector
10117 // which is strictly wider than the loaded value by 8 bytes. So we need to
10118 // adjust the splat index to point to the correct address in memory.
10119 if (IsPermutedLoad) {
10120 assert((isLittleEndian || IsFourByte) &&
10121 "Unexpected size for permuted load on big endian target");
10122 SplatIdx += IsFourByte ? 2 : 1;
10123 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10124 "Splat of a value outside of the loaded memory");
10125 }
10126
10127 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10128 // For 4-byte load-and-splat, we need Power9.
10129 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10130 uint64_t Offset = 0;
10131 if (IsFourByte)
10132 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10133 else
10134 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10135
10136 // If the width of the load is the same as the width of the splat,
10137 // loading with an offset would load the wrong memory.
10138 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10139 Offset = 0;
10140
10141 SDValue BasePtr = LD->getBasePtr();
10142 if (Offset != 0)
10144 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10145 SDValue Ops[] = {
10146 LD->getChain(), // Chain
10147 BasePtr, // BasePtr
10148 DAG.getValueType(Op.getValueType()) // VT
10149 };
10150 SDVTList VTL =
10151 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10152 SDValue LdSplt =
10154 Ops, LD->getMemoryVT(), LD->getMemOperand());
10155 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10156 if (LdSplt.getValueType() != SVOp->getValueType(0))
10157 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10158 return LdSplt;
10159 }
10160 }
10161
10162 // All v2i64 and v2f64 shuffles are legal
10163 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10164 return Op;
10165
10166 if (Subtarget.hasP9Vector() &&
10167 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10168 isLittleEndian)) {
10169 if (V2.isUndef())
10170 V2 = V1;
10171 else if (Swap)
10172 std::swap(V1, V2);
10173 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10174 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10175 if (ShiftElts) {
10176 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10177 DAG.getConstant(ShiftElts, dl, MVT::i32));
10178 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10179 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10180 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10181 }
10182 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10183 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10184 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10185 }
10186
10187 if (Subtarget.hasPrefixInstrs()) {
10188 SDValue SplatInsertNode;
10189 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10190 return SplatInsertNode;
10191 }
10192
10193 if (Subtarget.hasP9Altivec()) {
10194 SDValue NewISDNode;
10195 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10196 return NewISDNode;
10197
10198 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10199 return NewISDNode;
10200 }
10201
10202 if (Subtarget.hasVSX() &&
10203 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10204 if (Swap)
10205 std::swap(V1, V2);
10206 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10207 SDValue Conv2 =
10208 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10209
10210 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10211 DAG.getConstant(ShiftElts, dl, MVT::i32));
10212 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10213 }
10214
10215 if (Subtarget.hasVSX() &&
10216 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10217 if (Swap)
10218 std::swap(V1, V2);
10219 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10220 SDValue Conv2 =
10221 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10222
10223 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10224 DAG.getConstant(ShiftElts, dl, MVT::i32));
10225 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10226 }
10227
10228 if (Subtarget.hasP9Vector()) {
10229 if (PPC::isXXBRHShuffleMask(SVOp)) {
10230 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10231 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10232 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10233 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10234 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10235 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10236 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10237 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10238 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10239 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10240 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10241 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10242 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10243 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10244 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10245 }
10246 }
10247
10248 if (Subtarget.hasVSX()) {
10249 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10250 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10251
10252 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10253 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10254 DAG.getConstant(SplatIdx, dl, MVT::i32));
10255 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10256 }
10257
10258 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10259 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10260 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10261 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10262 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10263 }
10264 }
10265
10266 // Cases that are handled by instructions that take permute immediates
10267 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10268 // selected by the instruction selector.
10269 if (V2.isUndef()) {
10270 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10271 PPC::isSplatShuffleMask(SVOp, 2) ||
10272 PPC::isSplatShuffleMask(SVOp, 4) ||
10273 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10274 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10275 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10276 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10277 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10278 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10279 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10280 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10281 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10282 (Subtarget.hasP8Altivec() && (
10283 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10284 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10285 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10286 return Op;
10287 }
10288 }
10289
10290 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10291 // and produce a fixed permutation. If any of these match, do not lower to
10292 // VPERM.
10293 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10294 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10295 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10296 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10297 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10298 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10299 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10300 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10301 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10302 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10303 (Subtarget.hasP8Altivec() && (
10304 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10305 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10306 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10307 return Op;
10308
10309 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10310 // perfect shuffle table to emit an optimal matching sequence.
10311 ArrayRef<int> PermMask = SVOp->getMask();
10312
10313 if (!DisablePerfectShuffle && !isLittleEndian) {
10314 unsigned PFIndexes[4];
10315 bool isFourElementShuffle = true;
10316 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10317 ++i) { // Element number
10318 unsigned EltNo = 8; // Start out undef.
10319 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10320 if (PermMask[i * 4 + j] < 0)
10321 continue; // Undef, ignore it.
10322
10323 unsigned ByteSource = PermMask[i * 4 + j];
10324 if ((ByteSource & 3) != j) {
10325 isFourElementShuffle = false;
10326 break;
10327 }
10328
10329 if (EltNo == 8) {
10330 EltNo = ByteSource / 4;
10331 } else if (EltNo != ByteSource / 4) {
10332 isFourElementShuffle = false;
10333 break;
10334 }
10335 }
10336 PFIndexes[i] = EltNo;
10337 }
10338
10339 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10340 // perfect shuffle vector to determine if it is cost effective to do this as
10341 // discrete instructions, or whether we should use a vperm.
10342 // For now, we skip this for little endian until such time as we have a
10343 // little-endian perfect shuffle table.
10344 if (isFourElementShuffle) {
10345 // Compute the index in the perfect shuffle table.
10346 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10347 PFIndexes[2] * 9 + PFIndexes[3];
10348
10349 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10350 unsigned Cost = (PFEntry >> 30);
10351
10352 // Determining when to avoid vperm is tricky. Many things affect the cost
10353 // of vperm, particularly how many times the perm mask needs to be
10354 // computed. For example, if the perm mask can be hoisted out of a loop or
10355 // is already used (perhaps because there are multiple permutes with the
10356 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10357 // permute mask out of the loop requires an extra register.
10358 //
10359 // As a compromise, we only emit discrete instructions if the shuffle can
10360 // be generated in 3 or fewer operations. When we have loop information
10361 // available, if this block is within a loop, we should avoid using vperm
10362 // for 3-operation perms and use a constant pool load instead.
10363 if (Cost < 3)
10364 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10365 }
10366 }
10367
10368 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10369 // vector that will get spilled to the constant pool.
10370 if (V2.isUndef()) V2 = V1;
10371
10372 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10373}
10374
10375SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10376 ArrayRef<int> PermMask, EVT VT,
10377 SDValue V1, SDValue V2) const {
10378 unsigned Opcode = PPCISD::VPERM;
10379 EVT ValType = V1.getValueType();
10380 SDLoc dl(Op);
10381 bool NeedSwap = false;
10382 bool isLittleEndian = Subtarget.isLittleEndian();
10383 bool isPPC64 = Subtarget.isPPC64();
10384
10385 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10386 (V1->hasOneUse() || V2->hasOneUse())) {
10387 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10388 "XXPERM instead\n");
10389 Opcode = PPCISD::XXPERM;
10390
10391 // The second input to XXPERM is also an output so if the second input has
10392 // multiple uses then copying is necessary, as a result we want the
10393 // single-use operand to be used as the second input to prevent copying.
10394 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10395 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10396 std::swap(V1, V2);
10397 NeedSwap = !NeedSwap;
10398 }
10399 }
10400
10401 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10402 // that it is in input element units, not in bytes. Convert now.
10403
10404 // For little endian, the order of the input vectors is reversed, and
10405 // the permutation mask is complemented with respect to 31. This is
10406 // necessary to produce proper semantics with the big-endian-based vperm
10407 // instruction.
10408 EVT EltVT = V1.getValueType().getVectorElementType();
10409 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10410
10411 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10412 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10413
10414 /*
10415 Vectors will be appended like so: [ V1 | v2 ]
10416 XXSWAPD on V1:
10417 [ A | B | C | D ] -> [ C | D | A | B ]
10418 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10419 i.e. index of A, B += 8, and index of C, D -= 8.
10420 XXSWAPD on V2:
10421 [ E | F | G | H ] -> [ G | H | E | F ]
10422 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10423 i.e. index of E, F += 8, index of G, H -= 8
10424 Swap V1 and V2:
10425 [ V1 | V2 ] -> [ V2 | V1 ]
10426 0-15 16-31 0-15 16-31
10427 i.e. index of V1 += 16, index of V2 -= 16
10428 */
10429
10430 SmallVector<SDValue, 16> ResultMask;
10431 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10432 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10433
10434 if (V1HasXXSWAPD) {
10435 if (SrcElt < 8)
10436 SrcElt += 8;
10437 else if (SrcElt < 16)
10438 SrcElt -= 8;
10439 }
10440 if (V2HasXXSWAPD) {
10441 if (SrcElt > 23)
10442 SrcElt -= 8;
10443 else if (SrcElt > 15)
10444 SrcElt += 8;
10445 }
10446 if (NeedSwap) {
10447 if (SrcElt < 16)
10448 SrcElt += 16;
10449 else
10450 SrcElt -= 16;
10451 }
10452 for (unsigned j = 0; j != BytesPerElement; ++j)
10453 if (isLittleEndian)
10454 ResultMask.push_back(
10455 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10456 else
10457 ResultMask.push_back(
10458 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10459 }
10460
10461 if (V1HasXXSWAPD) {
10462 dl = SDLoc(V1->getOperand(0));
10463 V1 = V1->getOperand(0)->getOperand(1);
10464 }
10465 if (V2HasXXSWAPD) {
10466 dl = SDLoc(V2->getOperand(0));
10467 V2 = V2->getOperand(0)->getOperand(1);
10468 }
10469
10470 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10471 if (ValType != MVT::v2f64)
10472 V1 = DAG.getBitcast(MVT::v2f64, V1);
10473 if (V2.getValueType() != MVT::v2f64)
10474 V2 = DAG.getBitcast(MVT::v2f64, V2);
10475 }
10476
10477 ShufflesHandledWithVPERM++;
10478 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10479 LLVM_DEBUG({
10480 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10481 if (Opcode == PPCISD::XXPERM) {
10482 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10483 } else {
10484 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10485 }
10486 SVOp->dump();
10487 dbgs() << "With the following permute control vector:\n";
10488 VPermMask.dump();
10489 });
10490
10491 if (Opcode == PPCISD::XXPERM)
10492 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10493
10494 // Only need to place items backwards in LE,
10495 // the mask was properly calculated.
10496 if (isLittleEndian)
10497 std::swap(V1, V2);
10498
10499 SDValue VPERMNode =
10500 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10501
10502 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10503 return VPERMNode;
10504}
10505
10506/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10507/// vector comparison. If it is, return true and fill in Opc/isDot with
10508/// information about the intrinsic.
10509static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10510 bool &isDot, const PPCSubtarget &Subtarget) {
10511 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10512 CompareOpc = -1;
10513 isDot = false;
10514 switch (IntrinsicID) {
10515 default:
10516 return false;
10517 // Comparison predicates.
10518 case Intrinsic::ppc_altivec_vcmpbfp_p:
10519 CompareOpc = 966;
10520 isDot = true;
10521 break;
10522 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10523 CompareOpc = 198;
10524 isDot = true;
10525 break;
10526 case Intrinsic::ppc_altivec_vcmpequb_p:
10527 CompareOpc = 6;
10528 isDot = true;
10529 break;
10530 case Intrinsic::ppc_altivec_vcmpequh_p:
10531 CompareOpc = 70;
10532 isDot = true;
10533 break;
10534 case Intrinsic::ppc_altivec_vcmpequw_p:
10535 CompareOpc = 134;
10536 isDot = true;
10537 break;
10538 case Intrinsic::ppc_altivec_vcmpequd_p:
10539 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10540 CompareOpc = 199;
10541 isDot = true;
10542 } else
10543 return false;
10544 break;
10545 case Intrinsic::ppc_altivec_vcmpneb_p:
10546 case Intrinsic::ppc_altivec_vcmpneh_p:
10547 case Intrinsic::ppc_altivec_vcmpnew_p:
10548 case Intrinsic::ppc_altivec_vcmpnezb_p:
10549 case Intrinsic::ppc_altivec_vcmpnezh_p:
10550 case Intrinsic::ppc_altivec_vcmpnezw_p:
10551 if (Subtarget.hasP9Altivec()) {
10552 switch (IntrinsicID) {
10553 default:
10554 llvm_unreachable("Unknown comparison intrinsic.");
10555 case Intrinsic::ppc_altivec_vcmpneb_p:
10556 CompareOpc = 7;
10557 break;
10558 case Intrinsic::ppc_altivec_vcmpneh_p:
10559 CompareOpc = 71;
10560 break;
10561 case Intrinsic::ppc_altivec_vcmpnew_p:
10562 CompareOpc = 135;
10563 break;
10564 case Intrinsic::ppc_altivec_vcmpnezb_p:
10565 CompareOpc = 263;
10566 break;
10567 case Intrinsic::ppc_altivec_vcmpnezh_p:
10568 CompareOpc = 327;
10569 break;
10570 case Intrinsic::ppc_altivec_vcmpnezw_p:
10571 CompareOpc = 391;
10572 break;
10573 }
10574 isDot = true;
10575 } else
10576 return false;
10577 break;
10578 case Intrinsic::ppc_altivec_vcmpgefp_p:
10579 CompareOpc = 454;
10580 isDot = true;
10581 break;
10582 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10583 CompareOpc = 710;
10584 isDot = true;
10585 break;
10586 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10587 CompareOpc = 774;
10588 isDot = true;
10589 break;
10590 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10591 CompareOpc = 838;
10592 isDot = true;
10593 break;
10594 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10595 CompareOpc = 902;
10596 isDot = true;
10597 break;
10598 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10599 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10600 CompareOpc = 967;
10601 isDot = true;
10602 } else
10603 return false;
10604 break;
10605 case Intrinsic::ppc_altivec_vcmpgtub_p:
10606 CompareOpc = 518;
10607 isDot = true;
10608 break;
10609 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10610 CompareOpc = 582;
10611 isDot = true;
10612 break;
10613 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10614 CompareOpc = 646;
10615 isDot = true;
10616 break;
10617 case Intrinsic::ppc_altivec_vcmpgtud_p:
10618 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10619 CompareOpc = 711;
10620 isDot = true;
10621 } else
10622 return false;
10623 break;
10624
10625 case Intrinsic::ppc_altivec_vcmpequq:
10626 case Intrinsic::ppc_altivec_vcmpgtsq:
10627 case Intrinsic::ppc_altivec_vcmpgtuq:
10628 if (!Subtarget.isISA3_1())
10629 return false;
10630 switch (IntrinsicID) {
10631 default:
10632 llvm_unreachable("Unknown comparison intrinsic.");
10633 case Intrinsic::ppc_altivec_vcmpequq:
10634 CompareOpc = 455;
10635 break;
10636 case Intrinsic::ppc_altivec_vcmpgtsq:
10637 CompareOpc = 903;
10638 break;
10639 case Intrinsic::ppc_altivec_vcmpgtuq:
10640 CompareOpc = 647;
10641 break;
10642 }
10643 break;
10644
10645 // VSX predicate comparisons use the same infrastructure
10646 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10647 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10648 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10649 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10650 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10651 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10652 if (Subtarget.hasVSX()) {
10653 switch (IntrinsicID) {
10654 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10655 CompareOpc = 99;
10656 break;
10657 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10658 CompareOpc = 115;
10659 break;
10660 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10661 CompareOpc = 107;
10662 break;
10663 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10664 CompareOpc = 67;
10665 break;
10666 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10667 CompareOpc = 83;
10668 break;
10669 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10670 CompareOpc = 75;
10671 break;
10672 }
10673 isDot = true;
10674 } else
10675 return false;
10676 break;
10677
10678 // Normal Comparisons.
10679 case Intrinsic::ppc_altivec_vcmpbfp:
10680 CompareOpc = 966;
10681 break;
10682 case Intrinsic::ppc_altivec_vcmpeqfp:
10683 CompareOpc = 198;
10684 break;
10685 case Intrinsic::ppc_altivec_vcmpequb:
10686 CompareOpc = 6;
10687 break;
10688 case Intrinsic::ppc_altivec_vcmpequh:
10689 CompareOpc = 70;
10690 break;
10691 case Intrinsic::ppc_altivec_vcmpequw:
10692 CompareOpc = 134;
10693 break;
10694 case Intrinsic::ppc_altivec_vcmpequd:
10695 if (Subtarget.hasP8Altivec())
10696 CompareOpc = 199;
10697 else
10698 return false;
10699 break;
10700 case Intrinsic::ppc_altivec_vcmpneb:
10701 case Intrinsic::ppc_altivec_vcmpneh:
10702 case Intrinsic::ppc_altivec_vcmpnew:
10703 case Intrinsic::ppc_altivec_vcmpnezb:
10704 case Intrinsic::ppc_altivec_vcmpnezh:
10705 case Intrinsic::ppc_altivec_vcmpnezw:
10706 if (Subtarget.hasP9Altivec())
10707 switch (IntrinsicID) {
10708 default:
10709 llvm_unreachable("Unknown comparison intrinsic.");
10710 case Intrinsic::ppc_altivec_vcmpneb:
10711 CompareOpc = 7;
10712 break;
10713 case Intrinsic::ppc_altivec_vcmpneh:
10714 CompareOpc = 71;
10715 break;
10716 case Intrinsic::ppc_altivec_vcmpnew:
10717 CompareOpc = 135;
10718 break;
10719 case Intrinsic::ppc_altivec_vcmpnezb:
10720 CompareOpc = 263;
10721 break;
10722 case Intrinsic::ppc_altivec_vcmpnezh:
10723 CompareOpc = 327;
10724 break;
10725 case Intrinsic::ppc_altivec_vcmpnezw:
10726 CompareOpc = 391;
10727 break;
10728 }
10729 else
10730 return false;
10731 break;
10732 case Intrinsic::ppc_altivec_vcmpgefp:
10733 CompareOpc = 454;
10734 break;
10735 case Intrinsic::ppc_altivec_vcmpgtfp:
10736 CompareOpc = 710;
10737 break;
10738 case Intrinsic::ppc_altivec_vcmpgtsb:
10739 CompareOpc = 774;
10740 break;
10741 case Intrinsic::ppc_altivec_vcmpgtsh:
10742 CompareOpc = 838;
10743 break;
10744 case Intrinsic::ppc_altivec_vcmpgtsw:
10745 CompareOpc = 902;
10746 break;
10747 case Intrinsic::ppc_altivec_vcmpgtsd:
10748 if (Subtarget.hasP8Altivec())
10749 CompareOpc = 967;
10750 else
10751 return false;
10752 break;
10753 case Intrinsic::ppc_altivec_vcmpgtub:
10754 CompareOpc = 518;
10755 break;
10756 case Intrinsic::ppc_altivec_vcmpgtuh:
10757 CompareOpc = 582;
10758 break;
10759 case Intrinsic::ppc_altivec_vcmpgtuw:
10760 CompareOpc = 646;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpgtud:
10763 if (Subtarget.hasP8Altivec())
10764 CompareOpc = 711;
10765 else
10766 return false;
10767 break;
10768 case Intrinsic::ppc_altivec_vcmpequq_p:
10769 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10770 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10771 if (!Subtarget.isISA3_1())
10772 return false;
10773 switch (IntrinsicID) {
10774 default:
10775 llvm_unreachable("Unknown comparison intrinsic.");
10776 case Intrinsic::ppc_altivec_vcmpequq_p:
10777 CompareOpc = 455;
10778 break;
10779 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10780 CompareOpc = 903;
10781 break;
10782 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10783 CompareOpc = 647;
10784 break;
10785 }
10786 isDot = true;
10787 break;
10788 }
10789 return true;
10790}
10791
10792/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10793/// lower, do it, otherwise return null.
10794SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10795 SelectionDAG &DAG) const {
10796 unsigned IntrinsicID = Op.getConstantOperandVal(0);
10797
10798 SDLoc dl(Op);
10799
10800 switch (IntrinsicID) {
10801 case Intrinsic::thread_pointer:
10802 // Reads the thread pointer register, used for __builtin_thread_pointer.
10803 if (Subtarget.isPPC64())
10804 return DAG.getRegister(PPC::X13, MVT::i64);
10805 return DAG.getRegister(PPC::R2, MVT::i32);
10806
10807 case Intrinsic::ppc_rldimi: {
10808 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
10809 SDValue Src = Op.getOperand(1);
10810 APInt Mask = Op.getConstantOperandAPInt(4);
10811 if (Mask.isZero())
10812 return Op.getOperand(2);
10813 if (Mask.isAllOnes())
10814 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
10815 uint64_t SH = Op.getConstantOperandVal(3);
10816 unsigned MB = 0, ME = 0;
10817 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
10818 report_fatal_error("invalid rldimi mask!");
10819 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
10820 if (ME < 63 - SH) {
10821 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10822 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
10823 } else if (ME > 63 - SH) {
10824 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
10825 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
10826 }
10827 return SDValue(
10828 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
10829 {Op.getOperand(2), Src,
10830 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
10831 DAG.getTargetConstant(MB, dl, MVT::i32)}),
10832 0);
10833 }
10834
10835 case Intrinsic::ppc_rlwimi: {
10836 APInt Mask = Op.getConstantOperandAPInt(4);
10837 if (Mask.isZero())
10838 return Op.getOperand(2);
10839 if (Mask.isAllOnes())
10840 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
10841 Op.getOperand(3));
10842 unsigned MB = 0, ME = 0;
10843 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
10844 report_fatal_error("invalid rlwimi mask!");
10845 return SDValue(DAG.getMachineNode(
10846 PPC::RLWIMI, dl, MVT::i32,
10847 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
10848 DAG.getTargetConstant(MB, dl, MVT::i32),
10849 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10850 0);
10851 }
10852
10853 case Intrinsic::ppc_rlwnm: {
10854 if (Op.getConstantOperandVal(3) == 0)
10855 return DAG.getConstant(0, dl, MVT::i32);
10856 unsigned MB = 0, ME = 0;
10857 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
10858 report_fatal_error("invalid rlwnm mask!");
10859 return SDValue(
10860 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
10861 {Op.getOperand(1), Op.getOperand(2),
10862 DAG.getTargetConstant(MB, dl, MVT::i32),
10863 DAG.getTargetConstant(ME, dl, MVT::i32)}),
10864 0);
10865 }
10866
10867 case Intrinsic::ppc_mma_disassemble_acc: {
10868 if (Subtarget.isISAFuture()) {
10869 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
10870 SDValue WideVec = SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl,
10871 ArrayRef(ReturnTypes, 2),
10872 Op.getOperand(1)),
10873 0);
10875 SDValue Value = SDValue(WideVec.getNode(), 0);
10876 SDValue Value2 = SDValue(WideVec.getNode(), 1);
10877
10878 SDValue Extract;
10879 Extract = DAG.getNode(
10880 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10881 Subtarget.isLittleEndian() ? Value2 : Value,
10882 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10883 dl, getPointerTy(DAG.getDataLayout())));
10884 RetOps.push_back(Extract);
10885 Extract = DAG.getNode(
10886 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10887 Subtarget.isLittleEndian() ? Value2 : Value,
10888 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10889 dl, getPointerTy(DAG.getDataLayout())));
10890 RetOps.push_back(Extract);
10891 Extract = DAG.getNode(
10892 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10893 Subtarget.isLittleEndian() ? Value : Value2,
10894 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
10895 dl, getPointerTy(DAG.getDataLayout())));
10896 RetOps.push_back(Extract);
10897 Extract = DAG.getNode(
10898 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
10899 Subtarget.isLittleEndian() ? Value : Value2,
10900 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
10901 dl, getPointerTy(DAG.getDataLayout())));
10902 RetOps.push_back(Extract);
10903 return DAG.getMergeValues(RetOps, dl);
10904 }
10905 [[fallthrough]];
10906 }
10907 case Intrinsic::ppc_vsx_disassemble_pair: {
10908 int NumVecs = 2;
10909 SDValue WideVec = Op.getOperand(1);
10910 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10911 NumVecs = 4;
10912 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10913 }
10915 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10916 SDValue Extract = DAG.getNode(
10917 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10918 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10919 : VecNo,
10920 dl, getPointerTy(DAG.getDataLayout())));
10921 RetOps.push_back(Extract);
10922 }
10923 return DAG.getMergeValues(RetOps, dl);
10924 }
10925
10926 case Intrinsic::ppc_mma_xxmfacc:
10927 case Intrinsic::ppc_mma_xxmtacc: {
10928 // Allow pre-isa-future subtargets to lower as normal.
10929 if (!Subtarget.isISAFuture())
10930 return SDValue();
10931 // The intrinsics for xxmtacc and xxmfacc take one argument of
10932 // type v512i1, for future cpu the corresponding wacc instruction
10933 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
10934 // the need to produce the xxm[t|f]acc.
10935 SDValue WideVec = Op.getOperand(1);
10936 DAG.ReplaceAllUsesWith(Op, WideVec);
10937 return SDValue();
10938 }
10939
10940 case Intrinsic::ppc_unpack_longdouble: {
10941 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10942 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10943 "Argument of long double unpack must be 0 or 1!");
10944 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10945 DAG.getConstant(!!(Idx->getSExtValue()), dl,
10946 Idx->getValueType(0)));
10947 }
10948
10949 case Intrinsic::ppc_compare_exp_lt:
10950 case Intrinsic::ppc_compare_exp_gt:
10951 case Intrinsic::ppc_compare_exp_eq:
10952 case Intrinsic::ppc_compare_exp_uo: {
10953 unsigned Pred;
10954 switch (IntrinsicID) {
10955 case Intrinsic::ppc_compare_exp_lt:
10956 Pred = PPC::PRED_LT;
10957 break;
10958 case Intrinsic::ppc_compare_exp_gt:
10959 Pred = PPC::PRED_GT;
10960 break;
10961 case Intrinsic::ppc_compare_exp_eq:
10962 Pred = PPC::PRED_EQ;
10963 break;
10964 case Intrinsic::ppc_compare_exp_uo:
10965 Pred = PPC::PRED_UN;
10966 break;
10967 }
10968 return SDValue(
10969 DAG.getMachineNode(
10970 PPC::SELECT_CC_I4, dl, MVT::i32,
10971 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10972 Op.getOperand(1), Op.getOperand(2)),
10973 0),
10974 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10975 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10976 0);
10977 }
10978 case Intrinsic::ppc_test_data_class: {
10979 EVT OpVT = Op.getOperand(1).getValueType();
10980 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
10981 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
10982 : PPC::XSTSTDCSP);
10983 return SDValue(
10984 DAG.getMachineNode(
10985 PPC::SELECT_CC_I4, dl, MVT::i32,
10986 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10987 Op.getOperand(1)),
10988 0),
10989 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10990 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10991 0);
10992 }
10993 case Intrinsic::ppc_fnmsub: {
10994 EVT VT = Op.getOperand(1).getValueType();
10995 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
10996 return DAG.getNode(
10997 ISD::FNEG, dl, VT,
10998 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
10999 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11000 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11001 Op.getOperand(2), Op.getOperand(3));
11002 }
11003 case Intrinsic::ppc_convert_f128_to_ppcf128:
11004 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11005 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11006 ? RTLIB::CONVERT_PPCF128_F128
11007 : RTLIB::CONVERT_F128_PPCF128;
11008 MakeLibCallOptions CallOptions;
11009 std::pair<SDValue, SDValue> Result =
11010 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11011 dl, SDValue());
11012 return Result.first;
11013 }
11014 case Intrinsic::ppc_maxfe:
11015 case Intrinsic::ppc_maxfl:
11016 case Intrinsic::ppc_maxfs:
11017 case Intrinsic::ppc_minfe:
11018 case Intrinsic::ppc_minfl:
11019 case Intrinsic::ppc_minfs: {
11020 EVT VT = Op.getValueType();
11021 assert(
11022 all_of(Op->ops().drop_front(4),
11023 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11024 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11025 (void)VT;
11027 if (IntrinsicID == Intrinsic::ppc_minfe ||
11028 IntrinsicID == Intrinsic::ppc_minfl ||
11029 IntrinsicID == Intrinsic::ppc_minfs)
11030 CC = ISD::SETLT;
11031 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11032 SDValue Res = Op.getOperand(I);
11033 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11034 Res =
11035 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11036 }
11037 return Res;
11038 }
11039 }
11040
11041 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11042 // opcode number of the comparison.
11043 int CompareOpc;
11044 bool isDot;
11045 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11046 return SDValue(); // Don't custom lower most intrinsics.
11047
11048 // If this is a non-dot comparison, make the VCMP node and we are done.
11049 if (!isDot) {
11050 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11051 Op.getOperand(1), Op.getOperand(2),
11052 DAG.getConstant(CompareOpc, dl, MVT::i32));
11053 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11054 }
11055
11056 // Create the PPCISD altivec 'dot' comparison node.
11057 SDValue Ops[] = {
11058 Op.getOperand(2), // LHS
11059 Op.getOperand(3), // RHS
11060 DAG.getConstant(CompareOpc, dl, MVT::i32)
11061 };
11062 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11063 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11064
11065 // Now that we have the comparison, emit a copy from the CR to a GPR.
11066 // This is flagged to the above dot comparison.
11067 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11068 DAG.getRegister(PPC::CR6, MVT::i32),
11069 CompNode.getValue(1));
11070
11071 // Unpack the result based on how the target uses it.
11072 unsigned BitNo; // Bit # of CR6.
11073 bool InvertBit; // Invert result?
11074 switch (Op.getConstantOperandVal(1)) {
11075 default: // Can't happen, don't crash on invalid number though.
11076 case 0: // Return the value of the EQ bit of CR6.
11077 BitNo = 0; InvertBit = false;
11078 break;
11079 case 1: // Return the inverted value of the EQ bit of CR6.
11080 BitNo = 0; InvertBit = true;
11081 break;
11082 case 2: // Return the value of the LT bit of CR6.
11083 BitNo = 2; InvertBit = false;
11084 break;
11085 case 3: // Return the inverted value of the LT bit of CR6.
11086 BitNo = 2; InvertBit = true;
11087 break;
11088 }
11089
11090 // Shift the bit into the low position.
11091 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11092 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11093 // Isolate the bit.
11094 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11095 DAG.getConstant(1, dl, MVT::i32));
11096
11097 // If we are supposed to, toggle the bit.
11098 if (InvertBit)
11099 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11100 DAG.getConstant(1, dl, MVT::i32));
11101 return Flags;
11102}
11103
11104SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11105 SelectionDAG &DAG) const {
11106 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11107 // the beginning of the argument list.
11108 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11109 SDLoc DL(Op);
11110 switch (Op.getConstantOperandVal(ArgStart)) {
11111 case Intrinsic::ppc_cfence: {
11112 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11113 SDValue Val = Op.getOperand(ArgStart + 1);
11114 EVT Ty = Val.getValueType();
11115 if (Ty == MVT::i128) {
11116 // FIXME: Testing one of two paired registers is sufficient to guarantee
11117 // ordering?
11118 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11119 }
11120 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11121 EVT FTy = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
11122 return SDValue(
11123 DAG.getMachineNode(Opcode, DL, MVT::Other,
11124 DAG.getNode(ISD::ANY_EXTEND, DL, FTy, Val),
11125 Op.getOperand(0)),
11126 0);
11127 }
11128 default:
11129 break;
11130 }
11131 return SDValue();
11132}
11133
11134// Lower scalar BSWAP64 to xxbrd.
11135SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11136 SDLoc dl(Op);
11137 if (!Subtarget.isPPC64())
11138 return Op;
11139 // MTVSRDD
11140 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11141 Op.getOperand(0));
11142 // XXBRD
11143 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11144 // MFVSRD
11145 int VectorIndex = 0;
11146 if (Subtarget.isLittleEndian())
11147 VectorIndex = 1;
11148 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11149 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11150 return Op;
11151}
11152
11153// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11154// compared to a value that is atomically loaded (atomic loads zero-extend).
11155SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11156 SelectionDAG &DAG) const {
11157 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11158 "Expecting an atomic compare-and-swap here.");
11159 SDLoc dl(Op);
11160 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11161 EVT MemVT = AtomicNode->getMemoryVT();
11162 if (MemVT.getSizeInBits() >= 32)
11163 return Op;
11164
11165 SDValue CmpOp = Op.getOperand(2);
11166 // If this is already correctly zero-extended, leave it alone.
11167 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11168 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11169 return Op;
11170
11171 // Clear the high bits of the compare operand.
11172 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11173 SDValue NewCmpOp =
11174 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11175 DAG.getConstant(MaskVal, dl, MVT::i32));
11176
11177 // Replace the existing compare operand with the properly zero-extended one.
11179 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11180 Ops.push_back(AtomicNode->getOperand(i));
11181 Ops[2] = NewCmpOp;
11182 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11183 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11184 auto NodeTy =
11186 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11187}
11188
11189SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11190 SelectionDAG &DAG) const {
11191 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11192 EVT MemVT = N->getMemoryVT();
11193 assert(MemVT.getSimpleVT() == MVT::i128 &&
11194 "Expect quadword atomic operations");
11195 SDLoc dl(N);
11196 unsigned Opc = N->getOpcode();
11197 switch (Opc) {
11198 case ISD::ATOMIC_LOAD: {
11199 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11200 // lowered to ppc instructions by pattern matching instruction selector.
11201 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11203 N->getOperand(0),
11204 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11205 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11206 Ops.push_back(N->getOperand(I));
11207 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11208 Ops, MemVT, N->getMemOperand());
11209 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11210 SDValue ValHi =
11211 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11212 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11213 DAG.getConstant(64, dl, MVT::i32));
11214 SDValue Val =
11215 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11216 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11217 {Val, LoadedVal.getValue(2)});
11218 }
11219 case ISD::ATOMIC_STORE: {
11220 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11221 // lowered to ppc instructions by pattern matching instruction selector.
11222 SDVTList Tys = DAG.getVTList(MVT::Other);
11224 N->getOperand(0),
11225 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11226 SDValue Val = N->getOperand(1);
11227 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11228 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11229 DAG.getConstant(64, dl, MVT::i32));
11230 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11231 Ops.push_back(ValLo);
11232 Ops.push_back(ValHi);
11233 Ops.push_back(N->getOperand(2));
11234 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11235 N->getMemOperand());
11236 }
11237 default:
11238 llvm_unreachable("Unexpected atomic opcode");
11239 }
11240}
11241
11243 SelectionDAG &DAG,
11244 const PPCSubtarget &Subtarget) {
11245 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11246
11247 enum DataClassMask {
11248 DC_NAN = 1 << 6,
11249 DC_NEG_INF = 1 << 4,
11250 DC_POS_INF = 1 << 5,
11251 DC_NEG_ZERO = 1 << 2,
11252 DC_POS_ZERO = 1 << 3,
11253 DC_NEG_SUBNORM = 1,
11254 DC_POS_SUBNORM = 1 << 1,
11255 };
11256
11257 EVT VT = Op.getValueType();
11258
11259 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11260 : VT == MVT::f64 ? PPC::XSTSTDCDP
11261 : PPC::XSTSTDCSP;
11262
11263 if (Mask == fcAllFlags)
11264 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11265 if (Mask == 0)
11266 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11267
11268 // When it's cheaper or necessary to test reverse flags.
11269 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11270 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11271 return DAG.getNOT(Dl, Rev, MVT::i1);
11272 }
11273
11274 // Power doesn't support testing whether a value is 'normal'. Test the rest
11275 // first, and test if it's 'not not-normal' with expected sign.
11276 if (Mask & fcNormal) {
11277 SDValue Rev(DAG.getMachineNode(
11278 TestOp, Dl, MVT::i32,
11279 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11280 DC_NEG_ZERO | DC_POS_ZERO |
11281 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11282 Dl, MVT::i32),
11283 Op),
11284 0);
11285 // Sign are stored in CR bit 0, result are in CR bit 2.
11286 SDValue Sign(
11287 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11288 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11289 0);
11290 SDValue Normal(DAG.getNOT(
11291 Dl,
11293 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11294 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11295 0),
11296 MVT::i1));
11297 if (Mask & fcPosNormal)
11298 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11299 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11300 if (Mask == fcPosNormal || Mask == fcNegNormal)
11301 return Result;
11302
11303 return DAG.getNode(
11304 ISD::OR, Dl, MVT::i1,
11305 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11306 }
11307
11308 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11309 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11310 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11311 bool IsQuiet = Mask & fcQNan;
11312 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11313
11314 // Quietness is determined by the first bit in fraction field.
11315 uint64_t QuietMask = 0;
11316 SDValue HighWord;
11317 if (VT == MVT::f128) {
11318 HighWord = DAG.getNode(
11319 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11320 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11321 QuietMask = 0x8000;
11322 } else if (VT == MVT::f64) {
11323 if (Subtarget.isPPC64()) {
11324 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11325 DAG.getBitcast(MVT::i64, Op),
11326 DAG.getConstant(1, Dl, MVT::i32));
11327 } else {
11328 SDValue Vec = DAG.getBitcast(
11329 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11330 HighWord = DAG.getNode(
11331 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11332 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11333 }
11334 QuietMask = 0x80000;
11335 } else if (VT == MVT::f32) {
11336 HighWord = DAG.getBitcast(MVT::i32, Op);
11337 QuietMask = 0x400000;
11338 }
11339 SDValue NanRes = DAG.getSetCC(
11340 Dl, MVT::i1,
11341 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11342 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11343 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11344 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11345 if (Mask == fcQNan || Mask == fcSNan)
11346 return NanRes;
11347
11348 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11349 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11350 NanRes);
11351 }
11352
11353 unsigned NativeMask = 0;
11354 if ((Mask & fcNan) == fcNan)
11355 NativeMask |= DC_NAN;
11356 if (Mask & fcNegInf)
11357 NativeMask |= DC_NEG_INF;
11358 if (Mask & fcPosInf)
11359 NativeMask |= DC_POS_INF;
11360 if (Mask & fcNegZero)
11361 NativeMask |= DC_NEG_ZERO;
11362 if (Mask & fcPosZero)
11363 NativeMask |= DC_POS_ZERO;
11364 if (Mask & fcNegSubnormal)
11365 NativeMask |= DC_NEG_SUBNORM;
11366 if (Mask & fcPosSubnormal)
11367 NativeMask |= DC_POS_SUBNORM;
11368 return SDValue(
11369 DAG.getMachineNode(
11370 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11372 TestOp, Dl, MVT::i32,
11373 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11374 0),
11375 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11376 0);
11377}
11378
11379SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11380 SelectionDAG &DAG) const {
11381 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11382 SDValue LHS = Op.getOperand(0);
11383 uint64_t RHSC = Op.getConstantOperandVal(1);
11384 SDLoc Dl(Op);
11385 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11386 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11387}
11388
11389SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11390 SelectionDAG &DAG) const {
11391 SDLoc dl(Op);
11392 // Create a stack slot that is 16-byte aligned.
11394 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11395 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11396 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11397
11398 // Store the input value into Value#0 of the stack slot.
11399 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
11401 // Load it out.
11402 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11403}
11404
11405SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11406 SelectionDAG &DAG) const {
11407 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11408 "Should only be called for ISD::INSERT_VECTOR_ELT");
11409
11410 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11411
11412 EVT VT = Op.getValueType();
11413 SDLoc dl(Op);
11414 SDValue V1 = Op.getOperand(0);
11415 SDValue V2 = Op.getOperand(1);
11416
11417 if (VT == MVT::v2f64 && C)
11418 return Op;
11419
11420 if (Subtarget.hasP9Vector()) {
11421 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11422 // because on P10, it allows this specific insert_vector_elt load pattern to
11423 // utilize the refactored load and store infrastructure in order to exploit
11424 // prefixed loads.
11425 // On targets with inexpensive direct moves (Power9 and up), a
11426 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11427 // load since a single precision load will involve conversion to double
11428 // precision on the load followed by another conversion to single precision.
11429 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11430 (isa<LoadSDNode>(V2))) {
11431 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11432 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11433 SDValue InsVecElt =
11434 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11435 BitcastLoad, Op.getOperand(2));
11436 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11437 }
11438 }
11439
11440 if (Subtarget.isISA3_1()) {
11441 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11442 return SDValue();
11443 // On P10, we have legal lowering for constant and variable indices for
11444 // all vectors.
11445 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11446 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11447 return Op;
11448 }
11449
11450 // Before P10, we have legal lowering for constant indices but not for
11451 // variable ones.
11452 if (!C)
11453 return SDValue();
11454
11455 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11456 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11457 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11458 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11459 unsigned InsertAtElement = C->getZExtValue();
11460 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11461 if (Subtarget.isLittleEndian()) {
11462 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11463 }
11464 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11465 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11466 }
11467 return Op;
11468}
11469
11470SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
11471 SelectionDAG &DAG) const {
11472 SDLoc dl(Op);
11473 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11474 SDValue LoadChain = LN->getChain();
11475 SDValue BasePtr = LN->getBasePtr();
11476 EVT VT = Op.getValueType();
11477
11478 if (VT != MVT::v256i1 && VT != MVT::v512i1)
11479 return Op;
11480
11481 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11482 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
11483 // 2 or 4 vsx registers.
11484 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
11485 "Type unsupported without MMA");
11486 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11487 "Type unsupported without paired vector support");
11488 Align Alignment = LN->getAlign();
11490 SmallVector<SDValue, 4> LoadChains;
11491 unsigned NumVecs = VT.getSizeInBits() / 128;
11492 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11493 SDValue Load =
11494 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
11495 LN->getPointerInfo().getWithOffset(Idx * 16),
11496 commonAlignment(Alignment, Idx * 16),
11497 LN->getMemOperand()->getFlags(), LN->getAAInfo());
11498 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11499 DAG.getConstant(16, dl, BasePtr.getValueType()));
11500 Loads.push_back(Load);
11501 LoadChains.push_back(Load.getValue(1));
11502 }
11503 if (Subtarget.isLittleEndian()) {
11504 std::reverse(Loads.begin(), Loads.end());
11505 std::reverse(LoadChains.begin(), LoadChains.end());
11506 }
11507 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
11508 SDValue Value =
11509 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
11510 dl, VT, Loads);
11511 SDValue RetOps[] = {Value, TF};
11512 return DAG.getMergeValues(RetOps, dl);
11513}
11514
11515SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
11516 SelectionDAG &DAG) const {
11517 SDLoc dl(Op);
11518 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
11519 SDValue StoreChain = SN->getChain();
11520 SDValue BasePtr = SN->getBasePtr();
11521 SDValue Value = SN->getValue();
11522 SDValue Value2 = SN->getValue();
11523 EVT StoreVT = Value.getValueType();
11524
11525 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
11526 return Op;
11527
11528 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
11529 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
11530 // underlying registers individually.
11531 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
11532 "Type unsupported without MMA");
11533 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
11534 "Type unsupported without paired vector support");
11535 Align Alignment = SN->getAlign();
11537 unsigned NumVecs = 2;
11538 if (StoreVT == MVT::v512i1) {
11539 if (Subtarget.isISAFuture()) {
11540 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11541 MachineSDNode *ExtNode = DAG.getMachineNode(
11542 PPC::DMXXEXTFDMR512, dl, ArrayRef(ReturnTypes, 2), Op.getOperand(1));
11543
11544 Value = SDValue(ExtNode, 0);
11545 Value2 = SDValue(ExtNode, 1);
11546 } else
11547 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
11548 NumVecs = 4;
11549 }
11550 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11551 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
11552 SDValue Elt;
11553 if (Subtarget.isISAFuture()) {
11554 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
11555 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11556 Idx > 1 ? Value2 : Value,
11557 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11558 } else
11559 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
11560 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
11561
11562 SDValue Store =
11563 DAG.getStore(StoreChain, dl, Elt, BasePtr,
11564 SN->getPointerInfo().getWithOffset(Idx * 16),
11565 commonAlignment(Alignment, Idx * 16),
11566 SN->getMemOperand()->getFlags(), SN->getAAInfo());
11567 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11568 DAG.getConstant(16, dl, BasePtr.getValueType()));
11569 Stores.push_back(Store);
11570 }
11571 SDValue TF = DAG.getTokenFactor(dl, Stores);
11572 return TF;
11573}
11574
11575SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
11576 SDLoc dl(Op);
11577 if (Op.getValueType() == MVT::v4i32) {
11578 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11579
11580 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
11581 // +16 as shift amt.
11582 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
11583 SDValue RHSSwap = // = vrlw RHS, 16
11584 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
11585
11586 // Shrinkify inputs to v8i16.
11587 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
11588 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
11589 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
11590
11591 // Low parts multiplied together, generating 32-bit results (we ignore the
11592 // top parts).
11593 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
11594 LHS, RHS, DAG, dl, MVT::v4i32);
11595
11596 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
11597 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
11598 // Shift the high parts up 16 bits.
11599 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
11600 Neg16, DAG, dl);
11601 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
11602 } else if (Op.getValueType() == MVT::v16i8) {
11603 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
11604 bool isLittleEndian = Subtarget.isLittleEndian();
11605
11606 // Multiply the even 8-bit parts, producing 16-bit sums.
11607 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
11608 LHS, RHS, DAG, dl, MVT::v8i16);
11609 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
11610
11611 // Multiply the odd 8-bit parts, producing 16-bit sums.
11612 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
11613 LHS, RHS, DAG, dl, MVT::v8i16);
11614 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
11615
11616 // Merge the results together. Because vmuleub and vmuloub are
11617 // instructions with a big-endian bias, we must reverse the
11618 // element numbering and reverse the meaning of "odd" and "even"
11619 // when generating little endian code.
11620 int Ops[16];
11621 for (unsigned i = 0; i != 8; ++i) {
11622 if (isLittleEndian) {
11623 Ops[i*2 ] = 2*i;
11624 Ops[i*2+1] = 2*i+16;
11625 } else {
11626 Ops[i*2 ] = 2*i+1;
11627 Ops[i*2+1] = 2*i+1+16;
11628 }
11629 }
11630 if (isLittleEndian)
11631 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
11632 else
11633 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
11634 } else {
11635 llvm_unreachable("Unknown mul to lower!");
11636 }
11637}
11638
11639SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
11640 bool IsStrict = Op->isStrictFPOpcode();
11641 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
11642 !Subtarget.hasP9Vector())
11643 return SDValue();
11644
11645 return Op;
11646}
11647
11648// Custom lowering for fpext vf32 to v2f64
11649SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
11650
11651 assert(Op.getOpcode() == ISD::FP_EXTEND &&
11652 "Should only be called for ISD::FP_EXTEND");
11653
11654 // FIXME: handle extends from half precision float vectors on P9.
11655 // We only want to custom lower an extend from v2f32 to v2f64.
11656 if (Op.getValueType() != MVT::v2f64 ||
11657 Op.getOperand(0).getValueType() != MVT::v2f32)
11658 return SDValue();
11659
11660 SDLoc dl(Op);
11661 SDValue Op0 = Op.getOperand(0);
11662
11663 switch (Op0.getOpcode()) {
11664 default:
11665 return SDValue();
11667 assert(Op0.getNumOperands() == 2 &&
11668 isa<ConstantSDNode>(Op0->getOperand(1)) &&
11669 "Node should have 2 operands with second one being a constant!");
11670
11671 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11672 return SDValue();
11673
11674 // Custom lower is only done for high or low doubleword.
11675 int Idx = Op0.getConstantOperandVal(1);
11676 if (Idx % 2 != 0)
11677 return SDValue();
11678
11679 // Since input is v4f32, at this point Idx is either 0 or 2.
11680 // Shift to get the doubleword position we want.
11681 int DWord = Idx >> 1;
11682
11683 // High and low word positions are different on little endian.
11684 if (Subtarget.isLittleEndian())
11685 DWord ^= 0x1;
11686
11687 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11688 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11689 }
11690 case ISD::FADD:
11691 case ISD::FMUL:
11692 case ISD::FSUB: {
11693 SDValue NewLoad[2];
11694 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11695 // Ensure both input are loads.
11696 SDValue LdOp = Op0.getOperand(i);
11697 if (LdOp.getOpcode() != ISD::LOAD)
11698 return SDValue();
11699 // Generate new load node.
11700 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11701 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11702 NewLoad[i] = DAG.getMemIntrinsicNode(
11703 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11704 LD->getMemoryVT(), LD->getMemOperand());
11705 }
11706 SDValue NewOp =
11707 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11708 NewLoad[1], Op0.getNode()->getFlags());
11709 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11710 DAG.getConstant(0, dl, MVT::i32));
11711 }
11712 case ISD::LOAD: {
11713 LoadSDNode *LD = cast<LoadSDNode>(Op0);
11714 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11715 SDValue NewLd = DAG.getMemIntrinsicNode(
11716 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11717 LD->getMemoryVT(), LD->getMemOperand());
11718 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11719 DAG.getConstant(0, dl, MVT::i32));
11720 }
11721 }
11722 llvm_unreachable("ERROR:Should return for all cases within swtich.");
11723}
11724
11725/// LowerOperation - Provide custom lowering hooks for some operations.
11726///
11728 switch (Op.getOpcode()) {
11729 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11730 case ISD::FPOW: return lowerPow(Op, DAG);
11731 case ISD::FSIN: return lowerSin(Op, DAG);
11732 case ISD::FCOS: return lowerCos(Op, DAG);
11733 case ISD::FLOG: return lowerLog(Op, DAG);
11734 case ISD::FLOG10: return lowerLog10(Op, DAG);
11735 case ISD::FEXP: return lowerExp(Op, DAG);
11736 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11737 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11738 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11739 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11740 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11741 case ISD::STRICT_FSETCC:
11743 case ISD::SETCC: return LowerSETCC(Op, DAG);
11744 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11745 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11746
11747 case ISD::INLINEASM:
11748 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11749 // Variable argument lowering.
11750 case ISD::VASTART: return LowerVASTART(Op, DAG);
11751 case ISD::VAARG: return LowerVAARG(Op, DAG);
11752 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11753
11754 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11755 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11757 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11758
11759 // Exception handling lowering.
11760 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11761 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11762 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11763
11764 case ISD::LOAD: return LowerLOAD(Op, DAG);
11765 case ISD::STORE: return LowerSTORE(Op, DAG);
11766 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11767 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11770 case ISD::FP_TO_UINT:
11771 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11774 case ISD::UINT_TO_FP:
11775 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11776 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
11777
11778 // Lower 64-bit shifts.
11779 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11780 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11781 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11782
11783 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11784 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11785
11786 // Vector-related lowering.
11787 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11788 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11789 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11790 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11791 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11792 case ISD::MUL: return LowerMUL(Op, DAG);
11793 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11795 case ISD::FP_ROUND:
11796 return LowerFP_ROUND(Op, DAG);
11797 case ISD::ROTL: return LowerROTL(Op, DAG);
11798
11799 // For counter-based loop handling.
11800 case ISD::INTRINSIC_W_CHAIN: return SDValue();
11801
11802 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11803
11804 // Frame & Return address.
11805 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11806 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11807
11809 return LowerINTRINSIC_VOID(Op, DAG);
11810 case ISD::BSWAP:
11811 return LowerBSWAP(Op, DAG);
11813 return LowerATOMIC_CMP_SWAP(Op, DAG);
11814 case ISD::ATOMIC_STORE:
11815 return LowerATOMIC_LOAD_STORE(Op, DAG);
11816 case ISD::IS_FPCLASS:
11817 return LowerIS_FPCLASS(Op, DAG);
11818 }
11819}
11820
11823 SelectionDAG &DAG) const {
11824 SDLoc dl(N);
11825 switch (N->getOpcode()) {
11826 default:
11827 llvm_unreachable("Do not know how to custom type legalize this operation!");
11828 case ISD::ATOMIC_LOAD: {
11829 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11830 Results.push_back(Res);
11831 Results.push_back(Res.getValue(1));
11832 break;
11833 }
11834 case ISD::READCYCLECOUNTER: {
11835 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
11836 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11837
11838 Results.push_back(
11839 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11840 Results.push_back(RTB.getValue(2));
11841 break;
11842 }
11844 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
11845 break;
11846
11847 assert(N->getValueType(0) == MVT::i1 &&
11848 "Unexpected result type for CTR decrement intrinsic");
11849 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11850 N->getValueType(0));
11851 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11852 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11853 N->getOperand(1));
11854
11855 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11856 Results.push_back(NewInt.getValue(1));
11857 break;
11858 }
11860 switch (N->getConstantOperandVal(0)) {
11861 case Intrinsic::ppc_pack_longdouble:
11862 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11863 N->getOperand(2), N->getOperand(1)));
11864 break;
11865 case Intrinsic::ppc_maxfe:
11866 case Intrinsic::ppc_minfe:
11867 case Intrinsic::ppc_fnmsub:
11868 case Intrinsic::ppc_convert_f128_to_ppcf128:
11869 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11870 break;
11871 }
11872 break;
11873 }
11874 case ISD::VAARG: {
11875 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11876 return;
11877
11878 EVT VT = N->getValueType(0);
11879
11880 if (VT == MVT::i64) {
11881 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11882
11883 Results.push_back(NewNode);
11884 Results.push_back(NewNode.getValue(1));
11885 }
11886 return;
11887 }
11890 case ISD::FP_TO_SINT:
11891 case ISD::FP_TO_UINT: {
11892 // LowerFP_TO_INT() can only handle f32 and f64.
11893 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11894 MVT::ppcf128)
11895 return;
11896 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11897 Results.push_back(LoweredValue);
11898 if (N->isStrictFPOpcode())
11899 Results.push_back(LoweredValue.getValue(1));
11900 return;
11901 }
11902 case ISD::TRUNCATE: {
11903 if (!N->getValueType(0).isVector())
11904 return;
11905 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11906 if (Lowered)
11907 Results.push_back(Lowered);
11908 return;
11909 }
11910 case ISD::FSHL:
11911 case ISD::FSHR:
11912 // Don't handle funnel shifts here.
11913 return;
11914 case ISD::BITCAST:
11915 // Don't handle bitcast here.
11916 return;
11917 case ISD::FP_EXTEND:
11918 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11919 if (Lowered)
11920 Results.push_back(Lowered);
11921 return;
11922 }
11923}
11924
11925//===----------------------------------------------------------------------===//
11926// Other Lowering Code
11927//===----------------------------------------------------------------------===//
11928
11930 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11931 Function *Func = Intrinsic::getDeclaration(M, Id);
11932 return Builder.CreateCall(Func, {});
11933}
11934
11935// The mappings for emitLeading/TrailingFence is taken from
11936// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11938 Instruction *Inst,
11939 AtomicOrdering Ord) const {
11941 return callIntrinsic(Builder, Intrinsic::ppc_sync);
11942 if (isReleaseOrStronger(Ord))
11943 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11944 return nullptr;
11945}
11946
11948 Instruction *Inst,
11949 AtomicOrdering Ord) const {
11950 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11951 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11952 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11953 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11954 if (isa<LoadInst>(Inst))
11955 return Builder.CreateCall(
11957 Builder.GetInsertBlock()->getParent()->getParent(),
11958 Intrinsic::ppc_cfence, {Inst->getType()}),
11959 {Inst});
11960 // FIXME: Can use isync for rmw operation.
11961 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11962 }
11963 return nullptr;
11964}
11965
11968 unsigned AtomicSize,
11969 unsigned BinOpcode,
11970 unsigned CmpOpcode,
11971 unsigned CmpPred) const {
11972 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11973 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11974
11975 auto LoadMnemonic = PPC::LDARX;
11976 auto StoreMnemonic = PPC::STDCX;
11977 switch (AtomicSize) {
11978 default:
11979 llvm_unreachable("Unexpected size of atomic entity");
11980 case 1:
11981 LoadMnemonic = PPC::LBARX;
11982 StoreMnemonic = PPC::STBCX;
11983 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11984 break;
11985 case 2:
11986 LoadMnemonic = PPC::LHARX;
11987 StoreMnemonic = PPC::STHCX;
11988 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11989 break;
11990 case 4:
11991 LoadMnemonic = PPC::LWARX;
11992 StoreMnemonic = PPC::STWCX;
11993 break;
11994 case 8:
11995 LoadMnemonic = PPC::LDARX;
11996 StoreMnemonic = PPC::STDCX;
11997 break;
11998 }
11999
12000 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12001 MachineFunction *F = BB->getParent();
12003
12004 Register dest = MI.getOperand(0).getReg();
12005 Register ptrA = MI.getOperand(1).getReg();
12006 Register ptrB = MI.getOperand(2).getReg();
12007 Register incr = MI.getOperand(3).getReg();
12008 DebugLoc dl = MI.getDebugLoc();
12009
12010 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12011 MachineBasicBlock *loop2MBB =
12012 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12013 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12014 F->insert(It, loopMBB);
12015 if (CmpOpcode)
12016 F->insert(It, loop2MBB);
12017 F->insert(It, exitMBB);
12018 exitMBB->splice(exitMBB->begin(), BB,
12019 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12021
12022 MachineRegisterInfo &RegInfo = F->getRegInfo();
12023 Register TmpReg = (!BinOpcode) ? incr :
12024 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
12025 : &PPC::GPRCRegClass);
12026
12027 // thisMBB:
12028 // ...
12029 // fallthrough --> loopMBB
12030 BB->addSuccessor(loopMBB);
12031
12032 // loopMBB:
12033 // l[wd]arx dest, ptr
12034 // add r0, dest, incr
12035 // st[wd]cx. r0, ptr
12036 // bne- loopMBB
12037 // fallthrough --> exitMBB
12038
12039 // For max/min...
12040 // loopMBB:
12041 // l[wd]arx dest, ptr
12042 // cmpl?[wd] dest, incr
12043 // bgt exitMBB
12044 // loop2MBB:
12045 // st[wd]cx. dest, ptr
12046 // bne- loopMBB
12047 // fallthrough --> exitMBB
12048
12049 BB = loopMBB;
12050 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
12051 .addReg(ptrA).addReg(ptrB);
12052 if (BinOpcode)
12053 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
12054 if (CmpOpcode) {
12055 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12056 // Signed comparisons of byte or halfword values must be sign-extended.
12057 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
12058 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12059 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
12060 ExtReg).addReg(dest);
12061 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
12062 } else
12063 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
12064
12065 BuildMI(BB, dl, TII->get(PPC::BCC))
12066 .addImm(CmpPred)
12067 .addReg(CrReg)
12068 .addMBB(exitMBB);
12069 BB->addSuccessor(loop2MBB);
12070 BB->addSuccessor(exitMBB);
12071 BB = loop2MBB;
12072 }
12073 BuildMI(BB, dl, TII->get(StoreMnemonic))
12074 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
12075 BuildMI(BB, dl, TII->get(PPC::BCC))
12076 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
12077 BB->addSuccessor(loopMBB);
12078 BB->addSuccessor(exitMBB);
12079
12080 // exitMBB:
12081 // ...
12082 BB = exitMBB;
12083 return BB;
12084}
12085
12087 switch(MI.getOpcode()) {
12088 default:
12089 return false;
12090 case PPC::COPY:
12091 return TII->isSignExtended(MI.getOperand(1).getReg(),
12092 &MI.getMF()->getRegInfo());
12093 case PPC::LHA:
12094 case PPC::LHA8:
12095 case PPC::LHAU:
12096 case PPC::LHAU8:
12097 case PPC::LHAUX:
12098 case PPC::LHAUX8:
12099 case PPC::LHAX:
12100 case PPC::LHAX8:
12101 case PPC::LWA:
12102 case PPC::LWAUX:
12103 case PPC::LWAX:
12104 case PPC::LWAX_32:
12105 case PPC::LWA_32:
12106 case PPC::PLHA:
12107 case PPC::PLHA8:
12108 case PPC::PLHA8pc:
12109 case PPC::PLHApc:
12110 case PPC::PLWA:
12111 case PPC::PLWA8:
12112 case PPC::PLWA8pc:
12113 case PPC::PLWApc:
12114 case PPC::EXTSB:
12115 case PPC::EXTSB8:
12116 case PPC::EXTSB8_32_64:
12117 case PPC::EXTSB8_rec:
12118 case PPC::EXTSB_rec:
12119 case PPC::EXTSH:
12120 case PPC::EXTSH8:
12121 case PPC::EXTSH8_32_64:
12122 case PPC::EXTSH8_rec:
12123 case PPC::EXTSH_rec:
12124 case PPC::EXTSW:
12125 case PPC::EXTSWSLI:
12126 case PPC::EXTSWSLI_32_64:
12127 case PPC::EXTSWSLI_32_64_rec:
12128 case PPC::EXTSWSLI_rec:
12129 case PPC::EXTSW_32:
12130 case PPC::EXTSW_32_64:
12131 case PPC::EXTSW_32_64_rec:
12132 case PPC::EXTSW_rec:
12133 case PPC::SRAW:
12134 case PPC::SRAWI:
12135 case PPC::SRAWI_rec:
12136 case PPC::SRAW_rec:
12137 return true;
12138 }
12139 return false;
12140}
12141
12144 bool is8bit, // operation
12145 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
12146 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12147 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
12148
12149 // If this is a signed comparison and the value being compared is not known
12150 // to be sign extended, sign extend it here.
12151 DebugLoc dl = MI.getDebugLoc();
12152 MachineFunction *F = BB->getParent();
12153 MachineRegisterInfo &RegInfo = F->getRegInfo();
12154 Register incr = MI.getOperand(3).getReg();
12155 bool IsSignExtended =
12156 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
12157
12158 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
12159 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12160 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
12161 .addReg(MI.getOperand(3).getReg());
12162 MI.getOperand(3).setReg(ValueReg);
12163 incr = ValueReg;
12164 }
12165 // If we support part-word atomic mnemonics, just use them
12166 if (Subtarget.hasPartwordAtomics())
12167 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
12168 CmpPred);
12169
12170 // In 64 bit mode we have to use 64 bits for addresses, even though the
12171 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
12172 // registers without caring whether they're 32 or 64, but here we're
12173 // doing actual arithmetic on the addresses.
12174 bool is64bit = Subtarget.isPPC64();
12175 bool isLittleEndian = Subtarget.isLittleEndian();
12176 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12177
12178 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12180
12181 Register dest = MI.getOperand(0).getReg();
12182 Register ptrA = MI.getOperand(1).getReg();
12183 Register ptrB = MI.getOperand(2).getReg();
12184
12185 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12186 MachineBasicBlock *loop2MBB =
12187 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12188 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12189 F->insert(It, loopMBB);
12190 if (CmpOpcode)
12191 F->insert(It, loop2MBB);
12192 F->insert(It, exitMBB);
12193 exitMBB->splice(exitMBB->begin(), BB,
12194 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12196
12197 const TargetRegisterClass *RC =
12198 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12199 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12200
12201 Register PtrReg = RegInfo.createVirtualRegister(RC);
12202 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12203 Register ShiftReg =
12204 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12205 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
12206 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12207 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12208 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12209 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12210 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
12211 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12212 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12213 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
12214 Register Ptr1Reg;
12215 Register TmpReg =
12216 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
12217
12218 // thisMBB:
12219 // ...
12220 // fallthrough --> loopMBB
12221 BB->addSuccessor(loopMBB);
12222
12223 // The 4-byte load must be aligned, while a char or short may be
12224 // anywhere in the word. Hence all this nasty bookkeeping code.
12225 // add ptr1, ptrA, ptrB [copy if ptrA==0]
12226 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12227 // xori shift, shift1, 24 [16]
12228 // rlwinm ptr, ptr1, 0, 0, 29
12229 // slw incr2, incr, shift
12230 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12231 // slw mask, mask2, shift
12232 // loopMBB:
12233 // lwarx tmpDest, ptr
12234 // add tmp, tmpDest, incr2
12235 // andc tmp2, tmpDest, mask
12236 // and tmp3, tmp, mask
12237 // or tmp4, tmp3, tmp2
12238 // stwcx. tmp4, ptr
12239 // bne- loopMBB
12240 // fallthrough --> exitMBB
12241 // srw SrwDest, tmpDest, shift
12242 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
12243 if (ptrA != ZeroReg) {
12244 Ptr1Reg = RegInfo.createVirtualRegister(RC);
12245 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12246 .addReg(ptrA)
12247 .addReg(ptrB);
12248 } else {
12249 Ptr1Reg = ptrB;
12250 }
12251 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12252 // mode.
12253 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12254 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12255 .addImm(3)
12256 .addImm(27)
12257 .addImm(is8bit ? 28 : 27);
12258 if (!isLittleEndian)
12259 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12260 .addReg(Shift1Reg)
12261 .addImm(is8bit ? 24 : 16);
12262 if (is64bit)
12263 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12264 .addReg(Ptr1Reg)
12265 .addImm(0)
12266 .addImm(61);
12267 else
12268 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12269 .addReg(Ptr1Reg)
12270 .addImm(0)
12271 .addImm(0)
12272 .addImm(29);
12273 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
12274 if (is8bit)
12275 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12276 else {
12277 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12278 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12279 .addReg(Mask3Reg)
12280 .addImm(65535);
12281 }
12282 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12283 .addReg(Mask2Reg)
12284 .addReg(ShiftReg);
12285
12286 BB = loopMBB;
12287 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12288 .addReg(ZeroReg)
12289 .addReg(PtrReg);
12290 if (BinOpcode)
12291 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
12292 .addReg(Incr2Reg)
12293 .addReg(TmpDestReg);
12294 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12295 .addReg(TmpDestReg)
12296 .addReg(MaskReg);
12297 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
12298 if (CmpOpcode) {
12299 // For unsigned comparisons, we can directly compare the shifted values.
12300 // For signed comparisons we shift and sign extend.
12301 Register SReg = RegInfo.createVirtualRegister(GPRC);
12302 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12303 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
12304 .addReg(TmpDestReg)
12305 .addReg(MaskReg);
12306 unsigned ValueReg = SReg;
12307 unsigned CmpReg = Incr2Reg;
12308 if (CmpOpcode == PPC::CMPW) {
12309 ValueReg = RegInfo.createVirtualRegister(GPRC);
12310 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
12311 .addReg(SReg)
12312 .addReg(ShiftReg);
12313 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
12314 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
12315 .addReg(ValueReg);
12316 ValueReg = ValueSReg;
12317 CmpReg = incr;
12318 }
12319 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
12320 BuildMI(BB, dl, TII->get(PPC::BCC))
12321 .addImm(CmpPred)
12322 .addReg(CrReg)
12323 .addMBB(exitMBB);
12324 BB->addSuccessor(loop2MBB);
12325 BB->addSuccessor(exitMBB);
12326 BB = loop2MBB;
12327 }
12328 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
12329 BuildMI(BB, dl, TII->get(PPC::STWCX))
12330 .addReg(Tmp4Reg)
12331 .addReg(ZeroReg)
12332 .addReg(PtrReg);
12333 BuildMI(BB, dl, TII->get(PPC::BCC))
12335 .addReg(PPC::CR0)
12336 .addMBB(loopMBB);
12337 BB->addSuccessor(loopMBB);
12338 BB->addSuccessor(exitMBB);
12339
12340 // exitMBB:
12341 // ...
12342 BB = exitMBB;
12343 // Since the shift amount is not a constant, we need to clear
12344 // the upper bits with a separate RLWINM.
12345 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
12346 .addReg(SrwDestReg)
12347 .addImm(0)
12348 .addImm(is8bit ? 24 : 16)
12349 .addImm(31);
12350 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
12351 .addReg(TmpDestReg)
12352 .addReg(ShiftReg);
12353 return BB;
12354}
12355
12358 MachineBasicBlock *MBB) const {
12359 DebugLoc DL = MI.getDebugLoc();
12360 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12361 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
12362
12363 MachineFunction *MF = MBB->getParent();
12365
12366 const BasicBlock *BB = MBB->getBasicBlock();
12368
12369 Register DstReg = MI.getOperand(0).getReg();
12370 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
12371 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
12372 Register mainDstReg = MRI.createVirtualRegister(RC);
12373 Register restoreDstReg = MRI.createVirtualRegister(RC);
12374
12375 MVT PVT = getPointerTy(MF->getDataLayout());
12376 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12377 "Invalid Pointer Size!");
12378 // For v = setjmp(buf), we generate
12379 //
12380 // thisMBB:
12381 // SjLjSetup mainMBB
12382 // bl mainMBB
12383 // v_restore = 1
12384 // b sinkMBB
12385 //
12386 // mainMBB:
12387 // buf[LabelOffset] = LR
12388 // v_main = 0
12389 //
12390 // sinkMBB:
12391 // v = phi(main, restore)
12392 //
12393
12394 MachineBasicBlock *thisMBB = MBB;
12395 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
12396 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
12397 MF->insert(I, mainMBB);
12398 MF->insert(I, sinkMBB);
12399
12401
12402 // Transfer the remainder of BB and its successor edges to sinkMBB.
12403 sinkMBB->splice(sinkMBB->begin(), MBB,
12404 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12406
12407 // Note that the structure of the jmp_buf used here is not compatible
12408 // with that used by libc, and is not designed to be. Specifically, it
12409 // stores only those 'reserved' registers that LLVM does not otherwise
12410 // understand how to spill. Also, by convention, by the time this
12411 // intrinsic is called, Clang has already stored the frame address in the
12412 // first slot of the buffer and stack address in the third. Following the
12413 // X86 target code, we'll store the jump address in the second slot. We also
12414 // need to save the TOC pointer (R2) to handle jumps between shared
12415 // libraries, and that will be stored in the fourth slot. The thread
12416 // identifier (R13) is not affected.
12417
12418 // thisMBB:
12419 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12420 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12421 const int64_t BPOffset = 4 * PVT.getStoreSize();
12422
12423 // Prepare IP either in reg.
12424 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
12425 Register LabelReg = MRI.createVirtualRegister(PtrRC);
12426 Register BufReg = MI.getOperand(1).getReg();
12427
12428 if (Subtarget.is64BitELFABI()) {
12430 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
12431 .addReg(PPC::X2)
12432 .addImm(TOCOffset)
12433 .addReg(BufReg)
12434 .cloneMemRefs(MI);
12435 }
12436
12437 // Naked functions never have a base pointer, and so we use r1. For all
12438 // other functions, this decision must be delayed until during PEI.
12439 unsigned BaseReg;
12440 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
12441 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
12442 else
12443 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
12444
12445 MIB = BuildMI(*thisMBB, MI, DL,
12446 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
12447 .addReg(BaseReg)
12448 .addImm(BPOffset)
12449 .addReg(BufReg)
12450 .cloneMemRefs(MI);
12451
12452 // Setup
12453 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
12454 MIB.addRegMask(TRI->getNoPreservedMask());
12455
12456 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
12457
12458 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
12459 .addMBB(mainMBB);
12460 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
12461
12462 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
12463 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
12464
12465 // mainMBB:
12466 // mainDstReg = 0
12467 MIB =
12468 BuildMI(mainMBB, DL,
12469 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
12470
12471 // Store IP
12472 if (Subtarget.isPPC64()) {
12473 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
12474 .addReg(LabelReg)
12475 .addImm(LabelOffset)
12476 .addReg(BufReg);
12477 } else {
12478 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
12479 .addReg(LabelReg)
12480 .addImm(LabelOffset)
12481 .addReg(BufReg);
12482 }
12483 MIB.cloneMemRefs(MI);
12484
12485 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
12486 mainMBB->addSuccessor(sinkMBB);
12487
12488 // sinkMBB:
12489 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
12490 TII->get(PPC::PHI), DstReg)
12491 .addReg(mainDstReg).addMBB(mainMBB)
12492 .addReg(restoreDstReg).addMBB(thisMBB);
12493
12494 MI.eraseFromParent();
12495 return sinkMBB;
12496}
12497
12500 MachineBasicBlock *MBB) const {
12501 DebugLoc DL = MI.getDebugLoc();
12502 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12503
12504 MachineFunction *MF = MBB->getParent();
12506
12507 MVT PVT = getPointerTy(MF->getDataLayout());
12508 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
12509 "Invalid Pointer Size!");
12510
12511 const TargetRegisterClass *RC =
12512 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12513 Register Tmp = MRI.createVirtualRegister(RC);
12514 // Since FP is only updated here but NOT referenced, it's treated as GPR.
12515 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
12516 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
12517 unsigned BP =
12518 (PVT == MVT::i64)
12519 ? PPC::X30
12520 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
12521 : PPC::R30);
12522
12524
12525 const int64_t LabelOffset = 1 * PVT.getStoreSize();
12526 const int64_t SPOffset = 2 * PVT.getStoreSize();
12527 const int64_t TOCOffset = 3 * PVT.getStoreSize();
12528 const int64_t BPOffset = 4 * PVT.getStoreSize();
12529
12530 Register BufReg = MI.getOperand(0).getReg();
12531
12532 // Reload FP (the jumped-to function may not have had a
12533 // frame pointer, and if so, then its r31 will be restored
12534 // as necessary).
12535 if (PVT == MVT::i64) {
12536 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
12537 .addImm(0)
12538 .addReg(BufReg);
12539 } else {
12540 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
12541 .addImm(0)
12542 .addReg(BufReg);
12543 }
12544 MIB.cloneMemRefs(MI);
12545
12546 // Reload IP
12547 if (PVT == MVT::i64) {
12548 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
12549 .addImm(LabelOffset)
12550 .addReg(BufReg);
12551 } else {
12552 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
12553 .addImm(LabelOffset)
12554 .addReg(BufReg);
12555 }
12556 MIB.cloneMemRefs(MI);
12557
12558 // Reload SP
12559 if (PVT == MVT::i64) {
12560 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
12561 .addImm(SPOffset)
12562 .addReg(BufReg);
12563 } else {
12564 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
12565 .addImm(SPOffset)
12566 .addReg(BufReg);
12567 }
12568 MIB.cloneMemRefs(MI);
12569
12570 // Reload BP
12571 if (PVT == MVT::i64) {
12572 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
12573 .addImm(BPOffset)
12574 .addReg(BufReg);
12575 } else {
12576 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
12577 .addImm(BPOffset)
12578 .addReg(BufReg);
12579 }
12580 MIB.cloneMemRefs(MI);
12581
12582 // Reload TOC
12583 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
12585 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
12586 .addImm(TOCOffset)
12587 .addReg(BufReg)
12588 .cloneMemRefs(MI);
12589 }
12590
12591 // Jump
12592 BuildMI(*MBB, MI, DL,
12593 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
12594 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
12595
12596 MI.eraseFromParent();
12597 return MBB;
12598}
12599
12601 // If the function specifically requests inline stack probes, emit them.
12602 if (MF.getFunction().hasFnAttribute("probe-stack"))
12603 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
12604 "inline-asm";
12605 return false;
12606}
12607
12609 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
12610 unsigned StackAlign = TFI->getStackAlignment();
12611 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
12612 "Unexpected stack alignment");
12613 // The default stack probe size is 4096 if the function has no
12614 // stack-probe-size attribute.
12615 const Function &Fn = MF.getFunction();
12616 unsigned StackProbeSize =
12617 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
12618 // Round down to the stack alignment.
12619 StackProbeSize &= ~(StackAlign - 1);
12620 return StackProbeSize ? StackProbeSize : StackAlign;
12621}
12622
12623// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
12624// into three phases. In the first phase, it uses pseudo instruction
12625// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
12626// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
12627// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
12628// MaxCallFrameSize so that it can calculate correct data area pointer.
12631 MachineBasicBlock *MBB) const {
12632 const bool isPPC64 = Subtarget.isPPC64();
12633 MachineFunction *MF = MBB->getParent();
12634 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12635 DebugLoc DL = MI.getDebugLoc();
12636 const unsigned ProbeSize = getStackProbeSize(*MF);
12637 const BasicBlock *ProbedBB = MBB->getBasicBlock();
12639 // The CFG of probing stack looks as
12640 // +-----+
12641 // | MBB |
12642 // +--+--+
12643 // |
12644 // +----v----+
12645 // +--->+ TestMBB +---+
12646 // | +----+----+ |
12647 // | | |
12648 // | +-----v----+ |
12649 // +---+ BlockMBB | |
12650 // +----------+ |
12651 // |
12652 // +---------+ |
12653 // | TailMBB +<--+
12654 // +---------+
12655 // In MBB, calculate previous frame pointer and final stack pointer.
12656 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
12657 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
12658 // TailMBB is spliced via \p MI.
12659 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
12660 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
12661 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
12662
12664 MF->insert(MBBIter, TestMBB);
12665 MF->insert(MBBIter, BlockMBB);
12666 MF->insert(MBBIter, TailMBB);
12667
12668 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
12669 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12670
12671 Register DstReg = MI.getOperand(0).getReg();
12672 Register NegSizeReg = MI.getOperand(1).getReg();
12673 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
12674 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12675 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12676 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12677
12678 // Since value of NegSizeReg might be realigned in prologepilog, insert a
12679 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
12680 // NegSize.
12681 unsigned ProbeOpc;
12682 if (!MRI.hasOneNonDBGUse(NegSizeReg))
12683 ProbeOpc =
12684 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12685 else
12686 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12687 // and NegSizeReg will be allocated in the same phyreg to avoid
12688 // redundant copy when NegSizeReg has only one use which is current MI and
12689 // will be replaced by PREPARE_PROBED_ALLOCA then.
12690 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12691 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12692 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12693 .addDef(ActualNegSizeReg)
12694 .addReg(NegSizeReg)
12695 .add(MI.getOperand(2))
12696 .add(MI.getOperand(3));
12697
12698 // Calculate final stack pointer, which equals to SP + ActualNegSize.
12699 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12700 FinalStackPtr)
12701 .addReg(SPReg)
12702 .addReg(ActualNegSizeReg);
12703
12704 // Materialize a scratch register for update.
12705 int64_t NegProbeSize = -(int64_t)ProbeSize;
12706 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12707 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12708 if (!isInt<16>(NegProbeSize)) {
12709 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12710 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12711 .addImm(NegProbeSize >> 16);
12712 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12713 ScratchReg)
12714 .addReg(TempReg)
12715 .addImm(NegProbeSize & 0xFFFF);
12716 } else
12717 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12718 .addImm(NegProbeSize);
12719
12720 {
12721 // Probing leading residual part.
12722 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12723 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12724 .addReg(ActualNegSizeReg)
12725 .addReg(ScratchReg);
12726 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12727 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12728 .addReg(Div)
12729 .addReg(ScratchReg);
12730 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12731 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12732 .addReg(Mul)
12733 .addReg(ActualNegSizeReg);
12734 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12735 .addReg(FramePointer)
12736 .addReg(SPReg)
12737 .addReg(NegMod);
12738 }
12739
12740 {
12741 // Remaining part should be multiple of ProbeSize.
12742 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12743 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12744 .addReg(SPReg)
12745 .addReg(FinalStackPtr);
12746 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12748 .addReg(CmpResult)
12749 .addMBB(TailMBB);
12750 TestMBB->addSuccessor(BlockMBB);
12751 TestMBB->addSuccessor(TailMBB);
12752 }
12753
12754 {
12755 // Touch the block.
12756 // |P...|P...|P...
12757 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12758 .addReg(FramePointer)
12759 .addReg(SPReg)
12760 .addReg(ScratchReg);
12761 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12762 BlockMBB->addSuccessor(TestMBB);
12763 }
12764
12765 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12766 // DYNAREAOFFSET pseudo instruction to get the future result.
12767 Register MaxCallFrameSizeReg =
12768 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12769 BuildMI(TailMBB, DL,
12770 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12771 MaxCallFrameSizeReg)
12772 .add(MI.getOperand(2))
12773 .add(MI.getOperand(3));
12774 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12775 .addReg(SPReg)
12776 .addReg(MaxCallFrameSizeReg);
12777
12778 // Splice instructions after MI to TailMBB.
12779 TailMBB->splice(TailMBB->end(), MBB,
12780 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12782 MBB->addSuccessor(TestMBB);
12783
12784 // Delete the pseudo instruction.
12785 MI.eraseFromParent();
12786
12787 ++NumDynamicAllocaProbed;
12788 return TailMBB;
12789}
12790
12792 switch (MI.getOpcode()) {
12793 case PPC::SELECT_CC_I4:
12794 case PPC::SELECT_CC_I8:
12795 case PPC::SELECT_CC_F4:
12796 case PPC::SELECT_CC_F8:
12797 case PPC::SELECT_CC_F16:
12798 case PPC::SELECT_CC_VRRC:
12799 case PPC::SELECT_CC_VSFRC:
12800 case PPC::SELECT_CC_VSSRC:
12801 case PPC::SELECT_CC_VSRC:
12802 case PPC::SELECT_CC_SPE4:
12803 case PPC::SELECT_CC_SPE:
12804 return true;
12805 default:
12806 return false;
12807 }
12808}
12809
12810static bool IsSelect(MachineInstr &MI) {
12811 switch (MI.getOpcode()) {
12812 case PPC::SELECT_I4:
12813 case PPC::SELECT_I8:
12814 case PPC::SELECT_F4:
12815 case PPC::SELECT_F8:
12816 case PPC::SELECT_F16:
12817 case PPC::SELECT_SPE:
12818 case PPC::SELECT_SPE4:
12819 case PPC::SELECT_VRRC:
12820 case PPC::SELECT_VSFRC:
12821 case PPC::SELECT_VSSRC:
12822 case PPC::SELECT_VSRC:
12823 return true;
12824 default:
12825 return false;
12826 }
12827}
12828
12831 MachineBasicBlock *BB) const {
12832 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12833 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12834 if (Subtarget.is64BitELFABI() &&
12835 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12836 !Subtarget.isUsingPCRelativeCalls()) {
12837 // Call lowering should have added an r2 operand to indicate a dependence
12838 // on the TOC base pointer value. It can't however, because there is no
12839 // way to mark the dependence as implicit there, and so the stackmap code
12840 // will confuse it with a regular operand. Instead, add the dependence
12841 // here.
12842 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12843 }
12844
12845 return emitPatchPoint(MI, BB);
12846 }
12847
12848 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12849 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12850 return emitEHSjLjSetJmp(MI, BB);
12851 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12852 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12853 return emitEHSjLjLongJmp(MI, BB);
12854 }
12855
12856 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12857
12858 // To "insert" these instructions we actually have to insert their
12859 // control-flow patterns.
12860 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12862
12863 MachineFunction *F = BB->getParent();
12864 MachineRegisterInfo &MRI = F->getRegInfo();
12865
12866 if (Subtarget.hasISEL() &&
12867 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12868 MI.getOpcode() == PPC::SELECT_CC_I8 ||
12869 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
12871 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12872 MI.getOpcode() == PPC::SELECT_CC_I8)
12873 Cond.push_back(MI.getOperand(4));
12874 else
12876 Cond.push_back(MI.getOperand(1));
12877
12878 DebugLoc dl = MI.getDebugLoc();
12879 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12880 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12881 } else if (IsSelectCC(MI) || IsSelect(MI)) {
12882 // The incoming instruction knows the destination vreg to set, the
12883 // condition code register to branch on, the true/false values to
12884 // select between, and a branch opcode to use.
12885
12886 // thisMBB:
12887 // ...
12888 // TrueVal = ...
12889 // cmpTY ccX, r1, r2
12890 // bCC sinkMBB
12891 // fallthrough --> copy0MBB
12892 MachineBasicBlock *thisMBB = BB;
12893 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12894 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12895 DebugLoc dl = MI.getDebugLoc();
12896 F->insert(It, copy0MBB);
12897 F->insert(It, sinkMBB);
12898
12899 // Set the call frame size on entry to the new basic blocks.
12900 // See https://reviews.llvm.org/D156113.
12901 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
12902 copy0MBB->setCallFrameSize(CallFrameSize);
12903 sinkMBB->setCallFrameSize(CallFrameSize);
12904
12905 // Transfer the remainder of BB and its successor edges to sinkMBB.
12906 sinkMBB->splice(sinkMBB->begin(), BB,
12907 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12909
12910 // Next, add the true and fallthrough blocks as its successors.
12911 BB->addSuccessor(copy0MBB);
12912 BB->addSuccessor(sinkMBB);
12913
12914 if (IsSelect(MI)) {
12915 BuildMI(BB, dl, TII->get(PPC::BC))
12916 .addReg(MI.getOperand(1).getReg())
12917 .addMBB(sinkMBB);
12918 } else {
12919 unsigned SelectPred = MI.getOperand(4).getImm();
12920 BuildMI(BB, dl, TII->get(PPC::BCC))
12921 .addImm(SelectPred)
12922 .addReg(MI.getOperand(1).getReg())
12923 .addMBB(sinkMBB);
12924 }
12925
12926 // copy0MBB:
12927 // %FalseValue = ...
12928 // # fallthrough to sinkMBB
12929 BB = copy0MBB;
12930
12931 // Update machine-CFG edges
12932 BB->addSuccessor(sinkMBB);
12933
12934 // sinkMBB:
12935 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12936 // ...
12937 BB = sinkMBB;
12938 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12939 .addReg(MI.getOperand(3).getReg())
12940 .addMBB(copy0MBB)
12941 .addReg(MI.getOperand(2).getReg())
12942 .addMBB(thisMBB);
12943 } else if (MI.getOpcode() == PPC::ReadTB) {
12944 // To read the 64-bit time-base register on a 32-bit target, we read the
12945 // two halves. Should the counter have wrapped while it was being read, we
12946 // need to try again.
12947 // ...
12948 // readLoop:
12949 // mfspr Rx,TBU # load from TBU
12950 // mfspr Ry,TB # load from TB
12951 // mfspr Rz,TBU # load from TBU
12952 // cmpw crX,Rx,Rz # check if 'old'='new'
12953 // bne readLoop # branch if they're not equal
12954 // ...
12955
12956 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12957 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12958 DebugLoc dl = MI.getDebugLoc();
12959 F->insert(It, readMBB);
12960 F->insert(It, sinkMBB);
12961
12962 // Transfer the remainder of BB and its successor edges to sinkMBB.
12963 sinkMBB->splice(sinkMBB->begin(), BB,
12964 std::next(MachineBasicBlock::iterator(MI)), BB->end());
12966
12967 BB->addSuccessor(readMBB);
12968 BB = readMBB;
12969
12970 MachineRegisterInfo &RegInfo = F->getRegInfo();
12971 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12972 Register LoReg = MI.getOperand(0).getReg();
12973 Register HiReg = MI.getOperand(1).getReg();
12974
12975 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12976 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12977 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12978
12979 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12980
12981 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12982 .addReg(HiReg)
12983 .addReg(ReadAgainReg);
12984 BuildMI(BB, dl, TII->get(PPC::BCC))
12986 .addReg(CmpReg)
12987 .addMBB(readMBB);
12988
12989 BB->addSuccessor(readMBB);
12990 BB->addSuccessor(sinkMBB);
12991 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12992 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12993 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12994 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12995 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12996 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12997 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12998 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12999
13000 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13001 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13002 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13003 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13004 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13005 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13006 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13007 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13008
13009 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13010 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13011 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13012 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
13013 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
13014 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
13015 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
13016 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
13017
13018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
13019 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
13020 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
13021 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
13022 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
13023 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
13024 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
13025 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
13026
13027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
13028 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
13029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
13030 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
13031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
13032 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
13033 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
13034 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
13035
13036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
13037 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
13038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
13039 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
13040 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
13041 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
13042 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
13043 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
13044
13045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
13046 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
13047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
13048 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
13049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
13050 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
13051 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
13052 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
13053
13054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
13055 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
13056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
13057 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
13058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
13059 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
13060 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
13061 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
13062
13063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
13064 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
13065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
13066 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
13067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
13068 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
13069 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
13070 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
13071
13072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
13073 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
13074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
13075 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
13076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
13077 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
13078 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
13079 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
13080
13081 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
13082 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
13083 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
13084 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
13085 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
13086 BB = EmitAtomicBinary(MI, BB, 4, 0);
13087 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
13088 BB = EmitAtomicBinary(MI, BB, 8, 0);
13089 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
13090 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
13091 (Subtarget.hasPartwordAtomics() &&
13092 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
13093 (Subtarget.hasPartwordAtomics() &&
13094 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
13095 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
13096
13097 auto LoadMnemonic = PPC::LDARX;
13098 auto StoreMnemonic = PPC::STDCX;
13099 switch (MI.getOpcode()) {
13100 default:
13101 llvm_unreachable("Compare and swap of unknown size");
13102 case PPC::ATOMIC_CMP_SWAP_I8:
13103 LoadMnemonic = PPC::LBARX;
13104 StoreMnemonic = PPC::STBCX;
13105 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13106 break;
13107 case PPC::ATOMIC_CMP_SWAP_I16:
13108 LoadMnemonic = PPC::LHARX;
13109 StoreMnemonic = PPC::STHCX;
13110 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
13111 break;
13112 case PPC::ATOMIC_CMP_SWAP_I32:
13113 LoadMnemonic = PPC::LWARX;
13114 StoreMnemonic = PPC::STWCX;
13115 break;
13116 case PPC::ATOMIC_CMP_SWAP_I64:
13117 LoadMnemonic = PPC::LDARX;
13118 StoreMnemonic = PPC::STDCX;
13119 break;
13120 }
13121 MachineRegisterInfo &RegInfo = F->getRegInfo();
13122 Register dest = MI.getOperand(0).getReg();
13123 Register ptrA = MI.getOperand(1).getReg();
13124 Register ptrB = MI.getOperand(2).getReg();
13125 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13126 Register oldval = MI.getOperand(3).getReg();
13127 Register newval = MI.getOperand(4).getReg();
13128 DebugLoc dl = MI.getDebugLoc();
13129
13130 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13131 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13132 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13133 F->insert(It, loop1MBB);
13134 F->insert(It, loop2MBB);
13135 F->insert(It, exitMBB);
13136 exitMBB->splice(exitMBB->begin(), BB,
13137 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13139
13140 // thisMBB:
13141 // ...
13142 // fallthrough --> loopMBB
13143 BB->addSuccessor(loop1MBB);
13144
13145 // loop1MBB:
13146 // l[bhwd]arx dest, ptr
13147 // cmp[wd] dest, oldval
13148 // bne- exitBB
13149 // loop2MBB:
13150 // st[bhwd]cx. newval, ptr
13151 // bne- loopMBB
13152 // b exitBB
13153 // exitBB:
13154 BB = loop1MBB;
13155 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
13156 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
13157 .addReg(dest)
13158 .addReg(oldval);
13159 BuildMI(BB, dl, TII->get(PPC::BCC))
13161 .addReg(CrReg)
13162 .addMBB(exitMBB);
13163 BB->addSuccessor(loop2MBB);
13164 BB->addSuccessor(exitMBB);
13165
13166 BB = loop2MBB;
13167 BuildMI(BB, dl, TII->get(StoreMnemonic))
13168 .addReg(newval)
13169 .addReg(ptrA)
13170 .addReg(ptrB);
13171 BuildMI(BB, dl, TII->get(PPC::BCC))
13173 .addReg(PPC::CR0)
13174 .addMBB(loop1MBB);
13175 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13176 BB->addSuccessor(loop1MBB);
13177 BB->addSuccessor(exitMBB);
13178
13179 // exitMBB:
13180 // ...
13181 BB = exitMBB;
13182 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
13183 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
13184 // We must use 64-bit registers for addresses when targeting 64-bit,
13185 // since we're actually doing arithmetic on them. Other registers
13186 // can be 32-bit.
13187 bool is64bit = Subtarget.isPPC64();
13188 bool isLittleEndian = Subtarget.isLittleEndian();
13189 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
13190
13191 Register dest = MI.getOperand(0).getReg();
13192 Register ptrA = MI.getOperand(1).getReg();
13193 Register ptrB = MI.getOperand(2).getReg();
13194 Register oldval = MI.getOperand(3).getReg();
13195 Register newval = MI.getOperand(4).getReg();
13196 DebugLoc dl = MI.getDebugLoc();
13197
13198 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
13199 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
13200 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13201 F->insert(It, loop1MBB);
13202 F->insert(It, loop2MBB);
13203 F->insert(It, exitMBB);
13204 exitMBB->splice(exitMBB->begin(), BB,
13205 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13207
13208 MachineRegisterInfo &RegInfo = F->getRegInfo();
13209 const TargetRegisterClass *RC =
13210 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13211 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13212
13213 Register PtrReg = RegInfo.createVirtualRegister(RC);
13214 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13215 Register ShiftReg =
13216 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13217 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
13218 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
13219 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
13220 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
13221 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13222 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13223 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13224 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13225 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13226 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13227 Register Ptr1Reg;
13228 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
13229 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13230 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13231 // thisMBB:
13232 // ...
13233 // fallthrough --> loopMBB
13234 BB->addSuccessor(loop1MBB);
13235
13236 // The 4-byte load must be aligned, while a char or short may be
13237 // anywhere in the word. Hence all this nasty bookkeeping code.
13238 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13239 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13240 // xori shift, shift1, 24 [16]
13241 // rlwinm ptr, ptr1, 0, 0, 29
13242 // slw newval2, newval, shift
13243 // slw oldval2, oldval,shift
13244 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13245 // slw mask, mask2, shift
13246 // and newval3, newval2, mask
13247 // and oldval3, oldval2, mask
13248 // loop1MBB:
13249 // lwarx tmpDest, ptr
13250 // and tmp, tmpDest, mask
13251 // cmpw tmp, oldval3
13252 // bne- exitBB
13253 // loop2MBB:
13254 // andc tmp2, tmpDest, mask
13255 // or tmp4, tmp2, newval3
13256 // stwcx. tmp4, ptr
13257 // bne- loop1MBB
13258 // b exitBB
13259 // exitBB:
13260 // srw dest, tmpDest, shift
13261 if (ptrA != ZeroReg) {
13262 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13263 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13264 .addReg(ptrA)
13265 .addReg(ptrB);
13266 } else {
13267 Ptr1Reg = ptrB;
13268 }
13269
13270 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13271 // mode.
13272 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13273 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13274 .addImm(3)
13275 .addImm(27)
13276 .addImm(is8bit ? 28 : 27);
13277 if (!isLittleEndian)
13278 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13279 .addReg(Shift1Reg)
13280 .addImm(is8bit ? 24 : 16);
13281 if (is64bit)
13282 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13283 .addReg(Ptr1Reg)
13284 .addImm(0)
13285 .addImm(61);
13286 else
13287 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13288 .addReg(Ptr1Reg)
13289 .addImm(0)
13290 .addImm(0)
13291 .addImm(29);
13292 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
13293 .addReg(newval)
13294 .addReg(ShiftReg);
13295 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
13296 .addReg(oldval)
13297 .addReg(ShiftReg);
13298 if (is8bit)
13299 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13300 else {
13301 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13302 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13303 .addReg(Mask3Reg)
13304 .addImm(65535);
13305 }
13306 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13307 .addReg(Mask2Reg)
13308 .addReg(ShiftReg);
13309 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
13310 .addReg(NewVal2Reg)
13311 .addReg(MaskReg);
13312 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
13313 .addReg(OldVal2Reg)
13314 .addReg(MaskReg);
13315
13316 BB = loop1MBB;
13317 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13318 .addReg(ZeroReg)
13319 .addReg(PtrReg);
13320 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
13321 .addReg(TmpDestReg)
13322 .addReg(MaskReg);
13323 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
13324 .addReg(TmpReg)
13325 .addReg(OldVal3Reg);
13326 BuildMI(BB, dl, TII->get(PPC::BCC))
13328 .addReg(CrReg)
13329 .addMBB(exitMBB);
13330 BB->addSuccessor(loop2MBB);
13331 BB->addSuccessor(exitMBB);
13332
13333 BB = loop2MBB;
13334 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13335 .addReg(TmpDestReg)
13336 .addReg(MaskReg);
13337 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
13338 .addReg(Tmp2Reg)
13339 .addReg(NewVal3Reg);
13340 BuildMI(BB, dl, TII->get(PPC::STWCX))
13341 .addReg(Tmp4Reg)
13342 .addReg(ZeroReg)
13343 .addReg(PtrReg);
13344 BuildMI(BB, dl, TII->get(PPC::BCC))
13346 .addReg(PPC::CR0)
13347 .addMBB(loop1MBB);
13348 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
13349 BB->addSuccessor(loop1MBB);
13350 BB->addSuccessor(exitMBB);
13351
13352 // exitMBB:
13353 // ...
13354 BB = exitMBB;
13355 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
13356 .addReg(TmpReg)
13357 .addReg(ShiftReg);
13358 } else if (MI.getOpcode() == PPC::FADDrtz) {
13359 // This pseudo performs an FADD with rounding mode temporarily forced
13360 // to round-to-zero. We emit this via custom inserter since the FPSCR
13361 // is not modeled at the SelectionDAG level.
13362 Register Dest = MI.getOperand(0).getReg();
13363 Register Src1 = MI.getOperand(1).getReg();
13364 Register Src2 = MI.getOperand(2).getReg();
13365 DebugLoc dl = MI.getDebugLoc();
13366
13367 MachineRegisterInfo &RegInfo = F->getRegInfo();
13368 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13369
13370 // Save FPSCR value.
13371 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
13372
13373 // Set rounding mode to round-to-zero.
13374 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
13375 .addImm(31)
13377
13378 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
13379 .addImm(30)
13381
13382 // Perform addition.
13383 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
13384 .addReg(Src1)
13385 .addReg(Src2);
13386 if (MI.getFlag(MachineInstr::NoFPExcept))
13388
13389 // Restore FPSCR value.
13390 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
13391 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13392 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
13393 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13394 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
13395 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
13396 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
13397 ? PPC::ANDI8_rec
13398 : PPC::ANDI_rec;
13399 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
13400 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
13401
13402 MachineRegisterInfo &RegInfo = F->getRegInfo();
13403 Register Dest = RegInfo.createVirtualRegister(
13404 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
13405
13406 DebugLoc Dl = MI.getDebugLoc();
13407 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
13408 .addReg(MI.getOperand(1).getReg())
13409 .addImm(1);
13410 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13411 MI.getOperand(0).getReg())
13412 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
13413 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
13414 DebugLoc Dl = MI.getDebugLoc();
13415 MachineRegisterInfo &RegInfo = F->getRegInfo();
13416 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13417 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
13418 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13419 MI.getOperand(0).getReg())
13420 .addReg(CRReg);
13421 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
13422 DebugLoc Dl = MI.getDebugLoc();
13423 unsigned Imm = MI.getOperand(1).getImm();
13424 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
13425 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
13426 MI.getOperand(0).getReg())
13427 .addReg(PPC::CR0EQ);
13428 } else if (MI.getOpcode() == PPC::SETRNDi) {
13429 DebugLoc dl = MI.getDebugLoc();
13430 Register OldFPSCRReg = MI.getOperand(0).getReg();
13431
13432 // Save FPSCR value.
13433 if (MRI.use_empty(OldFPSCRReg))
13434 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13435 else
13436 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13437
13438 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
13439 // the following settings:
13440 // 00 Round to nearest
13441 // 01 Round to 0
13442 // 10 Round to +inf
13443 // 11 Round to -inf
13444
13445 // When the operand is immediate, using the two least significant bits of
13446 // the immediate to set the bits 62:63 of FPSCR.
13447 unsigned Mode = MI.getOperand(1).getImm();
13448 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
13449 .addImm(31)
13451
13452 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
13453 .addImm(30)
13455 } else if (MI.getOpcode() == PPC::SETRND) {
13456 DebugLoc dl = MI.getDebugLoc();
13457
13458 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
13459 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
13460 // If the target doesn't have DirectMove, we should use stack to do the
13461 // conversion, because the target doesn't have the instructions like mtvsrd
13462 // or mfvsrd to do this conversion directly.
13463 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
13464 if (Subtarget.hasDirectMove()) {
13465 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
13466 .addReg(SrcReg);
13467 } else {
13468 // Use stack to do the register copy.
13469 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
13470 MachineRegisterInfo &RegInfo = F->getRegInfo();
13471 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
13472 if (RC == &PPC::F8RCRegClass) {
13473 // Copy register from F8RCRegClass to G8RCRegclass.
13474 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
13475 "Unsupported RegClass.");
13476
13477 StoreOp = PPC::STFD;
13478 LoadOp = PPC::LD;
13479 } else {
13480 // Copy register from G8RCRegClass to F8RCRegclass.
13481 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
13482 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
13483 "Unsupported RegClass.");
13484 }
13485
13486 MachineFrameInfo &MFI = F->getFrameInfo();
13487 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
13488
13489 MachineMemOperand *MMOStore = F->getMachineMemOperand(
13490 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13492 MFI.getObjectAlign(FrameIdx));
13493
13494 // Store the SrcReg into the stack.
13495 BuildMI(*BB, MI, dl, TII->get(StoreOp))
13496 .addReg(SrcReg)
13497 .addImm(0)
13498 .addFrameIndex(FrameIdx)
13499 .addMemOperand(MMOStore);
13500
13501 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
13502 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
13504 MFI.getObjectAlign(FrameIdx));
13505
13506 // Load from the stack where SrcReg is stored, and save to DestReg,
13507 // so we have done the RegClass conversion from RegClass::SrcReg to
13508 // RegClass::DestReg.
13509 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
13510 .addImm(0)
13511 .addFrameIndex(FrameIdx)
13512 .addMemOperand(MMOLoad);
13513 }
13514 };
13515
13516 Register OldFPSCRReg = MI.getOperand(0).getReg();
13517
13518 // Save FPSCR value.
13519 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
13520
13521 // When the operand is gprc register, use two least significant bits of the
13522 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
13523 //
13524 // copy OldFPSCRTmpReg, OldFPSCRReg
13525 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
13526 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
13527 // copy NewFPSCRReg, NewFPSCRTmpReg
13528 // mtfsf 255, NewFPSCRReg
13529 MachineOperand SrcOp = MI.getOperand(1);
13530 MachineRegisterInfo &RegInfo = F->getRegInfo();
13531 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13532
13533 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
13534
13535 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13536 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13537
13538 // The first operand of INSERT_SUBREG should be a register which has
13539 // subregisters, we only care about its RegClass, so we should use an
13540 // IMPLICIT_DEF register.
13541 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
13542 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
13543 .addReg(ImDefReg)
13544 .add(SrcOp)
13545 .addImm(1);
13546
13547 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
13548 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
13549 .addReg(OldFPSCRTmpReg)
13550 .addReg(ExtSrcReg)
13551 .addImm(0)
13552 .addImm(62);
13553
13554 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
13555 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
13556
13557 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
13558 // bits of FPSCR.
13559 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
13560 .addImm(255)
13561 .addReg(NewFPSCRReg)
13562 .addImm(0)
13563 .addImm(0);
13564 } else if (MI.getOpcode() == PPC::SETFLM) {
13565 DebugLoc Dl = MI.getDebugLoc();
13566
13567 // Result of setflm is previous FPSCR content, so we need to save it first.
13568 Register OldFPSCRReg = MI.getOperand(0).getReg();
13569 if (MRI.use_empty(OldFPSCRReg))
13570 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
13571 else
13572 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
13573
13574 // Put bits in 32:63 to FPSCR.
13575 Register NewFPSCRReg = MI.getOperand(1).getReg();
13576 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
13577 .addImm(255)
13578 .addReg(NewFPSCRReg)
13579 .addImm(0)
13580 .addImm(0);
13581 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
13582 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
13583 return emitProbedAlloca(MI, BB);
13584 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
13585 DebugLoc DL = MI.getDebugLoc();
13586 Register Src = MI.getOperand(2).getReg();
13587 Register Lo = MI.getOperand(0).getReg();
13588 Register Hi = MI.getOperand(1).getReg();
13589 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13590 .addDef(Lo)
13591 .addUse(Src, 0, PPC::sub_gp8_x1);
13592 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
13593 .addDef(Hi)
13594 .addUse(Src, 0, PPC::sub_gp8_x0);
13595 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
13596 MI.getOpcode() == PPC::STQX_PSEUDO) {
13597 DebugLoc DL = MI.getDebugLoc();
13598 // Ptr is used as the ptr_rc_no_r0 part
13599 // of LQ/STQ's memory operand and adding result of RA and RB,
13600 // so it has to be g8rc_and_g8rc_nox0.
13601 Register Ptr =
13602 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
13603 Register Val = MI.getOperand(0).getReg();
13604 Register RA = MI.getOperand(1).getReg();
13605 Register RB = MI.getOperand(2).getReg();
13606 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
13607 BuildMI(*BB, MI, DL,
13608 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
13609 : TII->get(PPC::STQ))
13610 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
13611 .addImm(0)
13612 .addReg(Ptr);
13613 } else {
13614 llvm_unreachable("Unexpected instr type to insert");
13615 }
13616
13617 MI.eraseFromParent(); // The pseudo instruction is gone now.
13618 return BB;
13619}
13620
13621//===----------------------------------------------------------------------===//
13622// Target Optimization Hooks
13623//===----------------------------------------------------------------------===//
13624
13625static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
13626 // For the estimates, convergence is quadratic, so we essentially double the
13627 // number of digits correct after every iteration. For both FRE and FRSQRTE,
13628 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
13629 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
13630 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
13631 if (VT.getScalarType() == MVT::f64)
13632 RefinementSteps++;
13633 return RefinementSteps;
13634}
13635
13636SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
13637 const DenormalMode &Mode) const {
13638 // We only have VSX Vector Test for software Square Root.
13639 EVT VT = Op.getValueType();
13640 if (!isTypeLegal(MVT::i1) ||
13641 (VT != MVT::f64 &&
13642 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
13643 return TargetLowering::getSqrtInputTest(Op, DAG, Mode);
13644
13645 SDLoc DL(Op);
13646 // The output register of FTSQRT is CR field.
13647 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
13648 // ftsqrt BF,FRB
13649 // Let e_b be the unbiased exponent of the double-precision
13650 // floating-point operand in register FRB.
13651 // fe_flag is set to 1 if either of the following conditions occurs.
13652 // - The double-precision floating-point operand in register FRB is a zero,
13653 // a NaN, or an infinity, or a negative value.
13654 // - e_b is less than or equal to -970.
13655 // Otherwise fe_flag is set to 0.
13656 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
13657 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
13658 // exponent is less than -970)
13659 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
13660 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
13661 FTSQRT, SRIdxVal),
13662 0);
13663}
13664
13665SDValue
13666PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
13667 SelectionDAG &DAG) const {
13668 // We only have VSX Vector Square Root.
13669 EVT VT = Op.getValueType();
13670 if (VT != MVT::f64 &&
13671 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
13673
13674 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
13675}
13676
13677SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
13678 int Enabled, int &RefinementSteps,
13679 bool &UseOneConstNR,
13680 bool Reciprocal) const {
13681 EVT VT = Operand.getValueType();
13682 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13683 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13684 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13685 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13686 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13687 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13688
13689 // The Newton-Raphson computation with a single constant does not provide
13690 // enough accuracy on some CPUs.
13691 UseOneConstNR = !Subtarget.needsTwoConstNR();
13692 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13693 }
13694 return SDValue();
13695}
13696
13697SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13698 int Enabled,
13699 int &RefinementSteps) const {
13700 EVT VT = Operand.getValueType();
13701 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13702 (VT == MVT::f64 && Subtarget.hasFRE()) ||
13703 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13704 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13705 if (RefinementSteps == ReciprocalEstimate::Unspecified)
13706 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13707 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13708 }
13709 return SDValue();
13710}
13711
13712unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13713 // Note: This functionality is used only when unsafe-fp-math is enabled, and
13714 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13715 // enabled for division), this functionality is redundant with the default
13716 // combiner logic (once the division -> reciprocal/multiply transformation
13717 // has taken place). As a result, this matters more for older cores than for
13718 // newer ones.
13719
13720 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13721 // reciprocal if there are two or more FDIVs (for embedded cores with only
13722 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13723 switch (Subtarget.getCPUDirective()) {
13724 default:
13725 return 3;
13726 case PPC::DIR_440:
13727 case PPC::DIR_A2:
13728 case PPC::DIR_E500:
13729 case PPC::DIR_E500mc:
13730 case PPC::DIR_E5500:
13731 return 2;
13732 }
13733}
13734
13735// isConsecutiveLSLoc needs to work even if all adds have not yet been
13736// collapsed, and so we need to look through chains of them.
13738 int64_t& Offset, SelectionDAG &DAG) {
13739 if (DAG.isBaseWithConstantOffset(Loc)) {
13740 Base = Loc.getOperand(0);
13741 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13742
13743 // The base might itself be a base plus an offset, and if so, accumulate
13744 // that as well.
13746 }
13747}
13748
13750 unsigned Bytes, int Dist,
13751 SelectionDAG &DAG) {
13752 if (VT.getSizeInBits() / 8 != Bytes)
13753 return false;
13754
13755 SDValue BaseLoc = Base->getBasePtr();
13756 if (Loc.getOpcode() == ISD::FrameIndex) {
13757 if (BaseLoc.getOpcode() != ISD::FrameIndex)
13758 return false;
13760 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13761 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13762 int FS = MFI.getObjectSize(FI);
13763 int BFS = MFI.getObjectSize(BFI);
13764 if (FS != BFS || FS != (int)Bytes) return false;
13765 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13766 }
13767
13768 SDValue Base1 = Loc, Base2 = BaseLoc;
13769 int64_t Offset1 = 0, Offset2 = 0;
13770 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13771 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13772 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13773 return true;
13774
13775 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13776 const GlobalValue *GV1 = nullptr;
13777 const GlobalValue *GV2 = nullptr;
13778 Offset1 = 0;
13779 Offset2 = 0;
13780 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13781 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13782 if (isGA1 && isGA2 && GV1 == GV2)
13783 return Offset1 == (Offset2 + Dist*Bytes);
13784 return false;
13785}
13786
13787// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13788// not enforce equality of the chain operands.
13790 unsigned Bytes, int Dist,
13791 SelectionDAG &DAG) {
13792 if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13793 EVT VT = LS->getMemoryVT();
13794 SDValue Loc = LS->getBasePtr();
13795 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13796 }
13797
13798 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13799 EVT VT;
13800 switch (N->getConstantOperandVal(1)) {
13801 default: return false;
13802 case Intrinsic::ppc_altivec_lvx:
13803 case Intrinsic::ppc_altivec_lvxl:
13804 case Intrinsic::ppc_vsx_lxvw4x:
13805 case Intrinsic::ppc_vsx_lxvw4x_be:
13806 VT = MVT::v4i32;
13807 break;
13808 case Intrinsic::ppc_vsx_lxvd2x:
13809 case Intrinsic::ppc_vsx_lxvd2x_be:
13810 VT = MVT::v2f64;
13811 break;
13812 case Intrinsic::ppc_altivec_lvebx:
13813 VT = MVT::i8;
13814 break;
13815 case Intrinsic::ppc_altivec_lvehx:
13816 VT = MVT::i16;
13817 break;
13818 case Intrinsic::ppc_altivec_lvewx:
13819 VT = MVT::i32;
13820 break;
13821 }
13822
13823 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13824 }
13825
13826 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13827 EVT VT;
13828 switch (N->getConstantOperandVal(1)) {
13829 default: return false;
13830 case Intrinsic::ppc_altivec_stvx:
13831 case Intrinsic::ppc_altivec_stvxl:
13832 case Intrinsic::ppc_vsx_stxvw4x:
13833 VT = MVT::v4i32;
13834 break;
13835 case Intrinsic::ppc_vsx_stxvd2x:
13836 VT = MVT::v2f64;
13837 break;
13838 case Intrinsic::ppc_vsx_stxvw4x_be:
13839 VT = MVT::v4i32;
13840 break;
13841 case Intrinsic::ppc_vsx_stxvd2x_be:
13842 VT = MVT::v2f64;
13843 break;
13844 case Intrinsic::ppc_altivec_stvebx:
13845 VT = MVT::i8;
13846 break;
13847 case Intrinsic::ppc_altivec_stvehx:
13848 VT = MVT::i16;
13849 break;
13850 case Intrinsic::ppc_altivec_stvewx:
13851 VT = MVT::i32;
13852 break;
13853 }
13854
13855 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13856 }
13857
13858 return false;
13859}
13860
13861// Return true is there is a nearyby consecutive load to the one provided
13862// (regardless of alignment). We search up and down the chain, looking though
13863// token factors and other loads (but nothing else). As a result, a true result
13864// indicates that it is safe to create a new consecutive load adjacent to the
13865// load provided.
13867 SDValue Chain = LD->getChain();
13868 EVT VT = LD->getMemoryVT();
13869
13870 SmallSet<SDNode *, 16> LoadRoots;
13871 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13872 SmallSet<SDNode *, 16> Visited;
13873
13874 // First, search up the chain, branching to follow all token-factor operands.
13875 // If we find a consecutive load, then we're done, otherwise, record all
13876 // nodes just above the top-level loads and token factors.
13877 while (!Queue.empty()) {
13878 SDNode *ChainNext = Queue.pop_back_val();
13879 if (!Visited.insert(ChainNext).second)
13880 continue;
13881
13882 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13883 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13884 return true;
13885
13886 if (!Visited.count(ChainLD->getChain().getNode()))
13887 Queue.push_back(ChainLD->getChain().getNode());
13888 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13889 for (const SDUse &O : ChainNext->ops())
13890 if (!Visited.count(O.getNode()))
13891 Queue.push_back(O.getNode());
13892 } else
13893 LoadRoots.insert(ChainNext);
13894 }
13895
13896 // Second, search down the chain, starting from the top-level nodes recorded
13897 // in the first phase. These top-level nodes are the nodes just above all
13898 // loads and token factors. Starting with their uses, recursively look though
13899 // all loads (just the chain uses) and token factors to find a consecutive
13900 // load.
13901 Visited.clear();
13902 Queue.clear();
13903
13904 for (SDNode *I : LoadRoots) {
13905 Queue.push_back(I);
13906
13907 while (!Queue.empty()) {
13908 SDNode *LoadRoot = Queue.pop_back_val();
13909 if (!Visited.insert(LoadRoot).second)
13910 continue;
13911
13912 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13913 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13914 return true;
13915
13916 for (SDNode *U : LoadRoot->uses())
13917 if (((isa<MemSDNode>(U) &&
13918 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13919 U->getOpcode() == ISD::TokenFactor) &&
13920 !Visited.count(U))
13921 Queue.push_back(U);
13922 }
13923 }
13924
13925 return false;
13926}
13927
13928/// This function is called when we have proved that a SETCC node can be replaced
13929/// by subtraction (and other supporting instructions) so that the result of
13930/// comparison is kept in a GPR instead of CR. This function is purely for
13931/// codegen purposes and has some flags to guide the codegen process.
13932static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13933 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13934 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13935
13936 // Zero extend the operands to the largest legal integer. Originally, they
13937 // must be of a strictly smaller size.
13938 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13939 DAG.getConstant(Size, DL, MVT::i32));
13940 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13941 DAG.getConstant(Size, DL, MVT::i32));
13942
13943 // Swap if needed. Depends on the condition code.
13944 if (Swap)
13945 std::swap(Op0, Op1);
13946
13947 // Subtract extended integers.
13948 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13949
13950 // Move the sign bit to the least significant position and zero out the rest.
13951 // Now the least significant bit carries the result of original comparison.
13952 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13953 DAG.getConstant(Size - 1, DL, MVT::i32));
13954 auto Final = Shifted;
13955
13956 // Complement the result if needed. Based on the condition code.
13957 if (Complement)
13958 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13959 DAG.getConstant(1, DL, MVT::i64));
13960
13961 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13962}
13963
13964SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13965 DAGCombinerInfo &DCI) const {
13966 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13967
13968 SelectionDAG &DAG = DCI.DAG;
13969 SDLoc DL(N);
13970
13971 // Size of integers being compared has a critical role in the following
13972 // analysis, so we prefer to do this when all types are legal.
13973 if (!DCI.isAfterLegalizeDAG())
13974 return SDValue();
13975
13976 // If all users of SETCC extend its value to a legal integer type
13977 // then we replace SETCC with a subtraction
13978 for (const SDNode *U : N->uses())
13979 if (U->getOpcode() != ISD::ZERO_EXTEND)
13980 return SDValue();
13981
13982 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13983 auto OpSize = N->getOperand(0).getValueSizeInBits();
13984
13986
13987 if (OpSize < Size) {
13988 switch (CC) {
13989 default: break;
13990 case ISD::SETULT:
13991 return generateEquivalentSub(N, Size, false, false, DL, DAG);
13992 case ISD::SETULE:
13993 return generateEquivalentSub(N, Size, true, true, DL, DAG);
13994 case ISD::SETUGT:
13995 return generateEquivalentSub(N, Size, false, true, DL, DAG);
13996 case ISD::SETUGE:
13997 return generateEquivalentSub(N, Size, true, false, DL, DAG);
13998 }
13999 }
14000
14001 return SDValue();
14002}
14003
14004SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
14005 DAGCombinerInfo &DCI) const {
14006 SelectionDAG &DAG = DCI.DAG;
14007 SDLoc dl(N);
14008
14009 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
14010 // If we're tracking CR bits, we need to be careful that we don't have:
14011 // trunc(binary-ops(zext(x), zext(y)))
14012 // or
14013 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
14014 // such that we're unnecessarily moving things into GPRs when it would be
14015 // better to keep them in CR bits.
14016
14017 // Note that trunc here can be an actual i1 trunc, or can be the effective
14018 // truncation that comes from a setcc or select_cc.
14019 if (N->getOpcode() == ISD::TRUNCATE &&
14020 N->getValueType(0) != MVT::i1)
14021 return SDValue();
14022
14023 if (N->getOperand(0).getValueType() != MVT::i32 &&
14024 N->getOperand(0).getValueType() != MVT::i64)
14025 return SDValue();
14026
14027 if (N->getOpcode() == ISD::SETCC ||
14028 N->getOpcode() == ISD::SELECT_CC) {
14029 // If we're looking at a comparison, then we need to make sure that the
14030 // high bits (all except for the first) don't matter the result.
14032 cast<CondCodeSDNode>(N->getOperand(
14033 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
14034 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
14035
14037 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
14038 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
14039 return SDValue();
14040 } else if (ISD::isUnsignedIntSetCC(CC)) {
14041 if (!DAG.MaskedValueIsZero(N->getOperand(0),
14042 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
14043 !DAG.MaskedValueIsZero(N->getOperand(1),
14044 APInt::getHighBitsSet(OpBits, OpBits-1)))
14045 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
14046 : SDValue());
14047 } else {
14048 // This is neither a signed nor an unsigned comparison, just make sure
14049 // that the high bits are equal.
14050 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
14051 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
14052
14053 // We don't really care about what is known about the first bit (if
14054 // anything), so pretend that it is known zero for both to ensure they can
14055 // be compared as constants.
14056 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
14057 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
14058
14059 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
14060 Op1Known.getConstant() != Op2Known.getConstant())
14061 return SDValue();
14062 }
14063 }
14064
14065 // We now know that the higher-order bits are irrelevant, we just need to
14066 // make sure that all of the intermediate operations are bit operations, and
14067 // all inputs are extensions.
14068 if (N->getOperand(0).getOpcode() != ISD::AND &&
14069 N->getOperand(0).getOpcode() != ISD::OR &&
14070 N->getOperand(0).getOpcode() != ISD::XOR &&
14071 N->getOperand(0).getOpcode() != ISD::SELECT &&
14072 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
14073 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
14074 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
14075 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
14076 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
14077 return SDValue();
14078
14079 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
14080 N->getOperand(1).getOpcode() != ISD::AND &&
14081 N->getOperand(1).getOpcode() != ISD::OR &&
14082 N->getOperand(1).getOpcode() != ISD::XOR &&
14083 N->getOperand(1).getOpcode() != ISD::SELECT &&
14084 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
14085 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
14086 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
14087 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
14088 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
14089 return SDValue();
14090
14092 SmallVector<SDValue, 8> BinOps, PromOps;
14094
14095 for (unsigned i = 0; i < 2; ++i) {
14096 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14097 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14098 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14099 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14100 isa<ConstantSDNode>(N->getOperand(i)))
14101 Inputs.push_back(N->getOperand(i));
14102 else
14103 BinOps.push_back(N->getOperand(i));
14104
14105 if (N->getOpcode() == ISD::TRUNCATE)
14106 break;
14107 }
14108
14109 // Visit all inputs, collect all binary operations (and, or, xor and
14110 // select) that are all fed by extensions.
14111 while (!BinOps.empty()) {
14112 SDValue BinOp = BinOps.pop_back_val();
14113
14114 if (!Visited.insert(BinOp.getNode()).second)
14115 continue;
14116
14117 PromOps.push_back(BinOp);
14118
14119 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14120 // The condition of the select is not promoted.
14121 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14122 continue;
14123 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14124 continue;
14125
14126 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14127 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14128 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
14129 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
14130 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14131 Inputs.push_back(BinOp.getOperand(i));
14132 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14133 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14134 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14135 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14136 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
14137 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14138 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
14139 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
14140 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
14141 BinOps.push_back(BinOp.getOperand(i));
14142 } else {
14143 // We have an input that is not an extension or another binary
14144 // operation; we'll abort this transformation.
14145 return SDValue();
14146 }
14147 }
14148 }
14149
14150 // Make sure that this is a self-contained cluster of operations (which
14151 // is not quite the same thing as saying that everything has only one
14152 // use).
14153 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14154 if (isa<ConstantSDNode>(Inputs[i]))
14155 continue;
14156
14157 for (const SDNode *User : Inputs[i].getNode()->uses()) {
14158 if (User != N && !Visited.count(User))
14159 return SDValue();
14160
14161 // Make sure that we're not going to promote the non-output-value
14162 // operand(s) or SELECT or SELECT_CC.
14163 // FIXME: Although we could sometimes handle this, and it does occur in
14164 // practice that one of the condition inputs to the select is also one of
14165 // the outputs, we currently can't deal with this.
14166 if (User->getOpcode() == ISD::SELECT) {
14167 if (User->getOperand(0) == Inputs[i])
14168 return SDValue();
14169 } else if (User->getOpcode() == ISD::SELECT_CC) {
14170 if (User->getOperand(0) == Inputs[i] ||
14171 User->getOperand(1) == Inputs[i])
14172 return SDValue();
14173 }
14174 }
14175 }
14176
14177 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14178 for (const SDNode *User : PromOps[i].getNode()->uses()) {
14179 if (User != N && !Visited.count(User))
14180 return SDValue();
14181
14182 // Make sure that we're not going to promote the non-output-value
14183 // operand(s) or SELECT or SELECT_CC.
14184 // FIXME: Although we could sometimes handle this, and it does occur in
14185 // practice that one of the condition inputs to the select is also one of
14186 // the outputs, we currently can't deal with this.
14187 if (User->getOpcode() == ISD::SELECT) {
14188 if (User->getOperand(0) == PromOps[i])
14189 return SDValue();
14190 } else if (User->getOpcode() == ISD::SELECT_CC) {
14191 if (User->getOperand(0) == PromOps[i] ||
14192 User->getOperand(1) == PromOps[i])
14193 return SDValue();
14194 }
14195 }
14196 }
14197
14198 // Replace all inputs with the extension operand.
14199 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14200 // Constants may have users outside the cluster of to-be-promoted nodes,
14201 // and so we need to replace those as we do the promotions.
14202 if (isa<ConstantSDNode>(Inputs[i]))
14203 continue;
14204 else
14205 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
14206 }
14207
14208 std::list<HandleSDNode> PromOpHandles;
14209 for (auto &PromOp : PromOps)
14210 PromOpHandles.emplace_back(PromOp);
14211
14212 // Replace all operations (these are all the same, but have a different
14213 // (i1) return type). DAG.getNode will validate that the types of
14214 // a binary operator match, so go through the list in reverse so that
14215 // we've likely promoted both operands first. Any intermediate truncations or
14216 // extensions disappear.
14217 while (!PromOpHandles.empty()) {
14218 SDValue PromOp = PromOpHandles.back().getValue();
14219 PromOpHandles.pop_back();
14220
14221 if (PromOp.getOpcode() == ISD::TRUNCATE ||
14222 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
14223 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
14224 PromOp.getOpcode() == ISD::ANY_EXTEND) {
14225 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
14226 PromOp.getOperand(0).getValueType() != MVT::i1) {
14227 // The operand is not yet ready (see comment below).
14228 PromOpHandles.emplace_front(PromOp);
14229 continue;
14230 }
14231
14232 SDValue RepValue = PromOp.getOperand(0);
14233 if (isa<ConstantSDNode>(RepValue))
14234 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
14235
14236 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
14237 continue;
14238 }
14239
14240 unsigned C;
14241 switch (PromOp.getOpcode()) {
14242 default: C = 0; break;
14243 case ISD::SELECT: C = 1; break;
14244 case ISD::SELECT_CC: C = 2; break;
14245 }
14246
14247 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14248 PromOp.getOperand(C).getValueType() != MVT::i1) ||
14249 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14250 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
14251 // The to-be-promoted operands of this node have not yet been
14252 // promoted (this should be rare because we're going through the
14253 // list backward, but if one of the operands has several users in
14254 // this cluster of to-be-promoted nodes, it is possible).
14255 PromOpHandles.emplace_front(PromOp);
14256 continue;
14257 }
14258
14260 PromOp.getNode()->op_end());
14261
14262 // If there are any constant inputs, make sure they're replaced now.
14263 for (unsigned i = 0; i < 2; ++i)
14264 if (isa<ConstantSDNode>(Ops[C+i]))
14265 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
14266
14267 DAG.ReplaceAllUsesOfValueWith(PromOp,
14268 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
14269 }
14270
14271 // Now we're left with the initial truncation itself.
14272 if (N->getOpcode() == ISD::TRUNCATE)
14273 return N->getOperand(0);
14274
14275 // Otherwise, this is a comparison. The operands to be compared have just
14276 // changed type (to i1), but everything else is the same.
14277 return SDValue(N, 0);
14278}
14279
14280SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
14281 DAGCombinerInfo &DCI) const {
14282 SelectionDAG &DAG = DCI.DAG;
14283 SDLoc dl(N);
14284
14285 // If we're tracking CR bits, we need to be careful that we don't have:
14286 // zext(binary-ops(trunc(x), trunc(y)))
14287 // or
14288 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
14289 // such that we're unnecessarily moving things into CR bits that can more
14290 // efficiently stay in GPRs. Note that if we're not certain that the high
14291 // bits are set as required by the final extension, we still may need to do
14292 // some masking to get the proper behavior.
14293
14294 // This same functionality is important on PPC64 when dealing with
14295 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
14296 // the return values of functions. Because it is so similar, it is handled
14297 // here as well.
14298
14299 if (N->getValueType(0) != MVT::i32 &&
14300 N->getValueType(0) != MVT::i64)
14301 return SDValue();
14302
14303 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
14304 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
14305 return SDValue();
14306
14307 if (N->getOperand(0).getOpcode() != ISD::AND &&
14308 N->getOperand(0).getOpcode() != ISD::OR &&
14309 N->getOperand(0).getOpcode() != ISD::XOR &&
14310 N->getOperand(0).getOpcode() != ISD::SELECT &&
14311 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
14312 return SDValue();
14313
14315 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
14317
14318 // Visit all inputs, collect all binary operations (and, or, xor and
14319 // select) that are all fed by truncations.
14320 while (!BinOps.empty()) {
14321 SDValue BinOp = BinOps.pop_back_val();
14322
14323 if (!Visited.insert(BinOp.getNode()).second)
14324 continue;
14325
14326 PromOps.push_back(BinOp);
14327
14328 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
14329 // The condition of the select is not promoted.
14330 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
14331 continue;
14332 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
14333 continue;
14334
14335 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
14336 isa<ConstantSDNode>(BinOp.getOperand(i))) {
14337 Inputs.push_back(BinOp.getOperand(i));
14338 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
14339 BinOp.getOperand(i).getOpcode() == ISD::OR ||
14340 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
14341 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
14342 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
14343 BinOps.push_back(BinOp.getOperand(i));
14344 } else {
14345 // We have an input that is not a truncation or another binary
14346 // operation; we'll abort this transformation.
14347 return SDValue();
14348 }
14349 }
14350 }
14351
14352 // The operands of a select that must be truncated when the select is
14353 // promoted because the operand is actually part of the to-be-promoted set.
14354 DenseMap<SDNode *, EVT> SelectTruncOp[2];
14355
14356 // Make sure that this is a self-contained cluster of operations (which
14357 // is not quite the same thing as saying that everything has only one
14358 // use).
14359 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14360 if (isa<ConstantSDNode>(Inputs[i]))
14361 continue;
14362
14363 for (SDNode *User : Inputs[i].getNode()->uses()) {
14364 if (User != N && !Visited.count(User))
14365 return SDValue();
14366
14367 // If we're going to promote the non-output-value operand(s) or SELECT or
14368 // SELECT_CC, record them for truncation.
14369 if (User->getOpcode() == ISD::SELECT) {
14370 if (User->getOperand(0) == Inputs[i])
14371 SelectTruncOp[0].insert(std::make_pair(User,
14372 User->getOperand(0).getValueType()));
14373 } else if (User->getOpcode() == ISD::SELECT_CC) {
14374 if (User->getOperand(0) == Inputs[i])
14375 SelectTruncOp[0].insert(std::make_pair(User,
14376 User->getOperand(0).getValueType()));
14377 if (User->getOperand(1) == Inputs[i])
14378 SelectTruncOp[1].insert(std::make_pair(User,
14379 User->getOperand(1).getValueType()));
14380 }
14381 }
14382 }
14383
14384 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
14385 for (SDNode *User : PromOps[i].getNode()->uses()) {
14386 if (User != N && !Visited.count(User))
14387 return SDValue();
14388
14389 // If we're going to promote the non-output-value operand(s) or SELECT or
14390 // SELECT_CC, record them for truncation.
14391 if (User->getOpcode() == ISD::SELECT) {
14392 if (User->getOperand(0) == PromOps[i])
14393 SelectTruncOp[0].insert(std::make_pair(User,
14394 User->getOperand(0).getValueType()));
14395 } else if (User->getOpcode() == ISD::SELECT_CC) {
14396 if (User->getOperand(0) == PromOps[i])
14397 SelectTruncOp[0].insert(std::make_pair(User,
14398 User->getOperand(0).getValueType()));
14399 if (User->getOperand(1) == PromOps[i])
14400 SelectTruncOp[1].insert(std::make_pair(User,
14401 User->getOperand(1).getValueType()));
14402 }
14403 }
14404 }
14405
14406 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
14407 bool ReallyNeedsExt = false;
14408 if (N->getOpcode() != ISD::ANY_EXTEND) {
14409 // If all of the inputs are not already sign/zero extended, then
14410 // we'll still need to do that at the end.
14411 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14412 if (isa<ConstantSDNode>(Inputs[i]))
14413 continue;
14414
14415 unsigned OpBits =
14416 Inputs[i].getOperand(0).getValueSizeInBits();
14417 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
14418
14419 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
14420 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
14421 APInt::getHighBitsSet(OpBits,
14422 OpBits-PromBits))) ||
14423 (N->getOpcode() == ISD::SIGN_EXTEND &&
14424 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
14425 (OpBits-(PromBits-1)))) {
14426 ReallyNeedsExt = true;
14427 break;
14428 }
14429 }
14430 }
14431
14432 // Replace all inputs, either with the truncation operand, or a
14433 // truncation or extension to the final output type.
14434 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
14435 // Constant inputs need to be replaced with the to-be-promoted nodes that
14436 // use them because they might have users outside of the cluster of
14437 // promoted nodes.
14438 if (isa<ConstantSDNode>(Inputs[i]))
14439 continue;
14440
14441 SDValue InSrc = Inputs[i].getOperand(0);
14442 if (Inputs[i].getValueType() == N->getValueType(0))
14443 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
14444 else if (N->getOpcode() == ISD::SIGN_EXTEND)
14445 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14446 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
14447 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14448 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14449 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
14450 else
14451 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
14452 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
14453 }
14454
14455 std::list<HandleSDNode> PromOpHandles;
14456 for (auto &PromOp : PromOps)
14457 PromOpHandles.emplace_back(PromOp);
14458
14459 // Replace all operations (these are all the same, but have a different
14460 // (promoted) return type). DAG.getNode will validate that the types of
14461 // a binary operator match, so go through the list in reverse so that
14462 // we've likely promoted both operands first.
14463 while (!PromOpHandles.empty()) {
14464 SDValue PromOp = PromOpHandles.back().getValue();
14465 PromOpHandles.pop_back();
14466
14467 unsigned C;
14468 switch (PromOp.getOpcode()) {
14469 default: C = 0; break;
14470 case ISD::SELECT: C = 1; break;
14471 case ISD::SELECT_CC: C = 2; break;
14472 }
14473
14474 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
14475 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
14476 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
14477 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
14478 // The to-be-promoted operands of this node have not yet been
14479 // promoted (this should be rare because we're going through the
14480 // list backward, but if one of the operands has several users in
14481 // this cluster of to-be-promoted nodes, it is possible).
14482 PromOpHandles.emplace_front(PromOp);
14483 continue;
14484 }
14485
14486 // For SELECT and SELECT_CC nodes, we do a similar check for any
14487 // to-be-promoted comparison inputs.
14488 if (PromOp.getOpcode() == ISD::SELECT ||
14489 PromOp.getOpcode() == ISD::SELECT_CC) {
14490 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
14491 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
14492 (SelectTruncOp[1].count(PromOp.getNode()) &&
14493 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
14494 PromOpHandles.emplace_front(PromOp);
14495 continue;
14496 }
14497 }
14498
14500 PromOp.getNode()->op_end());
14501
14502 // If this node has constant inputs, then they'll need to be promoted here.
14503 for (unsigned i = 0; i < 2; ++i) {
14504 if (!isa<ConstantSDNode>(Ops[C+i]))
14505 continue;
14506 if (Ops[C+i].getValueType() == N->getValueType(0))
14507 continue;
14508
14509 if (N->getOpcode() == ISD::SIGN_EXTEND)
14510 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14511 else if (N->getOpcode() == ISD::ZERO_EXTEND)
14512 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14513 else
14514 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
14515 }
14516
14517 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
14518 // truncate them again to the original value type.
14519 if (PromOp.getOpcode() == ISD::SELECT ||
14520 PromOp.getOpcode() == ISD::SELECT_CC) {
14521 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
14522 if (SI0 != SelectTruncOp[0].end())
14523 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
14524 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
14525 if (SI1 != SelectTruncOp[1].end())
14526 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
14527 }
14528
14529 DAG.ReplaceAllUsesOfValueWith(PromOp,
14530 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
14531 }
14532
14533 // Now we're left with the initial extension itself.
14534 if (!ReallyNeedsExt)
14535 return N->getOperand(0);
14536
14537 // To zero extend, just mask off everything except for the first bit (in the
14538 // i1 case).
14539 if (N->getOpcode() == ISD::ZERO_EXTEND)
14540 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
14542 N->getValueSizeInBits(0), PromBits),
14543 dl, N->getValueType(0)));
14544
14545 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
14546 "Invalid extension type");
14547 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
14548 SDValue ShiftCst =
14549 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
14550 return DAG.getNode(
14551 ISD::SRA, dl, N->getValueType(0),
14552 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
14553 ShiftCst);
14554}
14555
14556SDValue PPCTargetLowering::combineSetCC(SDNode *N,
14557 DAGCombinerInfo &DCI) const {
14558 assert(N->getOpcode() == ISD::SETCC &&
14559 "Should be called with a SETCC node");
14560
14561 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
14562 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
14563 SDValue LHS = N->getOperand(0);
14564 SDValue RHS = N->getOperand(1);
14565
14566 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
14567 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
14568 LHS.hasOneUse())
14569 std::swap(LHS, RHS);
14570
14571 // x == 0-y --> x+y == 0
14572 // x != 0-y --> x+y != 0
14573 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
14574 RHS.hasOneUse()) {
14575 SDLoc DL(N);
14576 SelectionDAG &DAG = DCI.DAG;
14577 EVT VT = N->getValueType(0);
14578 EVT OpVT = LHS.getValueType();
14579 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
14580 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
14581 }
14582 }
14583
14584 return DAGCombineTruncBoolExt(N, DCI);
14585}
14586
14587// Is this an extending load from an f32 to an f64?
14588static bool isFPExtLoad(SDValue Op) {
14589 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
14590 return LD->getExtensionType() == ISD::EXTLOAD &&
14591 Op.getValueType() == MVT::f64;
14592 return false;
14593}
14594
14595/// Reduces the number of fp-to-int conversion when building a vector.
14596///
14597/// If this vector is built out of floating to integer conversions,
14598/// transform it to a vector built out of floating point values followed by a
14599/// single floating to integer conversion of the vector.
14600/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
14601/// becomes (fptosi (build_vector ($A, $B, ...)))
14602SDValue PPCTargetLowering::
14603combineElementTruncationToVectorTruncation(SDNode *N,
14604 DAGCombinerInfo &DCI) const {
14605 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14606 "Should be called with a BUILD_VECTOR node");
14607
14608 SelectionDAG &DAG = DCI.DAG;
14609 SDLoc dl(N);
14610
14611 SDValue FirstInput = N->getOperand(0);
14612 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
14613 "The input operand must be an fp-to-int conversion.");
14614
14615 // This combine happens after legalization so the fp_to_[su]i nodes are
14616 // already converted to PPCSISD nodes.
14617 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
14618 if (FirstConversion == PPCISD::FCTIDZ ||
14619 FirstConversion == PPCISD::FCTIDUZ ||
14620 FirstConversion == PPCISD::FCTIWZ ||
14621 FirstConversion == PPCISD::FCTIWUZ) {
14622 bool IsSplat = true;
14623 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
14624 FirstConversion == PPCISD::FCTIWUZ;
14625 EVT SrcVT = FirstInput.getOperand(0).getValueType();
14627 EVT TargetVT = N->getValueType(0);
14628 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14629 SDValue NextOp = N->getOperand(i);
14630 if (NextOp.getOpcode() != PPCISD::MFVSR)
14631 return SDValue();
14632 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
14633 if (NextConversion != FirstConversion)
14634 return SDValue();
14635 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
14636 // This is not valid if the input was originally double precision. It is
14637 // also not profitable to do unless this is an extending load in which
14638 // case doing this combine will allow us to combine consecutive loads.
14639 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
14640 return SDValue();
14641 if (N->getOperand(i) != FirstInput)
14642 IsSplat = false;
14643 }
14644
14645 // If this is a splat, we leave it as-is since there will be only a single
14646 // fp-to-int conversion followed by a splat of the integer. This is better
14647 // for 32-bit and smaller ints and neutral for 64-bit ints.
14648 if (IsSplat)
14649 return SDValue();
14650
14651 // Now that we know we have the right type of node, get its operands
14652 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
14653 SDValue In = N->getOperand(i).getOperand(0);
14654 if (Is32Bit) {
14655 // For 32-bit values, we need to add an FP_ROUND node (if we made it
14656 // here, we know that all inputs are extending loads so this is safe).
14657 if (In.isUndef())
14658 Ops.push_back(DAG.getUNDEF(SrcVT));
14659 else {
14660 SDValue Trunc =
14661 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
14662 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
14663 Ops.push_back(Trunc);
14664 }
14665 } else
14666 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
14667 }
14668
14669 unsigned Opcode;
14670 if (FirstConversion == PPCISD::FCTIDZ ||
14671 FirstConversion == PPCISD::FCTIWZ)
14672 Opcode = ISD::FP_TO_SINT;
14673 else
14674 Opcode = ISD::FP_TO_UINT;
14675
14676 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
14677 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
14678 return DAG.getNode(Opcode, dl, TargetVT, BV);
14679 }
14680 return SDValue();
14681}
14682
14683/// Reduce the number of loads when building a vector.
14684///
14685/// Building a vector out of multiple loads can be converted to a load
14686/// of the vector type if the loads are consecutive. If the loads are
14687/// consecutive but in descending order, a shuffle is added at the end
14688/// to reorder the vector.
14690 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14691 "Should be called with a BUILD_VECTOR node");
14692
14693 SDLoc dl(N);
14694
14695 // Return early for non byte-sized type, as they can't be consecutive.
14696 if (!N->getValueType(0).getVectorElementType().isByteSized())
14697 return SDValue();
14698
14699 bool InputsAreConsecutiveLoads = true;
14700 bool InputsAreReverseConsecutive = true;
14701 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14702 SDValue FirstInput = N->getOperand(0);
14703 bool IsRoundOfExtLoad = false;
14704 LoadSDNode *FirstLoad = nullptr;
14705
14706 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14707 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14708 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
14709 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
14710 }
14711 // Not a build vector of (possibly fp_rounded) loads.
14712 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14713 N->getNumOperands() == 1)
14714 return SDValue();
14715
14716 if (!IsRoundOfExtLoad)
14717 FirstLoad = cast<LoadSDNode>(FirstInput);
14718
14720 InputLoads.push_back(FirstLoad);
14721 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14722 // If any inputs are fp_round(extload), they all must be.
14723 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14724 return SDValue();
14725
14726 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14727 N->getOperand(i);
14728 if (NextInput.getOpcode() != ISD::LOAD)
14729 return SDValue();
14730
14731 SDValue PreviousInput =
14732 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14733 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
14734 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
14735
14736 // If any inputs are fp_round(extload), they all must be.
14737 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14738 return SDValue();
14739
14740 // We only care about regular loads. The PPC-specific load intrinsics
14741 // will not lead to a merge opportunity.
14742 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
14743 InputsAreConsecutiveLoads = false;
14744 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
14745 InputsAreReverseConsecutive = false;
14746
14747 // Exit early if the loads are neither consecutive nor reverse consecutive.
14748 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14749 return SDValue();
14750 InputLoads.push_back(LD2);
14751 }
14752
14753 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14754 "The loads cannot be both consecutive and reverse consecutive.");
14755
14756 SDValue WideLoad;
14757 SDValue ReturnSDVal;
14758 if (InputsAreConsecutiveLoads) {
14759 assert(FirstLoad && "Input needs to be a LoadSDNode.");
14760 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
14761 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
14762 FirstLoad->getAlign());
14763 ReturnSDVal = WideLoad;
14764 } else if (InputsAreReverseConsecutive) {
14765 LoadSDNode *LastLoad = InputLoads.back();
14766 assert(LastLoad && "Input needs to be a LoadSDNode.");
14767 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
14768 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
14769 LastLoad->getAlign());
14771 for (int i = N->getNumOperands() - 1; i >= 0; i--)
14772 Ops.push_back(i);
14773
14774 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
14775 DAG.getUNDEF(N->getValueType(0)), Ops);
14776 } else
14777 return SDValue();
14778
14779 for (auto *LD : InputLoads)
14780 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
14781 return ReturnSDVal;
14782}
14783
14784// This function adds the required vector_shuffle needed to get
14785// the elements of the vector extract in the correct position
14786// as specified by the CorrectElems encoding.
14788 SDValue Input, uint64_t Elems,
14789 uint64_t CorrectElems) {
14790 SDLoc dl(N);
14791
14792 unsigned NumElems = Input.getValueType().getVectorNumElements();
14793 SmallVector<int, 16> ShuffleMask(NumElems, -1);
14794
14795 // Knowing the element indices being extracted from the original
14796 // vector and the order in which they're being inserted, just put
14797 // them at element indices required for the instruction.
14798 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14799 if (DAG.getDataLayout().isLittleEndian())
14800 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14801 else
14802 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14803 CorrectElems = CorrectElems >> 8;
14804 Elems = Elems >> 8;
14805 }
14806
14807 SDValue Shuffle =
14808 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14809 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14810
14811 EVT VT = N->getValueType(0);
14812 SDValue Conv = DAG.getBitcast(VT, Shuffle);
14813
14814 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14817 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14818 DAG.getValueType(ExtVT));
14819}
14820
14821// Look for build vector patterns where input operands come from sign
14822// extended vector_extract elements of specific indices. If the correct indices
14823// aren't used, add a vector shuffle to fix up the indices and create
14824// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14825// during instruction selection.
14827 // This array encodes the indices that the vector sign extend instructions
14828 // extract from when extending from one type to another for both BE and LE.
14829 // The right nibble of each byte corresponds to the LE incides.
14830 // and the left nibble of each byte corresponds to the BE incides.
14831 // For example: 0x3074B8FC byte->word
14832 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14833 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14834 // For example: 0x000070F8 byte->double word
14835 // For LE: the allowed indices are: 0x0,0x8
14836 // For BE: the allowed indices are: 0x7,0xF
14837 uint64_t TargetElems[] = {
14838 0x3074B8FC, // b->w
14839 0x000070F8, // b->d
14840 0x10325476, // h->w
14841 0x00003074, // h->d
14842 0x00001032, // w->d
14843 };
14844
14845 uint64_t Elems = 0;
14846 int Index;
14847 SDValue Input;
14848
14849 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14850 if (!Op)
14851 return false;
14852 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14853 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14854 return false;
14855
14856 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14857 // of the right width.
14858 SDValue Extract = Op.getOperand(0);
14859 if (Extract.getOpcode() == ISD::ANY_EXTEND)
14860 Extract = Extract.getOperand(0);
14861 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14862 return false;
14863
14864 ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14865 if (!ExtOp)
14866 return false;
14867
14868 Index = ExtOp->getZExtValue();
14869 if (Input && Input != Extract.getOperand(0))
14870 return false;
14871
14872 if (!Input)
14873 Input = Extract.getOperand(0);
14874
14875 Elems = Elems << 8;
14876 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14877 Elems |= Index;
14878
14879 return true;
14880 };
14881
14882 // If the build vector operands aren't sign extended vector extracts,
14883 // of the same input vector, then return.
14884 for (unsigned i = 0; i < N->getNumOperands(); i++) {
14885 if (!isSExtOfVecExtract(N->getOperand(i))) {
14886 return SDValue();
14887 }
14888 }
14889
14890 // If the vector extract indicies are not correct, add the appropriate
14891 // vector_shuffle.
14892 int TgtElemArrayIdx;
14893 int InputSize = Input.getValueType().getScalarSizeInBits();
14894 int OutputSize = N->getValueType(0).getScalarSizeInBits();
14895 if (InputSize + OutputSize == 40)
14896 TgtElemArrayIdx = 0;
14897 else if (InputSize + OutputSize == 72)
14898 TgtElemArrayIdx = 1;
14899 else if (InputSize + OutputSize == 48)
14900 TgtElemArrayIdx = 2;
14901 else if (InputSize + OutputSize == 80)
14902 TgtElemArrayIdx = 3;
14903 else if (InputSize + OutputSize == 96)
14904 TgtElemArrayIdx = 4;
14905 else
14906 return SDValue();
14907
14908 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14909 CorrectElems = DAG.getDataLayout().isLittleEndian()
14910 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14911 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14912 if (Elems != CorrectElems) {
14913 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14914 }
14915
14916 // Regular lowering will catch cases where a shuffle is not needed.
14917 return SDValue();
14918}
14919
14920// Look for the pattern of a load from a narrow width to i128, feeding
14921// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14922// (LXVRZX). This node represents a zero extending load that will be matched
14923// to the Load VSX Vector Rightmost instructions.
14925 SDLoc DL(N);
14926
14927 // This combine is only eligible for a BUILD_VECTOR of v1i128.
14928 if (N->getValueType(0) != MVT::v1i128)
14929 return SDValue();
14930
14931 SDValue Operand = N->getOperand(0);
14932 // Proceed with the transformation if the operand to the BUILD_VECTOR
14933 // is a load instruction.
14934 if (Operand.getOpcode() != ISD::LOAD)
14935 return SDValue();
14936
14937 auto *LD = cast<LoadSDNode>(Operand);
14938 EVT MemoryType = LD->getMemoryVT();
14939
14940 // This transformation is only valid if the we are loading either a byte,
14941 // halfword, word, or doubleword.
14942 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14943 MemoryType == MVT::i32 || MemoryType == MVT::i64;
14944
14945 // Ensure that the load from the narrow width is being zero extended to i128.
14946 if (!ValidLDType ||
14947 (LD->getExtensionType() != ISD::ZEXTLOAD &&
14948 LD->getExtensionType() != ISD::EXTLOAD))
14949 return SDValue();
14950
14951 SDValue LoadOps[] = {
14952 LD->getChain(), LD->getBasePtr(),
14953 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14954
14956 DAG.getVTList(MVT::v1i128, MVT::Other),
14957 LoadOps, MemoryType, LD->getMemOperand());
14958}
14959
14960SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14961 DAGCombinerInfo &DCI) const {
14962 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14963 "Should be called with a BUILD_VECTOR node");
14964
14965 SelectionDAG &DAG = DCI.DAG;
14966 SDLoc dl(N);
14967
14968 if (!Subtarget.hasVSX())
14969 return SDValue();
14970
14971 // The target independent DAG combiner will leave a build_vector of
14972 // float-to-int conversions intact. We can generate MUCH better code for
14973 // a float-to-int conversion of a vector of floats.
14974 SDValue FirstInput = N->getOperand(0);
14975 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14976 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14977 if (Reduced)
14978 return Reduced;
14979 }
14980
14981 // If we're building a vector out of consecutive loads, just load that
14982 // vector type.
14983 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14984 if (Reduced)
14985 return Reduced;
14986
14987 // If we're building a vector out of extended elements from another vector
14988 // we have P9 vector integer extend instructions. The code assumes legal
14989 // input types (i.e. it can't handle things like v4i16) so do not run before
14990 // legalization.
14991 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14992 Reduced = combineBVOfVecSExt(N, DAG);
14993 if (Reduced)
14994 return Reduced;
14995 }
14996
14997 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14998 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14999 // is a load from <valid narrow width> to i128.
15000 if (Subtarget.isISA3_1()) {
15001 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
15002 if (BVOfZLoad)
15003 return BVOfZLoad;
15004 }
15005
15006 if (N->getValueType(0) != MVT::v2f64)
15007 return SDValue();
15008
15009 // Looking for:
15010 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
15011 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
15012 FirstInput.getOpcode() != ISD::UINT_TO_FP)
15013 return SDValue();
15014 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
15015 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
15016 return SDValue();
15017 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
15018 return SDValue();
15019
15020 SDValue Ext1 = FirstInput.getOperand(0);
15021 SDValue Ext2 = N->getOperand(1).getOperand(0);
15022 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
15024 return SDValue();
15025
15026 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
15027 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
15028 if (!Ext1Op || !Ext2Op)
15029 return SDValue();
15030 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
15031 Ext1.getOperand(0) != Ext2.getOperand(0))
15032 return SDValue();
15033
15034 int FirstElem = Ext1Op->getZExtValue();
15035 int SecondElem = Ext2Op->getZExtValue();
15036 int SubvecIdx;
15037 if (FirstElem == 0 && SecondElem == 1)
15038 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
15039 else if (FirstElem == 2 && SecondElem == 3)
15040 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
15041 else
15042 return SDValue();
15043
15044 SDValue SrcVec = Ext1.getOperand(0);
15045 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
15047 return DAG.getNode(NodeType, dl, MVT::v2f64,
15048 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
15049}
15050
15051SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
15052 DAGCombinerInfo &DCI) const {
15053 assert((N->getOpcode() == ISD::SINT_TO_FP ||
15054 N->getOpcode() == ISD::UINT_TO_FP) &&
15055 "Need an int -> FP conversion node here");
15056
15057 if (useSoftFloat() || !Subtarget.has64BitSupport())
15058 return SDValue();
15059
15060 SelectionDAG &DAG = DCI.DAG;
15061 SDLoc dl(N);
15062 SDValue Op(N, 0);
15063
15064 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
15065 // from the hardware.
15066 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
15067 return SDValue();
15068 if (!Op.getOperand(0).getValueType().isSimple())
15069 return SDValue();
15070 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
15071 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
15072 return SDValue();
15073
15074 SDValue FirstOperand(Op.getOperand(0));
15075 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
15076 (FirstOperand.getValueType() == MVT::i8 ||
15077 FirstOperand.getValueType() == MVT::i16);
15078 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
15079 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
15080 bool DstDouble = Op.getValueType() == MVT::f64;
15081 unsigned ConvOp = Signed ?
15082 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
15083 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
15084 SDValue WidthConst =
15085 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
15086 dl, false);
15087 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
15088 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
15090 DAG.getVTList(MVT::f64, MVT::Other),
15091 Ops, MVT::i8, LDN->getMemOperand());
15092 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
15093
15094 // For signed conversion, we need to sign-extend the value in the VSR
15095 if (Signed) {
15096 SDValue ExtOps[] = { Ld, WidthConst };
15097 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
15098 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
15099 } else
15100 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
15101 }
15102
15103
15104 // For i32 intermediate values, unfortunately, the conversion functions
15105 // leave the upper 32 bits of the value are undefined. Within the set of
15106 // scalar instructions, we have no method for zero- or sign-extending the
15107 // value. Thus, we cannot handle i32 intermediate values here.
15108 if (Op.getOperand(0).getValueType() == MVT::i32)
15109 return SDValue();
15110
15111 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
15112 "UINT_TO_FP is supported only with FPCVT");
15113
15114 // If we have FCFIDS, then use it when converting to single-precision.
15115 // Otherwise, convert to double-precision and then round.
15116 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15117 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
15119 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
15120 : PPCISD::FCFID);
15121 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
15122 ? MVT::f32
15123 : MVT::f64;
15124
15125 // If we're converting from a float, to an int, and back to a float again,
15126 // then we don't need the store/load pair at all.
15127 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
15128 Subtarget.hasFPCVT()) ||
15129 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
15130 SDValue Src = Op.getOperand(0).getOperand(0);
15131 if (Src.getValueType() == MVT::f32) {
15132 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
15133 DCI.AddToWorklist(Src.getNode());
15134 } else if (Src.getValueType() != MVT::f64) {
15135 // Make sure that we don't pick up a ppc_fp128 source value.
15136 return SDValue();
15137 }
15138
15139 unsigned FCTOp =
15140 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
15142
15143 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
15144 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
15145
15146 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
15147 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
15148 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
15149 DCI.AddToWorklist(FP.getNode());
15150 }
15151
15152 return FP;
15153 }
15154
15155 return SDValue();
15156}
15157
15158// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
15159// builtins) into loads with swaps.
15161 DAGCombinerInfo &DCI) const {
15162 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
15163 // load combines.
15164 if (DCI.isBeforeLegalizeOps())
15165 return SDValue();
15166
15167 SelectionDAG &DAG = DCI.DAG;
15168 SDLoc dl(N);
15169 SDValue Chain;
15170 SDValue Base;
15171 MachineMemOperand *MMO;
15172
15173 switch (N->getOpcode()) {
15174 default:
15175 llvm_unreachable("Unexpected opcode for little endian VSX load");
15176 case ISD::LOAD: {
15177 LoadSDNode *LD = cast<LoadSDNode>(N);
15178 Chain = LD->getChain();
15179 Base = LD->getBasePtr();
15180 MMO = LD->getMemOperand();
15181 // If the MMO suggests this isn't a load of a full vector, leave
15182 // things alone. For a built-in, we have to make the change for
15183 // correctness, so if there is a size problem that will be a bug.
15184 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15185 return SDValue();
15186 break;
15187 }
15189 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15190 Chain = Intrin->getChain();
15191 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
15192 // us what we want. Get operand 2 instead.
15193 Base = Intrin->getOperand(2);
15194 MMO = Intrin->getMemOperand();
15195 break;
15196 }
15197 }
15198
15199 MVT VecTy = N->getValueType(0).getSimpleVT();
15200
15201 SDValue LoadOps[] = { Chain, Base };
15203 DAG.getVTList(MVT::v2f64, MVT::Other),
15204 LoadOps, MVT::v2f64, MMO);
15205
15206 DCI.AddToWorklist(Load.getNode());
15207 Chain = Load.getValue(1);
15208 SDValue Swap = DAG.getNode(
15209 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
15210 DCI.AddToWorklist(Swap.getNode());
15211
15212 // Add a bitcast if the resulting load type doesn't match v2f64.
15213 if (VecTy != MVT::v2f64) {
15214 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
15215 DCI.AddToWorklist(N.getNode());
15216 // Package {bitcast value, swap's chain} to match Load's shape.
15217 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
15218 N, Swap.getValue(1));
15219 }
15220
15221 return Swap;
15222}
15223
15224// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
15225// builtins) into stores with swaps.
15227 DAGCombinerInfo &DCI) const {
15228 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
15229 // store combines.
15230 if (DCI.isBeforeLegalizeOps())
15231 return SDValue();
15232
15233 SelectionDAG &DAG = DCI.DAG;
15234 SDLoc dl(N);
15235 SDValue Chain;
15236 SDValue Base;
15237 unsigned SrcOpnd;
15238 MachineMemOperand *MMO;
15239
15240 switch (N->getOpcode()) {
15241 default:
15242 llvm_unreachable("Unexpected opcode for little endian VSX store");
15243 case ISD::STORE: {
15244 StoreSDNode *ST = cast<StoreSDNode>(N);
15245 Chain = ST->getChain();
15246 Base = ST->getBasePtr();
15247 MMO = ST->getMemOperand();
15248 SrcOpnd = 1;
15249 // If the MMO suggests this isn't a store of a full vector, leave
15250 // things alone. For a built-in, we have to make the change for
15251 // correctness, so if there is a size problem that will be a bug.
15252 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
15253 return SDValue();
15254 break;
15255 }
15256 case ISD::INTRINSIC_VOID: {
15257 MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
15258 Chain = Intrin->getChain();
15259 // Intrin->getBasePtr() oddly does not get what we want.
15260 Base = Intrin->getOperand(3);
15261 MMO = Intrin->getMemOperand();
15262 SrcOpnd = 2;
15263 break;
15264 }
15265 }
15266
15267 SDValue Src = N->getOperand(SrcOpnd);
15268 MVT VecTy = Src.getValueType().getSimpleVT();
15269
15270 // All stores are done as v2f64 and possible bit cast.
15271 if (VecTy != MVT::v2f64) {
15272 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
15273 DCI.AddToWorklist(Src.getNode());
15274 }
15275
15276 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
15277 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
15278 DCI.AddToWorklist(Swap.getNode());
15279 Chain = Swap.getValue(1);
15280 SDValue StoreOps[] = { Chain, Swap, Base };
15282 DAG.getVTList(MVT::Other),
15283 StoreOps, VecTy, MMO);
15284 DCI.AddToWorklist(Store.getNode());
15285 return Store;
15286}
15287
15288// Handle DAG combine for STORE (FP_TO_INT F).
15289SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
15290 DAGCombinerInfo &DCI) const {
15291 SelectionDAG &DAG = DCI.DAG;
15292 SDLoc dl(N);
15293 unsigned Opcode = N->getOperand(1).getOpcode();
15294 (void)Opcode;
15295 bool Strict = N->getOperand(1)->isStrictFPOpcode();
15296
15297 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15298 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
15299 && "Not a FP_TO_INT Instruction!");
15300
15301 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
15302 EVT Op1VT = N->getOperand(1).getValueType();
15303 EVT ResVT = Val.getValueType();
15304
15305 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
15306 return SDValue();
15307
15308 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
15309 bool ValidTypeForStoreFltAsInt =
15310 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
15311 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
15312
15313 // TODO: Lower conversion from f128 on all VSX targets
15314 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
15315 return SDValue();
15316
15317 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
15318 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
15319 return SDValue();
15320
15321 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
15322
15323 // Set number of bytes being converted.
15324 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
15325 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
15326 DAG.getIntPtrConstant(ByteSize, dl, false),
15327 DAG.getValueType(Op1VT)};
15328
15330 DAG.getVTList(MVT::Other), Ops,
15331 cast<StoreSDNode>(N)->getMemoryVT(),
15332 cast<StoreSDNode>(N)->getMemOperand());
15333
15334 return Val;
15335}
15336
15337static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
15338 // Check that the source of the element keeps flipping
15339 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
15340 bool PrevElemFromFirstVec = Mask[0] < NumElts;
15341 for (int i = 1, e = Mask.size(); i < e; i++) {
15342 if (PrevElemFromFirstVec && Mask[i] < NumElts)
15343 return false;
15344 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
15345 return false;
15346 PrevElemFromFirstVec = !PrevElemFromFirstVec;
15347 }
15348 return true;
15349}
15350
15351static bool isSplatBV(SDValue Op) {
15352 if (Op.getOpcode() != ISD::BUILD_VECTOR)
15353 return false;
15354 SDValue FirstOp;
15355
15356 // Find first non-undef input.
15357 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
15358 FirstOp = Op.getOperand(i);
15359 if (!FirstOp.isUndef())
15360 break;
15361 }
15362
15363 // All inputs are undef or the same as the first non-undef input.
15364 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
15365 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
15366 return false;
15367 return true;
15368}
15369
15371 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15372 return Op;
15373 if (Op.getOpcode() != ISD::BITCAST)
15374 return SDValue();
15375 Op = Op.getOperand(0);
15376 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
15377 return Op;
15378 return SDValue();
15379}
15380
15381// Fix up the shuffle mask to account for the fact that the result of
15382// scalar_to_vector is not in lane zero. This just takes all values in
15383// the ranges specified by the min/max indices and adds the number of
15384// elements required to ensure each element comes from the respective
15385// position in the valid lane.
15386// On little endian, that's just the corresponding element in the other
15387// half of the vector. On big endian, it is in the same half but right
15388// justified rather than left justified in that half.
15390 int LHSMaxIdx, int RHSMinIdx,
15391 int RHSMaxIdx, int HalfVec,
15392 unsigned ValidLaneWidth,
15393 const PPCSubtarget &Subtarget) {
15394 for (int i = 0, e = ShuffV.size(); i < e; i++) {
15395 int Idx = ShuffV[i];
15396 if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
15397 ShuffV[i] +=
15398 Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
15399 }
15400}
15401
15402// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
15403// the original is:
15404// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
15405// In such a case, just change the shuffle mask to extract the element
15406// from the permuted index.
15408 const PPCSubtarget &Subtarget) {
15409 SDLoc dl(OrigSToV);
15410 EVT VT = OrigSToV.getValueType();
15411 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
15412 "Expecting a SCALAR_TO_VECTOR here");
15413 SDValue Input = OrigSToV.getOperand(0);
15414
15415 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
15416 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
15417 SDValue OrigVector = Input.getOperand(0);
15418
15419 // Can't handle non-const element indices or different vector types
15420 // for the input to the extract and the output of the scalar_to_vector.
15421 if (Idx && VT == OrigVector.getValueType()) {
15422 unsigned NumElts = VT.getVectorNumElements();
15423 assert(
15424 NumElts > 1 &&
15425 "Cannot produce a permuted scalar_to_vector for one element vector");
15426 SmallVector<int, 16> NewMask(NumElts, -1);
15427 unsigned ResultInElt = NumElts / 2;
15428 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
15429 NewMask[ResultInElt] = Idx->getZExtValue();
15430 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
15431 }
15432 }
15433 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
15434 OrigSToV.getOperand(0));
15435}
15436
15437// On little endian subtargets, combine shuffles such as:
15438// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
15439// into:
15440// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
15441// because the latter can be matched to a single instruction merge.
15442// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
15443// to put the value into element zero. Adjust the shuffle mask so that the
15444// vector can remain in permuted form (to prevent a swap prior to a shuffle).
15445// On big endian targets, this is still useful for SCALAR_TO_VECTOR
15446// nodes with elements smaller than doubleword because all the ways
15447// of getting scalar data into a vector register put the value in the
15448// rightmost element of the left half of the vector.
15449SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
15450 SelectionDAG &DAG) const {
15451 SDValue LHS = SVN->getOperand(0);
15452 SDValue RHS = SVN->getOperand(1);
15453 auto Mask = SVN->getMask();
15454 int NumElts = LHS.getValueType().getVectorNumElements();
15455 SDValue Res(SVN, 0);
15456 SDLoc dl(SVN);
15457 bool IsLittleEndian = Subtarget.isLittleEndian();
15458
15459 // On big endian targets this is only useful for subtargets with direct moves.
15460 // On little endian targets it would be useful for all subtargets with VSX.
15461 // However adding special handling for LE subtargets without direct moves
15462 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
15463 // which includes direct moves.
15464 if (!Subtarget.hasDirectMove())
15465 return Res;
15466
15467 // If this is not a shuffle of a shuffle and the first element comes from
15468 // the second vector, canonicalize to the commuted form. This will make it
15469 // more likely to match one of the single instruction patterns.
15470 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
15471 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
15472 std::swap(LHS, RHS);
15473 Res = DAG.getCommutedVectorShuffle(*SVN);
15474 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15475 }
15476
15477 // Adjust the shuffle mask if either input vector comes from a
15478 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
15479 // form (to prevent the need for a swap).
15480 SmallVector<int, 16> ShuffV(Mask);
15481 SDValue SToVLHS = isScalarToVec(LHS);
15482 SDValue SToVRHS = isScalarToVec(RHS);
15483 if (SToVLHS || SToVRHS) {
15484 // FIXME: If both LHS and RHS are SCALAR_TO_VECTOR, but are not the
15485 // same type and have differing element sizes, then do not perform
15486 // the following transformation. The current transformation for
15487 // SCALAR_TO_VECTOR assumes that both input vectors have the same
15488 // element size. This will be updated in the future to account for
15489 // differing sizes of the LHS and RHS.
15490 if (SToVLHS && SToVRHS &&
15491 (SToVLHS.getValueType().getScalarSizeInBits() !=
15492 SToVRHS.getValueType().getScalarSizeInBits()))
15493 return Res;
15494
15495 int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
15496 : SToVRHS.getValueType().getVectorNumElements();
15497 int NumEltsOut = ShuffV.size();
15498 // The width of the "valid lane" (i.e. the lane that contains the value that
15499 // is vectorized) needs to be expressed in terms of the number of elements
15500 // of the shuffle. It is thereby the ratio of the values before and after
15501 // any bitcast.
15502 unsigned ValidLaneWidth =
15503 SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
15504 LHS.getValueType().getScalarSizeInBits()
15505 : SToVRHS.getValueType().getScalarSizeInBits() /
15506 RHS.getValueType().getScalarSizeInBits();
15507
15508 // Initially assume that neither input is permuted. These will be adjusted
15509 // accordingly if either input is.
15510 int LHSMaxIdx = -1;
15511 int RHSMinIdx = -1;
15512 int RHSMaxIdx = -1;
15513 int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
15514
15515 // Get the permuted scalar to vector nodes for the source(s) that come from
15516 // ISD::SCALAR_TO_VECTOR.
15517 // On big endian systems, this only makes sense for element sizes smaller
15518 // than 64 bits since for 64-bit elements, all instructions already put
15519 // the value into element zero. Since scalar size of LHS and RHS may differ
15520 // after isScalarToVec, this should be checked using their own sizes.
15521 if (SToVLHS) {
15522 if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
15523 return Res;
15524 // Set up the values for the shuffle vector fixup.
15525 LHSMaxIdx = NumEltsOut / NumEltsIn;
15526 SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
15527 if (SToVLHS.getValueType() != LHS.getValueType())
15528 SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
15529 LHS = SToVLHS;
15530 }
15531 if (SToVRHS) {
15532 if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
15533 return Res;
15534 RHSMinIdx = NumEltsOut;
15535 RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
15536 SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
15537 if (SToVRHS.getValueType() != RHS.getValueType())
15538 SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
15539 RHS = SToVRHS;
15540 }
15541
15542 // Fix up the shuffle mask to reflect where the desired element actually is.
15543 // The minimum and maximum indices that correspond to element zero for both
15544 // the LHS and RHS are computed and will control which shuffle mask entries
15545 // are to be changed. For example, if the RHS is permuted, any shuffle mask
15546 // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
15547 fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
15548 HalfVec, ValidLaneWidth, Subtarget);
15549 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15550
15551 // We may have simplified away the shuffle. We won't be able to do anything
15552 // further with it here.
15553 if (!isa<ShuffleVectorSDNode>(Res))
15554 return Res;
15555 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
15556 }
15557
15558 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
15559 // The common case after we commuted the shuffle is that the RHS is a splat
15560 // and we have elements coming in from the splat at indices that are not
15561 // conducive to using a merge.
15562 // Example:
15563 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
15564 if (!isSplatBV(TheSplat))
15565 return Res;
15566
15567 // We are looking for a mask such that all even elements are from
15568 // one vector and all odd elements from the other.
15569 if (!isAlternatingShuffMask(Mask, NumElts))
15570 return Res;
15571
15572 // Adjust the mask so we are pulling in the same index from the splat
15573 // as the index from the interesting vector in consecutive elements.
15574 if (IsLittleEndian) {
15575 // Example (even elements from first vector):
15576 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
15577 if (Mask[0] < NumElts)
15578 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15579 if (ShuffV[i] < 0)
15580 continue;
15581 // If element from non-splat is undef, pick first element from splat.
15582 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
15583 }
15584 // Example (odd elements from first vector):
15585 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
15586 else
15587 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15588 if (ShuffV[i] < 0)
15589 continue;
15590 // If element from non-splat is undef, pick first element from splat.
15591 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
15592 }
15593 } else {
15594 // Example (even elements from first vector):
15595 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
15596 if (Mask[0] < NumElts)
15597 for (int i = 0, e = Mask.size(); i < e; i += 2) {
15598 if (ShuffV[i] < 0)
15599 continue;
15600 // If element from non-splat is undef, pick first element from splat.
15601 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
15602 }
15603 // Example (odd elements from first vector):
15604 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
15605 else
15606 for (int i = 1, e = Mask.size(); i < e; i += 2) {
15607 if (ShuffV[i] < 0)
15608 continue;
15609 // If element from non-splat is undef, pick first element from splat.
15610 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
15611 }
15612 }
15613
15614 // If the RHS has undefs, we need to remove them since we may have created
15615 // a shuffle that adds those instead of the splat value.
15616 SDValue SplatVal =
15617 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
15618 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
15619
15620 if (IsLittleEndian)
15621 RHS = TheSplat;
15622 else
15623 LHS = TheSplat;
15624 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
15625}
15626
15627SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
15628 LSBaseSDNode *LSBase,
15629 DAGCombinerInfo &DCI) const {
15630 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
15631 "Not a reverse memop pattern!");
15632
15633 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
15634 auto Mask = SVN->getMask();
15635 int i = 0;
15636 auto I = Mask.rbegin();
15637 auto E = Mask.rend();
15638
15639 for (; I != E; ++I) {
15640 if (*I != i)
15641 return false;
15642 i++;
15643 }
15644 return true;
15645 };
15646
15647 SelectionDAG &DAG = DCI.DAG;
15648 EVT VT = SVN->getValueType(0);
15649
15650 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
15651 return SDValue();
15652
15653 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
15654 // See comment in PPCVSXSwapRemoval.cpp.
15655 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
15656 if (!Subtarget.hasP9Vector())
15657 return SDValue();
15658
15659 if(!IsElementReverse(SVN))
15660 return SDValue();
15661
15662 if (LSBase->getOpcode() == ISD::LOAD) {
15663 // If the load return value 0 has more than one user except the
15664 // shufflevector instruction, it is not profitable to replace the
15665 // shufflevector with a reverse load.
15666 for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
15667 UI != UE; ++UI)
15668 if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
15669 return SDValue();
15670
15671 SDLoc dl(LSBase);
15672 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
15673 return DAG.getMemIntrinsicNode(
15674 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
15675 LSBase->getMemoryVT(), LSBase->getMemOperand());
15676 }
15677
15678 if (LSBase->getOpcode() == ISD::STORE) {
15679 // If there are other uses of the shuffle, the swap cannot be avoided.
15680 // Forcing the use of an X-Form (since swapped stores only have
15681 // X-Forms) without removing the swap is unprofitable.
15682 if (!SVN->hasOneUse())
15683 return SDValue();
15684
15685 SDLoc dl(LSBase);
15686 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
15687 LSBase->getBasePtr()};
15688 return DAG.getMemIntrinsicNode(
15689 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
15690 LSBase->getMemoryVT(), LSBase->getMemOperand());
15691 }
15692
15693 llvm_unreachable("Expected a load or store node here");
15694}
15695
15696static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
15697 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
15698 if (IntrinsicID == Intrinsic::ppc_stdcx)
15699 StoreWidth = 8;
15700 else if (IntrinsicID == Intrinsic::ppc_stwcx)
15701 StoreWidth = 4;
15702 else if (IntrinsicID == Intrinsic::ppc_sthcx)
15703 StoreWidth = 2;
15704 else if (IntrinsicID == Intrinsic::ppc_stbcx)
15705 StoreWidth = 1;
15706 else
15707 return false;
15708 return true;
15709}
15710
15712 DAGCombinerInfo &DCI) const {
15713 SelectionDAG &DAG = DCI.DAG;
15714 SDLoc dl(N);
15715 switch (N->getOpcode()) {
15716 default: break;
15717 case ISD::ADD:
15718 return combineADD(N, DCI);
15719 case ISD::AND: {
15720 // We don't want (and (zext (shift...)), C) if C fits in the width of the
15721 // original input as that will prevent us from selecting optimal rotates.
15722 // This only matters if the input to the extend is i32 widened to i64.
15723 SDValue Op1 = N->getOperand(0);
15724 SDValue Op2 = N->getOperand(1);
15725 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
15726 Op1.getOpcode() != ISD::ANY_EXTEND) ||
15727 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
15728 Op1.getOperand(0).getValueType() != MVT::i32)
15729 break;
15730 SDValue NarrowOp = Op1.getOperand(0);
15731 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
15732 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
15733 break;
15734
15735 uint64_t Imm = Op2->getAsZExtVal();
15736 // Make sure that the constant is narrow enough to fit in the narrow type.
15737 if (!isUInt<32>(Imm))
15738 break;
15739 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
15740 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
15741 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
15742 }
15743 case ISD::SHL:
15744 return combineSHL(N, DCI);
15745 case ISD::SRA:
15746 return combineSRA(N, DCI);
15747 case ISD::SRL:
15748 return combineSRL(N, DCI);
15749 case ISD::MUL:
15750 return combineMUL(N, DCI);
15751 case ISD::FMA:
15752 case PPCISD::FNMSUB:
15753 return combineFMALike(N, DCI);
15754 case PPCISD::SHL:
15755 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15756 return N->getOperand(0);
15757 break;
15758 case PPCISD::SRL:
15759 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15760 return N->getOperand(0);
15761 break;
15762 case PPCISD::SRA:
15763 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15764 if (C->isZero() || // 0 >>s V -> 0.
15765 C->isAllOnes()) // -1 >>s V -> -1.
15766 return N->getOperand(0);
15767 }
15768 break;
15769 case ISD::SIGN_EXTEND:
15770 case ISD::ZERO_EXTEND:
15771 case ISD::ANY_EXTEND:
15772 return DAGCombineExtBoolTrunc(N, DCI);
15773 case ISD::TRUNCATE:
15774 return combineTRUNCATE(N, DCI);
15775 case ISD::SETCC:
15776 if (SDValue CSCC = combineSetCC(N, DCI))
15777 return CSCC;
15778 [[fallthrough]];
15779 case ISD::SELECT_CC:
15780 return DAGCombineTruncBoolExt(N, DCI);
15781 case ISD::SINT_TO_FP:
15782 case ISD::UINT_TO_FP:
15783 return combineFPToIntToFP(N, DCI);
15785 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15786 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15787 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15788 }
15789 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15790 case ISD::STORE: {
15791
15792 EVT Op1VT = N->getOperand(1).getValueType();
15793 unsigned Opcode = N->getOperand(1).getOpcode();
15794
15795 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
15796 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
15797 SDValue Val = combineStoreFPToInt(N, DCI);
15798 if (Val)
15799 return Val;
15800 }
15801
15802 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15803 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15804 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15805 if (Val)
15806 return Val;
15807 }
15808
15809 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15810 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15811 N->getOperand(1).getNode()->hasOneUse() &&
15812 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15813 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15814
15815 // STBRX can only handle simple types and it makes no sense to store less
15816 // two bytes in byte-reversed order.
15817 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15818 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15819 break;
15820
15821 SDValue BSwapOp = N->getOperand(1).getOperand(0);
15822 // Do an any-extend to 32-bits if this is a half-word input.
15823 if (BSwapOp.getValueType() == MVT::i16)
15824 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15825
15826 // If the type of BSWAP operand is wider than stored memory width
15827 // it need to be shifted to the right side before STBRX.
15828 if (Op1VT.bitsGT(mVT)) {
15829 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15830 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15831 DAG.getConstant(Shift, dl, MVT::i32));
15832 // Need to truncate if this is a bswap of i64 stored as i32/i16.
15833 if (Op1VT == MVT::i64)
15834 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15835 }
15836
15837 SDValue Ops[] = {
15838 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15839 };
15840 return
15841 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
15842 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15843 cast<StoreSDNode>(N)->getMemOperand());
15844 }
15845
15846 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15847 // So it can increase the chance of CSE constant construction.
15848 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15849 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15850 // Need to sign-extended to 64-bits to handle negative values.
15851 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15852 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15853 MemVT.getSizeInBits());
15854 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15855
15856 // DAG.getTruncStore() can't be used here because it doesn't accept
15857 // the general (base + offset) addressing mode.
15858 // So we use UpdateNodeOperands and setTruncatingStore instead.
15859 DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15860 N->getOperand(3));
15861 cast<StoreSDNode>(N)->setTruncatingStore(true);
15862 return SDValue(N, 0);
15863 }
15864
15865 // For little endian, VSX stores require generating xxswapd/lxvd2x.
15866 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15867 if (Op1VT.isSimple()) {
15868 MVT StoreVT = Op1VT.getSimpleVT();
15869 if (Subtarget.needsSwapsForVSXMemOps() &&
15870 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15871 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15872 return expandVSXStoreForLE(N, DCI);
15873 }
15874 break;
15875 }
15876 case ISD::LOAD: {
15877 LoadSDNode *LD = cast<LoadSDNode>(N);
15878 EVT VT = LD->getValueType(0);
15879
15880 // For little endian, VSX loads require generating lxvd2x/xxswapd.
15881 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15882 if (VT.isSimple()) {
15883 MVT LoadVT = VT.getSimpleVT();
15884 if (Subtarget.needsSwapsForVSXMemOps() &&
15885 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15886 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15887 return expandVSXLoadForLE(N, DCI);
15888 }
15889
15890 // We sometimes end up with a 64-bit integer load, from which we extract
15891 // two single-precision floating-point numbers. This happens with
15892 // std::complex<float>, and other similar structures, because of the way we
15893 // canonicalize structure copies. However, if we lack direct moves,
15894 // then the final bitcasts from the extracted integer values to the
15895 // floating-point numbers turn into store/load pairs. Even with direct moves,
15896 // just loading the two floating-point numbers is likely better.
15897 auto ReplaceTwoFloatLoad = [&]() {
15898 if (VT != MVT::i64)
15899 return false;
15900
15901 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15902 LD->isVolatile())
15903 return false;
15904
15905 // We're looking for a sequence like this:
15906 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15907 // t16: i64 = srl t13, Constant:i32<32>
15908 // t17: i32 = truncate t16
15909 // t18: f32 = bitcast t17
15910 // t19: i32 = truncate t13
15911 // t20: f32 = bitcast t19
15912
15913 if (!LD->hasNUsesOfValue(2, 0))
15914 return false;
15915
15916 auto UI = LD->use_begin();
15917 while (UI.getUse().getResNo() != 0) ++UI;
15918 SDNode *Trunc = *UI++;
15919 while (UI.getUse().getResNo() != 0) ++UI;
15920 SDNode *RightShift = *UI;
15921 if (Trunc->getOpcode() != ISD::TRUNCATE)
15922 std::swap(Trunc, RightShift);
15923
15924 if (Trunc->getOpcode() != ISD::TRUNCATE ||
15925 Trunc->getValueType(0) != MVT::i32 ||
15926 !Trunc->hasOneUse())
15927 return false;
15928 if (RightShift->getOpcode() != ISD::SRL ||
15929 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15930 RightShift->getConstantOperandVal(1) != 32 ||
15931 !RightShift->hasOneUse())
15932 return false;
15933
15934 SDNode *Trunc2 = *RightShift->use_begin();
15935 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15936 Trunc2->getValueType(0) != MVT::i32 ||
15937 !Trunc2->hasOneUse())
15938 return false;
15939
15940 SDNode *Bitcast = *Trunc->use_begin();
15941 SDNode *Bitcast2 = *Trunc2->use_begin();
15942
15943 if (Bitcast->getOpcode() != ISD::BITCAST ||
15944 Bitcast->getValueType(0) != MVT::f32)
15945 return false;
15946 if (Bitcast2->getOpcode() != ISD::BITCAST ||
15947 Bitcast2->getValueType(0) != MVT::f32)
15948 return false;
15949
15950 if (Subtarget.isLittleEndian())
15951 std::swap(Bitcast, Bitcast2);
15952
15953 // Bitcast has the second float (in memory-layout order) and Bitcast2
15954 // has the first one.
15955
15956 SDValue BasePtr = LD->getBasePtr();
15957 if (LD->isIndexed()) {
15958 assert(LD->getAddressingMode() == ISD::PRE_INC &&
15959 "Non-pre-inc AM on PPC?");
15960 BasePtr =
15961 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15962 LD->getOffset());
15963 }
15964
15965 auto MMOFlags =
15966 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15967 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15968 LD->getPointerInfo(), LD->getAlign(),
15969 MMOFlags, LD->getAAInfo());
15970 SDValue AddPtr =
15971 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15972 BasePtr, DAG.getIntPtrConstant(4, dl));
15973 SDValue FloatLoad2 = DAG.getLoad(
15974 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15975 LD->getPointerInfo().getWithOffset(4),
15976 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
15977
15978 if (LD->isIndexed()) {
15979 // Note that DAGCombine should re-form any pre-increment load(s) from
15980 // what is produced here if that makes sense.
15981 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15982 }
15983
15984 DCI.CombineTo(Bitcast2, FloatLoad);
15985 DCI.CombineTo(Bitcast, FloatLoad2);
15986
15987 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15988 SDValue(FloatLoad2.getNode(), 1));
15989 return true;
15990 };
15991
15992 if (ReplaceTwoFloatLoad())
15993 return SDValue(N, 0);
15994
15995 EVT MemVT = LD->getMemoryVT();
15996 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15997 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15998 if (LD->isUnindexed() && VT.isVector() &&
15999 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
16000 // P8 and later hardware should just use LOAD.
16001 !Subtarget.hasP8Vector() &&
16002 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
16003 VT == MVT::v4f32))) &&
16004 LD->getAlign() < ABIAlignment) {
16005 // This is a type-legal unaligned Altivec load.
16006 SDValue Chain = LD->getChain();
16007 SDValue Ptr = LD->getBasePtr();
16008 bool isLittleEndian = Subtarget.isLittleEndian();
16009
16010 // This implements the loading of unaligned vectors as described in
16011 // the venerable Apple Velocity Engine overview. Specifically:
16012 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
16013 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
16014 //
16015 // The general idea is to expand a sequence of one or more unaligned
16016 // loads into an alignment-based permutation-control instruction (lvsl
16017 // or lvsr), a series of regular vector loads (which always truncate
16018 // their input address to an aligned address), and a series of
16019 // permutations. The results of these permutations are the requested
16020 // loaded values. The trick is that the last "extra" load is not taken
16021 // from the address you might suspect (sizeof(vector) bytes after the
16022 // last requested load), but rather sizeof(vector) - 1 bytes after the
16023 // last requested vector. The point of this is to avoid a page fault if
16024 // the base address happened to be aligned. This works because if the
16025 // base address is aligned, then adding less than a full vector length
16026 // will cause the last vector in the sequence to be (re)loaded.
16027 // Otherwise, the next vector will be fetched as you might suspect was
16028 // necessary.
16029
16030 // We might be able to reuse the permutation generation from
16031 // a different base address offset from this one by an aligned amount.
16032 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
16033 // optimization later.
16034 Intrinsic::ID Intr, IntrLD, IntrPerm;
16035 MVT PermCntlTy, PermTy, LDTy;
16036 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16037 : Intrinsic::ppc_altivec_lvsl;
16038 IntrLD = Intrinsic::ppc_altivec_lvx;
16039 IntrPerm = Intrinsic::ppc_altivec_vperm;
16040 PermCntlTy = MVT::v16i8;
16041 PermTy = MVT::v4i32;
16042 LDTy = MVT::v4i32;
16043
16044 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
16045
16046 // Create the new MMO for the new base load. It is like the original MMO,
16047 // but represents an area in memory almost twice the vector size centered
16048 // on the original address. If the address is unaligned, we might start
16049 // reading up to (sizeof(vector)-1) bytes below the address of the
16050 // original unaligned load.
16052 MachineMemOperand *BaseMMO =
16053 MF.getMachineMemOperand(LD->getMemOperand(),
16054 -(int64_t)MemVT.getStoreSize()+1,
16055 2*MemVT.getStoreSize()-1);
16056
16057 // Create the new base load.
16058 SDValue LDXIntID =
16059 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
16060 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
16061 SDValue BaseLoad =
16063 DAG.getVTList(PermTy, MVT::Other),
16064 BaseLoadOps, LDTy, BaseMMO);
16065
16066 // Note that the value of IncOffset (which is provided to the next
16067 // load's pointer info offset value, and thus used to calculate the
16068 // alignment), and the value of IncValue (which is actually used to
16069 // increment the pointer value) are different! This is because we
16070 // require the next load to appear to be aligned, even though it
16071 // is actually offset from the base pointer by a lesser amount.
16072 int IncOffset = VT.getSizeInBits() / 8;
16073 int IncValue = IncOffset;
16074
16075 // Walk (both up and down) the chain looking for another load at the real
16076 // (aligned) offset (the alignment of the other load does not matter in
16077 // this case). If found, then do not use the offset reduction trick, as
16078 // that will prevent the loads from being later combined (as they would
16079 // otherwise be duplicates).
16080 if (!findConsecutiveLoad(LD, DAG))
16081 --IncValue;
16082
16083 SDValue Increment =
16084 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
16085 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
16086
16087 MachineMemOperand *ExtraMMO =
16088 MF.getMachineMemOperand(LD->getMemOperand(),
16089 1, 2*MemVT.getStoreSize()-1);
16090 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
16091 SDValue ExtraLoad =
16093 DAG.getVTList(PermTy, MVT::Other),
16094 ExtraLoadOps, LDTy, ExtraMMO);
16095
16096 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16097 BaseLoad.getValue(1), ExtraLoad.getValue(1));
16098
16099 // Because vperm has a big-endian bias, we must reverse the order
16100 // of the input vectors and complement the permute control vector
16101 // when generating little endian code. We have already handled the
16102 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
16103 // and ExtraLoad here.
16104 SDValue Perm;
16105 if (isLittleEndian)
16106 Perm = BuildIntrinsicOp(IntrPerm,
16107 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
16108 else
16109 Perm = BuildIntrinsicOp(IntrPerm,
16110 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
16111
16112 if (VT != PermTy)
16113 Perm = Subtarget.hasAltivec()
16114 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
16115 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
16116 DAG.getTargetConstant(1, dl, MVT::i64));
16117 // second argument is 1 because this rounding
16118 // is always exact.
16119
16120 // The output of the permutation is our loaded result, the TokenFactor is
16121 // our new chain.
16122 DCI.CombineTo(N, Perm, TF);
16123 return SDValue(N, 0);
16124 }
16125 }
16126 break;
16128 bool isLittleEndian = Subtarget.isLittleEndian();
16129 unsigned IID = N->getConstantOperandVal(0);
16130 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
16131 : Intrinsic::ppc_altivec_lvsl);
16132 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
16133 SDValue Add = N->getOperand(1);
16134
16135 int Bits = 4 /* 16 byte alignment */;
16136
16137 if (DAG.MaskedValueIsZero(Add->getOperand(1),
16138 APInt::getAllOnes(Bits /* alignment */)
16139 .zext(Add.getScalarValueSizeInBits()))) {
16140 SDNode *BasePtr = Add->getOperand(0).getNode();
16141 for (SDNode *U : BasePtr->uses()) {
16142 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16143 U->getConstantOperandVal(0) == IID) {
16144 // We've found another LVSL/LVSR, and this address is an aligned
16145 // multiple of that one. The results will be the same, so use the
16146 // one we've just found instead.
16147
16148 return SDValue(U, 0);
16149 }
16150 }
16151 }
16152
16153 if (isa<ConstantSDNode>(Add->getOperand(1))) {
16154 SDNode *BasePtr = Add->getOperand(0).getNode();
16155 for (SDNode *U : BasePtr->uses()) {
16156 if (U->getOpcode() == ISD::ADD &&
16157 isa<ConstantSDNode>(U->getOperand(1)) &&
16158 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
16159 (1ULL << Bits) ==
16160 0) {
16161 SDNode *OtherAdd = U;
16162 for (SDNode *V : OtherAdd->uses()) {
16163 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16164 V->getConstantOperandVal(0) == IID) {
16165 return SDValue(V, 0);
16166 }
16167 }
16168 }
16169 }
16170 }
16171 }
16172
16173 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
16174 // Expose the vabsduw/h/b opportunity for down stream
16175 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
16176 (IID == Intrinsic::ppc_altivec_vmaxsw ||
16177 IID == Intrinsic::ppc_altivec_vmaxsh ||
16178 IID == Intrinsic::ppc_altivec_vmaxsb)) {
16179 SDValue V1 = N->getOperand(1);
16180 SDValue V2 = N->getOperand(2);
16181 if ((V1.getSimpleValueType() == MVT::v4i32 ||
16182 V1.getSimpleValueType() == MVT::v8i16 ||
16183 V1.getSimpleValueType() == MVT::v16i8) &&
16184 V1.getSimpleValueType() == V2.getSimpleValueType()) {
16185 // (0-a, a)
16186 if (V1.getOpcode() == ISD::SUB &&
16188 V1.getOperand(1) == V2) {
16189 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
16190 }
16191 // (a, 0-a)
16192 if (V2.getOpcode() == ISD::SUB &&
16193 ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
16194 V2.getOperand(1) == V1) {
16195 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16196 }
16197 // (x-y, y-x)
16198 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
16199 V1.getOperand(0) == V2.getOperand(1) &&
16200 V1.getOperand(1) == V2.getOperand(0)) {
16201 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
16202 }
16203 }
16204 }
16205 }
16206
16207 break;
16209 switch (N->getConstantOperandVal(1)) {
16210 default:
16211 break;
16212 case Intrinsic::ppc_altivec_vsum4sbs:
16213 case Intrinsic::ppc_altivec_vsum4shs:
16214 case Intrinsic::ppc_altivec_vsum4ubs: {
16215 // These sum-across intrinsics only have a chain due to the side effect
16216 // that they may set the SAT bit. If we know the SAT bit will not be set
16217 // for some inputs, we can replace any uses of their chain with the
16218 // input chain.
16219 if (BuildVectorSDNode *BVN =
16220 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
16221 APInt APSplatBits, APSplatUndef;
16222 unsigned SplatBitSize;
16223 bool HasAnyUndefs;
16224 bool BVNIsConstantSplat = BVN->isConstantSplat(
16225 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
16226 !Subtarget.isLittleEndian());
16227 // If the constant splat vector is 0, the SAT bit will not be set.
16228 if (BVNIsConstantSplat && APSplatBits == 0)
16229 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
16230 }
16231 return SDValue();
16232 }
16233 case Intrinsic::ppc_vsx_lxvw4x:
16234 case Intrinsic::ppc_vsx_lxvd2x:
16235 // For little endian, VSX loads require generating lxvd2x/xxswapd.
16236 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
16237 if (Subtarget.needsSwapsForVSXMemOps())
16238 return expandVSXLoadForLE(N, DCI);
16239 break;
16240 }
16241 break;
16243 // For little endian, VSX stores require generating xxswapd/stxvd2x.
16244 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
16245 if (Subtarget.needsSwapsForVSXMemOps()) {
16246 switch (N->getConstantOperandVal(1)) {
16247 default:
16248 break;
16249 case Intrinsic::ppc_vsx_stxvw4x:
16250 case Intrinsic::ppc_vsx_stxvd2x:
16251 return expandVSXStoreForLE(N, DCI);
16252 }
16253 }
16254 break;
16255 case ISD::BSWAP: {
16256 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
16257 // For subtargets without LDBRX, we can still do better than the default
16258 // expansion even for 64-bit BSWAP (LOAD).
16259 bool Is64BitBswapOn64BitTgt =
16260 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
16261 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
16262 N->getOperand(0).hasOneUse();
16263 if (IsSingleUseNormalLd &&
16264 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
16265 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
16266 SDValue Load = N->getOperand(0);
16267 LoadSDNode *LD = cast<LoadSDNode>(Load);
16268 // Create the byte-swapping load.
16269 SDValue Ops[] = {
16270 LD->getChain(), // Chain
16271 LD->getBasePtr(), // Ptr
16272 DAG.getValueType(N->getValueType(0)) // VT
16273 };
16274 SDValue BSLoad =
16276 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
16277 MVT::i64 : MVT::i32, MVT::Other),
16278 Ops, LD->getMemoryVT(), LD->getMemOperand());
16279
16280 // If this is an i16 load, insert the truncate.
16281 SDValue ResVal = BSLoad;
16282 if (N->getValueType(0) == MVT::i16)
16283 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
16284
16285 // First, combine the bswap away. This makes the value produced by the
16286 // load dead.
16287 DCI.CombineTo(N, ResVal);
16288
16289 // Next, combine the load away, we give it a bogus result value but a real
16290 // chain result. The result value is dead because the bswap is dead.
16291 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
16292
16293 // Return N so it doesn't get rechecked!
16294 return SDValue(N, 0);
16295 }
16296 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
16297 // before legalization so that the BUILD_PAIR is handled correctly.
16298 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
16299 !IsSingleUseNormalLd)
16300 return SDValue();
16301 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
16302
16303 // Can't split volatile or atomic loads.
16304 if (!LD->isSimple())
16305 return SDValue();
16306 SDValue BasePtr = LD->getBasePtr();
16307 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
16308 LD->getPointerInfo(), LD->getAlign());
16309 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
16310 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
16311 DAG.getIntPtrConstant(4, dl));
16313 LD->getMemOperand(), 4, 4);
16314 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
16315 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
16316 SDValue Res;
16317 if (Subtarget.isLittleEndian())
16318 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
16319 else
16320 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
16321 SDValue TF =
16322 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
16323 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
16324 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
16325 return Res;
16326 }
16327 case PPCISD::VCMP:
16328 // If a VCMP_rec node already exists with exactly the same operands as this
16329 // node, use its result instead of this node (VCMP_rec computes both a CR6
16330 // and a normal output).
16331 //
16332 if (!N->getOperand(0).hasOneUse() &&
16333 !N->getOperand(1).hasOneUse() &&
16334 !N->getOperand(2).hasOneUse()) {
16335
16336 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
16337 SDNode *VCMPrecNode = nullptr;
16338
16339 SDNode *LHSN = N->getOperand(0).getNode();
16340 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
16341 UI != E; ++UI)
16342 if (UI->getOpcode() == PPCISD::VCMP_rec &&
16343 UI->getOperand(1) == N->getOperand(1) &&
16344 UI->getOperand(2) == N->getOperand(2) &&
16345 UI->getOperand(0) == N->getOperand(0)) {
16346 VCMPrecNode = *UI;
16347 break;
16348 }
16349
16350 // If there is no VCMP_rec node, or if the flag value has a single use,
16351 // don't transform this.
16352 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
16353 break;
16354
16355 // Look at the (necessarily single) use of the flag value. If it has a
16356 // chain, this transformation is more complex. Note that multiple things
16357 // could use the value result, which we should ignore.
16358 SDNode *FlagUser = nullptr;
16359 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
16360 FlagUser == nullptr; ++UI) {
16361 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
16362 SDNode *User = *UI;
16363 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
16364 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
16365 FlagUser = User;
16366 break;
16367 }
16368 }
16369 }
16370
16371 // If the user is a MFOCRF instruction, we know this is safe.
16372 // Otherwise we give up for right now.
16373 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
16374 return SDValue(VCMPrecNode, 0);
16375 }
16376 break;
16377 case ISD::BR_CC: {
16378 // If this is a branch on an altivec predicate comparison, lower this so
16379 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
16380 // lowering is done pre-legalize, because the legalizer lowers the predicate
16381 // compare down to code that is difficult to reassemble.
16382 // This code also handles branches that depend on the result of a store
16383 // conditional.
16384 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
16385 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
16386
16387 int CompareOpc;
16388 bool isDot;
16389
16390 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
16391 break;
16392
16393 // Since we are doing this pre-legalize, the RHS can be a constant of
16394 // arbitrary bitwidth which may cause issues when trying to get the value
16395 // from the underlying APInt.
16396 auto RHSAPInt = RHS->getAsAPIntVal();
16397 if (!RHSAPInt.isIntN(64))
16398 break;
16399
16400 unsigned Val = RHSAPInt.getZExtValue();
16401 auto isImpossibleCompare = [&]() {
16402 // If this is a comparison against something other than 0/1, then we know
16403 // that the condition is never/always true.
16404 if (Val != 0 && Val != 1) {
16405 if (CC == ISD::SETEQ) // Cond never true, remove branch.
16406 return N->getOperand(0);
16407 // Always !=, turn it into an unconditional branch.
16408 return DAG.getNode(ISD::BR, dl, MVT::Other,
16409 N->getOperand(0), N->getOperand(4));
16410 }
16411 return SDValue();
16412 };
16413 // Combine branches fed by store conditional instructions (st[bhwd]cx).
16414 unsigned StoreWidth = 0;
16415 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
16416 isStoreConditional(LHS, StoreWidth)) {
16417 if (SDValue Impossible = isImpossibleCompare())
16418 return Impossible;
16419 PPC::Predicate CompOpc;
16420 // eq 0 => ne
16421 // ne 0 => eq
16422 // eq 1 => eq
16423 // ne 1 => ne
16424 if (Val == 0)
16425 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
16426 else
16427 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
16428
16429 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
16430 DAG.getConstant(StoreWidth, dl, MVT::i32)};
16431 auto *MemNode = cast<MemSDNode>(LHS);
16432 SDValue ConstSt = DAG.getMemIntrinsicNode(
16434 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
16435 MemNode->getMemoryVT(), MemNode->getMemOperand());
16436
16437 SDValue InChain;
16438 // Unchain the branch from the original store conditional.
16439 if (N->getOperand(0) == LHS.getValue(1))
16440 InChain = LHS.getOperand(0);
16441 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
16442 SmallVector<SDValue, 4> InChains;
16443 SDValue InTF = N->getOperand(0);
16444 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
16445 if (InTF.getOperand(i) != LHS.getValue(1))
16446 InChains.push_back(InTF.getOperand(i));
16447 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
16448 }
16449
16450 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
16451 DAG.getConstant(CompOpc, dl, MVT::i32),
16452 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
16453 ConstSt.getValue(2));
16454 }
16455
16456 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
16457 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
16458 assert(isDot && "Can't compare against a vector result!");
16459
16460 if (SDValue Impossible = isImpossibleCompare())
16461 return Impossible;
16462
16463 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
16464 // Create the PPCISD altivec 'dot' comparison node.
16465 SDValue Ops[] = {
16466 LHS.getOperand(2), // LHS of compare
16467 LHS.getOperand(3), // RHS of compare
16468 DAG.getConstant(CompareOpc, dl, MVT::i32)
16469 };
16470 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
16471 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
16472
16473 // Unpack the result based on how the target uses it.
16474 PPC::Predicate CompOpc;
16475 switch (LHS.getConstantOperandVal(1)) {
16476 default: // Can't happen, don't crash on invalid number though.
16477 case 0: // Branch on the value of the EQ bit of CR6.
16478 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
16479 break;
16480 case 1: // Branch on the inverted value of the EQ bit of CR6.
16481 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
16482 break;
16483 case 2: // Branch on the value of the LT bit of CR6.
16484 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
16485 break;
16486 case 3: // Branch on the inverted value of the LT bit of CR6.
16487 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
16488 break;
16489 }
16490
16491 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
16492 DAG.getConstant(CompOpc, dl, MVT::i32),
16493 DAG.getRegister(PPC::CR6, MVT::i32),
16494 N->getOperand(4), CompNode.getValue(1));
16495 }
16496 break;
16497 }
16498 case ISD::BUILD_VECTOR:
16499 return DAGCombineBuildVector(N, DCI);
16500 }
16501
16502 return SDValue();
16503}
16504
16505SDValue
16507 SelectionDAG &DAG,
16508 SmallVectorImpl<SDNode *> &Created) const {
16509 // fold (sdiv X, pow2)
16510 EVT VT = N->getValueType(0);
16511 if (VT == MVT::i64 && !Subtarget.isPPC64())
16512 return SDValue();
16513 if ((VT != MVT::i32 && VT != MVT::i64) ||
16514 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
16515 return SDValue();
16516
16517 SDLoc DL(N);
16518 SDValue N0 = N->getOperand(0);
16519
16520 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
16521 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
16522 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
16523
16524 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
16525 Created.push_back(Op.getNode());
16526
16527 if (IsNegPow2) {
16528 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
16529 Created.push_back(Op.getNode());
16530 }
16531
16532 return Op;
16533}
16534
16535//===----------------------------------------------------------------------===//
16536// Inline Assembly Support
16537//===----------------------------------------------------------------------===//
16538
16540 KnownBits &Known,
16541 const APInt &DemandedElts,
16542 const SelectionDAG &DAG,
16543 unsigned Depth) const {
16544 Known.resetAll();
16545 switch (Op.getOpcode()) {
16546 default: break;
16547 case PPCISD::LBRX: {
16548 // lhbrx is known to have the top bits cleared out.
16549 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
16550 Known.Zero = 0xFFFF0000;
16551 break;
16552 }
16554 switch (Op.getConstantOperandVal(0)) {
16555 default: break;
16556 case Intrinsic::ppc_altivec_vcmpbfp_p:
16557 case Intrinsic::ppc_altivec_vcmpeqfp_p:
16558 case Intrinsic::ppc_altivec_vcmpequb_p:
16559 case Intrinsic::ppc_altivec_vcmpequh_p:
16560 case Intrinsic::ppc_altivec_vcmpequw_p:
16561 case Intrinsic::ppc_altivec_vcmpequd_p:
16562 case Intrinsic::ppc_altivec_vcmpequq_p:
16563 case Intrinsic::ppc_altivec_vcmpgefp_p:
16564 case Intrinsic::ppc_altivec_vcmpgtfp_p:
16565 case Intrinsic::ppc_altivec_vcmpgtsb_p:
16566 case Intrinsic::ppc_altivec_vcmpgtsh_p:
16567 case Intrinsic::ppc_altivec_vcmpgtsw_p:
16568 case Intrinsic::ppc_altivec_vcmpgtsd_p:
16569 case Intrinsic::ppc_altivec_vcmpgtsq_p:
16570 case Intrinsic::ppc_altivec_vcmpgtub_p:
16571 case Intrinsic::ppc_altivec_vcmpgtuh_p:
16572 case Intrinsic::ppc_altivec_vcmpgtuw_p:
16573 case Intrinsic::ppc_altivec_vcmpgtud_p:
16574 case Intrinsic::ppc_altivec_vcmpgtuq_p:
16575 Known.Zero = ~1U; // All bits but the low one are known to be zero.
16576 break;
16577 }
16578 break;
16579 }
16581 switch (Op.getConstantOperandVal(1)) {
16582 default:
16583 break;
16584 case Intrinsic::ppc_load2r:
16585 // Top bits are cleared for load2r (which is the same as lhbrx).
16586 Known.Zero = 0xFFFF0000;
16587 break;
16588 }
16589 break;
16590 }
16591 }
16592}
16593
16595 switch (Subtarget.getCPUDirective()) {
16596 default: break;
16597 case PPC::DIR_970:
16598 case PPC::DIR_PWR4:
16599 case PPC::DIR_PWR5:
16600 case PPC::DIR_PWR5X:
16601 case PPC::DIR_PWR6:
16602 case PPC::DIR_PWR6X:
16603 case PPC::DIR_PWR7:
16604 case PPC::DIR_PWR8:
16605 case PPC::DIR_PWR9:
16606 case PPC::DIR_PWR10:
16607 case PPC::DIR_PWR_FUTURE: {
16608 if (!ML)
16609 break;
16610
16612 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
16613 // so that we can decrease cache misses and branch-prediction misses.
16614 // Actual alignment of the loop will depend on the hotness check and other
16615 // logic in alignBlocks.
16616 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
16617 return Align(32);
16618 }
16619
16620 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
16621
16622 // For small loops (between 5 and 8 instructions), align to a 32-byte
16623 // boundary so that the entire loop fits in one instruction-cache line.
16624 uint64_t LoopSize = 0;
16625 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
16626 for (const MachineInstr &J : **I) {
16627 LoopSize += TII->getInstSizeInBytes(J);
16628 if (LoopSize > 32)
16629 break;
16630 }
16631
16632 if (LoopSize > 16 && LoopSize <= 32)
16633 return Align(32);
16634
16635 break;
16636 }
16637 }
16638
16640}
16641
16642/// getConstraintType - Given a constraint, return the type of
16643/// constraint it is for this target.
16646 if (Constraint.size() == 1) {
16647 switch (Constraint[0]) {
16648 default: break;
16649 case 'b':
16650 case 'r':
16651 case 'f':
16652 case 'd':
16653 case 'v':
16654 case 'y':
16655 return C_RegisterClass;
16656 case 'Z':
16657 // FIXME: While Z does indicate a memory constraint, it specifically
16658 // indicates an r+r address (used in conjunction with the 'y' modifier
16659 // in the replacement string). Currently, we're forcing the base
16660 // register to be r0 in the asm printer (which is interpreted as zero)
16661 // and forming the complete address in the second register. This is
16662 // suboptimal.
16663 return C_Memory;
16664 }
16665 } else if (Constraint == "wc") { // individual CR bits.
16666 return C_RegisterClass;
16667 } else if (Constraint == "wa" || Constraint == "wd" ||
16668 Constraint == "wf" || Constraint == "ws" ||
16669 Constraint == "wi" || Constraint == "ww") {
16670 return C_RegisterClass; // VSX registers.
16671 }
16672 return TargetLowering::getConstraintType(Constraint);
16673}
16674
16675/// Examine constraint type and operand type and determine a weight value.
16676/// This object must already have been set up with the operand type
16677/// and the current alternative constraint selected.
16680 AsmOperandInfo &info, const char *constraint) const {
16682 Value *CallOperandVal = info.CallOperandVal;
16683 // If we don't have a value, we can't do a match,
16684 // but allow it at the lowest weight.
16685 if (!CallOperandVal)
16686 return CW_Default;
16687 Type *type = CallOperandVal->getType();
16688
16689 // Look at the constraint type.
16690 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
16691 return CW_Register; // an individual CR bit.
16692 else if ((StringRef(constraint) == "wa" ||
16693 StringRef(constraint) == "wd" ||
16694 StringRef(constraint) == "wf") &&
16695 type->isVectorTy())
16696 return CW_Register;
16697 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
16698 return CW_Register; // just hold 64-bit integers data.
16699 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
16700 return CW_Register;
16701 else if (StringRef(constraint) == "ww" && type->isFloatTy())
16702 return CW_Register;
16703
16704 switch (*constraint) {
16705 default:
16707 break;
16708 case 'b':
16709 if (type->isIntegerTy())
16710 weight = CW_Register;
16711 break;
16712 case 'f':
16713 if (type->isFloatTy())
16714 weight = CW_Register;
16715 break;
16716 case 'd':
16717 if (type->isDoubleTy())
16718 weight = CW_Register;
16719 break;
16720 case 'v':
16721 if (type->isVectorTy())
16722 weight = CW_Register;
16723 break;
16724 case 'y':
16725 weight = CW_Register;
16726 break;
16727 case 'Z':
16728 weight = CW_Memory;
16729 break;
16730 }
16731 return weight;
16732}
16733
16734std::pair<unsigned, const TargetRegisterClass *>
16736 StringRef Constraint,
16737 MVT VT) const {
16738 if (Constraint.size() == 1) {
16739 // GCC RS6000 Constraint Letters
16740 switch (Constraint[0]) {
16741 case 'b': // R1-R31
16742 if (VT == MVT::i64 && Subtarget.isPPC64())
16743 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
16744 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
16745 case 'r': // R0-R31
16746 if (VT == MVT::i64 && Subtarget.isPPC64())
16747 return std::make_pair(0U, &PPC::G8RCRegClass);
16748 return std::make_pair(0U, &PPC::GPRCRegClass);
16749 // 'd' and 'f' constraints are both defined to be "the floating point
16750 // registers", where one is for 32-bit and the other for 64-bit. We don't
16751 // really care overly much here so just give them all the same reg classes.
16752 case 'd':
16753 case 'f':
16754 if (Subtarget.hasSPE()) {
16755 if (VT == MVT::f32 || VT == MVT::i32)
16756 return std::make_pair(0U, &PPC::GPRCRegClass);
16757 if (VT == MVT::f64 || VT == MVT::i64)
16758 return std::make_pair(0U, &PPC::SPERCRegClass);
16759 } else {
16760 if (VT == MVT::f32 || VT == MVT::i32)
16761 return std::make_pair(0U, &PPC::F4RCRegClass);
16762 if (VT == MVT::f64 || VT == MVT::i64)
16763 return std::make_pair(0U, &PPC::F8RCRegClass);
16764 }
16765 break;
16766 case 'v':
16767 if (Subtarget.hasAltivec() && VT.isVector())
16768 return std::make_pair(0U, &PPC::VRRCRegClass);
16769 else if (Subtarget.hasVSX())
16770 // Scalars in Altivec registers only make sense with VSX.
16771 return std::make_pair(0U, &PPC::VFRCRegClass);
16772 break;
16773 case 'y': // crrc
16774 return std::make_pair(0U, &PPC::CRRCRegClass);
16775 }
16776 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16777 // An individual CR bit.
16778 return std::make_pair(0U, &PPC::CRBITRCRegClass);
16779 } else if ((Constraint == "wa" || Constraint == "wd" ||
16780 Constraint == "wf" || Constraint == "wi") &&
16781 Subtarget.hasVSX()) {
16782 // A VSX register for either a scalar (FP) or vector. There is no
16783 // support for single precision scalars on subtargets prior to Power8.
16784 if (VT.isVector())
16785 return std::make_pair(0U, &PPC::VSRCRegClass);
16786 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16787 return std::make_pair(0U, &PPC::VSSRCRegClass);
16788 return std::make_pair(0U, &PPC::VSFRCRegClass);
16789 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16790 if (VT == MVT::f32 && Subtarget.hasP8Vector())
16791 return std::make_pair(0U, &PPC::VSSRCRegClass);
16792 else
16793 return std::make_pair(0U, &PPC::VSFRCRegClass);
16794 } else if (Constraint == "lr") {
16795 if (VT == MVT::i64)
16796 return std::make_pair(0U, &PPC::LR8RCRegClass);
16797 else
16798 return std::make_pair(0U, &PPC::LRRCRegClass);
16799 }
16800
16801 // Handle special cases of physical registers that are not properly handled
16802 // by the base class.
16803 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16804 // If we name a VSX register, we can't defer to the base class because it
16805 // will not recognize the correct register (their names will be VSL{0-31}
16806 // and V{0-31} so they won't match). So we match them here.
16807 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16808 int VSNum = atoi(Constraint.data() + 3);
16809 assert(VSNum >= 0 && VSNum <= 63 &&
16810 "Attempted to access a vsr out of range");
16811 if (VSNum < 32)
16812 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16813 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16814 }
16815
16816 // For float registers, we can't defer to the base class as it will match
16817 // the SPILLTOVSRRC class.
16818 if (Constraint.size() > 3 && Constraint[1] == 'f') {
16819 int RegNum = atoi(Constraint.data() + 2);
16820 if (RegNum > 31 || RegNum < 0)
16821 report_fatal_error("Invalid floating point register number");
16822 if (VT == MVT::f32 || VT == MVT::i32)
16823 return Subtarget.hasSPE()
16824 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16825 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16826 if (VT == MVT::f64 || VT == MVT::i64)
16827 return Subtarget.hasSPE()
16828 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16829 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16830 }
16831 }
16832
16833 std::pair<unsigned, const TargetRegisterClass *> R =
16835
16836 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16837 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16838 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16839 // register.
16840 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16841 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16842 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16843 PPC::GPRCRegClass.contains(R.first))
16844 return std::make_pair(TRI->getMatchingSuperReg(R.first,
16845 PPC::sub_32, &PPC::G8RCRegClass),
16846 &PPC::G8RCRegClass);
16847
16848 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16849 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16850 R.first = PPC::CR0;
16851 R.second = &PPC::CRRCRegClass;
16852 }
16853 // FIXME: This warning should ideally be emitted in the front end.
16854 const auto &TM = getTargetMachine();
16855 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16856 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16857 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16858 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16859 errs() << "warning: vector registers 20 to 32 are reserved in the "
16860 "default AIX AltiVec ABI and cannot be used\n";
16861 }
16862
16863 return R;
16864}
16865
16866/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16867/// vector. If it is invalid, don't add anything to Ops.
16869 StringRef Constraint,
16870 std::vector<SDValue> &Ops,
16871 SelectionDAG &DAG) const {
16872 SDValue Result;
16873
16874 // Only support length 1 constraints.
16875 if (Constraint.size() > 1)
16876 return;
16877
16878 char Letter = Constraint[0];
16879 switch (Letter) {
16880 default: break;
16881 case 'I':
16882 case 'J':
16883 case 'K':
16884 case 'L':
16885 case 'M':
16886 case 'N':
16887 case 'O':
16888 case 'P': {
16889 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16890 if (!CST) return; // Must be an immediate to match.
16891 SDLoc dl(Op);
16892 int64_t Value = CST->getSExtValue();
16893 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16894 // numbers are printed as such.
16895 switch (Letter) {
16896 default: llvm_unreachable("Unknown constraint letter!");
16897 case 'I': // "I" is a signed 16-bit constant.
16898 if (isInt<16>(Value))
16899 Result = DAG.getTargetConstant(Value, dl, TCVT);
16900 break;
16901 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16902 if (isShiftedUInt<16, 16>(Value))
16903 Result = DAG.getTargetConstant(Value, dl, TCVT);
16904 break;
16905 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16906 if (isShiftedInt<16, 16>(Value))
16907 Result = DAG.getTargetConstant(Value, dl, TCVT);
16908 break;
16909 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16910 if (isUInt<16>(Value))
16911 Result = DAG.getTargetConstant(Value, dl, TCVT);
16912 break;
16913 case 'M': // "M" is a constant that is greater than 31.
16914 if (Value > 31)
16915 Result = DAG.getTargetConstant(Value, dl, TCVT);
16916 break;
16917 case 'N': // "N" is a positive constant that is an exact power of two.
16918 if (Value > 0 && isPowerOf2_64(Value))
16919 Result = DAG.getTargetConstant(Value, dl, TCVT);
16920 break;
16921 case 'O': // "O" is the constant zero.
16922 if (Value == 0)
16923 Result = DAG.getTargetConstant(Value, dl, TCVT);
16924 break;
16925 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16926 if (isInt<16>(-Value))
16927 Result = DAG.getTargetConstant(Value, dl, TCVT);
16928 break;
16929 }
16930 break;
16931 }
16932 }
16933
16934 if (Result.getNode()) {
16935 Ops.push_back(Result);
16936 return;
16937 }
16938
16939 // Handle standard constraint letters.
16940 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16941}
16942
16945 SelectionDAG &DAG) const {
16946 if (I.getNumOperands() <= 1)
16947 return;
16948 if (!isa<ConstantSDNode>(Ops[1].getNode()))
16949 return;
16950 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
16951 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
16952 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
16953 return;
16954
16955 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
16956 Ops.push_back(DAG.getMDNode(MDN));
16957}
16958
16959// isLegalAddressingMode - Return true if the addressing mode represented
16960// by AM is legal for this target, for a load/store of the specified type.
16962 const AddrMode &AM, Type *Ty,
16963 unsigned AS,
16964 Instruction *I) const {
16965 // Vector type r+i form is supported since power9 as DQ form. We don't check
16966 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16967 // imm form is preferred and the offset can be adjusted to use imm form later
16968 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16969 // max offset to check legal addressing mode, we should be a little aggressive
16970 // to contain other offsets for that LSRUse.
16971 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16972 return false;
16973
16974 // PPC allows a sign-extended 16-bit immediate field.
16975 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16976 return false;
16977
16978 // No global is ever allowed as a base.
16979 if (AM.BaseGV)
16980 return false;
16981
16982 // PPC only support r+r,
16983 switch (AM.Scale) {
16984 case 0: // "r+i" or just "i", depending on HasBaseReg.
16985 break;
16986 case 1:
16987 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16988 return false;
16989 // Otherwise we have r+r or r+i.
16990 break;
16991 case 2:
16992 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16993 return false;
16994 // Allow 2*r as r+r.
16995 break;
16996 default:
16997 // No other scales are supported.
16998 return false;
16999 }
17000
17001 return true;
17002}
17003
17004SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
17005 SelectionDAG &DAG) const {
17007 MachineFrameInfo &MFI = MF.getFrameInfo();
17008 MFI.setReturnAddressIsTaken(true);
17009
17011 return SDValue();
17012
17013 SDLoc dl(Op);
17014 unsigned Depth = Op.getConstantOperandVal(0);
17015
17016 // Make sure the function does not optimize away the store of the RA to
17017 // the stack.
17018 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
17019 FuncInfo->setLRStoreRequired();
17020 bool isPPC64 = Subtarget.isPPC64();
17021 auto PtrVT = getPointerTy(MF.getDataLayout());
17022
17023 if (Depth > 0) {
17024 // The link register (return address) is saved in the caller's frame
17025 // not the callee's stack frame. So we must get the caller's frame
17026 // address and load the return address at the LR offset from there.
17027 SDValue FrameAddr =
17028 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17029 LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
17030 SDValue Offset =
17031 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
17032 isPPC64 ? MVT::i64 : MVT::i32);
17033 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
17034 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
17036 }
17037
17038 // Just load the return address off the stack.
17039 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
17040 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
17042}
17043
17044SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
17045 SelectionDAG &DAG) const {
17046 SDLoc dl(Op);
17047 unsigned Depth = Op.getConstantOperandVal(0);
17048
17050 MachineFrameInfo &MFI = MF.getFrameInfo();
17051 MFI.setFrameAddressIsTaken(true);
17052
17053 EVT PtrVT = getPointerTy(MF.getDataLayout());
17054 bool isPPC64 = PtrVT == MVT::i64;
17055
17056 // Naked functions never have a frame pointer, and so we use r1. For all
17057 // other functions, this decision must be delayed until during PEI.
17058 unsigned FrameReg;
17059 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
17060 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
17061 else
17062 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
17063
17064 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
17065 PtrVT);
17066 while (Depth--)
17067 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
17068 FrameAddr, MachinePointerInfo());
17069 return FrameAddr;
17070}
17071
17072// FIXME? Maybe this could be a TableGen attribute on some registers and
17073// this table could be generated automatically from RegInfo.
17075 const MachineFunction &MF) const {
17076 bool isPPC64 = Subtarget.isPPC64();
17077
17078 bool is64Bit = isPPC64 && VT == LLT::scalar(64);
17079 if (!is64Bit && VT != LLT::scalar(32))
17080 report_fatal_error("Invalid register global variable type");
17081
17083 .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
17084 .Case("r2", isPPC64 ? Register() : PPC::R2)
17085 .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
17086 .Default(Register());
17087
17088 if (Reg)
17089 return Reg;
17090 report_fatal_error("Invalid register name global variable");
17091}
17092
17094 // 32-bit SVR4 ABI access everything as got-indirect.
17095 if (Subtarget.is32BitELFABI())
17096 return true;
17097
17098 // AIX accesses everything indirectly through the TOC, which is similar to
17099 // the GOT.
17100 if (Subtarget.isAIXABI())
17101 return true;
17102
17104 // If it is small or large code model, module locals are accessed
17105 // indirectly by loading their address from .toc/.got.
17106 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
17107 return true;
17108
17109 // JumpTable and BlockAddress are accessed as got-indirect.
17110 if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
17111 return true;
17112
17113 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
17114 return Subtarget.isGVIndirectSymbol(G->getGlobal());
17115
17116 return false;
17117}
17118
17119bool
17121 // The PowerPC target isn't yet aware of offsets.
17122 return false;
17123}
17124
17126 const CallInst &I,
17127 MachineFunction &MF,
17128 unsigned Intrinsic) const {
17129 switch (Intrinsic) {
17130 case Intrinsic::ppc_atomicrmw_xchg_i128:
17131 case Intrinsic::ppc_atomicrmw_add_i128:
17132 case Intrinsic::ppc_atomicrmw_sub_i128:
17133 case Intrinsic::ppc_atomicrmw_nand_i128:
17134 case Intrinsic::ppc_atomicrmw_and_i128:
17135 case Intrinsic::ppc_atomicrmw_or_i128:
17136 case Intrinsic::ppc_atomicrmw_xor_i128:
17137 case Intrinsic::ppc_cmpxchg_i128:
17139 Info.memVT = MVT::i128;
17140 Info.ptrVal = I.getArgOperand(0);
17141 Info.offset = 0;
17142 Info.align = Align(16);
17145 return true;
17146 case Intrinsic::ppc_atomic_load_i128:
17148 Info.memVT = MVT::i128;
17149 Info.ptrVal = I.getArgOperand(0);
17150 Info.offset = 0;
17151 Info.align = Align(16);
17153 return true;
17154 case Intrinsic::ppc_atomic_store_i128:
17156 Info.memVT = MVT::i128;
17157 Info.ptrVal = I.getArgOperand(2);
17158 Info.offset = 0;
17159 Info.align = Align(16);
17161 return true;
17162 case Intrinsic::ppc_altivec_lvx:
17163 case Intrinsic::ppc_altivec_lvxl:
17164 case Intrinsic::ppc_altivec_lvebx:
17165 case Intrinsic::ppc_altivec_lvehx:
17166 case Intrinsic::ppc_altivec_lvewx:
17167 case Intrinsic::ppc_vsx_lxvd2x:
17168 case Intrinsic::ppc_vsx_lxvw4x:
17169 case Intrinsic::ppc_vsx_lxvd2x_be:
17170 case Intrinsic::ppc_vsx_lxvw4x_be:
17171 case Intrinsic::ppc_vsx_lxvl:
17172 case Intrinsic::ppc_vsx_lxvll: {
17173 EVT VT;
17174 switch (Intrinsic) {
17175 case Intrinsic::ppc_altivec_lvebx:
17176 VT = MVT::i8;
17177 break;
17178 case Intrinsic::ppc_altivec_lvehx:
17179 VT = MVT::i16;
17180 break;
17181 case Intrinsic::ppc_altivec_lvewx:
17182 VT = MVT::i32;
17183 break;
17184 case Intrinsic::ppc_vsx_lxvd2x:
17185 case Intrinsic::ppc_vsx_lxvd2x_be:
17186 VT = MVT::v2f64;
17187 break;
17188 default:
17189 VT = MVT::v4i32;
17190 break;
17191 }
17192
17194 Info.memVT = VT;
17195 Info.ptrVal = I.getArgOperand(0);
17196 Info.offset = -VT.getStoreSize()+1;
17197 Info.size = 2*VT.getStoreSize()-1;
17198 Info.align = Align(1);
17200 return true;
17201 }
17202 case Intrinsic::ppc_altivec_stvx:
17203 case Intrinsic::ppc_altivec_stvxl:
17204 case Intrinsic::ppc_altivec_stvebx:
17205 case Intrinsic::ppc_altivec_stvehx:
17206 case Intrinsic::ppc_altivec_stvewx:
17207 case Intrinsic::ppc_vsx_stxvd2x:
17208 case Intrinsic::ppc_vsx_stxvw4x:
17209 case Intrinsic::ppc_vsx_stxvd2x_be:
17210 case Intrinsic::ppc_vsx_stxvw4x_be:
17211 case Intrinsic::ppc_vsx_stxvl:
17212 case Intrinsic::ppc_vsx_stxvll: {
17213 EVT VT;
17214 switch (Intrinsic) {
17215 case Intrinsic::ppc_altivec_stvebx:
17216 VT = MVT::i8;
17217 break;
17218 case Intrinsic::ppc_altivec_stvehx:
17219 VT = MVT::i16;
17220 break;
17221 case Intrinsic::ppc_altivec_stvewx:
17222 VT = MVT::i32;
17223 break;
17224 case Intrinsic::ppc_vsx_stxvd2x:
17225 case Intrinsic::ppc_vsx_stxvd2x_be:
17226 VT = MVT::v2f64;
17227 break;
17228 default:
17229 VT = MVT::v4i32;
17230 break;
17231 }
17232
17234 Info.memVT = VT;
17235 Info.ptrVal = I.getArgOperand(1);
17236 Info.offset = -VT.getStoreSize()+1;
17237 Info.size = 2*VT.getStoreSize()-1;
17238 Info.align = Align(1);
17240 return true;
17241 }
17242 case Intrinsic::ppc_stdcx:
17243 case Intrinsic::ppc_stwcx:
17244 case Intrinsic::ppc_sthcx:
17245 case Intrinsic::ppc_stbcx: {
17246 EVT VT;
17247 auto Alignment = Align(8);
17248 switch (Intrinsic) {
17249 case Intrinsic::ppc_stdcx:
17250 VT = MVT::i64;
17251 break;
17252 case Intrinsic::ppc_stwcx:
17253 VT = MVT::i32;
17254 Alignment = Align(4);
17255 break;
17256 case Intrinsic::ppc_sthcx:
17257 VT = MVT::i16;
17258 Alignment = Align(2);
17259 break;
17260 case Intrinsic::ppc_stbcx:
17261 VT = MVT::i8;
17262 Alignment = Align(1);
17263 break;
17264 }
17266 Info.memVT = VT;
17267 Info.ptrVal = I.getArgOperand(0);
17268 Info.offset = 0;
17269 Info.align = Alignment;
17271 return true;
17272 }
17273 default:
17274 break;
17275 }
17276
17277 return false;
17278}
17279
17280/// It returns EVT::Other if the type should be determined using generic
17281/// target-independent logic.
17283 const MemOp &Op, const AttributeList &FuncAttributes) const {
17284 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
17285 // We should use Altivec/VSX loads and stores when available. For unaligned
17286 // addresses, unaligned VSX loads are only fast starting with the P8.
17287 if (Subtarget.hasAltivec() && Op.size() >= 16) {
17288 if (Op.isMemset() && Subtarget.hasVSX()) {
17289 uint64_t TailSize = Op.size() % 16;
17290 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
17291 // element if vector element type matches tail store. For tail size
17292 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
17293 if (TailSize > 2 && TailSize <= 4) {
17294 return MVT::v8i16;
17295 }
17296 return MVT::v4i32;
17297 }
17298 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
17299 return MVT::v4i32;
17300 }
17301 }
17302
17303 if (Subtarget.isPPC64()) {
17304 return MVT::i64;
17305 }
17306
17307 return MVT::i32;
17308}
17309
17310/// Returns true if it is beneficial to convert a load of a constant
17311/// to just the constant itself.
17313 Type *Ty) const {
17314 assert(Ty->isIntegerTy());
17315
17316 unsigned BitSize = Ty->getPrimitiveSizeInBits();
17317 return !(BitSize == 0 || BitSize > 64);
17318}
17319
17321 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
17322 return false;
17323 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
17324 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
17325 return NumBits1 == 64 && NumBits2 == 32;
17326}
17327
17329 if (!VT1.isInteger() || !VT2.isInteger())
17330 return false;
17331 unsigned NumBits1 = VT1.getSizeInBits();
17332 unsigned NumBits2 = VT2.getSizeInBits();
17333 return NumBits1 == 64 && NumBits2 == 32;
17334}
17335
17337 // Generally speaking, zexts are not free, but they are free when they can be
17338 // folded with other operations.
17339 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
17340 EVT MemVT = LD->getMemoryVT();
17341 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
17342 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
17343 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
17344 LD->getExtensionType() == ISD::ZEXTLOAD))
17345 return true;
17346 }
17347
17348 // FIXME: Add other cases...
17349 // - 32-bit shifts with a zext to i64
17350 // - zext after ctlz, bswap, etc.
17351 // - zext after and by a constant mask
17352
17353 return TargetLowering::isZExtFree(Val, VT2);
17354}
17355
17356bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
17357 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
17358 "invalid fpext types");
17359 // Extending to float128 is not free.
17360 if (DestVT == MVT::f128)
17361 return false;
17362 return true;
17363}
17364
17366 return isInt<16>(Imm) || isUInt<16>(Imm);
17367}
17368
17370 return isInt<16>(Imm) || isUInt<16>(Imm);
17371}
17372
17375 unsigned *Fast) const {
17377 return false;
17378
17379 // PowerPC supports unaligned memory access for simple non-vector types.
17380 // Although accessing unaligned addresses is not as efficient as accessing
17381 // aligned addresses, it is generally more efficient than manual expansion,
17382 // and generally only traps for software emulation when crossing page
17383 // boundaries.
17384
17385 if (!VT.isSimple())
17386 return false;
17387
17388 if (VT.isFloatingPoint() && !VT.isVector() &&
17389 !Subtarget.allowsUnalignedFPAccess())
17390 return false;
17391
17392 if (VT.getSimpleVT().isVector()) {
17393 if (Subtarget.hasVSX()) {
17394 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
17395 VT != MVT::v4f32 && VT != MVT::v4i32)
17396 return false;
17397 } else {
17398 return false;
17399 }
17400 }
17401
17402 if (VT == MVT::ppcf128)
17403 return false;
17404
17405 if (Fast)
17406 *Fast = 1;
17407
17408 return true;
17409}
17410
17412 SDValue C) const {
17413 // Check integral scalar types.
17414 if (!VT.isScalarInteger())
17415 return false;
17416 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
17417 if (!ConstNode->getAPIntValue().isSignedIntN(64))
17418 return false;
17419 // This transformation will generate >= 2 operations. But the following
17420 // cases will generate <= 2 instructions during ISEL. So exclude them.
17421 // 1. If the constant multiplier fits 16 bits, it can be handled by one
17422 // HW instruction, ie. MULLI
17423 // 2. If the multiplier after shifted fits 16 bits, an extra shift
17424 // instruction is needed than case 1, ie. MULLI and RLDICR
17425 int64_t Imm = ConstNode->getSExtValue();
17426 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
17427 Imm >>= Shift;
17428 if (isInt<16>(Imm))
17429 return false;
17430 uint64_t UImm = static_cast<uint64_t>(Imm);
17431 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
17432 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
17433 return true;
17434 }
17435 return false;
17436}
17437
17439 EVT VT) const {
17442}
17443
17445 Type *Ty) const {
17446 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
17447 return false;
17448 switch (Ty->getScalarType()->getTypeID()) {
17449 case Type::FloatTyID:
17450 case Type::DoubleTyID:
17451 return true;
17452 case Type::FP128TyID:
17453 return Subtarget.hasP9Vector();
17454 default:
17455 return false;
17456 }
17457}
17458
17459// FIXME: add more patterns which are not profitable to hoist.
17461 if (!I->hasOneUse())
17462 return true;
17463
17464 Instruction *User = I->user_back();
17465 assert(User && "A single use instruction with no uses.");
17466
17467 switch (I->getOpcode()) {
17468 case Instruction::FMul: {
17469 // Don't break FMA, PowerPC prefers FMA.
17470 if (User->getOpcode() != Instruction::FSub &&
17471 User->getOpcode() != Instruction::FAdd)
17472 return true;
17473
17475 const Function *F = I->getFunction();
17476 const DataLayout &DL = F->getParent()->getDataLayout();
17477 Type *Ty = User->getOperand(0)->getType();
17478
17479 return !(
17482 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
17483 }
17484 case Instruction::Load: {
17485 // Don't break "store (load float*)" pattern, this pattern will be combined
17486 // to "store (load int32)" in later InstCombine pass. See function
17487 // combineLoadToOperationType. On PowerPC, loading a float point takes more
17488 // cycles than loading a 32 bit integer.
17489 LoadInst *LI = cast<LoadInst>(I);
17490 // For the loads that combineLoadToOperationType does nothing, like
17491 // ordered load, it should be profitable to hoist them.
17492 // For swifterror load, it can only be used for pointer to pointer type, so
17493 // later type check should get rid of this case.
17494 if (!LI->isUnordered())
17495 return true;
17496
17497 if (User->getOpcode() != Instruction::Store)
17498 return true;
17499
17500 if (I->getType()->getTypeID() != Type::FloatTyID)
17501 return true;
17502
17503 return false;
17504 }
17505 default:
17506 return true;
17507 }
17508 return true;
17509}
17510
17511const MCPhysReg *
17513 // LR is a callee-save register, but we must treat it as clobbered by any call
17514 // site. Hence we include LR in the scratch registers, which are in turn added
17515 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
17516 // to CTR, which is used by any indirect call.
17517 static const MCPhysReg ScratchRegs[] = {
17518 PPC::X12, PPC::LR8, PPC::CTR8, 0
17519 };
17520
17521 return ScratchRegs;
17522}
17523
17525 const Constant *PersonalityFn) const {
17526 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
17527}
17528
17530 const Constant *PersonalityFn) const {
17531 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
17532}
17533
17534bool
17536 EVT VT , unsigned DefinedValues) const {
17537 if (VT == MVT::v2i64)
17538 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
17539
17540 if (Subtarget.hasVSX())
17541 return true;
17542
17544}
17545
17547 if (DisableILPPref || Subtarget.enableMachineScheduler())
17549
17550 return Sched::ILP;
17551}
17552
17553// Create a fast isel object.
17554FastISel *
17556 const TargetLibraryInfo *LibInfo) const {
17557 return PPC::createFastISel(FuncInfo, LibInfo);
17558}
17559
17560// 'Inverted' means the FMA opcode after negating one multiplicand.
17561// For example, (fma -a b c) = (fnmsub a b c)
17562static unsigned invertFMAOpcode(unsigned Opc) {
17563 switch (Opc) {
17564 default:
17565 llvm_unreachable("Invalid FMA opcode for PowerPC!");
17566 case ISD::FMA:
17567 return PPCISD::FNMSUB;
17568 case PPCISD::FNMSUB:
17569 return ISD::FMA;
17570 }
17571}
17572
17574 bool LegalOps, bool OptForSize,
17576 unsigned Depth) const {
17578 return SDValue();
17579
17580 unsigned Opc = Op.getOpcode();
17581 EVT VT = Op.getValueType();
17582 SDNodeFlags Flags = Op.getNode()->getFlags();
17583
17584 switch (Opc) {
17585 case PPCISD::FNMSUB:
17586 if (!Op.hasOneUse() || !isTypeLegal(VT))
17587 break;
17588
17590 SDValue N0 = Op.getOperand(0);
17591 SDValue N1 = Op.getOperand(1);
17592 SDValue N2 = Op.getOperand(2);
17593 SDLoc Loc(Op);
17594
17596 SDValue NegN2 =
17597 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
17598
17599 if (!NegN2)
17600 return SDValue();
17601
17602 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
17603 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
17604 // These transformations may change sign of zeroes. For example,
17605 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
17606 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
17607 // Try and choose the cheaper one to negate.
17609 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
17610 N0Cost, Depth + 1);
17611
17613 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
17614 N1Cost, Depth + 1);
17615
17616 if (NegN0 && N0Cost <= N1Cost) {
17617 Cost = std::min(N0Cost, N2Cost);
17618 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
17619 } else if (NegN1) {
17620 Cost = std::min(N1Cost, N2Cost);
17621 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
17622 }
17623 }
17624
17625 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
17626 if (isOperationLegal(ISD::FMA, VT)) {
17627 Cost = N2Cost;
17628 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
17629 }
17630
17631 break;
17632 }
17633
17634 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
17635 Cost, Depth);
17636}
17637
17638// Override to enable LOAD_STACK_GUARD lowering on Linux.
17640 if (!Subtarget.isTargetLinux())
17642 return true;
17643}
17644
17645// Override to disable global variable loading on Linux and insert AIX canary
17646// word declaration.
17648 if (Subtarget.isAIXABI()) {
17649 M.getOrInsertGlobal(AIXSSPCanaryWordName,
17650 PointerType::getUnqual(M.getContext()));
17651 return;
17652 }
17653 if (!Subtarget.isTargetLinux())
17655}
17656
17658 if (Subtarget.isAIXABI())
17659 return M.getGlobalVariable(AIXSSPCanaryWordName);
17661}
17662
17664 bool ForCodeSize) const {
17665 if (!VT.isSimple() || !Subtarget.hasVSX())
17666 return false;
17667
17668 switch(VT.getSimpleVT().SimpleTy) {
17669 default:
17670 // For FP types that are currently not supported by PPC backend, return
17671 // false. Examples: f16, f80.
17672 return false;
17673 case MVT::f32:
17674 case MVT::f64: {
17675 if (Subtarget.hasPrefixInstrs()) {
17676 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
17677 return true;
17678 }
17679 bool IsExact;
17680 APSInt IntResult(16, false);
17681 // The rounding mode doesn't really matter because we only care about floats
17682 // that can be converted to integers exactly.
17683 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
17684 // For exact values in the range [-16, 15] we can materialize the float.
17685 if (IsExact && IntResult <= 15 && IntResult >= -16)
17686 return true;
17687 return Imm.isZero();
17688 }
17689 case MVT::ppcf128:
17690 return Imm.isPosZero();
17691 }
17692}
17693
17694// For vector shift operation op, fold
17695// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
17697 SelectionDAG &DAG) {
17698 SDValue N0 = N->getOperand(0);
17699 SDValue N1 = N->getOperand(1);
17700 EVT VT = N0.getValueType();
17701 unsigned OpSizeInBits = VT.getScalarSizeInBits();
17702 unsigned Opcode = N->getOpcode();
17703 unsigned TargetOpcode;
17704
17705 switch (Opcode) {
17706 default:
17707 llvm_unreachable("Unexpected shift operation");
17708 case ISD::SHL:
17709 TargetOpcode = PPCISD::SHL;
17710 break;
17711 case ISD::SRL:
17712 TargetOpcode = PPCISD::SRL;
17713 break;
17714 case ISD::SRA:
17715 TargetOpcode = PPCISD::SRA;
17716 break;
17717 }
17718
17719 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
17720 N1->getOpcode() == ISD::AND)
17721 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
17722 if (Mask->getZExtValue() == OpSizeInBits - 1)
17723 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
17724
17725 return SDValue();
17726}
17727
17728SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
17729 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17730 return Value;
17731
17732 SDValue N0 = N->getOperand(0);
17733 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
17734 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
17735 N0.getOpcode() != ISD::SIGN_EXTEND ||
17736 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
17737 N->getValueType(0) != MVT::i64)
17738 return SDValue();
17739
17740 // We can't save an operation here if the value is already extended, and
17741 // the existing shift is easier to combine.
17742 SDValue ExtsSrc = N0.getOperand(0);
17743 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
17744 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
17745 return SDValue();
17746
17747 SDLoc DL(N0);
17748 SDValue ShiftBy = SDValue(CN1, 0);
17749 // We want the shift amount to be i32 on the extswli, but the shift could
17750 // have an i64.
17751 if (ShiftBy.getValueType() == MVT::i64)
17752 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
17753
17754 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
17755 ShiftBy);
17756}
17757
17758SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
17759 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17760 return Value;
17761
17762 return SDValue();
17763}
17764
17765SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
17766 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
17767 return Value;
17768
17769 return SDValue();
17770}
17771
17772// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
17773// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
17774// When C is zero, the equation (addi Z, -C) can be simplified to Z
17775// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
17777 const PPCSubtarget &Subtarget) {
17778 if (!Subtarget.isPPC64())
17779 return SDValue();
17780
17781 SDValue LHS = N->getOperand(0);
17782 SDValue RHS = N->getOperand(1);
17783
17784 auto isZextOfCompareWithConstant = [](SDValue Op) {
17785 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
17786 Op.getValueType() != MVT::i64)
17787 return false;
17788
17789 SDValue Cmp = Op.getOperand(0);
17790 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
17791 Cmp.getOperand(0).getValueType() != MVT::i64)
17792 return false;
17793
17794 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
17795 int64_t NegConstant = 0 - Constant->getSExtValue();
17796 // Due to the limitations of the addi instruction,
17797 // -C is required to be [-32768, 32767].
17798 return isInt<16>(NegConstant);
17799 }
17800
17801 return false;
17802 };
17803
17804 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
17805 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
17806
17807 // If there is a pattern, canonicalize a zext operand to the RHS.
17808 if (LHSHasPattern && !RHSHasPattern)
17809 std::swap(LHS, RHS);
17810 else if (!LHSHasPattern && !RHSHasPattern)
17811 return SDValue();
17812
17813 SDLoc DL(N);
17814 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
17815 SDValue Cmp = RHS.getOperand(0);
17816 SDValue Z = Cmp.getOperand(0);
17817 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
17818 int64_t NegConstant = 0 - Constant->getSExtValue();
17819
17820 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
17821 default: break;
17822 case ISD::SETNE: {
17823 // when C == 0
17824 // --> addze X, (addic Z, -1).carry
17825 // /
17826 // add X, (zext(setne Z, C))--
17827 // \ when -32768 <= -C <= 32767 && C != 0
17828 // --> addze X, (addic (addi Z, -C), -1).carry
17829 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17830 DAG.getConstant(NegConstant, DL, MVT::i64));
17831 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17832 SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17833 AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17834 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17835 SDValue(Addc.getNode(), 1));
17836 }
17837 case ISD::SETEQ: {
17838 // when C == 0
17839 // --> addze X, (subfic Z, 0).carry
17840 // /
17841 // add X, (zext(sete Z, C))--
17842 // \ when -32768 <= -C <= 32767 && C != 0
17843 // --> addze X, (subfic (addi Z, -C), 0).carry
17844 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17845 DAG.getConstant(NegConstant, DL, MVT::i64));
17846 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17847 SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17848 DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17849 return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17850 SDValue(Subc.getNode(), 1));
17851 }
17852 }
17853
17854 return SDValue();
17855}
17856
17857// Transform
17858// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17859// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17860// In this case both C1 and C2 must be known constants.
17861// C1+C2 must fit into a 34 bit signed integer.
17863 const PPCSubtarget &Subtarget) {
17864 if (!Subtarget.isUsingPCRelativeCalls())
17865 return SDValue();
17866
17867 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17868 // If we find that node try to cast the Global Address and the Constant.
17869 SDValue LHS = N->getOperand(0);
17870 SDValue RHS = N->getOperand(1);
17871
17872 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17873 std::swap(LHS, RHS);
17874
17875 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17876 return SDValue();
17877
17878 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17879 GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17880 ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17881
17882 // Check that both casts succeeded.
17883 if (!GSDN || !ConstNode)
17884 return SDValue();
17885
17886 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17887 SDLoc DL(GSDN);
17888
17889 // The signed int offset needs to fit in 34 bits.
17890 if (!isInt<34>(NewOffset))
17891 return SDValue();
17892
17893 // The new global address is a copy of the old global address except
17894 // that it has the updated Offset.
17895 SDValue GA =
17896 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17897 NewOffset, GSDN->getTargetFlags());
17898 SDValue MatPCRel =
17899 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17900 return MatPCRel;
17901}
17902
17903SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17904 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17905 return Value;
17906
17907 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17908 return Value;
17909
17910 return SDValue();
17911}
17912
17913// Detect TRUNCATE operations on bitcasts of float128 values.
17914// What we are looking for here is the situtation where we extract a subset
17915// of bits from a 128 bit float.
17916// This can be of two forms:
17917// 1) BITCAST of f128 feeding TRUNCATE
17918// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17919// The reason this is required is because we do not have a legal i128 type
17920// and so we want to prevent having to store the f128 and then reload part
17921// of it.
17922SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17923 DAGCombinerInfo &DCI) const {
17924 // If we are using CRBits then try that first.
17925 if (Subtarget.useCRBits()) {
17926 // Check if CRBits did anything and return that if it did.
17927 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17928 return CRTruncValue;
17929 }
17930
17931 SDLoc dl(N);
17932 SDValue Op0 = N->getOperand(0);
17933
17934 // Looking for a truncate of i128 to i64.
17935 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17936 return SDValue();
17937
17938 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17939
17940 // SRL feeding TRUNCATE.
17941 if (Op0.getOpcode() == ISD::SRL) {
17942 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17943 // The right shift has to be by 64 bits.
17944 if (!ConstNode || ConstNode->getZExtValue() != 64)
17945 return SDValue();
17946
17947 // Switch the element number to extract.
17948 EltToExtract = EltToExtract ? 0 : 1;
17949 // Update Op0 past the SRL.
17950 Op0 = Op0.getOperand(0);
17951 }
17952
17953 // BITCAST feeding a TRUNCATE possibly via SRL.
17954 if (Op0.getOpcode() == ISD::BITCAST &&
17955 Op0.getValueType() == MVT::i128 &&
17956 Op0.getOperand(0).getValueType() == MVT::f128) {
17957 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17958 return DCI.DAG.getNode(
17959 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
17960 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17961 }
17962 return SDValue();
17963}
17964
17965SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17966 SelectionDAG &DAG = DCI.DAG;
17967
17968 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17969 if (!ConstOpOrElement)
17970 return SDValue();
17971
17972 // An imul is usually smaller than the alternative sequence for legal type.
17974 isOperationLegal(ISD::MUL, N->getValueType(0)))
17975 return SDValue();
17976
17977 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17978 switch (this->Subtarget.getCPUDirective()) {
17979 default:
17980 // TODO: enhance the condition for subtarget before pwr8
17981 return false;
17982 case PPC::DIR_PWR8:
17983 // type mul add shl
17984 // scalar 4 1 1
17985 // vector 7 2 2
17986 return true;
17987 case PPC::DIR_PWR9:
17988 case PPC::DIR_PWR10:
17990 // type mul add shl
17991 // scalar 5 2 2
17992 // vector 7 2 2
17993
17994 // The cycle RATIO of related operations are showed as a table above.
17995 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17996 // scalar and vector type. For 2 instrs patterns, add/sub + shl
17997 // are 4, it is always profitable; but for 3 instrs patterns
17998 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17999 // So we should only do it for vector type.
18000 return IsAddOne && IsNeg ? VT.isVector() : true;
18001 }
18002 };
18003
18004 EVT VT = N->getValueType(0);
18005 SDLoc DL(N);
18006
18007 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
18008 bool IsNeg = MulAmt.isNegative();
18009 APInt MulAmtAbs = MulAmt.abs();
18010
18011 if ((MulAmtAbs - 1).isPowerOf2()) {
18012 // (mul x, 2^N + 1) => (add (shl x, N), x)
18013 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
18014
18015 if (!IsProfitable(IsNeg, true, VT))
18016 return SDValue();
18017
18018 SDValue Op0 = N->getOperand(0);
18019 SDValue Op1 =
18020 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18021 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
18022 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
18023
18024 if (!IsNeg)
18025 return Res;
18026
18027 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
18028 } else if ((MulAmtAbs + 1).isPowerOf2()) {
18029 // (mul x, 2^N - 1) => (sub (shl x, N), x)
18030 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
18031
18032 if (!IsProfitable(IsNeg, false, VT))
18033 return SDValue();
18034
18035 SDValue Op0 = N->getOperand(0);
18036 SDValue Op1 =
18037 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
18038 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
18039
18040 if (!IsNeg)
18041 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
18042 else
18043 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
18044
18045 } else {
18046 return SDValue();
18047 }
18048}
18049
18050// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
18051// in combiner since we need to check SD flags and other subtarget features.
18052SDValue PPCTargetLowering::combineFMALike(SDNode *N,
18053 DAGCombinerInfo &DCI) const {
18054 SDValue N0 = N->getOperand(0);
18055 SDValue N1 = N->getOperand(1);
18056 SDValue N2 = N->getOperand(2);
18057 SDNodeFlags Flags = N->getFlags();
18058 EVT VT = N->getValueType(0);
18059 SelectionDAG &DAG = DCI.DAG;
18061 unsigned Opc = N->getOpcode();
18062 bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
18063 bool LegalOps = !DCI.isBeforeLegalizeOps();
18064 SDLoc Loc(N);
18065
18066 if (!isOperationLegal(ISD::FMA, VT))
18067 return SDValue();
18068
18069 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
18070 // since (fnmsub a b c)=-0 while c-ab=+0.
18071 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
18072 return SDValue();
18073
18074 // (fma (fneg a) b c) => (fnmsub a b c)
18075 // (fnmsub (fneg a) b c) => (fma a b c)
18076 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
18077 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
18078
18079 // (fma a (fneg b) c) => (fnmsub a b c)
18080 // (fnmsub a (fneg b) c) => (fma a b c)
18081 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
18082 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
18083
18084 return SDValue();
18085}
18086
18087bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
18088 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
18089 if (!Subtarget.is64BitELFABI())
18090 return false;
18091
18092 // If not a tail call then no need to proceed.
18093 if (!CI->isTailCall())
18094 return false;
18095
18096 // If sibling calls have been disabled and tail-calls aren't guaranteed
18097 // there is no reason to duplicate.
18098 auto &TM = getTargetMachine();
18099 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
18100 return false;
18101
18102 // Can't tail call a function called indirectly, or if it has variadic args.
18103 const Function *Callee = CI->getCalledFunction();
18104 if (!Callee || Callee->isVarArg())
18105 return false;
18106
18107 // Make sure the callee and caller calling conventions are eligible for tco.
18108 const Function *Caller = CI->getParent()->getParent();
18109 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
18110 CI->getCallingConv()))
18111 return false;
18112
18113 // If the function is local then we have a good chance at tail-calling it
18114 return getTargetMachine().shouldAssumeDSOLocal(Callee);
18115}
18116
18117bool PPCTargetLowering::
18118isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
18119 const Value *Mask = AndI.getOperand(1);
18120 // If the mask is suitable for andi. or andis. we should sink the and.
18121 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
18122 // Can't handle constants wider than 64-bits.
18123 if (CI->getBitWidth() > 64)
18124 return false;
18125 int64_t ConstVal = CI->getZExtValue();
18126 return isUInt<16>(ConstVal) ||
18127 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
18128 }
18129
18130 // For non-constant masks, we can always use the record-form and.
18131 return true;
18132}
18133
18134/// getAddrModeForFlags - Based on the set of address flags, select the most
18135/// optimal instruction format to match by.
18136PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
18137 // This is not a node we should be handling here.
18138 if (Flags == PPC::MOF_None)
18139 return PPC::AM_None;
18140 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
18141 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
18142 if ((Flags & FlagSet) == FlagSet)
18143 return PPC::AM_DForm;
18144 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
18145 if ((Flags & FlagSet) == FlagSet)
18146 return PPC::AM_DSForm;
18147 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
18148 if ((Flags & FlagSet) == FlagSet)
18149 return PPC::AM_DQForm;
18150 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
18151 if ((Flags & FlagSet) == FlagSet)
18152 return PPC::AM_PrefixDForm;
18153 // If no other forms are selected, return an X-Form as it is the most
18154 // general addressing mode.
18155 return PPC::AM_XForm;
18156}
18157
18158/// Set alignment flags based on whether or not the Frame Index is aligned.
18159/// Utilized when computing flags for address computation when selecting
18160/// load and store instructions.
18161static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
18162 SelectionDAG &DAG) {
18163 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
18164 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
18165 if (!FI)
18166 return;
18168 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
18169 // If this is (add $FI, $S16Imm), the alignment flags are already set
18170 // based on the immediate. We just need to clear the alignment flags
18171 // if the FI alignment is weaker.
18172 if ((FrameIndexAlign % 4) != 0)
18173 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
18174 if ((FrameIndexAlign % 16) != 0)
18175 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
18176 // If the address is a plain FrameIndex, set alignment flags based on
18177 // FI alignment.
18178 if (!IsAdd) {
18179 if ((FrameIndexAlign % 4) == 0)
18180 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18181 if ((FrameIndexAlign % 16) == 0)
18182 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18183 }
18184}
18185
18186/// Given a node, compute flags that are used for address computation when
18187/// selecting load and store instructions. The flags computed are stored in
18188/// FlagSet. This function takes into account whether the node is a constant,
18189/// an ADD, OR, or a constant, and computes the address flags accordingly.
18190static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
18191 SelectionDAG &DAG) {
18192 // Set the alignment flags for the node depending on if the node is
18193 // 4-byte or 16-byte aligned.
18194 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
18195 if ((Imm & 0x3) == 0)
18196 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
18197 if ((Imm & 0xf) == 0)
18198 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
18199 };
18200
18201 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
18202 // All 32-bit constants can be computed as LIS + Disp.
18203 const APInt &ConstImm = CN->getAPIntValue();
18204 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
18205 FlagSet |= PPC::MOF_AddrIsSImm32;
18206 SetAlignFlagsForImm(ConstImm.getZExtValue());
18207 setAlignFlagsForFI(N, FlagSet, DAG);
18208 }
18209 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
18210 FlagSet |= PPC::MOF_RPlusSImm34;
18211 else // Let constant materialization handle large constants.
18212 FlagSet |= PPC::MOF_NotAddNorCst;
18213 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
18214 // This address can be represented as an addition of:
18215 // - Register + Imm16 (possibly a multiple of 4/16)
18216 // - Register + Imm34
18217 // - Register + PPCISD::Lo
18218 // - Register + Register
18219 // In any case, we won't have to match this as Base + Zero.
18220 SDValue RHS = N.getOperand(1);
18221 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
18222 const APInt &ConstImm = CN->getAPIntValue();
18223 if (ConstImm.isSignedIntN(16)) {
18224 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
18225 SetAlignFlagsForImm(ConstImm.getZExtValue());
18226 setAlignFlagsForFI(N, FlagSet, DAG);
18227 }
18228 if (ConstImm.isSignedIntN(34))
18229 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
18230 else
18231 FlagSet |= PPC::MOF_RPlusR; // Register.
18232 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
18233 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
18234 else
18235 FlagSet |= PPC::MOF_RPlusR;
18236 } else { // The address computation is not a constant or an addition.
18237 setAlignFlagsForFI(N, FlagSet, DAG);
18238 FlagSet |= PPC::MOF_NotAddNorCst;
18239 }
18240}
18241
18242static bool isPCRelNode(SDValue N) {
18243 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
18244 isValidPCRelNode<ConstantPoolSDNode>(N) ||
18245 isValidPCRelNode<GlobalAddressSDNode>(N) ||
18246 isValidPCRelNode<JumpTableSDNode>(N) ||
18247 isValidPCRelNode<BlockAddressSDNode>(N));
18248}
18249
18250/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
18251/// the address flags of the load/store instruction that is to be matched.
18252unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
18253 SelectionDAG &DAG) const {
18254 unsigned FlagSet = PPC::MOF_None;
18255
18256 // Compute subtarget flags.
18257 if (!Subtarget.hasP9Vector())
18258 FlagSet |= PPC::MOF_SubtargetBeforeP9;
18259 else {
18260 FlagSet |= PPC::MOF_SubtargetP9;
18261 if (Subtarget.hasPrefixInstrs())
18262 FlagSet |= PPC::MOF_SubtargetP10;
18263 }
18264 if (Subtarget.hasSPE())
18265 FlagSet |= PPC::MOF_SubtargetSPE;
18266
18267 // Check if we have a PCRel node and return early.
18268 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
18269 return FlagSet;
18270
18271 // If the node is the paired load/store intrinsics, compute flags for
18272 // address computation and return early.
18273 unsigned ParentOp = Parent->getOpcode();
18274 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
18275 (ParentOp == ISD::INTRINSIC_VOID))) {
18276 unsigned ID = Parent->getConstantOperandVal(1);
18277 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
18278 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
18279 ? Parent->getOperand(2)
18280 : Parent->getOperand(3);
18281 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
18282 FlagSet |= PPC::MOF_Vector;
18283 return FlagSet;
18284 }
18285 }
18286
18287 // Mark this as something we don't want to handle here if it is atomic
18288 // or pre-increment instruction.
18289 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
18290 if (LSB->isIndexed())
18291 return PPC::MOF_None;
18292
18293 // Compute in-memory type flags. This is based on if there are scalars,
18294 // floats or vectors.
18295 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
18296 assert(MN && "Parent should be a MemSDNode!");
18297 EVT MemVT = MN->getMemoryVT();
18298 unsigned Size = MemVT.getSizeInBits();
18299 if (MemVT.isScalarInteger()) {
18300 assert(Size <= 128 &&
18301 "Not expecting scalar integers larger than 16 bytes!");
18302 if (Size < 32)
18303 FlagSet |= PPC::MOF_SubWordInt;
18304 else if (Size == 32)
18305 FlagSet |= PPC::MOF_WordInt;
18306 else
18307 FlagSet |= PPC::MOF_DoubleWordInt;
18308 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
18309 if (Size == 128)
18310 FlagSet |= PPC::MOF_Vector;
18311 else if (Size == 256) {
18312 assert(Subtarget.pairedVectorMemops() &&
18313 "256-bit vectors are only available when paired vector memops is "
18314 "enabled!");
18315 FlagSet |= PPC::MOF_Vector;
18316 } else
18317 llvm_unreachable("Not expecting illegal vectors!");
18318 } else { // Floating point type: can be scalar, f128 or vector types.
18319 if (Size == 32 || Size == 64)
18320 FlagSet |= PPC::MOF_ScalarFloat;
18321 else if (MemVT == MVT::f128 || MemVT.isVector())
18322 FlagSet |= PPC::MOF_Vector;
18323 else
18324 llvm_unreachable("Not expecting illegal scalar floats!");
18325 }
18326
18327 // Compute flags for address computation.
18328 computeFlagsForAddressComputation(N, FlagSet, DAG);
18329
18330 // Compute type extension flags.
18331 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
18332 switch (LN->getExtensionType()) {
18333 case ISD::SEXTLOAD:
18334 FlagSet |= PPC::MOF_SExt;
18335 break;
18336 case ISD::EXTLOAD:
18337 case ISD::ZEXTLOAD:
18338 FlagSet |= PPC::MOF_ZExt;
18339 break;
18340 case ISD::NON_EXTLOAD:
18341 FlagSet |= PPC::MOF_NoExt;
18342 break;
18343 }
18344 } else
18345 FlagSet |= PPC::MOF_NoExt;
18346
18347 // For integers, no extension is the same as zero extension.
18348 // We set the extension mode to zero extension so we don't have
18349 // to add separate entries in AddrModesMap for loads and stores.
18350 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
18351 FlagSet |= PPC::MOF_ZExt;
18352 FlagSet &= ~PPC::MOF_NoExt;
18353 }
18354
18355 // If we don't have prefixed instructions, 34-bit constants should be
18356 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
18357 bool IsNonP1034BitConst =
18359 FlagSet) == PPC::MOF_RPlusSImm34;
18360 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
18361 IsNonP1034BitConst)
18362 FlagSet |= PPC::MOF_NotAddNorCst;
18363
18364 return FlagSet;
18365}
18366
18367/// SelectForceXFormMode - Given the specified address, force it to be
18368/// represented as an indexed [r+r] operation (an XForm instruction).
18370 SDValue &Base,
18371 SelectionDAG &DAG) const {
18372
18374 int16_t ForceXFormImm = 0;
18375 if (provablyDisjointOr(DAG, N) &&
18376 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
18377 Disp = N.getOperand(0);
18378 Base = N.getOperand(1);
18379 return Mode;
18380 }
18381
18382 // If the address is the result of an add, we will utilize the fact that the
18383 // address calculation includes an implicit add. However, we can reduce
18384 // register pressure if we do not materialize a constant just for use as the
18385 // index register. We only get rid of the add if it is not an add of a
18386 // value and a 16-bit signed constant and both have a single use.
18387 if (N.getOpcode() == ISD::ADD &&
18388 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
18389 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
18390 Disp = N.getOperand(0);
18391 Base = N.getOperand(1);
18392 return Mode;
18393 }
18394
18395 // Otherwise, use R0 as the base register.
18396 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18397 N.getValueType());
18398 Base = N;
18399
18400 return Mode;
18401}
18402
18404 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
18405 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
18406 EVT ValVT = Val.getValueType();
18407 // If we are splitting a scalar integer into f64 parts (i.e. so they
18408 // can be placed into VFRC registers), we need to zero extend and
18409 // bitcast the values. This will ensure the value is placed into a
18410 // VSR using direct moves or stack operations as needed.
18411 if (PartVT == MVT::f64 &&
18412 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
18413 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
18414 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
18415 Parts[0] = Val;
18416 return true;
18417 }
18418 return false;
18419}
18420
18421SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
18422 SelectionDAG &DAG) const {
18423 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18425 EVT RetVT = Op.getValueType();
18426 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
18427 SDValue Callee =
18428 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
18429 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetVT, false);
18432 for (const SDValue &N : Op->op_values()) {
18433 EVT ArgVT = N.getValueType();
18434 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
18435 Entry.Node = N;
18436 Entry.Ty = ArgTy;
18437 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, SignExtend);
18438 Entry.IsZExt = !Entry.IsSExt;
18439 Args.push_back(Entry);
18440 }
18441
18442 SDValue InChain = DAG.getEntryNode();
18443 SDValue TCChain = InChain;
18444 const Function &F = DAG.getMachineFunction().getFunction();
18445 bool isTailCall =
18446 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
18447 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
18448 if (isTailCall)
18449 InChain = TCChain;
18450 CLI.setDebugLoc(SDLoc(Op))
18451 .setChain(InChain)
18452 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
18453 .setTailCall(isTailCall)
18454 .setSExtResult(SignExtend)
18455 .setZExtResult(!SignExtend)
18457 return TLI.LowerCallTo(CLI).first;
18458}
18459
18460SDValue PPCTargetLowering::lowerLibCallBasedOnType(
18461 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
18462 SelectionDAG &DAG) const {
18463 if (Op.getValueType() == MVT::f32)
18464 return lowerToLibCall(LibCallFloatName, Op, DAG);
18465
18466 if (Op.getValueType() == MVT::f64)
18467 return lowerToLibCall(LibCallDoubleName, Op, DAG);
18468
18469 return SDValue();
18470}
18471
18472bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
18473 SDNodeFlags Flags = Op.getNode()->getFlags();
18474 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
18475 Flags.hasNoNaNs() && Flags.hasNoInfs();
18476}
18477
18478bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
18479 return Op.getNode()->getFlags().hasApproximateFuncs();
18480}
18481
18482bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
18484}
18485
18486SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
18487 const char *LibCallFloatName,
18488 const char *LibCallDoubleNameFinite,
18489 const char *LibCallFloatNameFinite,
18490 SDValue Op,
18491 SelectionDAG &DAG) const {
18492 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
18493 return SDValue();
18494
18495 if (!isLowringToMASSFiniteSafe(Op))
18496 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
18497 DAG);
18498
18499 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
18500 LibCallDoubleNameFinite, Op, DAG);
18501}
18502
18503SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
18504 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
18505 "__xl_powf_finite", Op, DAG);
18506}
18507
18508SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
18509 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
18510 "__xl_sinf_finite", Op, DAG);
18511}
18512
18513SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
18514 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
18515 "__xl_cosf_finite", Op, DAG);
18516}
18517
18518SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
18519 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
18520 "__xl_logf_finite", Op, DAG);
18521}
18522
18523SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
18524 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
18525 "__xl_log10f_finite", Op, DAG);
18526}
18527
18528SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
18529 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
18530 "__xl_expf_finite", Op, DAG);
18531}
18532
18533// If we happen to match to an aligned D-Form, check if the Frame Index is
18534// adequately aligned. If it is not, reset the mode to match to X-Form.
18535static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
18536 PPC::AddrMode &Mode) {
18537 if (!isa<FrameIndexSDNode>(N))
18538 return;
18539 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
18540 (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
18541 Mode = PPC::AM_XForm;
18542}
18543
18544/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
18545/// compute the address flags of the node, get the optimal address mode based
18546/// on the flags, and set the Base and Disp based on the address mode.
18548 SDValue N, SDValue &Disp,
18549 SDValue &Base,
18550 SelectionDAG &DAG,
18551 MaybeAlign Align) const {
18552 SDLoc DL(Parent);
18553
18554 // Compute the address flags.
18555 unsigned Flags = computeMOFlags(Parent, N, DAG);
18556
18557 // Get the optimal address mode based on the Flags.
18558 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
18559
18560 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
18561 // Select an X-Form load if it is not.
18562 setXFormForUnalignedFI(N, Flags, Mode);
18563
18564 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
18565 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
18566 assert(Subtarget.isUsingPCRelativeCalls() &&
18567 "Must be using PC-Relative calls when a valid PC-Relative node is "
18568 "present!");
18569 Mode = PPC::AM_PCRel;
18570 }
18571
18572 // Set Base and Disp accordingly depending on the address mode.
18573 switch (Mode) {
18574 case PPC::AM_DForm:
18575 case PPC::AM_DSForm:
18576 case PPC::AM_DQForm: {
18577 // This is a register plus a 16-bit immediate. The base will be the
18578 // register and the displacement will be the immediate unless it
18579 // isn't sufficiently aligned.
18580 if (Flags & PPC::MOF_RPlusSImm16) {
18581 SDValue Op0 = N.getOperand(0);
18582 SDValue Op1 = N.getOperand(1);
18583 int16_t Imm = Op1->getAsZExtVal();
18584 if (!Align || isAligned(*Align, Imm)) {
18585 Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
18586 Base = Op0;
18587 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
18588 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18589 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18590 }
18591 break;
18592 }
18593 }
18594 // This is a register plus the @lo relocation. The base is the register
18595 // and the displacement is the global address.
18596 else if (Flags & PPC::MOF_RPlusLo) {
18597 Disp = N.getOperand(1).getOperand(0); // The global address.
18602 Base = N.getOperand(0);
18603 break;
18604 }
18605 // This is a constant address at most 32 bits. The base will be
18606 // zero or load-immediate-shifted and the displacement will be
18607 // the low 16 bits of the address.
18608 else if (Flags & PPC::MOF_AddrIsSImm32) {
18609 auto *CN = cast<ConstantSDNode>(N);
18610 EVT CNType = CN->getValueType(0);
18611 uint64_t CNImm = CN->getZExtValue();
18612 // If this address fits entirely in a 16-bit sext immediate field, codegen
18613 // this as "d, 0".
18614 int16_t Imm;
18615 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
18616 Disp = DAG.getTargetConstant(Imm, DL, CNType);
18617 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18618 CNType);
18619 break;
18620 }
18621 // Handle 32-bit sext immediate with LIS + Addr mode.
18622 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
18623 (!Align || isAligned(*Align, CNImm))) {
18624 int32_t Addr = (int32_t)CNImm;
18625 // Otherwise, break this down into LIS + Disp.
18626 Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
18627 Base =
18628 DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
18629 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
18630 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
18631 break;
18632 }
18633 }
18634 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
18635 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
18636 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
18637 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18638 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
18639 } else
18640 Base = N;
18641 break;
18642 }
18643 case PPC::AM_PrefixDForm: {
18644 int64_t Imm34 = 0;
18645 unsigned Opcode = N.getOpcode();
18646 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
18647 (isIntS34Immediate(N.getOperand(1), Imm34))) {
18648 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
18649 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18650 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
18651 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
18652 else
18653 Base = N.getOperand(0);
18654 } else if (isIntS34Immediate(N, Imm34)) {
18655 // The address is a 34-bit signed immediate.
18656 Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
18657 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
18658 }
18659 break;
18660 }
18661 case PPC::AM_PCRel: {
18662 // When selecting PC-Relative instructions, "Base" is not utilized as
18663 // we select the address as [PC+imm].
18664 Disp = N;
18665 break;
18666 }
18667 case PPC::AM_None:
18668 break;
18669 default: { // By default, X-Form is always available to be selected.
18670 // When a frame index is not aligned, we also match by XForm.
18671 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
18672 Base = FI ? N : N.getOperand(1);
18673 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
18674 N.getValueType())
18675 : N.getOperand(0);
18676 break;
18677 }
18678 }
18679 return Mode;
18680}
18681
18683 bool Return,
18684 bool IsVarArg) const {
18685 switch (CC) {
18686 case CallingConv::Cold:
18687 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
18688 default:
18689 return CC_PPC64_ELF;
18690 }
18691}
18692
18694 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
18695}
18696
18699 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
18700 if (shouldInlineQuadwordAtomics() && Size == 128)
18702
18703 switch (AI->getOperation()) {
18707 default:
18709 }
18710
18711 llvm_unreachable("unreachable atomicrmw operation");
18712}
18713
18716 unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
18717 if (shouldInlineQuadwordAtomics() && Size == 128)
18720}
18721
18722static Intrinsic::ID
18724 switch (BinOp) {
18725 default:
18726 llvm_unreachable("Unexpected AtomicRMW BinOp");
18728 return Intrinsic::ppc_atomicrmw_xchg_i128;
18729 case AtomicRMWInst::Add:
18730 return Intrinsic::ppc_atomicrmw_add_i128;
18731 case AtomicRMWInst::Sub:
18732 return Intrinsic::ppc_atomicrmw_sub_i128;
18733 case AtomicRMWInst::And:
18734 return Intrinsic::ppc_atomicrmw_and_i128;
18735 case AtomicRMWInst::Or:
18736 return Intrinsic::ppc_atomicrmw_or_i128;
18737 case AtomicRMWInst::Xor:
18738 return Intrinsic::ppc_atomicrmw_xor_i128;
18740 return Intrinsic::ppc_atomicrmw_nand_i128;
18741 }
18742}
18743
18745 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
18746 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
18747 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18748 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18749 Type *ValTy = Incr->getType();
18750 assert(ValTy->getPrimitiveSizeInBits() == 128);
18753 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18754 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
18755 Value *IncrHi =
18756 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
18757 Value *LoHi = Builder.CreateCall(RMW, {AlignedAddr, IncrLo, IncrHi});
18758 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18759 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18760 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18761 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18762 return Builder.CreateOr(
18763 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18764}
18765
18767 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
18768 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
18769 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
18770 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
18771 Type *ValTy = CmpVal->getType();
18772 assert(ValTy->getPrimitiveSizeInBits() == 128);
18773 Function *IntCmpXchg =
18774 Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
18775 Type *Int64Ty = Type::getInt64Ty(M->getContext());
18776 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
18777 Value *CmpHi =
18778 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
18779 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
18780 Value *NewHi =
18781 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
18782 emitLeadingFence(Builder, CI, Ord);
18783 Value *LoHi =
18784 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
18785 emitTrailingFence(Builder, CI, Ord);
18786 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
18787 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
18788 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
18789 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
18790 return Builder.CreateOr(
18791 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
18792}
unsigned const MachineRegisterInfo * MRI
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall)
#define Success
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
@ OP_COPY
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
Function Alias Analysis Results
Atomic ordering constants.
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Given that RA is a live propagate it s liveness to any other values it uses(according to Uses). void DeadArgumentEliminationPass
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Addr
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
#define RegName(no)
static LVOptions Options
Definition: LVOptions.cpp:25
lazy value info
static bool isConstantOrUndef(const SDValue Op)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
cl::opt< bool > ANDIGlueBug
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef TM
pre isel intrinsic Pre ISel Intrinsic Lowering
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:5196
bool isDenormal() const
Definition: APFloat.h:1296
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1385
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:427
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
APInt abs() const
Get the absolute value.
Definition: APInt.h:1737
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:307
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:449
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1671
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:274
An arbitrary precision integer that knows its signedness.
Definition: APSInt.h:23
This class represents an incoming formal argument to a Function.
Definition: Argument.h:31
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:760
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:800
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:804
@ Nand
*p = ~(old & v)
Definition: Instructions.h:770
BinOp getOperation() const
Definition: Instructions.h:845
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:889
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
MachineFunction & getMachineFunction() const
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
int64_t AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1494
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:2228
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1800
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1662
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1735
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1668
unsigned arg_size() const
Definition: InstrTypes.h:1685
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:268
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:238
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:900
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:865
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:504
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:681
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:701
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:713
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:678
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:263
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition: Function.h:339
arg_iterator arg_begin()
Definition: Function.h:814
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:356
size_t arg_size() const
Definition: Function.h:847
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:206
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:214
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:675
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
void setThreadLocalMode(ThreadLocalMode Val)
Definition: GlobalValue.h:267
bool hasHiddenVisibility() const
Definition: GlobalValue.h:250
StringRef getSection() const
Definition: Globals.cpp:174
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:631
bool hasComdat() const
Definition: GlobalValue.h:241
Type * getValueType() const
Definition: GlobalValue.h:296
bool hasProtectedVisibility() const
Definition: GlobalValue.h:251
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2516
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1437
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1416
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2021
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2007
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1497
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2412
const BasicBlock * getParent() const
Definition: Instruction.h:152
bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:184
bool isUnordered() const
Definition: Instructions.h:274
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition: MCContext.h:81
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:397
Metadata node.
Definition: Metadata.h:1067
Machine Value Type.
SimpleValueType SimpleTy
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
An SDNode that represents everything that will be needed to construct a MachineInstr.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:293
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
static bool hasPCRelFlag(unsigned TF)
Definition: PPCInstrInfo.h:300
bool is32BitELFABI() const
Definition: PPCSubtarget.h:219
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:259
bool isAIXABI() const
Definition: PPCSubtarget.h:214
bool useSoftFloat() const
Definition: PPCSubtarget.h:174
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:142
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:202
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:253
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:271
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:145
bool isSVR4ABI() const
Definition: PPCSubtarget.h:215
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:134
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:206
bool isLittleEndian() const
Definition: PPCSubtarget.h:181
bool isTargetLinux() const
Definition: PPCSubtarget.h:212
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:277
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:289
bool is64BitELFABI() const
Definition: PPCSubtarget.h:218
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:155
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:295
bool enableMachineScheduler() const override
Scheduling customization.
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:152
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:265
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
bool shouldInlineQuadwordAtomics() const
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:188
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:473
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:477
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:447
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:827
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:471
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:726
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:472
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:772
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:767
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:468
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:798
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:844
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:484
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:553
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:218
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
static Type * getVoidTy(LLVMContext &C)
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:137
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:979
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:913
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1199
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
Definition: ISDOpcodes.h:939
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:1047
@ TargetJumpTable
Definition: ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:349
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:978
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1248
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:674
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:1097
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:1094
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1146
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:340
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1140
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1320
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1205
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1556
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1472
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1562
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1461
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition: NVPTX.h:96
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition: PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition: PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition: PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition: PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition: PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition: PPC.h:200
@ MO_TPREL_HA
Definition: PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition: PPC.h:113
@ MO_TLS
Symbol for VK_PPC_TLS fixup attached to an ADD instruction.
Definition: PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition: PPC.h:140
@ MO_TPREL_LO
Definition: PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition: PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:160
@ MO_HA
Definition: PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:46
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:105
@ XTY_ER
External reference.
Definition: XCOFF.h:241
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:269
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
unsigned M1(unsigned Val)
Definition: VE.h:376
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Mod
The access may modify the value stored in memory.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ Mul
Product of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
DWARFExpression::Operation Op
unsigned M0(unsigned Val)
Definition: VE.h:375
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:252
static constexpr roundingMode rmTowardZero
Definition: APFloat.h:234
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:141
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:438
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)