LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSubtarget.h"
24#include "PPCTargetMachine.h"
25#include "llvm/ADT/APFloat.h"
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/APSInt.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/DenseMap.h"
30#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/StringRef.h"
57#include "llvm/IR/CallingConv.h"
58#include "llvm/IR/Constant.h"
59#include "llvm/IR/Constants.h"
60#include "llvm/IR/DataLayout.h"
61#include "llvm/IR/DebugLoc.h"
63#include "llvm/IR/Function.h"
64#include "llvm/IR/GlobalValue.h"
65#include "llvm/IR/IRBuilder.h"
67#include "llvm/IR/Intrinsics.h"
68#include "llvm/IR/IntrinsicsPowerPC.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/Value.h"
73#include "llvm/MC/MCContext.h"
74#include "llvm/MC/MCExpr.h"
83#include "llvm/Support/Debug.h"
85#include "llvm/Support/Format.h"
91#include <algorithm>
92#include <cassert>
93#include <cstdint>
94#include <iterator>
95#include <list>
96#include <optional>
97#include <utility>
98#include <vector>
99
100using namespace llvm;
101
102#define DEBUG_TYPE "ppc-lowering"
103
105 "disable-p10-store-forward",
106 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
107 cl::init(false));
108
109static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
110cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
111
112static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
113cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
114
115static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
116cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
117
118static cl::opt<bool> DisableSCO("disable-ppc-sco",
119cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
120
121static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
122cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
123
124static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
125cl::desc("use absolute jump tables on ppc"), cl::Hidden);
126
127static cl::opt<bool>
128 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
129 cl::desc("disable vector permute decomposition"),
130 cl::init(true), cl::Hidden);
131
133 "disable-auto-paired-vec-st",
134 cl::desc("disable automatically generated 32byte paired vector stores"),
135 cl::init(true), cl::Hidden);
136
138 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
139 cl::desc("Set minimum number of entries to use a jump table on PPC"));
140
142 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
143 cl::desc("Set minimum of largest number of comparisons to use bit test for "
144 "switch on PPC."));
145
147 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
148 cl::desc("max depth when checking alias info in GatherAllAliases()"));
149
151 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
152 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
153 "function to use initial-exec"));
154
155STATISTIC(NumTailCalls, "Number of tail calls");
156STATISTIC(NumSiblingCalls, "Number of sibling calls");
157STATISTIC(ShufflesHandledWithVPERM,
158 "Number of shuffles lowered to a VPERM or XXPERM");
159STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
160
161static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
162
163static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
164
165// A faster local-[exec|dynamic] TLS access sequence (enabled with the
166// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
167// variables; consistent with the IBM XL compiler, we apply a max size of
168// slightly under 32KB.
170
171// FIXME: Remove this once the bug has been fixed!
173
175 const PPCSubtarget &STI)
176 : TargetLowering(TM), Subtarget(STI) {
177 // Initialize map that relates the PPC addressing modes to the computed flags
178 // of a load/store instruction. The map is used to determine the optimal
179 // addressing mode when selecting load and stores.
180 initializeAddrModeMap();
181 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
182 // arguments are at least 4/8 bytes aligned.
183 bool isPPC64 = Subtarget.isPPC64();
184 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
185 const MVT RegVT = Subtarget.getScalarIntVT();
186
187 // Set up the register classes.
188 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
189 if (!useSoftFloat()) {
190 if (hasSPE()) {
191 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
192 // EFPU2 APU only supports f32
193 if (!Subtarget.hasEFPU2())
194 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
195 } else {
196 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
197 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
198 }
199 }
200
203
204 // PowerPC uses addo_carry,subo_carry to propagate carry.
207
208 // On P10, the default lowering generates better code using the
209 // setbc instruction.
210 if (!Subtarget.hasP10Vector()) {
212 if (isPPC64)
214 }
215
216 // Match BITREVERSE to customized fast code sequence in the td file.
219
220 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
221 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
222
223 // Custom lower inline assembly to check for special registers.
224 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
225 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
226
227 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
228 for (MVT VT : MVT::integer_valuetypes()) {
231 }
232
233 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
234 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
235
236 if (Subtarget.isISA3_0()) {
237 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
238 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
239 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
240 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
241 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
242 } else {
243 // No extending loads from f16 or HW conversions back and forth.
244 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
245 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
246 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
247 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
248 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
249 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
250 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
251 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
252 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
253 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
254 }
255
256 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
257
258 // PowerPC has pre-inc load and store's.
269 if (!Subtarget.hasSPE()) {
274 }
275
276 if (Subtarget.useCRBits()) {
278
279 if (isPPC64 || Subtarget.hasFPCVT()) {
284
286 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
288 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
289
294
296 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
298 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
299 } else {
304 }
305
306 // PowerPC does not support direct load/store of condition registers.
307 setOperationAction(ISD::LOAD, MVT::i1, Custom);
308 setOperationAction(ISD::STORE, MVT::i1, Custom);
309
310 // FIXME: Remove this once the ANDI glue bug is fixed:
311 if (ANDIGlueBug)
313
314 for (MVT VT : MVT::integer_valuetypes()) {
317 setTruncStoreAction(VT, MVT::i1, Expand);
318 }
319
320 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
321 }
322
323 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
324 // PPC (the libcall is not available).
329
330 // We do not currently implement these libm ops for PowerPC.
331 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
332 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
333 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
334 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
335 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
337
338 // PowerPC has no SREM/UREM instructions unless we are on P9
339 // On P9 we may use a hardware instruction to compute the remainder.
340 // When the result of both the remainder and the division is required it is
341 // more efficient to compute the remainder from the result of the division
342 // rather than use the remainder instruction. The instructions are legalized
343 // directly because the DivRemPairsPass performs the transformation at the IR
344 // level.
345 if (Subtarget.isISA3_0()) {
350 } else {
355 }
356
357 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
366
367 // Handle constrained floating-point operations of scalar.
368 // TODO: Handle SPE specific operation.
374
379
380 if (!Subtarget.hasSPE()) {
383 }
384
385 if (Subtarget.hasVSX()) {
388 }
389
390 if (Subtarget.hasFSQRT()) {
393 }
394
395 if (Subtarget.hasFPRND()) {
400
405 }
406
407 // We don't support sin/cos/sqrt/fmod/pow
408 setOperationAction(ISD::FSIN , MVT::f64, Expand);
409 setOperationAction(ISD::FCOS , MVT::f64, Expand);
410 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
412 setOperationAction(ISD::FPOW , MVT::f64, Expand);
413 setOperationAction(ISD::FSIN , MVT::f32, Expand);
414 setOperationAction(ISD::FCOS , MVT::f32, Expand);
415 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
417 setOperationAction(ISD::FPOW , MVT::f32, Expand);
418
419 // MASS transformation for LLVM intrinsics with replicating fast-math flag
420 // to be consistent to PPCGenScalarMASSEntries pass
421 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
422 setOperationAction(ISD::FSIN , MVT::f64, Custom);
423 setOperationAction(ISD::FCOS , MVT::f64, Custom);
424 setOperationAction(ISD::FPOW , MVT::f64, Custom);
425 setOperationAction(ISD::FLOG, MVT::f64, Custom);
426 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
427 setOperationAction(ISD::FEXP, MVT::f64, Custom);
428 setOperationAction(ISD::FSIN , MVT::f32, Custom);
429 setOperationAction(ISD::FCOS , MVT::f32, Custom);
430 setOperationAction(ISD::FPOW , MVT::f32, Custom);
431 setOperationAction(ISD::FLOG, MVT::f32, Custom);
432 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
433 setOperationAction(ISD::FEXP, MVT::f32, Custom);
434 }
435
436 if (Subtarget.hasSPE()) {
439 } else {
440 setOperationAction(ISD::FMA , MVT::f64, Legal);
441 setOperationAction(ISD::FMA , MVT::f32, Legal);
443 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
444 }
445
446 if (Subtarget.hasSPE())
447 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
448
449 // If we're enabling GP optimizations, use hardware square root
450 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
451 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
452
453 if (!Subtarget.hasFSQRT() &&
454 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
455 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
456
457 if (Subtarget.hasFCPSGN()) {
460 } else {
463 }
464
465 if (Subtarget.hasFPRND()) {
466 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
467 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
468 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
469 setOperationAction(ISD::FROUND, MVT::f64, Legal);
470
471 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
472 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
473 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
474 setOperationAction(ISD::FROUND, MVT::f32, Legal);
475 }
476
477 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
478 // instruction xxbrd to speed up scalar BSWAP64.
479 if (Subtarget.isISA3_1()) {
482 } else {
485 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
486 }
487
488 // CTPOP or CTTZ were introduced in P8/P9 respectively
489 if (Subtarget.isISA3_0()) {
490 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
491 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
492 } else {
493 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
494 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
495 }
496
497 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
500 } else {
503 }
504
505 // PowerPC does not have ROTR
508
509 if (!Subtarget.useCRBits()) {
510 // PowerPC does not have Select
515 }
516
517 // PowerPC wants to turn select_cc of FP into fsel when possible.
520
521 // PowerPC wants to optimize integer setcc a bit
522 if (!Subtarget.useCRBits())
524
525 if (Subtarget.hasFPU()) {
529
533 }
534
535 // PowerPC does not have BRCOND which requires SetCC
536 if (!Subtarget.useCRBits())
537 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
538
539 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
540
541 if (Subtarget.hasSPE()) {
542 // SPE has built-in conversions
549
550 // SPE supports signaling compare of f32/f64.
553 } else {
554 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
557
558 // PowerPC does not have [U|S]INT_TO_FP
563 }
564
565 if (Subtarget.hasDirectMove() && isPPC64) {
566 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
567 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
568 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
569 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
570
579 } else {
580 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
581 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
582 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
583 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
584 }
585
586 // We cannot sextinreg(i1). Expand to shifts.
588
589 // Custom handling for PowerPC ucmp instruction
591 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
592
593 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
594 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
595 // support continuation, user-level threading, and etc.. As a result, no
596 // other SjLj exception interfaces are implemented and please don't build
597 // your own exception handling based on them.
598 // LLVM/Clang supports zero-cost DWARF exception handling.
601
602 // We want to legalize GlobalAddress and ConstantPool nodes into the
603 // appropriate instructions to materialize the address.
614
615 // TRAP is legal.
616 setOperationAction(ISD::TRAP, MVT::Other, Legal);
617
618 // TRAMPOLINE is custom lowered.
619 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
620 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
621
622 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
623 setOperationAction(ISD::VASTART , MVT::Other, Custom);
624
625 if (Subtarget.is64BitELFABI()) {
626 // VAARG always uses double-word chunks, so promote anything smaller.
627 setOperationAction(ISD::VAARG, MVT::i1, Promote);
628 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
629 setOperationAction(ISD::VAARG, MVT::i8, Promote);
630 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
631 setOperationAction(ISD::VAARG, MVT::i16, Promote);
632 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
633 setOperationAction(ISD::VAARG, MVT::i32, Promote);
634 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
635 setOperationAction(ISD::VAARG, MVT::Other, Expand);
636 } else if (Subtarget.is32BitELFABI()) {
637 // VAARG is custom lowered with the 32-bit SVR4 ABI.
638 setOperationAction(ISD::VAARG, MVT::Other, Custom);
639 setOperationAction(ISD::VAARG, MVT::i64, Custom);
640 } else
641 setOperationAction(ISD::VAARG, MVT::Other, Expand);
642
643 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
644 if (Subtarget.is32BitELFABI())
645 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
646 else
647 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
648
649 // Use the default implementation.
650 setOperationAction(ISD::VAEND , MVT::Other, Expand);
651 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
652 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
653 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
654 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
655 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
656 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
659
660 // We want to custom lower some of our intrinsics.
666
667 // To handle counter-based loop conditions.
669
674
675 // Comparisons that require checking two conditions.
676 if (Subtarget.hasSPE()) {
681 }
694
697
698 if (Subtarget.has64BitSupport()) {
699 // They also have instructions for converting between i64 and fp.
708 // This is just the low 32 bits of a (signed) fp->i64 conversion.
709 // We cannot do this with Promote because i64 is not a legal type.
712
713 if (Subtarget.hasLFIWAX() || isPPC64) {
716 }
717 } else {
718 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
719 if (Subtarget.hasSPE()) {
722 } else {
725 }
726 }
727
728 // With the instructions enabled under FPCVT, we can do everything.
729 if (Subtarget.hasFPCVT()) {
730 if (Subtarget.has64BitSupport()) {
739 }
740
749 }
750
751 if (Subtarget.use64BitRegs()) {
752 // 64-bit PowerPC implementations can support i64 types directly
753 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
754 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
756 // 64-bit PowerPC wants to expand i128 shifts itself.
760 } else {
761 // 32-bit PowerPC wants to expand i64 shifts itself.
765 }
766
767 // PowerPC has better expansions for funnel shifts than the generic
768 // TargetLowering::expandFunnelShift.
769 if (Subtarget.has64BitSupport()) {
772 }
775
776 if (Subtarget.hasVSX()) {
777 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
778 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
779 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
780 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
783 }
784
785 if (Subtarget.hasAltivec()) {
786 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
791 }
792 // First set operation action for all vector types to expand. Then we
793 // will selectively turn on ones that can be effectively codegen'd.
795 // add/sub are legal for all supported vector VT's.
798
799 // For v2i64, these are only valid with P8Vector. This is corrected after
800 // the loop.
801 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
806 }
807 else {
812 }
813
814 if (Subtarget.hasVSX()) {
815 setOperationAction(ISD::FMAXNUM, VT, Legal);
816 setOperationAction(ISD::FMINNUM, VT, Legal);
817 }
818
819 // Vector instructions introduced in P8
820 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
823 }
824 else {
827 }
828
829 // Vector instructions introduced in P9
830 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
832 else
834
835 // We promote all shuffles to v16i8.
837 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
838
839 // We promote all non-typed operations to v4i32.
841 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
843 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
845 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
846 setOperationAction(ISD::LOAD , VT, Promote);
847 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
849 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
852 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
853 setOperationAction(ISD::STORE, VT, Promote);
854 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
855
856 // No other operations are legal.
864 setOperationAction(ISD::FNEG, VT, Expand);
865 setOperationAction(ISD::FSQRT, VT, Expand);
866 setOperationAction(ISD::FLOG, VT, Expand);
867 setOperationAction(ISD::FLOG10, VT, Expand);
868 setOperationAction(ISD::FLOG2, VT, Expand);
869 setOperationAction(ISD::FEXP, VT, Expand);
870 setOperationAction(ISD::FEXP2, VT, Expand);
871 setOperationAction(ISD::FSIN, VT, Expand);
872 setOperationAction(ISD::FCOS, VT, Expand);
873 setOperationAction(ISD::FABS, VT, Expand);
874 setOperationAction(ISD::FFLOOR, VT, Expand);
875 setOperationAction(ISD::FCEIL, VT, Expand);
876 setOperationAction(ISD::FTRUNC, VT, Expand);
877 setOperationAction(ISD::FRINT, VT, Expand);
878 setOperationAction(ISD::FLDEXP, VT, Expand);
879 setOperationAction(ISD::FNEARBYINT, VT, Expand);
890 setOperationAction(ISD::FPOW, VT, Expand);
895
896 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
897 setTruncStoreAction(VT, InnerVT, Expand);
900 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
901 }
902 }
904 if (!Subtarget.hasP8Vector()) {
905 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
906 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
907 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
908 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
909 }
910
911 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
912 // with merges, splats, etc.
914
915 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
916 // are cheap, so handle them before they get expanded to scalar.
922
923 setOperationAction(ISD::AND , MVT::v4i32, Legal);
924 setOperationAction(ISD::OR , MVT::v4i32, Legal);
925 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
926 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
928 Subtarget.useCRBits() ? Legal : Expand);
929 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
938 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
939 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
940 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
941 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
942
943 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
944 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
945 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
946 if (Subtarget.hasAltivec())
947 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
949 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
950 if (Subtarget.hasP8Altivec())
951 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
952
953 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
954 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
955 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
956 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
957
958 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
959 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
960
961 if (Subtarget.hasVSX()) {
962 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
963 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
965 }
966
967 if (Subtarget.hasP8Altivec())
968 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
969 else
970 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
971
972 if (Subtarget.isISA3_1()) {
973 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
974 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
975 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
976 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
977 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
978 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
979 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
980 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
981 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
982 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
983 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
984 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
985 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
986 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
987 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
988 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
989 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
990 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
991 }
992
993 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
994 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
995
998 // LE is P8+/64-bit so direct moves are supported and these operations
999 // are legal. The custom transformation requires 64-bit since we need a
1000 // pair of stores that will cover a 128-bit load for P10.
1001 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1005 }
1006
1011
1012 // Altivec does not contain unordered floating-point compare instructions
1013 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1014 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1015 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1016 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1017
1018 if (Subtarget.hasVSX()) {
1021 if (Subtarget.hasP8Vector()) {
1024 }
1025 if (Subtarget.hasDirectMove() && isPPC64) {
1034 }
1036
1037 // The nearbyint variants are not allowed to raise the inexact exception
1038 // so we can only code-gen them with fpexcept.ignore.
1041
1042 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1043 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1044 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1045 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
1046 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1047 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1048 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1049 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1050
1051 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
1052 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1053 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1054 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1055 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1056
1057 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1058 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1059
1060 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1061 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1062
1063 // Share the Altivec comparison restrictions.
1064 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1065 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1066 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1067 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1068
1069 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1070 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1071
1073
1074 if (Subtarget.hasP8Vector())
1075 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1076
1077 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1078
1079 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1080 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1081 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1082
1083 if (Subtarget.hasP8Altivec()) {
1084 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1085 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1086 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1087
1088 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1089 // SRL, but not for SRA because of the instructions available:
1090 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1091 // doing
1092 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1093 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1094 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1095
1096 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1097 }
1098 else {
1099 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1100 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1101 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1102
1103 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1104
1105 // VSX v2i64 only supports non-arithmetic operations.
1106 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1107 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1108 }
1109
1110 if (Subtarget.isISA3_1())
1111 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1112 else
1113 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1114
1115 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1116 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1117 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1118 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1119
1121
1130
1131 // Custom handling for partial vectors of integers converted to
1132 // floating point. We already have optimal handling for v2i32 through
1133 // the DAG combine, so those aren't necessary.
1150
1151 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1152 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1153 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1154 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1157
1160
1161 // Handle constrained floating-point operations of vector.
1162 // The predictor is `hasVSX` because altivec instruction has
1163 // no exception but VSX vector instruction has.
1177
1191
1192 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1193 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1194
1195 for (MVT FPT : MVT::fp_valuetypes())
1196 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1197
1198 // Expand the SELECT to SELECT_CC
1200
1201 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1202 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1203
1204 // No implementation for these ops for PowerPC.
1205 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1206 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1207 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1208 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1209 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1210 setOperationAction(ISD::FREM, MVT::f128, Expand);
1211 }
1212
1213 if (Subtarget.hasP8Altivec()) {
1214 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1215 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1216 }
1217
1218 if (Subtarget.hasP9Vector()) {
1221
1222 // Test data class instructions store results in CR bits.
1223 if (Subtarget.useCRBits()) {
1228 }
1229
1230 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1231 // SRL, but not for SRA because of the instructions available:
1232 // VS{RL} and VS{RL}O.
1233 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1234 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1235 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1236
1237 setOperationAction(ISD::FADD, MVT::f128, Legal);
1238 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1239 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1240 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1241 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1242
1243 setOperationAction(ISD::FMA, MVT::f128, Legal);
1250
1251 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1252 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1253 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1254 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1255 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1256 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1257
1260 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1261
1262 // Handle constrained floating-point operations of fp128
1278 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1279 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1280 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1281 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1282 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1283 } else if (Subtarget.hasVSX()) {
1284 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1285 setOperationAction(ISD::STORE, MVT::f128, Promote);
1286
1287 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1288 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1289
1290 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1291 // fp_to_uint and int_to_fp.
1294
1295 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1296 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1297 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1298 setOperationAction(ISD::FABS, MVT::f128, Expand);
1299 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1300 setOperationAction(ISD::FMA, MVT::f128, Expand);
1302
1303 // Expand the fp_extend if the target type is fp128.
1304 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1306
1307 // Expand the fp_round if the source type is fp128.
1308 for (MVT VT : {MVT::f32, MVT::f64}) {
1311 }
1312
1316 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1317
1318 // Lower following f128 select_cc pattern:
1319 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1321
1322 // We need to handle f128 SELECT_CC with integer result type.
1324 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1325 }
1326
1327 if (Subtarget.hasP9Altivec()) {
1328 if (Subtarget.isISA3_1()) {
1333 } else {
1336 }
1344
1345 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1346 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1347 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1348 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1349 }
1350
1351 if (Subtarget.hasP10Vector()) {
1353 }
1354 }
1355
1356 if (Subtarget.pairedVectorMemops()) {
1357 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1358 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1359 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1360 }
1361 if (Subtarget.hasMMA()) {
1362 if (Subtarget.isISAFuture()) {
1363 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1364 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1365 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1366 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1367 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1368 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1369 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1370 } else {
1371 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1372 }
1373 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1374 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1376 }
1377
1378 if (Subtarget.has64BitSupport())
1379 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1380
1381 if (Subtarget.isISA3_1())
1382 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1383
1384 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1385
1386 if (!isPPC64) {
1387 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1388 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1389 }
1390
1392 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1393 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1395 }
1396
1398
1399 if (Subtarget.hasAltivec()) {
1400 // Altivec instructions set fields to all zeros or all ones.
1402 }
1403
1406 else if (isPPC64)
1408 else
1410
1411 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1412
1413 // We have target-specific dag combine patterns for the following nodes:
1416 if (Subtarget.hasFPCVT())
1418 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1419 if (Subtarget.useCRBits())
1420 setTargetDAGCombine(ISD::BRCOND);
1423
1425
1427
1428 if (Subtarget.useCRBits()) {
1430 }
1431
1432 // With 32 condition bits, we don't need to sink (and duplicate) compares
1433 // aggressively in CodeGenPrep.
1434 if (Subtarget.useCRBits()) {
1436 }
1437
1438 // TODO: The default entry number is set to 64. This stops most jump table
1439 // generation on PPC. But it is good for current PPC HWs because the indirect
1440 // branch instruction mtctr to the jump table may lead to bad branch predict.
1441 // Re-evaluate this value on future HWs that can do better with mtctr.
1443
1444 // The default minimum of largest number in a BitTest cluster is 3.
1446
1448 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1449
1450 auto CPUDirective = Subtarget.getCPUDirective();
1451 switch (CPUDirective) {
1452 default: break;
1453 case PPC::DIR_970:
1454 case PPC::DIR_A2:
1455 case PPC::DIR_E500:
1456 case PPC::DIR_E500mc:
1457 case PPC::DIR_E5500:
1458 case PPC::DIR_PWR4:
1459 case PPC::DIR_PWR5:
1460 case PPC::DIR_PWR5X:
1461 case PPC::DIR_PWR6:
1462 case PPC::DIR_PWR6X:
1463 case PPC::DIR_PWR7:
1464 case PPC::DIR_PWR8:
1465 case PPC::DIR_PWR9:
1466 case PPC::DIR_PWR10:
1467 case PPC::DIR_PWR11:
1471 break;
1472 }
1473
1474 if (Subtarget.enableMachineScheduler())
1476 else
1478
1480
1481 // The Freescale cores do better with aggressive inlining of memcpy and
1482 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1483 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1484 MaxStoresPerMemset = 32;
1486 MaxStoresPerMemcpy = 32;
1490 } else if (CPUDirective == PPC::DIR_A2) {
1491 // The A2 also benefits from (very) aggressive inlining of memcpy and
1492 // friends. The overhead of a the function call, even when warm, can be
1493 // over one hundred cycles.
1494 MaxStoresPerMemset = 128;
1495 MaxStoresPerMemcpy = 128;
1496 MaxStoresPerMemmove = 128;
1497 MaxLoadsPerMemcmp = 128;
1498 } else {
1501 }
1502
1503 // Enable generation of STXVP instructions by default for mcpu=future.
1504 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1505 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1506 DisableAutoPairedVecSt = false;
1507
1508 IsStrictFPEnabled = true;
1509
1510 // Let the subtarget (CPU) decide if a predictable select is more expensive
1511 // than the corresponding branch. This information is used in CGP to decide
1512 // when to convert selects into branches.
1513 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1514
1516}
1517
1518// *********************************** NOTE ************************************
1519// For selecting load and store instructions, the addressing modes are defined
1520// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1521// patterns to match the load the store instructions.
1522//
1523// The TD definitions for the addressing modes correspond to their respective
1524// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1525// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1526// address mode flags of a particular node. Afterwards, the computed address
1527// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1528// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1529// accordingly, based on the preferred addressing mode.
1530//
1531// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1532// MemOpFlags contains all the possible flags that can be used to compute the
1533// optimal addressing mode for load and store instructions.
1534// AddrMode contains all the possible load and store addressing modes available
1535// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1536//
1537// When adding new load and store instructions, it is possible that new address
1538// flags may need to be added into MemOpFlags, and a new addressing mode will
1539// need to be added to AddrMode. An entry of the new addressing mode (consisting
1540// of the minimal and main distinguishing address flags for the new load/store
1541// instructions) will need to be added into initializeAddrModeMap() below.
1542// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1543// need to be updated to account for selecting the optimal addressing mode.
1544// *****************************************************************************
1545/// Initialize the map that relates the different addressing modes of the load
1546/// and store instructions to a set of flags. This ensures the load/store
1547/// instruction is correctly matched during instruction selection.
1548void PPCTargetLowering::initializeAddrModeMap() {
1549 AddrModesMap[PPC::AM_DForm] = {
1550 // LWZ, STW
1555 // LBZ, LHZ, STB, STH
1560 // LHA
1565 // LFS, LFD, STFS, STFD
1570 };
1571 AddrModesMap[PPC::AM_DSForm] = {
1572 // LWA
1576 // LD, STD
1580 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1584 };
1585 AddrModesMap[PPC::AM_DQForm] = {
1586 // LXV, STXV
1590 };
1591 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1593 // TODO: Add mapping for quadword load/store.
1594}
1595
1596/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1597/// the desired ByVal argument alignment.
1598static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1599 if (MaxAlign == MaxMaxAlign)
1600 return;
1601 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1602 if (MaxMaxAlign >= 32 &&
1603 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1604 MaxAlign = Align(32);
1605 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1606 MaxAlign < 16)
1607 MaxAlign = Align(16);
1608 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1609 Align EltAlign;
1610 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1611 if (EltAlign > MaxAlign)
1612 MaxAlign = EltAlign;
1613 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1614 for (auto *EltTy : STy->elements()) {
1615 Align EltAlign;
1616 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1617 if (EltAlign > MaxAlign)
1618 MaxAlign = EltAlign;
1619 if (MaxAlign == MaxMaxAlign)
1620 break;
1621 }
1622 }
1623}
1624
1625/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1626/// function arguments in the caller parameter area.
1628 const DataLayout &DL) const {
1629 // 16byte and wider vectors are passed on 16byte boundary.
1630 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1631 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1632 if (Subtarget.hasAltivec())
1633 getMaxByValAlign(Ty, Alignment, Align(16));
1634 return Alignment;
1635}
1636
1638 return Subtarget.useSoftFloat();
1639}
1640
1642 return Subtarget.hasSPE();
1643}
1644
1646 return VT.isScalarInteger();
1647}
1648
1650 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1651 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1652 return false;
1653
1654 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1655 if (VTy->getScalarType()->isIntegerTy()) {
1656 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1657 if (ElemSizeInBits == 32) {
1658 Index = Subtarget.isLittleEndian() ? 2 : 1;
1659 return true;
1660 }
1661 if (ElemSizeInBits == 64) {
1662 Index = Subtarget.isLittleEndian() ? 1 : 0;
1663 return true;
1664 }
1665 }
1666 }
1667 return false;
1668}
1669
1670const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1671 switch ((PPCISD::NodeType)Opcode) {
1672 case PPCISD::FIRST_NUMBER: break;
1673 case PPCISD::FSEL: return "PPCISD::FSEL";
1674 case PPCISD::XSMAXC: return "PPCISD::XSMAXC";
1675 case PPCISD::XSMINC: return "PPCISD::XSMINC";
1676 case PPCISD::FCFID: return "PPCISD::FCFID";
1677 case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1678 case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1679 case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1680 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1681 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1682 case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1683 case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1684 case PPCISD::FRE: return "PPCISD::FRE";
1685 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1686 case PPCISD::FTSQRT:
1687 return "PPCISD::FTSQRT";
1688 case PPCISD::FSQRT:
1689 return "PPCISD::FSQRT";
1690 case PPCISD::STFIWX: return "PPCISD::STFIWX";
1691 case PPCISD::VPERM: return "PPCISD::VPERM";
1692 case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1694 return "PPCISD::XXSPLTI_SP_TO_DP";
1696 return "PPCISD::XXSPLTI32DX";
1697 case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1698 case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1699 case PPCISD::XXPERM:
1700 return "PPCISD::XXPERM";
1701 case PPCISD::VECSHL: return "PPCISD::VECSHL";
1702 case PPCISD::VSRQ:
1703 return "PPCISD::VSRQ";
1704 case PPCISD::CMPB: return "PPCISD::CMPB";
1705 case PPCISD::Hi: return "PPCISD::Hi";
1706 case PPCISD::Lo: return "PPCISD::Lo";
1707 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1708 case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1709 case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1710 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1711 case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1712 case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1713 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1714 case PPCISD::SRL: return "PPCISD::SRL";
1715 case PPCISD::SRA: return "PPCISD::SRA";
1716 case PPCISD::SHL: return "PPCISD::SHL";
1717 case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1718 case PPCISD::CALL: return "PPCISD::CALL";
1719 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1720 case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1721 case PPCISD::CALL_RM:
1722 return "PPCISD::CALL_RM";
1724 return "PPCISD::CALL_NOP_RM";
1726 return "PPCISD::CALL_NOTOC_RM";
1727 case PPCISD::MTCTR: return "PPCISD::MTCTR";
1728 case PPCISD::BCTRL: return "PPCISD::BCTRL";
1729 case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1730 case PPCISD::BCTRL_RM:
1731 return "PPCISD::BCTRL_RM";
1733 return "PPCISD::BCTRL_LOAD_TOC_RM";
1734 case PPCISD::RET_GLUE: return "PPCISD::RET_GLUE";
1735 case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1736 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1737 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1738 case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1739 case PPCISD::MFVSR: return "PPCISD::MFVSR";
1740 case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1741 case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1742 case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1743 case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1745 return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1747 return "PPCISD::ANDI_rec_1_EQ_BIT";
1749 return "PPCISD::ANDI_rec_1_GT_BIT";
1750 case PPCISD::VCMP: return "PPCISD::VCMP";
1751 case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1752 case PPCISD::LBRX: return "PPCISD::LBRX";
1753 case PPCISD::STBRX: return "PPCISD::STBRX";
1754 case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1755 case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1756 case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1757 case PPCISD::STXSIX: return "PPCISD::STXSIX";
1758 case PPCISD::VEXTS: return "PPCISD::VEXTS";
1759 case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1760 case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1761 case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1762 case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1764 return "PPCISD::ST_VSR_SCAL_INT";
1765 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1766 case PPCISD::BDNZ: return "PPCISD::BDNZ";
1767 case PPCISD::BDZ: return "PPCISD::BDZ";
1768 case PPCISD::MFFS: return "PPCISD::MFFS";
1769 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1770 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1771 case PPCISD::CR6SET: return "PPCISD::CR6SET";
1772 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1773 case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1774 case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1775 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1776 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1777 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1778 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1779 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1780 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1781 case PPCISD::GET_TLS_MOD_AIX: return "PPCISD::GET_TLS_MOD_AIX";
1782 case PPCISD::GET_TPOINTER: return "PPCISD::GET_TPOINTER";
1783 case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1784 case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1785 case PPCISD::TLSLD_AIX: return "PPCISD::TLSLD_AIX";
1786 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1787 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1788 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1789 case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1790 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1791 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1793 return "PPCISD::PADDI_DTPREL";
1794 case PPCISD::VADD_SPLAT:
1795 return "PPCISD::VADD_SPLAT";
1796 case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1797 case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1798 case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1799 case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1800 case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1801 case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1802 case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1803 case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1804 case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1806 return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1808 return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1809 case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1810 case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1811 case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1812 case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1813 case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1814 case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1815 case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1816 case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1818 return "PPCISD::STRICT_FADDRTZ";
1820 return "PPCISD::STRICT_FCTIDZ";
1822 return "PPCISD::STRICT_FCTIWZ";
1824 return "PPCISD::STRICT_FCTIDUZ";
1826 return "PPCISD::STRICT_FCTIWUZ";
1828 return "PPCISD::STRICT_FCFID";
1830 return "PPCISD::STRICT_FCFIDU";
1832 return "PPCISD::STRICT_FCFIDS";
1834 return "PPCISD::STRICT_FCFIDUS";
1835 case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1836 case PPCISD::STORE_COND:
1837 return "PPCISD::STORE_COND";
1838 case PPCISD::SETBC:
1839 return "PPCISD::SETBC";
1840 case PPCISD::SETBCR:
1841 return "PPCISD::SETBCR";
1842 case PPCISD::ADDC:
1843 return "PPCISD::ADDC";
1844 case PPCISD::ADDE:
1845 return "PPCISD::ADDE";
1846 case PPCISD::SUBC:
1847 return "PPCISD::SUBC";
1848 case PPCISD::SUBE:
1849 return "PPCISD::SUBE";
1850 }
1851 return nullptr;
1852}
1853
1855 EVT VT) const {
1856 if (!VT.isVector())
1857 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1858
1860}
1861
1863 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1864 return true;
1865}
1866
1867//===----------------------------------------------------------------------===//
1868// Node matching predicates, for use by the tblgen matching code.
1869//===----------------------------------------------------------------------===//
1870
1871/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1874 return CFP->getValueAPF().isZero();
1875 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1876 // Maybe this has already been legalized into the constant pool?
1877 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1878 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1879 return CFP->getValueAPF().isZero();
1880 }
1881 return false;
1882}
1883
1884/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1885/// true if Op is undef or if it matches the specified value.
1886static bool isConstantOrUndef(int Op, int Val) {
1887 return Op < 0 || Op == Val;
1888}
1889
1890/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1891/// VPKUHUM instruction.
1892/// The ShuffleKind distinguishes between big-endian operations with
1893/// two different inputs (0), either-endian operations with two identical
1894/// inputs (1), and little-endian operations with two different inputs (2).
1895/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1897 SelectionDAG &DAG) {
1898 bool IsLE = DAG.getDataLayout().isLittleEndian();
1899 if (ShuffleKind == 0) {
1900 if (IsLE)
1901 return false;
1902 for (unsigned i = 0; i != 16; ++i)
1903 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1904 return false;
1905 } else if (ShuffleKind == 2) {
1906 if (!IsLE)
1907 return false;
1908 for (unsigned i = 0; i != 16; ++i)
1909 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1910 return false;
1911 } else if (ShuffleKind == 1) {
1912 unsigned j = IsLE ? 0 : 1;
1913 for (unsigned i = 0; i != 8; ++i)
1914 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1915 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1916 return false;
1917 }
1918 return true;
1919}
1920
1921/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1922/// VPKUWUM instruction.
1923/// The ShuffleKind distinguishes between big-endian operations with
1924/// two different inputs (0), either-endian operations with two identical
1925/// inputs (1), and little-endian operations with two different inputs (2).
1926/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1928 SelectionDAG &DAG) {
1929 bool IsLE = DAG.getDataLayout().isLittleEndian();
1930 if (ShuffleKind == 0) {
1931 if (IsLE)
1932 return false;
1933 for (unsigned i = 0; i != 16; i += 2)
1934 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1935 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1936 return false;
1937 } else if (ShuffleKind == 2) {
1938 if (!IsLE)
1939 return false;
1940 for (unsigned i = 0; i != 16; i += 2)
1941 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1942 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1943 return false;
1944 } else if (ShuffleKind == 1) {
1945 unsigned j = IsLE ? 0 : 2;
1946 for (unsigned i = 0; i != 8; i += 2)
1947 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1948 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1949 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1950 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1951 return false;
1952 }
1953 return true;
1954}
1955
1956/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1957/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1958/// current subtarget.
1959///
1960/// The ShuffleKind distinguishes between big-endian operations with
1961/// two different inputs (0), either-endian operations with two identical
1962/// inputs (1), and little-endian operations with two different inputs (2).
1963/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1965 SelectionDAG &DAG) {
1966 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1967 if (!Subtarget.hasP8Vector())
1968 return false;
1969
1970 bool IsLE = DAG.getDataLayout().isLittleEndian();
1971 if (ShuffleKind == 0) {
1972 if (IsLE)
1973 return false;
1974 for (unsigned i = 0; i != 16; i += 4)
1975 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1976 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1977 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1978 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1979 return false;
1980 } else if (ShuffleKind == 2) {
1981 if (!IsLE)
1982 return false;
1983 for (unsigned i = 0; i != 16; i += 4)
1984 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1985 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1986 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1987 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1988 return false;
1989 } else if (ShuffleKind == 1) {
1990 unsigned j = IsLE ? 0 : 4;
1991 for (unsigned i = 0; i != 8; i += 4)
1992 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1993 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1994 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1995 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1996 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1997 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1998 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1999 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
2000 return false;
2001 }
2002 return true;
2003}
2004
2005/// isVMerge - Common function, used to match vmrg* shuffles.
2006///
2007static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
2008 unsigned LHSStart, unsigned RHSStart) {
2009 if (N->getValueType(0) != MVT::v16i8)
2010 return false;
2011 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
2012 "Unsupported merge size!");
2013
2014 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
2015 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
2016 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
2017 LHSStart+j+i*UnitSize) ||
2018 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
2019 RHSStart+j+i*UnitSize))
2020 return false;
2021 }
2022 return true;
2023}
2024
2025/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
2026/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
2027/// The ShuffleKind distinguishes between big-endian merges with two
2028/// different inputs (0), either-endian merges with two identical inputs (1),
2029/// and little-endian merges with two different inputs (2). For the latter,
2030/// the input operands are swapped (see PPCInstrAltivec.td).
2032 unsigned ShuffleKind, SelectionDAG &DAG) {
2033 if (DAG.getDataLayout().isLittleEndian()) {
2034 if (ShuffleKind == 1) // unary
2035 return isVMerge(N, UnitSize, 0, 0);
2036 else if (ShuffleKind == 2) // swapped
2037 return isVMerge(N, UnitSize, 0, 16);
2038 else
2039 return false;
2040 } else {
2041 if (ShuffleKind == 1) // unary
2042 return isVMerge(N, UnitSize, 8, 8);
2043 else if (ShuffleKind == 0) // normal
2044 return isVMerge(N, UnitSize, 8, 24);
2045 else
2046 return false;
2047 }
2048}
2049
2050/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
2051/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
2052/// The ShuffleKind distinguishes between big-endian merges with two
2053/// different inputs (0), either-endian merges with two identical inputs (1),
2054/// and little-endian merges with two different inputs (2). For the latter,
2055/// the input operands are swapped (see PPCInstrAltivec.td).
2057 unsigned ShuffleKind, SelectionDAG &DAG) {
2058 if (DAG.getDataLayout().isLittleEndian()) {
2059 if (ShuffleKind == 1) // unary
2060 return isVMerge(N, UnitSize, 8, 8);
2061 else if (ShuffleKind == 2) // swapped
2062 return isVMerge(N, UnitSize, 8, 24);
2063 else
2064 return false;
2065 } else {
2066 if (ShuffleKind == 1) // unary
2067 return isVMerge(N, UnitSize, 0, 0);
2068 else if (ShuffleKind == 0) // normal
2069 return isVMerge(N, UnitSize, 0, 16);
2070 else
2071 return false;
2072 }
2073}
2074
2075/**
2076 * Common function used to match vmrgew and vmrgow shuffles
2077 *
2078 * The indexOffset determines whether to look for even or odd words in
2079 * the shuffle mask. This is based on the of the endianness of the target
2080 * machine.
2081 * - Little Endian:
2082 * - Use offset of 0 to check for odd elements
2083 * - Use offset of 4 to check for even elements
2084 * - Big Endian:
2085 * - Use offset of 0 to check for even elements
2086 * - Use offset of 4 to check for odd elements
2087 * A detailed description of the vector element ordering for little endian and
2088 * big endian can be found at
2089 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
2090 * Targeting your applications - what little endian and big endian IBM XL C/C++
2091 * compiler differences mean to you
2092 *
2093 * The mask to the shuffle vector instruction specifies the indices of the
2094 * elements from the two input vectors to place in the result. The elements are
2095 * numbered in array-access order, starting with the first vector. These vectors
2096 * are always of type v16i8, thus each vector will contain 16 elements of size
2097 * 8. More info on the shuffle vector can be found in the
2098 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2099 * Language Reference.
2100 *
2101 * The RHSStartValue indicates whether the same input vectors are used (unary)
2102 * or two different input vectors are used, based on the following:
2103 * - If the instruction uses the same vector for both inputs, the range of the
2104 * indices will be 0 to 15. In this case, the RHSStart value passed should
2105 * be 0.
2106 * - If the instruction has two different vectors then the range of the
2107 * indices will be 0 to 31. In this case, the RHSStart value passed should
2108 * be 16 (indices 0-15 specify elements in the first vector while indices 16
2109 * to 31 specify elements in the second vector).
2110 *
2111 * \param[in] N The shuffle vector SD Node to analyze
2112 * \param[in] IndexOffset Specifies whether to look for even or odd elements
2113 * \param[in] RHSStartValue Specifies the starting index for the righthand input
2114 * vector to the shuffle_vector instruction
2115 * \return true iff this shuffle vector represents an even or odd word merge
2116 */
2117static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2118 unsigned RHSStartValue) {
2119 if (N->getValueType(0) != MVT::v16i8)
2120 return false;
2121
2122 for (unsigned i = 0; i < 2; ++i)
2123 for (unsigned j = 0; j < 4; ++j)
2124 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2125 i*RHSStartValue+j+IndexOffset) ||
2126 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2127 i*RHSStartValue+j+IndexOffset+8))
2128 return false;
2129 return true;
2130}
2131
2132/**
2133 * Determine if the specified shuffle mask is suitable for the vmrgew or
2134 * vmrgow instructions.
2135 *
2136 * \param[in] N The shuffle vector SD Node to analyze
2137 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2138 * \param[in] ShuffleKind Identify the type of merge:
2139 * - 0 = big-endian merge with two different inputs;
2140 * - 1 = either-endian merge with two identical inputs;
2141 * - 2 = little-endian merge with two different inputs (inputs are swapped for
2142 * little-endian merges).
2143 * \param[in] DAG The current SelectionDAG
2144 * \return true iff this shuffle mask
2145 */
2147 unsigned ShuffleKind, SelectionDAG &DAG) {
2148 if (DAG.getDataLayout().isLittleEndian()) {
2149 unsigned indexOffset = CheckEven ? 4 : 0;
2150 if (ShuffleKind == 1) // Unary
2151 return isVMerge(N, indexOffset, 0);
2152 else if (ShuffleKind == 2) // swapped
2153 return isVMerge(N, indexOffset, 16);
2154 else
2155 return false;
2156 }
2157 else {
2158 unsigned indexOffset = CheckEven ? 0 : 4;
2159 if (ShuffleKind == 1) // Unary
2160 return isVMerge(N, indexOffset, 0);
2161 else if (ShuffleKind == 0) // Normal
2162 return isVMerge(N, indexOffset, 16);
2163 else
2164 return false;
2165 }
2166 return false;
2167}
2168
2169/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2170/// amount, otherwise return -1.
2171/// The ShuffleKind distinguishes between big-endian operations with two
2172/// different inputs (0), either-endian operations with two identical inputs
2173/// (1), and little-endian operations with two different inputs (2). For the
2174/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2175int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2176 SelectionDAG &DAG) {
2177 if (N->getValueType(0) != MVT::v16i8)
2178 return -1;
2179
2181
2182 // Find the first non-undef value in the shuffle mask.
2183 unsigned i;
2184 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2185 /*search*/;
2186
2187 if (i == 16) return -1; // all undef.
2188
2189 // Otherwise, check to see if the rest of the elements are consecutively
2190 // numbered from this value.
2191 unsigned ShiftAmt = SVOp->getMaskElt(i);
2192 if (ShiftAmt < i) return -1;
2193
2194 ShiftAmt -= i;
2195 bool isLE = DAG.getDataLayout().isLittleEndian();
2196
2197 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2198 // Check the rest of the elements to see if they are consecutive.
2199 for (++i; i != 16; ++i)
2200 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2201 return -1;
2202 } else if (ShuffleKind == 1) {
2203 // Check the rest of the elements to see if they are consecutive.
2204 for (++i; i != 16; ++i)
2205 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2206 return -1;
2207 } else
2208 return -1;
2209
2210 if (isLE)
2211 ShiftAmt = 16 - ShiftAmt;
2212
2213 return ShiftAmt;
2214}
2215
2216/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2217/// specifies a splat of a single element that is suitable for input to
2218/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2220 EVT VT = N->getValueType(0);
2221 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2222 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2223
2224 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2225 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2226
2227 // The consecutive indices need to specify an element, not part of two
2228 // different elements. So abandon ship early if this isn't the case.
2229 if (N->getMaskElt(0) % EltSize != 0)
2230 return false;
2231
2232 // This is a splat operation if each element of the permute is the same, and
2233 // if the value doesn't reference the second vector.
2234 unsigned ElementBase = N->getMaskElt(0);
2235
2236 // FIXME: Handle UNDEF elements too!
2237 if (ElementBase >= 16)
2238 return false;
2239
2240 // Check that the indices are consecutive, in the case of a multi-byte element
2241 // splatted with a v16i8 mask.
2242 for (unsigned i = 1; i != EltSize; ++i)
2243 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2244 return false;
2245
2246 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2247 // An UNDEF element is a sequence of UNDEF bytes.
2248 if (N->getMaskElt(i) < 0) {
2249 for (unsigned j = 1; j != EltSize; ++j)
2250 if (N->getMaskElt(i + j) >= 0)
2251 return false;
2252 } else
2253 for (unsigned j = 0; j != EltSize; ++j)
2254 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2255 return false;
2256 }
2257 return true;
2258}
2259
2260/// Check that the mask is shuffling N byte elements. Within each N byte
2261/// element of the mask, the indices could be either in increasing or
2262/// decreasing order as long as they are consecutive.
2263/// \param[in] N the shuffle vector SD Node to analyze
2264/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2265/// Word/DoubleWord/QuadWord).
2266/// \param[in] StepLen the delta indices number among the N byte element, if
2267/// the mask is in increasing/decreasing order then it is 1/-1.
2268/// \return true iff the mask is shuffling N byte elements.
2269static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2270 int StepLen) {
2271 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2272 "Unexpected element width.");
2273 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2274
2275 unsigned NumOfElem = 16 / Width;
2276 unsigned MaskVal[16]; // Width is never greater than 16
2277 for (unsigned i = 0; i < NumOfElem; ++i) {
2278 MaskVal[0] = N->getMaskElt(i * Width);
2279 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2280 return false;
2281 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2282 return false;
2283 }
2284
2285 for (unsigned int j = 1; j < Width; ++j) {
2286 MaskVal[j] = N->getMaskElt(i * Width + j);
2287 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2288 return false;
2289 }
2290 }
2291 }
2292
2293 return true;
2294}
2295
2296bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2297 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2298 if (!isNByteElemShuffleMask(N, 4, 1))
2299 return false;
2300
2301 // Now we look at mask elements 0,4,8,12
2302 unsigned M0 = N->getMaskElt(0) / 4;
2303 unsigned M1 = N->getMaskElt(4) / 4;
2304 unsigned M2 = N->getMaskElt(8) / 4;
2305 unsigned M3 = N->getMaskElt(12) / 4;
2306 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2307 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2308
2309 // Below, let H and L be arbitrary elements of the shuffle mask
2310 // where H is in the range [4,7] and L is in the range [0,3].
2311 // H, 1, 2, 3 or L, 5, 6, 7
2312 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2313 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2314 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2315 InsertAtByte = IsLE ? 12 : 0;
2316 Swap = M0 < 4;
2317 return true;
2318 }
2319 // 0, H, 2, 3 or 4, L, 6, 7
2320 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2321 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2322 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2323 InsertAtByte = IsLE ? 8 : 4;
2324 Swap = M1 < 4;
2325 return true;
2326 }
2327 // 0, 1, H, 3 or 4, 5, L, 7
2328 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2329 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2330 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2331 InsertAtByte = IsLE ? 4 : 8;
2332 Swap = M2 < 4;
2333 return true;
2334 }
2335 // 0, 1, 2, H or 4, 5, 6, L
2336 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2337 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2338 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2339 InsertAtByte = IsLE ? 0 : 12;
2340 Swap = M3 < 4;
2341 return true;
2342 }
2343
2344 // If both vector operands for the shuffle are the same vector, the mask will
2345 // contain only elements from the first one and the second one will be undef.
2346 if (N->getOperand(1).isUndef()) {
2347 ShiftElts = 0;
2348 Swap = true;
2349 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2350 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2351 InsertAtByte = IsLE ? 12 : 0;
2352 return true;
2353 }
2354 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2355 InsertAtByte = IsLE ? 8 : 4;
2356 return true;
2357 }
2358 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2359 InsertAtByte = IsLE ? 4 : 8;
2360 return true;
2361 }
2362 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2363 InsertAtByte = IsLE ? 0 : 12;
2364 return true;
2365 }
2366 }
2367
2368 return false;
2369}
2370
2372 bool &Swap, bool IsLE) {
2373 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2374 // Ensure each byte index of the word is consecutive.
2375 if (!isNByteElemShuffleMask(N, 4, 1))
2376 return false;
2377
2378 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2379 unsigned M0 = N->getMaskElt(0) / 4;
2380 unsigned M1 = N->getMaskElt(4) / 4;
2381 unsigned M2 = N->getMaskElt(8) / 4;
2382 unsigned M3 = N->getMaskElt(12) / 4;
2383
2384 // If both vector operands for the shuffle are the same vector, the mask will
2385 // contain only elements from the first one and the second one will be undef.
2386 if (N->getOperand(1).isUndef()) {
2387 assert(M0 < 4 && "Indexing into an undef vector?");
2388 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2389 return false;
2390
2391 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2392 Swap = false;
2393 return true;
2394 }
2395
2396 // Ensure each word index of the ShuffleVector Mask is consecutive.
2397 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2398 return false;
2399
2400 if (IsLE) {
2401 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2402 // Input vectors don't need to be swapped if the leading element
2403 // of the result is one of the 3 left elements of the second vector
2404 // (or if there is no shift to be done at all).
2405 Swap = false;
2406 ShiftElts = (8 - M0) % 8;
2407 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2408 // Input vectors need to be swapped if the leading element
2409 // of the result is one of the 3 left elements of the first vector
2410 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2411 Swap = true;
2412 ShiftElts = (4 - M0) % 4;
2413 }
2414
2415 return true;
2416 } else { // BE
2417 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2418 // Input vectors don't need to be swapped if the leading element
2419 // of the result is one of the 4 elements of the first vector.
2420 Swap = false;
2421 ShiftElts = M0;
2422 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2423 // Input vectors need to be swapped if the leading element
2424 // of the result is one of the 4 elements of the right vector.
2425 Swap = true;
2426 ShiftElts = M0 - 4;
2427 }
2428
2429 return true;
2430 }
2431}
2432
2434 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2435
2436 if (!isNByteElemShuffleMask(N, Width, -1))
2437 return false;
2438
2439 for (int i = 0; i < 16; i += Width)
2440 if (N->getMaskElt(i) != i + Width - 1)
2441 return false;
2442
2443 return true;
2444}
2445
2449
2453
2457
2461
2462/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2463/// if the inputs to the instruction should be swapped and set \p DM to the
2464/// value for the immediate.
2465/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2466/// AND element 0 of the result comes from the first input (LE) or second input
2467/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2468/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2469/// mask.
2471 bool &Swap, bool IsLE) {
2472 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2473
2474 // Ensure each byte index of the double word is consecutive.
2475 if (!isNByteElemShuffleMask(N, 8, 1))
2476 return false;
2477
2478 unsigned M0 = N->getMaskElt(0) / 8;
2479 unsigned M1 = N->getMaskElt(8) / 8;
2480 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2481
2482 // If both vector operands for the shuffle are the same vector, the mask will
2483 // contain only elements from the first one and the second one will be undef.
2484 if (N->getOperand(1).isUndef()) {
2485 if ((M0 | M1) < 2) {
2486 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2487 Swap = false;
2488 return true;
2489 } else
2490 return false;
2491 }
2492
2493 if (IsLE) {
2494 if (M0 > 1 && M1 < 2) {
2495 Swap = false;
2496 } else if (M0 < 2 && M1 > 1) {
2497 M0 = (M0 + 2) % 4;
2498 M1 = (M1 + 2) % 4;
2499 Swap = true;
2500 } else
2501 return false;
2502
2503 // Note: if control flow comes here that means Swap is already set above
2504 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2505 return true;
2506 } else { // BE
2507 if (M0 < 2 && M1 > 1) {
2508 Swap = false;
2509 } else if (M0 > 1 && M1 < 2) {
2510 M0 = (M0 + 2) % 4;
2511 M1 = (M1 + 2) % 4;
2512 Swap = true;
2513 } else
2514 return false;
2515
2516 // Note: if control flow comes here that means Swap is already set above
2517 DM = (M0 << 1) + (M1 & 1);
2518 return true;
2519 }
2520}
2521
2522
2523/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2524/// appropriate for PPC mnemonics (which have a big endian bias - namely
2525/// elements are counted from the left of the vector register).
2526unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2527 SelectionDAG &DAG) {
2529 assert(isSplatShuffleMask(SVOp, EltSize));
2530 EVT VT = SVOp->getValueType(0);
2531
2532 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2533 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2534 : SVOp->getMaskElt(0);
2535
2536 if (DAG.getDataLayout().isLittleEndian())
2537 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2538 else
2539 return SVOp->getMaskElt(0) / EltSize;
2540}
2541
2542/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2543/// by using a vspltis[bhw] instruction of the specified element size, return
2544/// the constant being splatted. The ByteSize field indicates the number of
2545/// bytes of each element [124] -> [bhw].
2547 SDValue OpVal;
2548
2549 // If ByteSize of the splat is bigger than the element size of the
2550 // build_vector, then we have a case where we are checking for a splat where
2551 // multiple elements of the buildvector are folded together into a single
2552 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2553 unsigned EltSize = 16/N->getNumOperands();
2554 if (EltSize < ByteSize) {
2555 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2556 SDValue UniquedVals[4];
2557 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2558
2559 // See if all of the elements in the buildvector agree across.
2560 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2561 if (N->getOperand(i).isUndef()) continue;
2562 // If the element isn't a constant, bail fully out.
2563 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2564
2565 if (!UniquedVals[i&(Multiple-1)].getNode())
2566 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2567 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2568 return SDValue(); // no match.
2569 }
2570
2571 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2572 // either constant or undef values that are identical for each chunk. See
2573 // if these chunks can form into a larger vspltis*.
2574
2575 // Check to see if all of the leading entries are either 0 or -1. If
2576 // neither, then this won't fit into the immediate field.
2577 bool LeadingZero = true;
2578 bool LeadingOnes = true;
2579 for (unsigned i = 0; i != Multiple-1; ++i) {
2580 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2581
2582 LeadingZero &= isNullConstant(UniquedVals[i]);
2583 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2584 }
2585 // Finally, check the least significant entry.
2586 if (LeadingZero) {
2587 if (!UniquedVals[Multiple-1].getNode())
2588 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2589 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2590 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2591 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2592 }
2593 if (LeadingOnes) {
2594 if (!UniquedVals[Multiple-1].getNode())
2595 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2596 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2597 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2598 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2599 }
2600
2601 return SDValue();
2602 }
2603
2604 // Check to see if this buildvec has a single non-undef value in its elements.
2605 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2606 if (N->getOperand(i).isUndef()) continue;
2607 if (!OpVal.getNode())
2608 OpVal = N->getOperand(i);
2609 else if (OpVal != N->getOperand(i))
2610 return SDValue();
2611 }
2612
2613 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2614
2615 unsigned ValSizeInBytes = EltSize;
2616 uint64_t Value = 0;
2617 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2618 Value = CN->getZExtValue();
2619 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2620 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2621 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2622 }
2623
2624 // If the splat value is larger than the element value, then we can never do
2625 // this splat. The only case that we could fit the replicated bits into our
2626 // immediate field for would be zero, and we prefer to use vxor for it.
2627 if (ValSizeInBytes < ByteSize) return SDValue();
2628
2629 // If the element value is larger than the splat value, check if it consists
2630 // of a repeated bit pattern of size ByteSize.
2631 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2632 return SDValue();
2633
2634 // Properly sign extend the value.
2635 int MaskVal = SignExtend32(Value, ByteSize * 8);
2636
2637 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2638 if (MaskVal == 0) return SDValue();
2639
2640 // Finally, if this value fits in a 5 bit sext field, return it
2641 if (SignExtend32<5>(MaskVal) == MaskVal)
2642 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2643 return SDValue();
2644}
2645
2646//===----------------------------------------------------------------------===//
2647// Addressing Mode Selection
2648//===----------------------------------------------------------------------===//
2649
2650/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2651/// or 64-bit immediate, and if the value can be accurately represented as a
2652/// sign extension from a 16-bit value. If so, this returns true and the
2653/// immediate.
2654bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2655 if (!isa<ConstantSDNode>(N))
2656 return false;
2657
2658 Imm = (int16_t)N->getAsZExtVal();
2659 if (N->getValueType(0) == MVT::i32)
2660 return Imm == (int32_t)N->getAsZExtVal();
2661 else
2662 return Imm == (int64_t)N->getAsZExtVal();
2663}
2665 return isIntS16Immediate(Op.getNode(), Imm);
2666}
2667
2668/// Used when computing address flags for selecting loads and stores.
2669/// If we have an OR, check if the LHS and RHS are provably disjoint.
2670/// An OR of two provably disjoint values is equivalent to an ADD.
2671/// Most PPC load/store instructions compute the effective address as a sum,
2672/// so doing this conversion is useful.
2673static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2674 if (N.getOpcode() != ISD::OR)
2675 return false;
2676 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2677 if (!LHSKnown.Zero.getBoolValue())
2678 return false;
2679 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2680 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2681}
2682
2683/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2684/// be represented as an indexed [r+r] operation.
2686 SDValue &Index,
2687 SelectionDAG &DAG) const {
2688 for (SDNode *U : N->users()) {
2689 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2690 if (Memop->getMemoryVT() == MVT::f64) {
2691 Base = N.getOperand(0);
2692 Index = N.getOperand(1);
2693 return true;
2694 }
2695 }
2696 }
2697 return false;
2698}
2699
2700/// isIntS34Immediate - This method tests if value of node given can be
2701/// accurately represented as a sign extension from a 34-bit value. If so,
2702/// this returns true and the immediate.
2703bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2704 if (!isa<ConstantSDNode>(N))
2705 return false;
2706
2707 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2708 return isInt<34>(Imm);
2709}
2711 return isIntS34Immediate(Op.getNode(), Imm);
2712}
2713
2714/// SelectAddressRegReg - Given the specified addressed, check to see if it
2715/// can be represented as an indexed [r+r] operation. Returns false if it
2716/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2717/// non-zero and N can be represented by a base register plus a signed 16-bit
2718/// displacement, make a more precise judgement by checking (displacement % \p
2719/// EncodingAlignment).
2721 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2722 MaybeAlign EncodingAlignment) const {
2723 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2724 // a [pc+imm].
2726 return false;
2727
2728 int16_t Imm = 0;
2729 if (N.getOpcode() == ISD::ADD) {
2730 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2731 // SPE load/store can only handle 8-bit offsets.
2732 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2733 return true;
2734 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2735 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2736 return false; // r+i
2737 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2738 return false; // r+i
2739
2740 Base = N.getOperand(0);
2741 Index = N.getOperand(1);
2742 return true;
2743 } else if (N.getOpcode() == ISD::OR) {
2744 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2745 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2746 return false; // r+i can fold it if we can.
2747
2748 // If this is an or of disjoint bitfields, we can codegen this as an add
2749 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2750 // disjoint.
2751 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2752
2753 if (LHSKnown.Zero.getBoolValue()) {
2754 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2755 // If all of the bits are known zero on the LHS or RHS, the add won't
2756 // carry.
2757 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2758 Base = N.getOperand(0);
2759 Index = N.getOperand(1);
2760 return true;
2761 }
2762 }
2763 }
2764
2765 return false;
2766}
2767
2768// If we happen to be doing an i64 load or store into a stack slot that has
2769// less than a 4-byte alignment, then the frame-index elimination may need to
2770// use an indexed load or store instruction (because the offset may not be a
2771// multiple of 4). The extra register needed to hold the offset comes from the
2772// register scavenger, and it is possible that the scavenger will need to use
2773// an emergency spill slot. As a result, we need to make sure that a spill slot
2774// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2775// stack slot.
2776static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2777 // FIXME: This does not handle the LWA case.
2778 if (VT != MVT::i64)
2779 return;
2780
2781 // NOTE: We'll exclude negative FIs here, which come from argument
2782 // lowering, because there are no known test cases triggering this problem
2783 // using packed structures (or similar). We can remove this exclusion if
2784 // we find such a test case. The reason why this is so test-case driven is
2785 // because this entire 'fixup' is only to prevent crashes (from the
2786 // register scavenger) on not-really-valid inputs. For example, if we have:
2787 // %a = alloca i1
2788 // %b = bitcast i1* %a to i64*
2789 // store i64* a, i64 b
2790 // then the store should really be marked as 'align 1', but is not. If it
2791 // were marked as 'align 1' then the indexed form would have been
2792 // instruction-selected initially, and the problem this 'fixup' is preventing
2793 // won't happen regardless.
2794 if (FrameIdx < 0)
2795 return;
2796
2798 MachineFrameInfo &MFI = MF.getFrameInfo();
2799
2800 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2801 return;
2802
2803 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2804 FuncInfo->setHasNonRISpills();
2805}
2806
2807/// Returns true if the address N can be represented by a base register plus
2808/// a signed 16-bit displacement [r+imm], and if it is not better
2809/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2810/// displacements that are multiples of that value.
2812 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2813 MaybeAlign EncodingAlignment) const {
2814 // FIXME dl should come from parent load or store, not from address
2815 SDLoc dl(N);
2816
2817 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2818 // a [pc+imm].
2820 return false;
2821
2822 // If this can be more profitably realized as r+r, fail.
2823 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2824 return false;
2825
2826 if (N.getOpcode() == ISD::ADD) {
2827 int16_t imm = 0;
2828 if (isIntS16Immediate(N.getOperand(1), imm) &&
2829 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2830 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2831 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2832 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2833 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2834 } else {
2835 Base = N.getOperand(0);
2836 }
2837 return true; // [r+i]
2838 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2839 // Match LOAD (ADD (X, Lo(G))).
2840 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2841 "Cannot handle constant offsets yet!");
2842 Disp = N.getOperand(1).getOperand(0); // The global address.
2847 Base = N.getOperand(0);
2848 return true; // [&g+r]
2849 }
2850 } else if (N.getOpcode() == ISD::OR) {
2851 int16_t imm = 0;
2852 if (isIntS16Immediate(N.getOperand(1), imm) &&
2853 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2854 // If this is an or of disjoint bitfields, we can codegen this as an add
2855 // (for better address arithmetic) if the LHS and RHS of the OR are
2856 // provably disjoint.
2857 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2858
2859 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2860 // If all of the bits are known zero on the LHS or RHS, the add won't
2861 // carry.
2862 if (FrameIndexSDNode *FI =
2863 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2864 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2865 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2866 } else {
2867 Base = N.getOperand(0);
2868 }
2869 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2870 return true;
2871 }
2872 }
2873 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2874 // Loading from a constant address.
2875
2876 // If this address fits entirely in a 16-bit sext immediate field, codegen
2877 // this as "d, 0"
2878 int16_t Imm;
2879 if (isIntS16Immediate(CN, Imm) &&
2880 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2881 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2882 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2883 CN->getValueType(0));
2884 return true;
2885 }
2886
2887 // Handle 32-bit sext immediates with LIS + addr mode.
2888 if ((CN->getValueType(0) == MVT::i32 ||
2889 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2890 (!EncodingAlignment ||
2891 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2892 int Addr = (int)CN->getZExtValue();
2893
2894 // Otherwise, break this down into an LIS + disp.
2895 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2896
2897 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2898 MVT::i32);
2899 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2900 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2901 return true;
2902 }
2903 }
2904
2905 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2907 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2908 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2909 } else
2910 Base = N;
2911 return true; // [r+0]
2912}
2913
2914/// Similar to the 16-bit case but for instructions that take a 34-bit
2915/// displacement field (prefixed loads/stores).
2917 SDValue &Base,
2918 SelectionDAG &DAG) const {
2919 // Only on 64-bit targets.
2920 if (N.getValueType() != MVT::i64)
2921 return false;
2922
2923 SDLoc dl(N);
2924 int64_t Imm = 0;
2925
2926 if (N.getOpcode() == ISD::ADD) {
2927 if (!isIntS34Immediate(N.getOperand(1), Imm))
2928 return false;
2929 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2930 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2931 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2932 else
2933 Base = N.getOperand(0);
2934 return true;
2935 }
2936
2937 if (N.getOpcode() == ISD::OR) {
2938 if (!isIntS34Immediate(N.getOperand(1), Imm))
2939 return false;
2940 // If this is an or of disjoint bitfields, we can codegen this as an add
2941 // (for better address arithmetic) if the LHS and RHS of the OR are
2942 // provably disjoint.
2943 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2944 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2945 return false;
2946 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2947 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2948 else
2949 Base = N.getOperand(0);
2950 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2951 return true;
2952 }
2953
2954 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2955 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2956 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2957 return true;
2958 }
2959
2960 return false;
2961}
2962
2963/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2964/// represented as an indexed [r+r] operation.
2966 SDValue &Index,
2967 SelectionDAG &DAG) const {
2968 // Check to see if we can easily represent this as an [r+r] address. This
2969 // will fail if it thinks that the address is more profitably represented as
2970 // reg+imm, e.g. where imm = 0.
2971 if (SelectAddressRegReg(N, Base, Index, DAG))
2972 return true;
2973
2974 // If the address is the result of an add, we will utilize the fact that the
2975 // address calculation includes an implicit add. However, we can reduce
2976 // register pressure if we do not materialize a constant just for use as the
2977 // index register. We only get rid of the add if it is not an add of a
2978 // value and a 16-bit signed constant and both have a single use.
2979 int16_t imm = 0;
2980 if (N.getOpcode() == ISD::ADD &&
2981 (!isIntS16Immediate(N.getOperand(1), imm) ||
2982 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2983 Base = N.getOperand(0);
2984 Index = N.getOperand(1);
2985 return true;
2986 }
2987
2988 // Otherwise, do it the hard way, using R0 as the base register.
2989 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2990 N.getValueType());
2991 Index = N;
2992 return true;
2993}
2994
2995template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2996 Ty *PCRelCand = dyn_cast<Ty>(N);
2997 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2998}
2999
3000/// Returns true if this address is a PC Relative address.
3001/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
3002/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
3004 // This is a materialize PC Relative node. Always select this as PC Relative.
3005 Base = N;
3006 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
3007 return true;
3012 return true;
3013 return false;
3014}
3015
3016/// Returns true if we should use a direct load into vector instruction
3017/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
3018static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
3019
3020 // If there are any other uses other than scalar to vector, then we should
3021 // keep it as a scalar load -> direct move pattern to prevent multiple
3022 // loads.
3024 if (!LD)
3025 return false;
3026
3027 EVT MemVT = LD->getMemoryVT();
3028 if (!MemVT.isSimple())
3029 return false;
3030 switch(MemVT.getSimpleVT().SimpleTy) {
3031 case MVT::i64:
3032 break;
3033 case MVT::i32:
3034 if (!ST.hasP8Vector())
3035 return false;
3036 break;
3037 case MVT::i16:
3038 case MVT::i8:
3039 if (!ST.hasP9Vector())
3040 return false;
3041 break;
3042 default:
3043 return false;
3044 }
3045
3046 SDValue LoadedVal(N, 0);
3047 if (!LoadedVal.hasOneUse())
3048 return false;
3049
3050 for (SDUse &Use : LD->uses())
3051 if (Use.getResNo() == 0 &&
3052 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
3054 return false;
3055
3056 return true;
3057}
3058
3059/// getPreIndexedAddressParts - returns true by value, base pointer and
3060/// offset pointer and addressing mode by reference if the node's address
3061/// can be legally represented as pre-indexed load / store address.
3063 SDValue &Offset,
3065 SelectionDAG &DAG) const {
3066 if (DisablePPCPreinc) return false;
3067
3068 bool isLoad = true;
3069 SDValue Ptr;
3070 EVT VT;
3071 Align Alignment;
3072 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3073 Ptr = LD->getBasePtr();
3074 VT = LD->getMemoryVT();
3075 Alignment = LD->getAlign();
3076 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
3077 Ptr = ST->getBasePtr();
3078 VT = ST->getMemoryVT();
3079 Alignment = ST->getAlign();
3080 isLoad = false;
3081 } else
3082 return false;
3083
3084 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
3085 // instructions because we can fold these into a more efficient instruction
3086 // instead, (such as LXSD).
3087 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
3088 return false;
3089 }
3090
3091 // PowerPC doesn't have preinc load/store instructions for vectors
3092 if (VT.isVector())
3093 return false;
3094
3095 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
3096 // Common code will reject creating a pre-inc form if the base pointer
3097 // is a frame index, or if N is a store and the base pointer is either
3098 // the same as or a predecessor of the value being stored. Check for
3099 // those situations here, and try with swapped Base/Offset instead.
3100 bool Swap = false;
3101
3103 Swap = true;
3104 else if (!isLoad) {
3105 SDValue Val = cast<StoreSDNode>(N)->getValue();
3106 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
3107 Swap = true;
3108 }
3109
3110 if (Swap)
3112
3113 AM = ISD::PRE_INC;
3114 return true;
3115 }
3116
3117 // LDU/STU can only handle immediates that are a multiple of 4.
3118 if (VT != MVT::i64) {
3119 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
3120 return false;
3121 } else {
3122 // LDU/STU need an address with at least 4-byte alignment.
3123 if (Alignment < Align(4))
3124 return false;
3125
3126 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3127 return false;
3128 }
3129
3130 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3131 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3132 // sext i32 to i64 when addr mode is r+i.
3133 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3134 LD->getExtensionType() == ISD::SEXTLOAD &&
3136 return false;
3137 }
3138
3139 AM = ISD::PRE_INC;
3140 return true;
3141}
3142
3143//===----------------------------------------------------------------------===//
3144// LowerOperation implementation
3145//===----------------------------------------------------------------------===//
3146
3147/// Return true if we should reference labels using a PICBase, set the HiOpFlags
3148/// and LoOpFlags to the target MO flags.
3149static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3150 unsigned &HiOpFlags, unsigned &LoOpFlags,
3151 const GlobalValue *GV = nullptr) {
3152 HiOpFlags = PPCII::MO_HA;
3153 LoOpFlags = PPCII::MO_LO;
3154
3155 // Don't use the pic base if not in PIC relocation model.
3156 if (IsPIC) {
3157 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
3158 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
3159 }
3160}
3161
3162static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3163 SelectionDAG &DAG) {
3164 SDLoc DL(HiPart);
3165 EVT PtrVT = HiPart.getValueType();
3166 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3167
3168 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3169 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3170
3171 // With PIC, the first instruction is actually "GR+hi(&G)".
3172 if (isPIC)
3173 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3174 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3175
3176 // Generate non-pic code that has direct accesses to the constant pool.
3177 // The address of the global is just (hi(&g)+lo(&g)).
3178 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3179}
3180
3182 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3183 FuncInfo->setUsesTOCBasePtr();
3184}
3185
3189
3190SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3191 SDValue GA) const {
3192 EVT VT = Subtarget.getScalarIntVT();
3193 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3194 : Subtarget.isAIXABI()
3195 ? DAG.getRegister(PPC::R2, VT)
3196 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3197 SDValue Ops[] = { GA, Reg };
3198 return DAG.getMemIntrinsicNode(
3199 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3202}
3203
3204SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3205 SelectionDAG &DAG) const {
3206 EVT PtrVT = Op.getValueType();
3207 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3208 const Constant *C = CP->getConstVal();
3209
3210 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3211 // The actual address of the GlobalValue is stored in the TOC.
3212 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3213 if (Subtarget.isUsingPCRelativeCalls()) {
3214 SDLoc DL(CP);
3215 EVT Ty = getPointerTy(DAG.getDataLayout());
3216 SDValue ConstPool = DAG.getTargetConstantPool(
3217 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3218 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3219 }
3220 setUsesTOCBasePtr(DAG);
3221 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3222 return getTOCEntry(DAG, SDLoc(CP), GA);
3223 }
3224
3225 unsigned MOHiFlag, MOLoFlag;
3226 bool IsPIC = isPositionIndependent();
3227 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3228
3229 if (IsPIC && Subtarget.isSVR4ABI()) {
3230 SDValue GA =
3231 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3232 return getTOCEntry(DAG, SDLoc(CP), GA);
3233 }
3234
3235 SDValue CPIHi =
3236 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3237 SDValue CPILo =
3238 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3239 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3240}
3241
3242// For 64-bit PowerPC, prefer the more compact relative encodings.
3243// This trades 32 bits per jump table entry for one or two instructions
3244// on the jump site.
3251
3254 return false;
3255 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3256 return true;
3258}
3259
3261 SelectionDAG &DAG) const {
3262 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3264
3265 switch (getTargetMachine().getCodeModel()) {
3266 case CodeModel::Small:
3267 case CodeModel::Medium:
3269 default:
3270 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3272 }
3273}
3274
3275const MCExpr *
3277 unsigned JTI,
3278 MCContext &Ctx) const {
3279 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3281
3282 switch (getTargetMachine().getCodeModel()) {
3283 case CodeModel::Small:
3284 case CodeModel::Medium:
3286 default:
3287 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3288 }
3289}
3290
3291SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3292 EVT PtrVT = Op.getValueType();
3294
3295 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3296 if (Subtarget.isUsingPCRelativeCalls()) {
3297 SDLoc DL(JT);
3298 EVT Ty = getPointerTy(DAG.getDataLayout());
3299 SDValue GA =
3300 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3301 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3302 return MatAddr;
3303 }
3304
3305 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3306 // The actual address of the GlobalValue is stored in the TOC.
3307 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3308 setUsesTOCBasePtr(DAG);
3309 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3310 return getTOCEntry(DAG, SDLoc(JT), GA);
3311 }
3312
3313 unsigned MOHiFlag, MOLoFlag;
3314 bool IsPIC = isPositionIndependent();
3315 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3316
3317 if (IsPIC && Subtarget.isSVR4ABI()) {
3318 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3320 return getTOCEntry(DAG, SDLoc(GA), GA);
3321 }
3322
3323 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3324 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3325 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3326}
3327
3328SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3329 SelectionDAG &DAG) const {
3330 EVT PtrVT = Op.getValueType();
3331 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3332 const BlockAddress *BA = BASDN->getBlockAddress();
3333
3334 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3335 if (Subtarget.isUsingPCRelativeCalls()) {
3336 SDLoc DL(BASDN);
3337 EVT Ty = getPointerTy(DAG.getDataLayout());
3338 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3340 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3341 return MatAddr;
3342 }
3343
3344 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3345 // The actual BlockAddress is stored in the TOC.
3346 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3347 setUsesTOCBasePtr(DAG);
3348 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3349 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3350 }
3351
3352 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3353 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3354 return getTOCEntry(
3355 DAG, SDLoc(BASDN),
3356 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3357
3358 unsigned MOHiFlag, MOLoFlag;
3359 bool IsPIC = isPositionIndependent();
3360 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3361 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3362 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3363 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3364}
3365
3366SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3367 SelectionDAG &DAG) const {
3368 if (Subtarget.isAIXABI())
3369 return LowerGlobalTLSAddressAIX(Op, DAG);
3370
3371 return LowerGlobalTLSAddressLinux(Op, DAG);
3372}
3373
3374/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3375/// and then apply the update.
3377 SelectionDAG &DAG,
3378 const TargetMachine &TM) {
3379 // Initialize TLS model opt setting lazily:
3380 // (1) Use initial-exec for single TLS var references within current function.
3381 // (2) Use local-dynamic for multiple TLS var references within current
3382 // function.
3383 PPCFunctionInfo *FuncInfo =
3385 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3387 // Iterate over all instructions within current function, collect all TLS
3388 // global variables (global variables taken as the first parameter to
3389 // Intrinsic::threadlocal_address).
3390 const Function &Func = DAG.getMachineFunction().getFunction();
3391 for (const BasicBlock &BB : Func)
3392 for (const Instruction &I : BB)
3393 if (I.getOpcode() == Instruction::Call)
3394 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3395 if (Function *CF = CI->getCalledFunction())
3396 if (CF->isDeclaration() &&
3397 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3398 if (const GlobalValue *GV =
3399 dyn_cast<GlobalValue>(I.getOperand(0))) {
3400 TLSModel::Model GVModel = TM.getTLSModel(GV);
3401 if (GVModel == TLSModel::LocalDynamic)
3402 TLSGV.insert(GV);
3403 }
3404
3405 unsigned TLSGVCnt = TLSGV.size();
3406 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3407 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3408 FuncInfo->setAIXFuncUseTLSIEForLD();
3410 }
3411
3412 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3413 LLVM_DEBUG(
3414 dbgs() << DAG.getMachineFunction().getName()
3415 << " function is using the TLS-IE model for TLS-LD access.\n");
3416 Model = TLSModel::InitialExec;
3417 }
3418}
3419
3420SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3421 SelectionDAG &DAG) const {
3422 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3423
3424 if (DAG.getTarget().useEmulatedTLS())
3425 report_fatal_error("Emulated TLS is not yet supported on AIX");
3426
3427 SDLoc dl(GA);
3428 const GlobalValue *GV = GA->getGlobal();
3429 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3430 bool Is64Bit = Subtarget.isPPC64();
3432
3433 // Apply update to the TLS model.
3434 if (Subtarget.hasAIXShLibTLSModelOpt())
3436
3437 // TLS variables are accessed through TOC entries.
3438 // To support this, set the DAG to use the TOC base pointer.
3439 setUsesTOCBasePtr(DAG);
3440
3441 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3442
3443 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3444 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3445 bool HasAIXSmallTLSGlobalAttr = false;
3446 SDValue VariableOffsetTGA =
3447 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3448 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3449 SDValue TLSReg;
3450
3451 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3452 if (GVar->hasAttribute("aix-small-tls"))
3453 HasAIXSmallTLSGlobalAttr = true;
3454
3455 if (Is64Bit) {
3456 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3457 // involves a load of the variable offset (from the TOC), followed by an
3458 // add of the loaded variable offset to R13 (the thread pointer).
3459 // This code sequence looks like:
3460 // ld reg1,var[TC](2)
3461 // add reg2, reg1, r13 // r13 contains the thread pointer
3462 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3463
3464 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3465 // global variable attribute, produce a faster access sequence for
3466 // local-exec TLS variables where the offset from the TLS base is encoded
3467 // as an immediate operand.
3468 //
3469 // We only utilize the faster local-exec access sequence when the TLS
3470 // variable has a size within the policy limit. We treat types that are
3471 // not sized or are empty as being over the policy size limit.
3472 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3473 IsTLSLocalExecModel) {
3474 Type *GVType = GV->getValueType();
3475 if (GVType->isSized() && !GVType->isEmptyTy() &&
3476 GV->getDataLayout().getTypeAllocSize(GVType) <=
3478 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3479 }
3480 } else {
3481 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3482 // involves loading the variable offset from the TOC, generating a call to
3483 // .__get_tpointer to get the thread pointer (which will be in R3), and
3484 // adding the two together:
3485 // lwz reg1,var[TC](2)
3486 // bla .__get_tpointer
3487 // add reg2, reg1, r3
3488 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3489
3490 // We do not implement the 32-bit version of the faster access sequence
3491 // for local-exec that is controlled by the -maix-small-local-exec-tls
3492 // option, or the "aix-small-tls" global variable attribute.
3493 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3494 report_fatal_error("The small-local-exec TLS access sequence is "
3495 "currently only supported on AIX (64-bit mode).");
3496 }
3497 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3498 }
3499
3500 if (Model == TLSModel::LocalDynamic) {
3501 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3502
3503 // We do not implement the 32-bit version of the faster access sequence
3504 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3505 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3506 report_fatal_error("The small-local-dynamic TLS access sequence is "
3507 "currently only supported on AIX (64-bit mode).");
3508
3509 // For local-dynamic on AIX, we need to generate one TOC entry for each
3510 // variable offset, and a single module-handle TOC entry for the entire
3511 // file.
3512
3513 SDValue VariableOffsetTGA =
3514 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3515 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3516
3518 GlobalVariable *TLSGV =
3519 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3520 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3522 assert(TLSGV && "Not able to create GV for _$TLSML.");
3523 SDValue ModuleHandleTGA =
3524 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3525 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3526 SDValue ModuleHandle =
3527 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3528
3529 // With the -maix-small-local-dynamic-tls option, produce a faster access
3530 // sequence for local-dynamic TLS variables where the offset from the
3531 // module-handle is encoded as an immediate operand.
3532 //
3533 // We only utilize the faster local-dynamic access sequence when the TLS
3534 // variable has a size within the policy limit. We treat types that are
3535 // not sized or are empty as being over the policy size limit.
3536 if (HasAIXSmallLocalDynamicTLS) {
3537 Type *GVType = GV->getValueType();
3538 if (GVType->isSized() && !GVType->isEmptyTy() &&
3539 GV->getDataLayout().getTypeAllocSize(GVType) <=
3541 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3542 ModuleHandle);
3543 }
3544
3545 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3546 }
3547
3548 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3549 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3550 // need to generate two TOC entries, one for the variable offset, one for the
3551 // region handle. The global address for the TOC entry of the region handle is
3552 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3553 // entry of the variable offset is created with MO_TLSGD_FLAG.
3554 SDValue VariableOffsetTGA =
3555 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3556 SDValue RegionHandleTGA =
3557 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3558 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3559 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3560 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3561 RegionHandle);
3562}
3563
3564SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3565 SelectionDAG &DAG) const {
3566 // FIXME: TLS addresses currently use medium model code sequences,
3567 // which is the most useful form. Eventually support for small and
3568 // large models could be added if users need it, at the cost of
3569 // additional complexity.
3570 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3571 if (DAG.getTarget().useEmulatedTLS())
3572 return LowerToTLSEmulatedModel(GA, DAG);
3573
3574 SDLoc dl(GA);
3575 const GlobalValue *GV = GA->getGlobal();
3576 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3577 bool is64bit = Subtarget.isPPC64();
3578 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3579 PICLevel::Level picLevel = M->getPICLevel();
3580
3581 const TargetMachine &TM = getTargetMachine();
3582 TLSModel::Model Model = TM.getTLSModel(GV);
3583
3584 if (Model == TLSModel::LocalExec) {
3585 if (Subtarget.isUsingPCRelativeCalls()) {
3586 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3587 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3589 SDValue MatAddr =
3590 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3591 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3592 }
3593
3594 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3596 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3598 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3599 : DAG.getRegister(PPC::R2, MVT::i32);
3600
3601 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3602 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3603 }
3604
3605 if (Model == TLSModel::InitialExec) {
3606 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3608 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3609 SDValue TGATLS = DAG.getTargetGlobalAddress(
3610 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3611 SDValue TPOffset;
3612 if (IsPCRel) {
3613 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3614 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3615 MachinePointerInfo());
3616 } else {
3617 SDValue GOTPtr;
3618 if (is64bit) {
3619 setUsesTOCBasePtr(DAG);
3620 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3621 GOTPtr =
3622 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3623 } else {
3624 if (!TM.isPositionIndependent())
3625 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3626 else if (picLevel == PICLevel::SmallPIC)
3627 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3628 else
3629 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3630 }
3631 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3632 }
3633 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3634 }
3635
3636 if (Model == TLSModel::GeneralDynamic) {
3637 if (Subtarget.isUsingPCRelativeCalls()) {
3638 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3640 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3641 }
3642
3643 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3644 SDValue GOTPtr;
3645 if (is64bit) {
3646 setUsesTOCBasePtr(DAG);
3647 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3648 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3649 GOTReg, TGA);
3650 } else {
3651 if (picLevel == PICLevel::SmallPIC)
3652 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3653 else
3654 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3655 }
3656 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3657 GOTPtr, TGA, TGA);
3658 }
3659
3660 if (Model == TLSModel::LocalDynamic) {
3661 if (Subtarget.isUsingPCRelativeCalls()) {
3662 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3664 SDValue MatPCRel =
3665 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3666 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3667 }
3668
3669 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3670 SDValue GOTPtr;
3671 if (is64bit) {
3672 setUsesTOCBasePtr(DAG);
3673 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3674 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3675 GOTReg, TGA);
3676 } else {
3677 if (picLevel == PICLevel::SmallPIC)
3678 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3679 else
3680 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3681 }
3682 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3683 PtrVT, GOTPtr, TGA, TGA);
3684 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3685 PtrVT, TLSAddr, TGA);
3686 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3687 }
3688
3689 llvm_unreachable("Unknown TLS model!");
3690}
3691
3692SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3693 SelectionDAG &DAG) const {
3694 EVT PtrVT = Op.getValueType();
3695 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3696 SDLoc DL(GSDN);
3697 const GlobalValue *GV = GSDN->getGlobal();
3698
3699 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3700 // The actual address of the GlobalValue is stored in the TOC.
3701 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3702 if (Subtarget.isUsingPCRelativeCalls()) {
3703 EVT Ty = getPointerTy(DAG.getDataLayout());
3705 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3707 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3708 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3709 MachinePointerInfo());
3710 return Load;
3711 } else {
3712 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3714 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3715 }
3716 }
3717 setUsesTOCBasePtr(DAG);
3718 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3719 return getTOCEntry(DAG, DL, GA);
3720 }
3721
3722 unsigned MOHiFlag, MOLoFlag;
3723 bool IsPIC = isPositionIndependent();
3724 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3725
3726 if (IsPIC && Subtarget.isSVR4ABI()) {
3727 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3728 GSDN->getOffset(),
3730 return getTOCEntry(DAG, DL, GA);
3731 }
3732
3733 SDValue GAHi =
3734 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3735 SDValue GALo =
3736 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3737
3738 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3739}
3740
3741SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3742 bool IsStrict = Op->isStrictFPOpcode();
3743 ISD::CondCode CC =
3744 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3745 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3746 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3747 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3748 EVT LHSVT = LHS.getValueType();
3749 SDLoc dl(Op);
3750
3751 // Soften the setcc with libcall if it is fp128.
3752 if (LHSVT == MVT::f128) {
3753 assert(!Subtarget.hasP9Vector() &&
3754 "SETCC for f128 is already legal under Power9!");
3755 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3756 Op->getOpcode() == ISD::STRICT_FSETCCS);
3757 if (RHS.getNode())
3758 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3759 DAG.getCondCode(CC));
3760 if (IsStrict)
3761 return DAG.getMergeValues({LHS, Chain}, dl);
3762 return LHS;
3763 }
3764
3765 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3766
3767 if (Op.getValueType() == MVT::v2i64) {
3768 // When the operands themselves are v2i64 values, we need to do something
3769 // special because VSX has no underlying comparison operations for these.
3770 if (LHS.getValueType() == MVT::v2i64) {
3771 // Equality can be handled by casting to the legal type for Altivec
3772 // comparisons, everything else needs to be expanded.
3773 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3774 return SDValue();
3775 SDValue SetCC32 = DAG.getSetCC(
3776 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3777 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3778 int ShuffV[] = {1, 0, 3, 2};
3779 SDValue Shuff =
3780 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3781 return DAG.getBitcast(MVT::v2i64,
3782 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3783 dl, MVT::v4i32, Shuff, SetCC32));
3784 }
3785
3786 // We handle most of these in the usual way.
3787 return Op;
3788 }
3789
3790 // If we're comparing for equality to zero, expose the fact that this is
3791 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3792 // fold the new nodes.
3793 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3794 return V;
3795
3796 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3797 // Leave comparisons against 0 and -1 alone for now, since they're usually
3798 // optimized. FIXME: revisit this when we can custom lower all setcc
3799 // optimizations.
3800 if (C->isAllOnes() || C->isZero())
3801 return SDValue();
3802 }
3803
3804 // If we have an integer seteq/setne, turn it into a compare against zero
3805 // by xor'ing the rhs with the lhs, which is faster than setting a
3806 // condition register, reading it back out, and masking the correct bit. The
3807 // normal approach here uses sub to do this instead of xor. Using xor exposes
3808 // the result to other bit-twiddling opportunities.
3809 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3810 EVT VT = Op.getValueType();
3811 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3812 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3813 }
3814 return SDValue();
3815}
3816
3817SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3818 SDNode *Node = Op.getNode();
3819 EVT VT = Node->getValueType(0);
3820 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3821 SDValue InChain = Node->getOperand(0);
3822 SDValue VAListPtr = Node->getOperand(1);
3823 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3824 SDLoc dl(Node);
3825
3826 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3827
3828 // gpr_index
3829 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3830 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3831 InChain = GprIndex.getValue(1);
3832
3833 if (VT == MVT::i64) {
3834 // Check if GprIndex is even
3835 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3836 DAG.getConstant(1, dl, MVT::i32));
3837 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3838 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3839 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3840 DAG.getConstant(1, dl, MVT::i32));
3841 // Align GprIndex to be even if it isn't
3842 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3843 GprIndex);
3844 }
3845
3846 // fpr index is 1 byte after gpr
3847 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3848 DAG.getConstant(1, dl, MVT::i32));
3849
3850 // fpr
3851 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3852 FprPtr, MachinePointerInfo(SV), MVT::i8);
3853 InChain = FprIndex.getValue(1);
3854
3855 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3856 DAG.getConstant(8, dl, MVT::i32));
3857
3858 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3859 DAG.getConstant(4, dl, MVT::i32));
3860
3861 // areas
3862 SDValue OverflowArea =
3863 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3864 InChain = OverflowArea.getValue(1);
3865
3866 SDValue RegSaveArea =
3867 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3868 InChain = RegSaveArea.getValue(1);
3869
3870 // select overflow_area if index > 8
3871 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3872 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3873
3874 // adjustment constant gpr_index * 4/8
3875 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3876 VT.isInteger() ? GprIndex : FprIndex,
3877 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3878 MVT::i32));
3879
3880 // OurReg = RegSaveArea + RegConstant
3881 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3882 RegConstant);
3883
3884 // Floating types are 32 bytes into RegSaveArea
3885 if (VT.isFloatingPoint())
3886 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3887 DAG.getConstant(32, dl, MVT::i32));
3888
3889 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3890 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3891 VT.isInteger() ? GprIndex : FprIndex,
3892 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3893 MVT::i32));
3894
3895 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3896 VT.isInteger() ? VAListPtr : FprPtr,
3897 MachinePointerInfo(SV), MVT::i8);
3898
3899 // determine if we should load from reg_save_area or overflow_area
3900 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3901
3902 // increase overflow_area by 4/8 if gpr/fpr > 8
3903 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3904 DAG.getConstant(VT.isInteger() ? 4 : 8,
3905 dl, MVT::i32));
3906
3907 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3908 OverflowAreaPlusN);
3909
3910 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3911 MachinePointerInfo(), MVT::i32);
3912
3913 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3914}
3915
3916SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3917 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3918
3919 // We have to copy the entire va_list struct:
3920 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3921 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3922 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3923 false, true, /*CI=*/nullptr, std::nullopt,
3924 MachinePointerInfo(), MachinePointerInfo());
3925}
3926
3927SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3928 SelectionDAG &DAG) const {
3929 return Op.getOperand(0);
3930}
3931
3932SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3933 MachineFunction &MF = DAG.getMachineFunction();
3934 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3935
3936 assert((Op.getOpcode() == ISD::INLINEASM ||
3937 Op.getOpcode() == ISD::INLINEASM_BR) &&
3938 "Expecting Inline ASM node.");
3939
3940 // If an LR store is already known to be required then there is not point in
3941 // checking this ASM as well.
3942 if (MFI.isLRStoreRequired())
3943 return Op;
3944
3945 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3946 // type MVT::Glue. We want to ignore this last operand if that is the case.
3947 unsigned NumOps = Op.getNumOperands();
3948 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3949 --NumOps;
3950
3951 // Check all operands that may contain the LR.
3952 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3953 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3954 unsigned NumVals = Flags.getNumOperandRegisters();
3955 ++i; // Skip the ID value.
3956
3957 switch (Flags.getKind()) {
3958 default:
3959 llvm_unreachable("Bad flags!");
3963 i += NumVals;
3964 break;
3968 for (; NumVals; --NumVals, ++i) {
3969 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3970 if (Reg != PPC::LR && Reg != PPC::LR8)
3971 continue;
3972 MFI.setLRStoreRequired();
3973 return Op;
3974 }
3975 break;
3976 }
3977 }
3978 }
3979
3980 return Op;
3981}
3982
3983SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3984 SelectionDAG &DAG) const {
3985 SDValue Chain = Op.getOperand(0);
3986 SDValue Trmp = Op.getOperand(1); // trampoline
3987 SDValue FPtr = Op.getOperand(2); // nested function
3988 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3989 SDLoc dl(Op);
3990
3991 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3992
3993 if (Subtarget.isAIXABI()) {
3994 // On AIX we create a trampoline descriptor by combining the
3995 // entry point and TOC from the global descriptor (FPtr) with the
3996 // nest argument as the environment pointer.
3997 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3998 MaybeAlign PointerAlign(PointerSize);
3999 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
4002 : MachineMemOperand::MONone;
4003
4004 uint64_t TOCPointerOffset = 1 * PointerSize;
4005 uint64_t EnvPointerOffset = 2 * PointerSize;
4006 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
4007 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
4008
4009 const Value *TrampolineAddr =
4010 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4011 const Function *Func =
4012 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
4013
4014 SDValue OutChains[3];
4015
4016 // Copy the entry point address from the global descriptor to the
4017 // trampoline buffer.
4018 SDValue LoadEntryPoint =
4019 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
4020 PointerAlign, MMOFlags);
4021 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
4022 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
4023 MachinePointerInfo(TrampolineAddr, 0));
4024
4025 // Copy the TOC pointer from the global descriptor to the trampoline
4026 // buffer.
4027 SDValue TOCFromDescriptorPtr =
4028 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
4029 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
4030 MachinePointerInfo(Func, TOCPointerOffset),
4031 PointerAlign, MMOFlags);
4032 SDValue TrampolineTOCPointer =
4033 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
4034 SDValue TOCLoadChain = TOCReg.getValue(1);
4035 OutChains[1] =
4036 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
4037 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
4038
4039 // Store the nest argument into the environment pointer in the trampoline
4040 // buffer.
4041 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
4042 OutChains[2] =
4043 DAG.getStore(Chain, dl, Nest, EnvPointer,
4044 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
4045
4047 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
4048 return TokenFactor;
4049 }
4050
4051 bool isPPC64 = (PtrVT == MVT::i64);
4052 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
4053
4055 Args.emplace_back(Trmp, IntPtrTy);
4056 // TrampSize == (isPPC64 ? 48 : 40);
4057 Args.emplace_back(
4058 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
4059 IntPtrTy);
4060 Args.emplace_back(FPtr, IntPtrTy);
4061 Args.emplace_back(Nest, IntPtrTy);
4062
4063 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
4064 TargetLowering::CallLoweringInfo CLI(DAG);
4065 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
4067 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
4068
4069 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4070 return CallResult.second;
4071}
4072
4073SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
4074 MachineFunction &MF = DAG.getMachineFunction();
4075 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4076 EVT PtrVT = getPointerTy(MF.getDataLayout());
4077
4078 SDLoc dl(Op);
4079
4080 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
4081 // vastart just stores the address of the VarArgsFrameIndex slot into the
4082 // memory location argument.
4083 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4084 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4085 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
4086 MachinePointerInfo(SV));
4087 }
4088
4089 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
4090 // We suppose the given va_list is already allocated.
4091 //
4092 // typedef struct {
4093 // char gpr; /* index into the array of 8 GPRs
4094 // * stored in the register save area
4095 // * gpr=0 corresponds to r3,
4096 // * gpr=1 to r4, etc.
4097 // */
4098 // char fpr; /* index into the array of 8 FPRs
4099 // * stored in the register save area
4100 // * fpr=0 corresponds to f1,
4101 // * fpr=1 to f2, etc.
4102 // */
4103 // char *overflow_arg_area;
4104 // /* location on stack that holds
4105 // * the next overflow argument
4106 // */
4107 // char *reg_save_area;
4108 // /* where r3:r10 and f1:f8 (if saved)
4109 // * are stored
4110 // */
4111 // } va_list[1];
4112
4113 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
4114 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
4115 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
4116 PtrVT);
4117 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
4118 PtrVT);
4119
4120 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
4121 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
4122
4123 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
4124 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
4125
4126 uint64_t FPROffset = 1;
4127 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
4128
4129 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4130
4131 // Store first byte : number of int regs
4132 SDValue firstStore =
4133 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
4134 MachinePointerInfo(SV), MVT::i8);
4135 uint64_t nextOffset = FPROffset;
4136 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
4137 ConstFPROffset);
4138
4139 // Store second byte : number of float regs
4140 SDValue secondStore =
4141 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
4142 MachinePointerInfo(SV, nextOffset), MVT::i8);
4143 nextOffset += StackOffset;
4144 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
4145
4146 // Store second word : arguments given on stack
4147 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
4148 MachinePointerInfo(SV, nextOffset));
4149 nextOffset += FrameOffset;
4150 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
4151
4152 // Store third word : arguments given in registers
4153 return DAG.getStore(thirdStore, dl, FR, nextPtr,
4154 MachinePointerInfo(SV, nextOffset));
4155}
4156
4157/// FPR - The set of FP registers that should be allocated for arguments
4158/// on Darwin and AIX.
4159static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
4160 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4161 PPC::F11, PPC::F12, PPC::F13};
4162
4163/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4164/// the stack.
4165static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4166 unsigned PtrByteSize) {
4167 unsigned ArgSize = ArgVT.getStoreSize();
4168 if (Flags.isByVal())
4169 ArgSize = Flags.getByValSize();
4170
4171 // Round up to multiples of the pointer size, except for array members,
4172 // which are always packed.
4173 if (!Flags.isInConsecutiveRegs())
4174 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4175
4176 return ArgSize;
4177}
4178
4179/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4180/// on the stack.
4182 ISD::ArgFlagsTy Flags,
4183 unsigned PtrByteSize) {
4184 Align Alignment(PtrByteSize);
4185
4186 // Altivec parameters are padded to a 16 byte boundary.
4187 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4188 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4189 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4190 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4191 Alignment = Align(16);
4192
4193 // ByVal parameters are aligned as requested.
4194 if (Flags.isByVal()) {
4195 auto BVAlign = Flags.getNonZeroByValAlign();
4196 if (BVAlign > PtrByteSize) {
4197 if (BVAlign.value() % PtrByteSize != 0)
4199 "ByVal alignment is not a multiple of the pointer size");
4200
4201 Alignment = BVAlign;
4202 }
4203 }
4204
4205 // Array members are always packed to their original alignment.
4206 if (Flags.isInConsecutiveRegs()) {
4207 // If the array member was split into multiple registers, the first
4208 // needs to be aligned to the size of the full type. (Except for
4209 // ppcf128, which is only aligned as its f64 components.)
4210 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4211 Alignment = Align(OrigVT.getStoreSize());
4212 else
4213 Alignment = Align(ArgVT.getStoreSize());
4214 }
4215
4216 return Alignment;
4217}
4218
4219/// CalculateStackSlotUsed - Return whether this argument will use its
4220/// stack slot (instead of being passed in registers). ArgOffset,
4221/// AvailableFPRs, and AvailableVRs must hold the current argument
4222/// position, and will be updated to account for this argument.
4223static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4224 unsigned PtrByteSize, unsigned LinkageSize,
4225 unsigned ParamAreaSize, unsigned &ArgOffset,
4226 unsigned &AvailableFPRs,
4227 unsigned &AvailableVRs) {
4228 bool UseMemory = false;
4229
4230 // Respect alignment of argument on the stack.
4231 Align Alignment =
4232 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4233 ArgOffset = alignTo(ArgOffset, Alignment);
4234 // If there's no space left in the argument save area, we must
4235 // use memory (this check also catches zero-sized arguments).
4236 if (ArgOffset >= LinkageSize + ParamAreaSize)
4237 UseMemory = true;
4238
4239 // Allocate argument on the stack.
4240 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4241 if (Flags.isInConsecutiveRegsLast())
4242 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4243 // If we overran the argument save area, we must use memory
4244 // (this check catches arguments passed partially in memory)
4245 if (ArgOffset > LinkageSize + ParamAreaSize)
4246 UseMemory = true;
4247
4248 // However, if the argument is actually passed in an FPR or a VR,
4249 // we don't use memory after all.
4250 if (!Flags.isByVal()) {
4251 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4252 if (AvailableFPRs > 0) {
4253 --AvailableFPRs;
4254 return false;
4255 }
4256 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4257 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4258 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4259 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4260 if (AvailableVRs > 0) {
4261 --AvailableVRs;
4262 return false;
4263 }
4264 }
4265
4266 return UseMemory;
4267}
4268
4269/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4270/// ensure minimum alignment required for target.
4272 unsigned NumBytes) {
4273 return alignTo(NumBytes, Lowering->getStackAlign());
4274}
4275
4276SDValue PPCTargetLowering::LowerFormalArguments(
4277 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4278 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4279 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4280 if (Subtarget.isAIXABI())
4281 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4282 InVals);
4283 if (Subtarget.is64BitELFABI())
4284 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4285 InVals);
4286 assert(Subtarget.is32BitELFABI());
4287 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4288 InVals);
4289}
4290
4291SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4292 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4293 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4294 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4295
4296 // 32-bit SVR4 ABI Stack Frame Layout:
4297 // +-----------------------------------+
4298 // +--> | Back chain |
4299 // | +-----------------------------------+
4300 // | | Floating-point register save area |
4301 // | +-----------------------------------+
4302 // | | General register save area |
4303 // | +-----------------------------------+
4304 // | | CR save word |
4305 // | +-----------------------------------+
4306 // | | VRSAVE save word |
4307 // | +-----------------------------------+
4308 // | | Alignment padding |
4309 // | +-----------------------------------+
4310 // | | Vector register save area |
4311 // | +-----------------------------------+
4312 // | | Local variable space |
4313 // | +-----------------------------------+
4314 // | | Parameter list area |
4315 // | +-----------------------------------+
4316 // | | LR save word |
4317 // | +-----------------------------------+
4318 // SP--> +--- | Back chain |
4319 // +-----------------------------------+
4320 //
4321 // Specifications:
4322 // System V Application Binary Interface PowerPC Processor Supplement
4323 // AltiVec Technology Programming Interface Manual
4324
4325 MachineFunction &MF = DAG.getMachineFunction();
4326 MachineFrameInfo &MFI = MF.getFrameInfo();
4327 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4328
4329 EVT PtrVT = getPointerTy(MF.getDataLayout());
4330 // Potential tail calls could cause overwriting of argument stack slots.
4331 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4332 (CallConv == CallingConv::Fast));
4333 const Align PtrAlign(4);
4334
4335 // Assign locations to all of the incoming arguments.
4337 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4338 *DAG.getContext());
4339
4340 // Reserve space for the linkage area on the stack.
4341 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4342 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4343 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4344
4345 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4346 CCValAssign &VA = ArgLocs[i];
4347
4348 // Arguments stored in registers.
4349 if (VA.isRegLoc()) {
4350 const TargetRegisterClass *RC;
4351 EVT ValVT = VA.getValVT();
4352
4353 switch (ValVT.getSimpleVT().SimpleTy) {
4354 default:
4355 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4356 case MVT::i1:
4357 case MVT::i32:
4358 RC = &PPC::GPRCRegClass;
4359 break;
4360 case MVT::f32:
4361 if (Subtarget.hasP8Vector())
4362 RC = &PPC::VSSRCRegClass;
4363 else if (Subtarget.hasSPE())
4364 RC = &PPC::GPRCRegClass;
4365 else
4366 RC = &PPC::F4RCRegClass;
4367 break;
4368 case MVT::f64:
4369 if (Subtarget.hasVSX())
4370 RC = &PPC::VSFRCRegClass;
4371 else if (Subtarget.hasSPE())
4372 // SPE passes doubles in GPR pairs.
4373 RC = &PPC::GPRCRegClass;
4374 else
4375 RC = &PPC::F8RCRegClass;
4376 break;
4377 case MVT::v16i8:
4378 case MVT::v8i16:
4379 case MVT::v4i32:
4380 RC = &PPC::VRRCRegClass;
4381 break;
4382 case MVT::v4f32:
4383 RC = &PPC::VRRCRegClass;
4384 break;
4385 case MVT::v2f64:
4386 case MVT::v2i64:
4387 RC = &PPC::VRRCRegClass;
4388 break;
4389 }
4390
4391 SDValue ArgValue;
4392 // Transform the arguments stored in physical registers into
4393 // virtual ones.
4394 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4395 assert(i + 1 < e && "No second half of double precision argument");
4396 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4397 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4398 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4399 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4400 if (!Subtarget.isLittleEndian())
4401 std::swap (ArgValueLo, ArgValueHi);
4402 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4403 ArgValueHi);
4404 } else {
4405 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4406 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4407 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4408 if (ValVT == MVT::i1)
4409 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4410 }
4411
4412 InVals.push_back(ArgValue);
4413 } else {
4414 // Argument stored in memory.
4415 assert(VA.isMemLoc());
4416
4417 // Get the extended size of the argument type in stack
4418 unsigned ArgSize = VA.getLocVT().getStoreSize();
4419 // Get the actual size of the argument type
4420 unsigned ObjSize = VA.getValVT().getStoreSize();
4421 unsigned ArgOffset = VA.getLocMemOffset();
4422 // Stack objects in PPC32 are right justified.
4423 ArgOffset += ArgSize - ObjSize;
4424 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4425
4426 // Create load nodes to retrieve arguments from the stack.
4427 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4428 InVals.push_back(
4429 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4430 }
4431 }
4432
4433 // Assign locations to all of the incoming aggregate by value arguments.
4434 // Aggregates passed by value are stored in the local variable space of the
4435 // caller's stack frame, right above the parameter list area.
4436 SmallVector<CCValAssign, 16> ByValArgLocs;
4437 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4438 ByValArgLocs, *DAG.getContext());
4439
4440 // Reserve stack space for the allocations in CCInfo.
4441 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4442
4443 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4444
4445 // Area that is at least reserved in the caller of this function.
4446 unsigned MinReservedArea = CCByValInfo.getStackSize();
4447 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4448
4449 // Set the size that is at least reserved in caller of this function. Tail
4450 // call optimized function's reserved stack space needs to be aligned so that
4451 // taking the difference between two stack areas will result in an aligned
4452 // stack.
4453 MinReservedArea =
4454 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4455 FuncInfo->setMinReservedArea(MinReservedArea);
4456
4458
4459 // If the function takes variable number of arguments, make a frame index for
4460 // the start of the first vararg value... for expansion of llvm.va_start.
4461 if (isVarArg) {
4462 static const MCPhysReg GPArgRegs[] = {
4463 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4464 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4465 };
4466 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4467
4468 static const MCPhysReg FPArgRegs[] = {
4469 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4470 PPC::F8
4471 };
4472 unsigned NumFPArgRegs = std::size(FPArgRegs);
4473
4474 if (useSoftFloat() || hasSPE())
4475 NumFPArgRegs = 0;
4476
4477 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4478 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4479
4480 // Make room for NumGPArgRegs and NumFPArgRegs.
4481 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4482 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4483
4485 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4486
4487 FuncInfo->setVarArgsFrameIndex(
4488 MFI.CreateStackObject(Depth, Align(8), false));
4489 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4490
4491 // The fixed integer arguments of a variadic function are stored to the
4492 // VarArgsFrameIndex on the stack so that they may be loaded by
4493 // dereferencing the result of va_next.
4494 for (MCPhysReg GPArgReg : GPArgRegs) {
4495 // Get an existing live-in vreg, or add a new one.
4496 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4497 if (!VReg)
4498 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4499
4500 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4501 SDValue Store =
4502 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4503 MemOps.push_back(Store);
4504 // Increment the address by four for the next argument to store
4505 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4506 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4507 }
4508
4509 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4510 // is set.
4511 // The double arguments are stored to the VarArgsFrameIndex
4512 // on the stack.
4513 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4514 // Get an existing live-in vreg, or add a new one.
4515 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4516 if (!VReg)
4517 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4518
4519 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4520 SDValue Store =
4521 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4522 MemOps.push_back(Store);
4523 // Increment the address by eight for the next argument to store
4524 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4525 PtrVT);
4526 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4527 }
4528 }
4529
4530 if (!MemOps.empty())
4531 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4532
4533 return Chain;
4534}
4535
4536// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4537// value to MVT::i64 and then truncate to the correct register size.
4538SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4539 EVT ObjectVT, SelectionDAG &DAG,
4540 SDValue ArgVal,
4541 const SDLoc &dl) const {
4542 if (Flags.isSExt())
4543 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4544 DAG.getValueType(ObjectVT));
4545 else if (Flags.isZExt())
4546 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4547 DAG.getValueType(ObjectVT));
4548
4549 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4550}
4551
4552SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4553 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4554 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4555 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4556 // TODO: add description of PPC stack frame format, or at least some docs.
4557 //
4558 bool isELFv2ABI = Subtarget.isELFv2ABI();
4559 bool isLittleEndian = Subtarget.isLittleEndian();
4560 MachineFunction &MF = DAG.getMachineFunction();
4561 MachineFrameInfo &MFI = MF.getFrameInfo();
4562 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4563
4564 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4565 "fastcc not supported on varargs functions");
4566
4567 EVT PtrVT = getPointerTy(MF.getDataLayout());
4568 // Potential tail calls could cause overwriting of argument stack slots.
4569 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4570 (CallConv == CallingConv::Fast));
4571 unsigned PtrByteSize = 8;
4572 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4573
4574 static const MCPhysReg GPR[] = {
4575 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4576 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4577 };
4578 static const MCPhysReg VR[] = {
4579 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4580 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4581 };
4582
4583 const unsigned Num_GPR_Regs = std::size(GPR);
4584 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4585 const unsigned Num_VR_Regs = std::size(VR);
4586
4587 // Do a first pass over the arguments to determine whether the ABI
4588 // guarantees that our caller has allocated the parameter save area
4589 // on its stack frame. In the ELFv1 ABI, this is always the case;
4590 // in the ELFv2 ABI, it is true if this is a vararg function or if
4591 // any parameter is located in a stack slot.
4592
4593 bool HasParameterArea = !isELFv2ABI || isVarArg;
4594 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4595 unsigned NumBytes = LinkageSize;
4596 unsigned AvailableFPRs = Num_FPR_Regs;
4597 unsigned AvailableVRs = Num_VR_Regs;
4598 for (const ISD::InputArg &In : Ins) {
4599 if (In.Flags.isNest())
4600 continue;
4601
4602 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4603 LinkageSize, ParamAreaSize, NumBytes,
4604 AvailableFPRs, AvailableVRs))
4605 HasParameterArea = true;
4606 }
4607
4608 // Add DAG nodes to load the arguments or copy them out of registers. On
4609 // entry to a function on PPC, the arguments start after the linkage area,
4610 // although the first ones are often in registers.
4611
4612 unsigned ArgOffset = LinkageSize;
4613 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4616 unsigned CurArgIdx = 0;
4617 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4618 SDValue ArgVal;
4619 bool needsLoad = false;
4620 EVT ObjectVT = Ins[ArgNo].VT;
4621 EVT OrigVT = Ins[ArgNo].ArgVT;
4622 unsigned ObjSize = ObjectVT.getStoreSize();
4623 unsigned ArgSize = ObjSize;
4624 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4625 if (Ins[ArgNo].isOrigArg()) {
4626 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4627 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4628 }
4629 // We re-align the argument offset for each argument, except when using the
4630 // fast calling convention, when we need to make sure we do that only when
4631 // we'll actually use a stack slot.
4632 unsigned CurArgOffset;
4633 Align Alignment;
4634 auto ComputeArgOffset = [&]() {
4635 /* Respect alignment of argument on the stack. */
4636 Alignment =
4637 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4638 ArgOffset = alignTo(ArgOffset, Alignment);
4639 CurArgOffset = ArgOffset;
4640 };
4641
4642 if (CallConv != CallingConv::Fast) {
4643 ComputeArgOffset();
4644
4645 /* Compute GPR index associated with argument offset. */
4646 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4647 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4648 }
4649
4650 // FIXME the codegen can be much improved in some cases.
4651 // We do not have to keep everything in memory.
4652 if (Flags.isByVal()) {
4653 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4654
4655 if (CallConv == CallingConv::Fast)
4656 ComputeArgOffset();
4657
4658 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4659 ObjSize = Flags.getByValSize();
4660 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4661 // Empty aggregate parameters do not take up registers. Examples:
4662 // struct { } a;
4663 // union { } b;
4664 // int c[0];
4665 // etc. However, we have to provide a place-holder in InVals, so
4666 // pretend we have an 8-byte item at the current address for that
4667 // purpose.
4668 if (!ObjSize) {
4669 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4670 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4671 InVals.push_back(FIN);
4672 continue;
4673 }
4674
4675 // Create a stack object covering all stack doublewords occupied
4676 // by the argument. If the argument is (fully or partially) on
4677 // the stack, or if the argument is fully in registers but the
4678 // caller has allocated the parameter save anyway, we can refer
4679 // directly to the caller's stack frame. Otherwise, create a
4680 // local copy in our own frame.
4681 int FI;
4682 if (HasParameterArea ||
4683 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4684 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4685 else
4686 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4687 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4688
4689 // Handle aggregates smaller than 8 bytes.
4690 if (ObjSize < PtrByteSize) {
4691 // The value of the object is its address, which differs from the
4692 // address of the enclosing doubleword on big-endian systems.
4693 SDValue Arg = FIN;
4694 if (!isLittleEndian) {
4695 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4696 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4697 }
4698 InVals.push_back(Arg);
4699
4700 if (GPR_idx != Num_GPR_Regs) {
4701 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4702 FuncInfo->addLiveInAttr(VReg, Flags);
4703 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4704 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4705 SDValue Store =
4706 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4707 MachinePointerInfo(&*FuncArg), ObjType);
4708 MemOps.push_back(Store);
4709 }
4710 // Whether we copied from a register or not, advance the offset
4711 // into the parameter save area by a full doubleword.
4712 ArgOffset += PtrByteSize;
4713 continue;
4714 }
4715
4716 // The value of the object is its address, which is the address of
4717 // its first stack doubleword.
4718 InVals.push_back(FIN);
4719
4720 // Store whatever pieces of the object are in registers to memory.
4721 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4722 if (GPR_idx == Num_GPR_Regs)
4723 break;
4724
4725 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4726 FuncInfo->addLiveInAttr(VReg, Flags);
4727 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4728 SDValue Addr = FIN;
4729 if (j) {
4730 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4731 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4732 }
4733 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4734 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4735 SDValue Store =
4736 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4737 MachinePointerInfo(&*FuncArg, j), ObjType);
4738 MemOps.push_back(Store);
4739 ++GPR_idx;
4740 }
4741 ArgOffset += ArgSize;
4742 continue;
4743 }
4744
4745 switch (ObjectVT.getSimpleVT().SimpleTy) {
4746 default: llvm_unreachable("Unhandled argument type!");
4747 case MVT::i1:
4748 case MVT::i32:
4749 case MVT::i64:
4750 if (Flags.isNest()) {
4751 // The 'nest' parameter, if any, is passed in R11.
4752 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4753 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4754
4755 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4756 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4757
4758 break;
4759 }
4760
4761 // These can be scalar arguments or elements of an integer array type
4762 // passed directly. Clang may use those instead of "byval" aggregate
4763 // types to avoid forcing arguments to memory unnecessarily.
4764 if (GPR_idx != Num_GPR_Regs) {
4765 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4766 FuncInfo->addLiveInAttr(VReg, Flags);
4767 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4768
4769 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4770 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4771 // value to MVT::i64 and then truncate to the correct register size.
4772 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4773 } else {
4774 if (CallConv == CallingConv::Fast)
4775 ComputeArgOffset();
4776
4777 needsLoad = true;
4778 ArgSize = PtrByteSize;
4779 }
4780 if (CallConv != CallingConv::Fast || needsLoad)
4781 ArgOffset += 8;
4782 break;
4783
4784 case MVT::f32:
4785 case MVT::f64:
4786 // These can be scalar arguments or elements of a float array type
4787 // passed directly. The latter are used to implement ELFv2 homogenous
4788 // float aggregates.
4789 if (FPR_idx != Num_FPR_Regs) {
4790 unsigned VReg;
4791
4792 if (ObjectVT == MVT::f32)
4793 VReg = MF.addLiveIn(FPR[FPR_idx],
4794 Subtarget.hasP8Vector()
4795 ? &PPC::VSSRCRegClass
4796 : &PPC::F4RCRegClass);
4797 else
4798 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4799 ? &PPC::VSFRCRegClass
4800 : &PPC::F8RCRegClass);
4801
4802 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4803 ++FPR_idx;
4804 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4805 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4806 // once we support fp <-> gpr moves.
4807
4808 // This can only ever happen in the presence of f32 array types,
4809 // since otherwise we never run out of FPRs before running out
4810 // of GPRs.
4811 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4812 FuncInfo->addLiveInAttr(VReg, Flags);
4813 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4814
4815 if (ObjectVT == MVT::f32) {
4816 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4817 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4818 DAG.getConstant(32, dl, MVT::i32));
4819 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4820 }
4821
4822 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4823 } else {
4824 if (CallConv == CallingConv::Fast)
4825 ComputeArgOffset();
4826
4827 needsLoad = true;
4828 }
4829
4830 // When passing an array of floats, the array occupies consecutive
4831 // space in the argument area; only round up to the next doubleword
4832 // at the end of the array. Otherwise, each float takes 8 bytes.
4833 if (CallConv != CallingConv::Fast || needsLoad) {
4834 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4835 ArgOffset += ArgSize;
4836 if (Flags.isInConsecutiveRegsLast())
4837 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4838 }
4839 break;
4840 case MVT::v4f32:
4841 case MVT::v4i32:
4842 case MVT::v8i16:
4843 case MVT::v16i8:
4844 case MVT::v2f64:
4845 case MVT::v2i64:
4846 case MVT::v1i128:
4847 case MVT::f128:
4848 // These can be scalar arguments or elements of a vector array type
4849 // passed directly. The latter are used to implement ELFv2 homogenous
4850 // vector aggregates.
4851 if (VR_idx != Num_VR_Regs) {
4852 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4853 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4854 ++VR_idx;
4855 } else {
4856 if (CallConv == CallingConv::Fast)
4857 ComputeArgOffset();
4858 needsLoad = true;
4859 }
4860 if (CallConv != CallingConv::Fast || needsLoad)
4861 ArgOffset += 16;
4862 break;
4863 }
4864
4865 // We need to load the argument to a virtual register if we determined
4866 // above that we ran out of physical registers of the appropriate type.
4867 if (needsLoad) {
4868 if (ObjSize < ArgSize && !isLittleEndian)
4869 CurArgOffset += ArgSize - ObjSize;
4870 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4871 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4872 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4873 }
4874
4875 InVals.push_back(ArgVal);
4876 }
4877
4878 // Area that is at least reserved in the caller of this function.
4879 unsigned MinReservedArea;
4880 if (HasParameterArea)
4881 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4882 else
4883 MinReservedArea = LinkageSize;
4884
4885 // Set the size that is at least reserved in caller of this function. Tail
4886 // call optimized functions' reserved stack space needs to be aligned so that
4887 // taking the difference between two stack areas will result in an aligned
4888 // stack.
4889 MinReservedArea =
4890 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4891 FuncInfo->setMinReservedArea(MinReservedArea);
4892
4893 // If the function takes variable number of arguments, make a frame index for
4894 // the start of the first vararg value... for expansion of llvm.va_start.
4895 // On ELFv2ABI spec, it writes:
4896 // C programs that are intended to be *portable* across different compilers
4897 // and architectures must use the header file <stdarg.h> to deal with variable
4898 // argument lists.
4899 if (isVarArg && MFI.hasVAStart()) {
4900 int Depth = ArgOffset;
4901
4902 FuncInfo->setVarArgsFrameIndex(
4903 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4904 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4905
4906 // If this function is vararg, store any remaining integer argument regs
4907 // to their spots on the stack so that they may be loaded by dereferencing
4908 // the result of va_next.
4909 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4910 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4911 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4912 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4913 SDValue Store =
4914 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4915 MemOps.push_back(Store);
4916 // Increment the address by four for the next argument to store
4917 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4918 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4919 }
4920 }
4921
4922 if (!MemOps.empty())
4923 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4924
4925 return Chain;
4926}
4927
4928/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4929/// adjusted to accommodate the arguments for the tailcall.
4930static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4931 unsigned ParamSize) {
4932
4933 if (!isTailCall) return 0;
4934
4936 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4937 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4938 // Remember only if the new adjustment is bigger.
4939 if (SPDiff < FI->getTailCallSPDelta())
4940 FI->setTailCallSPDelta(SPDiff);
4941
4942 return SPDiff;
4943}
4944
4945static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4946
4947static bool callsShareTOCBase(const Function *Caller,
4948 const GlobalValue *CalleeGV,
4949 const TargetMachine &TM) {
4950 // It does not make sense to call callsShareTOCBase() with a caller that
4951 // is PC Relative since PC Relative callers do not have a TOC.
4952#ifndef NDEBUG
4953 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4954 assert(!STICaller->isUsingPCRelativeCalls() &&
4955 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4956#endif
4957
4958 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4959 // don't have enough information to determine if the caller and callee share
4960 // the same TOC base, so we have to pessimistically assume they don't for
4961 // correctness.
4962 if (!CalleeGV)
4963 return false;
4964
4965 // If the callee is preemptable, then the static linker will use a plt-stub
4966 // which saves the toc to the stack, and needs a nop after the call
4967 // instruction to convert to a toc-restore.
4968 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4969 return false;
4970
4971 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4972 // We may need a TOC restore in the situation where the caller requires a
4973 // valid TOC but the callee is PC Relative and does not.
4974 const Function *F = dyn_cast<Function>(CalleeGV);
4975 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4976
4977 // If we have an Alias we can try to get the function from there.
4978 if (Alias) {
4979 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4980 F = dyn_cast<Function>(GlobalObj);
4981 }
4982
4983 // If we still have no valid function pointer we do not have enough
4984 // information to determine if the callee uses PC Relative calls so we must
4985 // assume that it does.
4986 if (!F)
4987 return false;
4988
4989 // If the callee uses PC Relative we cannot guarantee that the callee won't
4990 // clobber the TOC of the caller and so we must assume that the two
4991 // functions do not share a TOC base.
4992 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4993 if (STICallee->isUsingPCRelativeCalls())
4994 return false;
4995
4996 // If the GV is not a strong definition then we need to assume it can be
4997 // replaced by another function at link time. The function that replaces
4998 // it may not share the same TOC as the caller since the callee may be
4999 // replaced by a PC Relative version of the same function.
5000 if (!CalleeGV->isStrongDefinitionForLinker())
5001 return false;
5002
5003 // The medium and large code models are expected to provide a sufficiently
5004 // large TOC to provide all data addressing needs of a module with a
5005 // single TOC.
5006 if (CodeModel::Medium == TM.getCodeModel() ||
5007 CodeModel::Large == TM.getCodeModel())
5008 return true;
5009
5010 // Any explicitly-specified sections and section prefixes must also match.
5011 // Also, if we're using -ffunction-sections, then each function is always in
5012 // a different section (the same is true for COMDAT functions).
5013 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
5014 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
5015 return false;
5016 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
5017 if (F->getSectionPrefix() != Caller->getSectionPrefix())
5018 return false;
5019 }
5020
5021 return true;
5022}
5023
5024static bool
5026 const SmallVectorImpl<ISD::OutputArg> &Outs) {
5027 assert(Subtarget.is64BitELFABI());
5028
5029 const unsigned PtrByteSize = 8;
5030 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5031
5032 static const MCPhysReg GPR[] = {
5033 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5034 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5035 };
5036 static const MCPhysReg VR[] = {
5037 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5038 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5039 };
5040
5041 const unsigned NumGPRs = std::size(GPR);
5042 const unsigned NumFPRs = 13;
5043 const unsigned NumVRs = std::size(VR);
5044 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5045
5046 unsigned NumBytes = LinkageSize;
5047 unsigned AvailableFPRs = NumFPRs;
5048 unsigned AvailableVRs = NumVRs;
5049
5050 for (const ISD::OutputArg& Param : Outs) {
5051 if (Param.Flags.isNest()) continue;
5052
5053 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
5054 LinkageSize, ParamAreaSize, NumBytes,
5055 AvailableFPRs, AvailableVRs))
5056 return true;
5057 }
5058 return false;
5059}
5060
5061static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
5062 if (CB.arg_size() != CallerFn->arg_size())
5063 return false;
5064
5065 auto CalleeArgIter = CB.arg_begin();
5066 auto CalleeArgEnd = CB.arg_end();
5067 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
5068
5069 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
5070 const Value* CalleeArg = *CalleeArgIter;
5071 const Value* CallerArg = &(*CallerArgIter);
5072 if (CalleeArg == CallerArg)
5073 continue;
5074
5075 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
5076 // tail call @callee([4 x i64] undef, [4 x i64] %b)
5077 // }
5078 // 1st argument of callee is undef and has the same type as caller.
5079 if (CalleeArg->getType() == CallerArg->getType() &&
5080 isa<UndefValue>(CalleeArg))
5081 continue;
5082
5083 return false;
5084 }
5085
5086 return true;
5087}
5088
5089// Returns true if TCO is possible between the callers and callees
5090// calling conventions.
5091static bool
5093 CallingConv::ID CalleeCC) {
5094 // Tail calls are possible with fastcc and ccc.
5095 auto isTailCallableCC = [] (CallingConv::ID CC){
5096 return CC == CallingConv::C || CC == CallingConv::Fast;
5097 };
5098 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
5099 return false;
5100
5101 // We can safely tail call both fastcc and ccc callees from a c calling
5102 // convention caller. If the caller is fastcc, we may have less stack space
5103 // than a non-fastcc caller with the same signature so disable tail-calls in
5104 // that case.
5105 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
5106}
5107
5108bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
5109 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5110 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5112 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5113 bool isCalleeExternalSymbol) const {
5114 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
5115
5116 if (DisableSCO && !TailCallOpt) return false;
5117
5118 // Variadic argument functions are not supported.
5119 if (isVarArg) return false;
5120
5121 // Check that the calling conventions are compatible for tco.
5122 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
5123 return false;
5124
5125 // Caller contains any byval parameter is not supported.
5126 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5127 return false;
5128
5129 // Callee contains any byval parameter is not supported, too.
5130 // Note: This is a quick work around, because in some cases, e.g.
5131 // caller's stack size > callee's stack size, we are still able to apply
5132 // sibling call optimization. For example, gcc is able to do SCO for caller1
5133 // in the following example, but not for caller2.
5134 // struct test {
5135 // long int a;
5136 // char ary[56];
5137 // } gTest;
5138 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
5139 // b->a = v.a;
5140 // return 0;
5141 // }
5142 // void caller1(struct test a, struct test c, struct test *b) {
5143 // callee(gTest, b); }
5144 // void caller2(struct test *b) { callee(gTest, b); }
5145 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
5146 return false;
5147
5148 // If callee and caller use different calling conventions, we cannot pass
5149 // parameters on stack since offsets for the parameter area may be different.
5150 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
5151 return false;
5152
5153 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
5154 // the caller and callee share the same TOC for TCO/SCO. If the caller and
5155 // callee potentially have different TOC bases then we cannot tail call since
5156 // we need to restore the TOC pointer after the call.
5157 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
5158 // We cannot guarantee this for indirect calls or calls to external functions.
5159 // When PC-Relative addressing is used, the concept of the TOC is no longer
5160 // applicable so this check is not required.
5161 // Check first for indirect calls.
5162 if (!Subtarget.isUsingPCRelativeCalls() &&
5163 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5164 return false;
5165
5166 // Check if we share the TOC base.
5167 if (!Subtarget.isUsingPCRelativeCalls() &&
5168 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5169 return false;
5170
5171 // TCO allows altering callee ABI, so we don't have to check further.
5172 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5173 return true;
5174
5175 if (DisableSCO) return false;
5176
5177 // If callee use the same argument list that caller is using, then we can
5178 // apply SCO on this case. If it is not, then we need to check if callee needs
5179 // stack for passing arguments.
5180 // PC Relative tail calls may not have a CallBase.
5181 // If there is no CallBase we cannot verify if we have the same argument
5182 // list so assume that we don't have the same argument list.
5183 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5184 needStackSlotPassParameters(Subtarget, Outs))
5185 return false;
5186 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5187 return false;
5188
5189 return true;
5190}
5191
5192/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5193/// for tail call optimization. Targets which want to do tail call
5194/// optimization should implement this function.
5195bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5196 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5197 CallingConv::ID CallerCC, bool isVarArg,
5198 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5199 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5200 return false;
5201
5202 // Variable argument functions are not supported.
5203 if (isVarArg)
5204 return false;
5205
5206 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5207 // Functions containing by val parameters are not supported.
5208 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5209 return false;
5210
5211 // Non-PIC/GOT tail calls are supported.
5212 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5213 return true;
5214
5215 // At the moment we can only do local tail calls (in same module, hidden
5216 // or protected) if we are generating PIC.
5217 if (CalleeGV)
5218 return CalleeGV->hasHiddenVisibility() ||
5219 CalleeGV->hasProtectedVisibility();
5220 }
5221
5222 return false;
5223}
5224
5225/// isCallCompatibleAddress - Return the immediate to use if the specified
5226/// 32-bit value is representable in the immediate field of a BxA instruction.
5229 if (!C) return nullptr;
5230
5231 int Addr = C->getZExtValue();
5232 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5233 SignExtend32<26>(Addr) != Addr)
5234 return nullptr; // Top 6 bits have to be sext of immediate.
5235
5236 return DAG
5238 (int)C->getZExtValue() >> 2, SDLoc(Op),
5240 .getNode();
5241}
5242
5243namespace {
5244
5245struct TailCallArgumentInfo {
5246 SDValue Arg;
5247 SDValue FrameIdxOp;
5248 int FrameIdx = 0;
5249
5250 TailCallArgumentInfo() = default;
5251};
5252
5253} // end anonymous namespace
5254
5255/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5257 SelectionDAG &DAG, SDValue Chain,
5258 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5259 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5260 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5261 SDValue Arg = TailCallArgs[i].Arg;
5262 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5263 int FI = TailCallArgs[i].FrameIdx;
5264 // Store relative to framepointer.
5265 MemOpChains.push_back(DAG.getStore(
5266 Chain, dl, Arg, FIN,
5268 }
5269}
5270
5271/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5272/// the appropriate stack slot for the tail call optimized function call.
5274 SDValue OldRetAddr, SDValue OldFP,
5275 int SPDiff, const SDLoc &dl) {
5276 if (SPDiff) {
5277 // Calculate the new stack slot for the return address.
5279 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5280 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5281 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5282 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5283 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5284 NewRetAddrLoc, true);
5285 SDValue NewRetAddrFrIdx =
5286 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5287 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5288 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5289 }
5290 return Chain;
5291}
5292
5293/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5294/// the position of the argument.
5296 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5297 int SPDiff, unsigned ArgOffset,
5298 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5299 int Offset = ArgOffset + SPDiff;
5300 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5301 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5302 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5303 SDValue FIN = DAG.getFrameIndex(FI, VT);
5304 TailCallArgumentInfo Info;
5305 Info.Arg = Arg;
5306 Info.FrameIdxOp = FIN;
5307 Info.FrameIdx = FI;
5308 TailCallArguments.push_back(Info);
5309}
5310
5311/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5312/// stack slot. Returns the chain as result and the loaded frame pointers in
5313/// LROpOut/FPOpout. Used when tail calling.
5314SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5315 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5316 SDValue &FPOpOut, const SDLoc &dl) const {
5317 if (SPDiff) {
5318 // Load the LR and FP stack slot for later adjusting.
5319 LROpOut = getReturnAddrFrameIndex(DAG);
5320 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5321 MachinePointerInfo());
5322 Chain = SDValue(LROpOut.getNode(), 1);
5323 }
5324 return Chain;
5325}
5326
5327/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5328/// by "Src" to address "Dst" of size "Size". Alignment information is
5329/// specified by the specific parameter attribute. The copy will be passed as
5330/// a byval function parameter.
5331/// Sometimes what we are copying is the end of a larger object, the part that
5332/// does not fit in registers.
5334 SDValue Chain, ISD::ArgFlagsTy Flags,
5335 SelectionDAG &DAG, const SDLoc &dl) {
5336 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5337 return DAG.getMemcpy(
5338 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5339 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5340}
5341
5342/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5343/// tail calls.
5345 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5346 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5347 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5348 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5350 if (!isTailCall) {
5351 if (isVector) {
5352 SDValue StackPtr;
5353 if (isPPC64)
5354 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5355 else
5356 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5357 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5358 DAG.getConstant(ArgOffset, dl, PtrVT));
5359 }
5360 MemOpChains.push_back(
5361 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5362 // Calculate and remember argument location.
5363 } else
5364 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5365 TailCallArguments);
5366}
5367
5368static void
5370 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5371 SDValue FPOp,
5372 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5373 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5374 // might overwrite each other in case of tail call optimization.
5375 SmallVector<SDValue, 8> MemOpChains2;
5376 // Do not flag preceding copytoreg stuff together with the following stuff.
5377 InGlue = SDValue();
5378 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5379 MemOpChains2, dl);
5380 if (!MemOpChains2.empty())
5381 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5382
5383 // Store the return address to the appropriate stack slot.
5384 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5385
5386 // Emit callseq_end just before tailcall node.
5387 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5388 InGlue = Chain.getValue(1);
5389}
5390
5391// Is this global address that of a function that can be called by name? (as
5392// opposed to something that must hold a descriptor for an indirect call).
5393static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5394 if (GV) {
5395 if (GV->isThreadLocal())
5396 return false;
5397
5398 return GV->getValueType()->isFunctionTy();
5399 }
5400
5401 return false;
5402}
5403
5404SDValue PPCTargetLowering::LowerCallResult(
5405 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5406 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5407 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5409 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5410 *DAG.getContext());
5411
5412 CCRetInfo.AnalyzeCallResult(
5413 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5415 : RetCC_PPC);
5416
5417 // Copy all of the result registers out of their specified physreg.
5418 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5419 CCValAssign &VA = RVLocs[i];
5420 assert(VA.isRegLoc() && "Can only return in registers!");
5421
5422 SDValue Val;
5423
5424 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5425 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5426 InGlue);
5427 Chain = Lo.getValue(1);
5428 InGlue = Lo.getValue(2);
5429 VA = RVLocs[++i]; // skip ahead to next loc
5430 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5431 InGlue);
5432 Chain = Hi.getValue(1);
5433 InGlue = Hi.getValue(2);
5434 if (!Subtarget.isLittleEndian())
5435 std::swap (Lo, Hi);
5436 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5437 } else {
5438 Val = DAG.getCopyFromReg(Chain, dl,
5439 VA.getLocReg(), VA.getLocVT(), InGlue);
5440 Chain = Val.getValue(1);
5441 InGlue = Val.getValue(2);
5442 }
5443
5444 switch (VA.getLocInfo()) {
5445 default: llvm_unreachable("Unknown loc info!");
5446 case CCValAssign::Full: break;
5447 case CCValAssign::AExt:
5448 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5449 break;
5450 case CCValAssign::ZExt:
5451 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5452 DAG.getValueType(VA.getValVT()));
5453 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5454 break;
5455 case CCValAssign::SExt:
5456 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5457 DAG.getValueType(VA.getValVT()));
5458 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5459 break;
5460 }
5461
5462 InVals.push_back(Val);
5463 }
5464
5465 return Chain;
5466}
5467
5468static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5469 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5470 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5471 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5472
5473 // PatchPoint calls are not indirect.
5474 if (isPatchPoint)
5475 return false;
5476
5478 return false;
5479
5480 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5481 // becuase the immediate function pointer points to a descriptor instead of
5482 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5483 // pointer immediate points to the global entry point, while the BLA would
5484 // need to jump to the local entry point (see rL211174).
5485 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5486 isBLACompatibleAddress(Callee, DAG))
5487 return false;
5488
5489 return true;
5490}
5491
5492// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5493static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5494 return Subtarget.isAIXABI() ||
5495 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5496}
5497
5499 const Function &Caller, const SDValue &Callee,
5500 const PPCSubtarget &Subtarget,
5501 const TargetMachine &TM,
5502 bool IsStrictFPCall = false) {
5503 if (CFlags.IsTailCall)
5504 return PPCISD::TC_RETURN;
5505
5506 unsigned RetOpc = 0;
5507 // This is a call through a function pointer.
5508 if (CFlags.IsIndirect) {
5509 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5510 // indirect calls. The save of the caller's TOC pointer to the stack will be
5511 // inserted into the DAG as part of call lowering. The restore of the TOC
5512 // pointer is modeled by using a pseudo instruction for the call opcode that
5513 // represents the 2 instruction sequence of an indirect branch and link,
5514 // immediately followed by a load of the TOC pointer from the stack save
5515 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5516 // as it is not saved or used.
5518 : PPCISD::BCTRL;
5519 } else if (Subtarget.isUsingPCRelativeCalls()) {
5520 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5521 RetOpc = PPCISD::CALL_NOTOC;
5522 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5523 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5524 // immediately following the call instruction if the caller and callee may
5525 // have different TOC bases. At link time if the linker determines the calls
5526 // may not share a TOC base, the call is redirected to a trampoline inserted
5527 // by the linker. The trampoline will (among other things) save the callers
5528 // TOC pointer at an ABI designated offset in the linkage area and the
5529 // linker will rewrite the nop to be a load of the TOC pointer from the
5530 // linkage area into gpr2.
5531 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5532 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5533 RetOpc =
5534 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5535 } else
5536 RetOpc = PPCISD::CALL;
5537 if (IsStrictFPCall) {
5538 switch (RetOpc) {
5539 default:
5540 llvm_unreachable("Unknown call opcode");
5543 break;
5544 case PPCISD::BCTRL:
5545 RetOpc = PPCISD::BCTRL_RM;
5546 break;
5547 case PPCISD::CALL_NOTOC:
5548 RetOpc = PPCISD::CALL_NOTOC_RM;
5549 break;
5550 case PPCISD::CALL:
5551 RetOpc = PPCISD::CALL_RM;
5552 break;
5553 case PPCISD::CALL_NOP:
5554 RetOpc = PPCISD::CALL_NOP_RM;
5555 break;
5556 }
5557 }
5558 return RetOpc;
5559}
5560
5561static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5562 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5563 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5564 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5565 return SDValue(Dest, 0);
5566
5567 // Returns true if the callee is local, and false otherwise.
5568 auto isLocalCallee = [&]() {
5570 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5571
5572 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5574 };
5575
5576 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5577 // a static relocation model causes some versions of GNU LD (2.17.50, at
5578 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5579 // built with secure-PLT.
5580 bool UsePlt =
5581 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5583
5584 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5585 const TargetMachine &TM = Subtarget.getTargetMachine();
5586 const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5587 auto *S =
5588 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5589
5591 return DAG.getMCSymbol(S, PtrVT);
5592 };
5593
5594 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5595 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5596 if (isFunctionGlobalAddress(GV)) {
5597 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5598
5599 if (Subtarget.isAIXABI()) {
5600 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5601 return getAIXFuncEntryPointSymbolSDNode(GV);
5602 }
5603 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5604 UsePlt ? PPCII::MO_PLT : 0);
5605 }
5606
5608 const char *SymName = S->getSymbol();
5609 if (Subtarget.isAIXABI()) {
5610 // If there exists a user-declared function whose name is the same as the
5611 // ExternalSymbol's, then we pick up the user-declared version.
5613 if (const Function *F =
5614 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5615 return getAIXFuncEntryPointSymbolSDNode(F);
5616
5617 // On AIX, direct function calls reference the symbol for the function's
5618 // entry point, which is named by prepending a "." before the function's
5619 // C-linkage name. A Qualname is returned here because an external
5620 // function entry point is a csect with XTY_ER property.
5621 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5622 auto &Context = DAG.getMachineFunction().getContext();
5623 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5624 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5626 return Sec->getQualNameSymbol();
5627 };
5628
5629 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5630 }
5631 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5632 UsePlt ? PPCII::MO_PLT : 0);
5633 }
5634
5635 // No transformation needed.
5636 assert(Callee.getNode() && "What no callee?");
5637 return Callee;
5638}
5639
5641 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5642 "Expected a CALLSEQ_STARTSDNode.");
5643
5644 // The last operand is the chain, except when the node has glue. If the node
5645 // has glue, then the last operand is the glue, and the chain is the second
5646 // last operand.
5647 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5648 if (LastValue.getValueType() != MVT::Glue)
5649 return LastValue;
5650
5651 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5652}
5653
5654// Creates the node that moves a functions address into the count register
5655// to prepare for an indirect call instruction.
5656static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5657 SDValue &Glue, SDValue &Chain,
5658 const SDLoc &dl) {
5659 SDValue MTCTROps[] = {Chain, Callee, Glue};
5660 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5661 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5662 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5663 // The glue is the second value produced.
5664 Glue = Chain.getValue(1);
5665}
5666
5668 SDValue &Glue, SDValue &Chain,
5669 SDValue CallSeqStart,
5670 const CallBase *CB, const SDLoc &dl,
5671 bool hasNest,
5672 const PPCSubtarget &Subtarget) {
5673 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5674 // entry point, but to the function descriptor (the function entry point
5675 // address is part of the function descriptor though).
5676 // The function descriptor is a three doubleword structure with the
5677 // following fields: function entry point, TOC base address and
5678 // environment pointer.
5679 // Thus for a call through a function pointer, the following actions need
5680 // to be performed:
5681 // 1. Save the TOC of the caller in the TOC save area of its stack
5682 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5683 // 2. Load the address of the function entry point from the function
5684 // descriptor.
5685 // 3. Load the TOC of the callee from the function descriptor into r2.
5686 // 4. Load the environment pointer from the function descriptor into
5687 // r11.
5688 // 5. Branch to the function entry point address.
5689 // 6. On return of the callee, the TOC of the caller needs to be
5690 // restored (this is done in FinishCall()).
5691 //
5692 // The loads are scheduled at the beginning of the call sequence, and the
5693 // register copies are flagged together to ensure that no other
5694 // operations can be scheduled in between. E.g. without flagging the
5695 // copies together, a TOC access in the caller could be scheduled between
5696 // the assignment of the callee TOC and the branch to the callee, which leads
5697 // to incorrect code.
5698
5699 // Start by loading the function address from the descriptor.
5700 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5701 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5705
5706 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5707
5708 // Registers used in building the DAG.
5709 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5710 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5711
5712 // Offsets of descriptor members.
5713 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5714 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5715
5716 const MVT RegVT = Subtarget.getScalarIntVT();
5717 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5718
5719 // One load for the functions entry point address.
5720 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5721 Alignment, MMOFlags);
5722
5723 // One for loading the TOC anchor for the module that contains the called
5724 // function.
5725 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5726 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5727 SDValue TOCPtr =
5728 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5729 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5730
5731 // One for loading the environment pointer.
5732 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5733 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5734 SDValue LoadEnvPtr =
5735 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5736 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5737
5738
5739 // Then copy the newly loaded TOC anchor to the TOC pointer.
5740 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5741 Chain = TOCVal.getValue(0);
5742 Glue = TOCVal.getValue(1);
5743
5744 // If the function call has an explicit 'nest' parameter, it takes the
5745 // place of the environment pointer.
5746 assert((!hasNest || !Subtarget.isAIXABI()) &&
5747 "Nest parameter is not supported on AIX.");
5748 if (!hasNest) {
5749 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5750 Chain = EnvVal.getValue(0);
5751 Glue = EnvVal.getValue(1);
5752 }
5753
5754 // The rest of the indirect call sequence is the same as the non-descriptor
5755 // DAG.
5756 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5757}
5758
5759static void
5761 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5762 SelectionDAG &DAG,
5763 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5764 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5765 const PPCSubtarget &Subtarget) {
5766 const bool IsPPC64 = Subtarget.isPPC64();
5767 // MVT for a general purpose register.
5768 const MVT RegVT = Subtarget.getScalarIntVT();
5769
5770 // First operand is always the chain.
5771 Ops.push_back(Chain);
5772
5773 // If it's a direct call pass the callee as the second operand.
5774 if (!CFlags.IsIndirect)
5775 Ops.push_back(Callee);
5776 else {
5777 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5778
5779 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5780 // on the stack (this would have been done in `LowerCall_64SVR4` or
5781 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5782 // represents both the indirect branch and a load that restores the TOC
5783 // pointer from the linkage area. The operand for the TOC restore is an add
5784 // of the TOC save offset to the stack pointer. This must be the second
5785 // operand: after the chain input but before any other variadic arguments.
5786 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5787 // saved or used.
5788 if (isTOCSaveRestoreRequired(Subtarget)) {
5789 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5790
5791 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5792 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5793 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5794 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5795 Ops.push_back(AddTOC);
5796 }
5797
5798 // Add the register used for the environment pointer.
5799 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5800 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5801 RegVT));
5802
5803
5804 // Add CTR register as callee so a bctr can be emitted later.
5805 if (CFlags.IsTailCall)
5806 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5807 }
5808
5809 // If this is a tail call add stack pointer delta.
5810 if (CFlags.IsTailCall)
5811 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5812
5813 // Add argument registers to the end of the list so that they are known live
5814 // into the call.
5815 for (const auto &[Reg, N] : RegsToPass)
5816 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5817
5818 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5819 // no way to mark dependencies as implicit here.
5820 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5821 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5822 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5823 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5824
5825 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5826 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5827 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5828
5829 // Add a register mask operand representing the call-preserved registers.
5830 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5831 const uint32_t *Mask =
5832 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5833 assert(Mask && "Missing call preserved mask for calling convention");
5834 Ops.push_back(DAG.getRegisterMask(Mask));
5835
5836 // If the glue is valid, it is the last operand.
5837 if (Glue.getNode())
5838 Ops.push_back(Glue);
5839}
5840
5841SDValue PPCTargetLowering::FinishCall(
5842 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5843 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5844 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5845 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5846 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5847
5848 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5849 Subtarget.isAIXABI())
5850 setUsesTOCBasePtr(DAG);
5851
5852 unsigned CallOpc =
5853 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5854 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5855
5856 if (!CFlags.IsIndirect)
5857 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5858 else if (Subtarget.usesFunctionDescriptors())
5859 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5860 dl, CFlags.HasNest, Subtarget);
5861 else
5862 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5863
5864 // Build the operand list for the call instruction.
5866 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5867 SPDiff, Subtarget);
5868
5869 // Emit tail call.
5870 if (CFlags.IsTailCall) {
5871 // Indirect tail call when using PC Relative calls do not have the same
5872 // constraints.
5873 assert(((Callee.getOpcode() == ISD::Register &&
5874 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5875 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5876 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5877 isa<ConstantSDNode>(Callee) ||
5878 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5879 "Expecting a global address, external symbol, absolute value, "
5880 "register or an indirect tail call when PC Relative calls are "
5881 "used.");
5882 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5883 assert(CallOpc == PPCISD::TC_RETURN &&
5884 "Unexpected call opcode for a tail call.");
5886 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5887 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5888 return Ret;
5889 }
5890
5891 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5892 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5893 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5894 Glue = Chain.getValue(1);
5895
5896 // When performing tail call optimization the callee pops its arguments off
5897 // the stack. Account for this here so these bytes can be pushed back on in
5898 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5899 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5901 ? NumBytes
5902 : 0;
5903
5904 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5905 Glue = Chain.getValue(1);
5906
5907 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5908 DAG, InVals);
5909}
5910
5912 CallingConv::ID CalleeCC = CB->getCallingConv();
5913 const Function *CallerFunc = CB->getCaller();
5914 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5915 const Function *CalleeFunc = CB->getCalledFunction();
5916 if (!CalleeFunc)
5917 return false;
5918 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5919
5922
5923 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5924 CalleeFunc->getAttributes(), Outs, *this,
5925 CalleeFunc->getDataLayout());
5926
5927 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5928 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5929 false /*isCalleeExternalSymbol*/);
5930}
5931
5932bool PPCTargetLowering::isEligibleForTCO(
5933 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5934 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5936 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5937 bool isCalleeExternalSymbol) const {
5938 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5939 return false;
5940
5941 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5942 return IsEligibleForTailCallOptimization_64SVR4(
5943 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5944 isCalleeExternalSymbol);
5945 else
5946 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5947 isVarArg, Ins);
5948}
5949
5950SDValue
5951PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5952 SmallVectorImpl<SDValue> &InVals) const {
5953 SelectionDAG &DAG = CLI.DAG;
5954 SDLoc &dl = CLI.DL;
5956 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5958 SDValue Chain = CLI.Chain;
5959 SDValue Callee = CLI.Callee;
5960 bool &isTailCall = CLI.IsTailCall;
5961 CallingConv::ID CallConv = CLI.CallConv;
5962 bool isVarArg = CLI.IsVarArg;
5963 bool isPatchPoint = CLI.IsPatchPoint;
5964 const CallBase *CB = CLI.CB;
5965
5966 if (isTailCall) {
5968 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5969 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5970 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5971 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5972
5973 isTailCall =
5974 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5975 &(MF.getFunction()), IsCalleeExternalSymbol);
5976 if (isTailCall) {
5977 ++NumTailCalls;
5978 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5979 ++NumSiblingCalls;
5980
5981 // PC Relative calls no longer guarantee that the callee is a Global
5982 // Address Node. The callee could be an indirect tail call in which
5983 // case the SDValue for the callee could be a load (to load the address
5984 // of a function pointer) or it may be a register copy (to move the
5985 // address of the callee from a function parameter into a virtual
5986 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5987 assert((Subtarget.isUsingPCRelativeCalls() ||
5988 isa<GlobalAddressSDNode>(Callee)) &&
5989 "Callee should be an llvm::Function object.");
5990
5991 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5992 << "\nTCO callee: ");
5993 LLVM_DEBUG(Callee.dump());
5994 }
5995 }
5996
5997 if (!isTailCall && CB && CB->isMustTailCall())
5998 report_fatal_error("failed to perform tail call elimination on a call "
5999 "site marked musttail");
6000
6001 // When long calls (i.e. indirect calls) are always used, calls are always
6002 // made via function pointer. If we have a function name, first translate it
6003 // into a pointer.
6004 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
6005 !isTailCall)
6006 Callee = LowerGlobalAddress(Callee, DAG);
6007
6008 CallFlags CFlags(
6009 CallConv, isTailCall, isVarArg, isPatchPoint,
6010 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
6011 // hasNest
6012 Subtarget.is64BitELFABI() &&
6013 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
6014 CLI.NoMerge);
6015
6016 if (Subtarget.isAIXABI())
6017 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6018 InVals, CB);
6019
6020 assert(Subtarget.isSVR4ABI());
6021 if (Subtarget.isPPC64())
6022 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6023 InVals, CB);
6024 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
6025 InVals, CB);
6026}
6027
6028SDValue PPCTargetLowering::LowerCall_32SVR4(
6029 SDValue Chain, SDValue Callee, CallFlags CFlags,
6031 const SmallVectorImpl<SDValue> &OutVals,
6032 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6034 const CallBase *CB) const {
6035 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
6036 // of the 32-bit SVR4 ABI stack frame layout.
6037
6038 const CallingConv::ID CallConv = CFlags.CallConv;
6039 const bool IsVarArg = CFlags.IsVarArg;
6040 const bool IsTailCall = CFlags.IsTailCall;
6041
6042 assert((CallConv == CallingConv::C ||
6043 CallConv == CallingConv::Cold ||
6044 CallConv == CallingConv::Fast) && "Unknown calling convention!");
6045
6046 const Align PtrAlign(4);
6047
6048 MachineFunction &MF = DAG.getMachineFunction();
6049
6050 // Mark this function as potentially containing a function that contains a
6051 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6052 // and restoring the callers stack pointer in this functions epilog. This is
6053 // done because by tail calling the called function might overwrite the value
6054 // in this function's (MF) stack pointer stack slot 0(SP).
6055 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
6056 CallConv == CallingConv::Fast)
6057 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6058
6059 // Count how many bytes are to be pushed on the stack, including the linkage
6060 // area, parameter list area and the part of the local variable space which
6061 // contains copies of aggregates which are passed by value.
6062
6063 // Assign locations to all of the outgoing arguments.
6065 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
6066
6067 // Reserve space for the linkage area on the stack.
6068 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
6069 PtrAlign);
6070
6071 if (IsVarArg) {
6072 // Handle fixed and variable vector arguments differently.
6073 // Fixed vector arguments go into registers as long as registers are
6074 // available. Variable vector arguments always go into memory.
6075 unsigned NumArgs = Outs.size();
6076
6077 for (unsigned i = 0; i != NumArgs; ++i) {
6078 MVT ArgVT = Outs[i].VT;
6079 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
6080 bool Result;
6081
6082 if (!ArgFlags.isVarArg()) {
6083 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
6084 Outs[i].OrigTy, CCInfo);
6085 } else {
6087 ArgFlags, Outs[i].OrigTy, CCInfo);
6088 }
6089
6090 if (Result) {
6091#ifndef NDEBUG
6092 errs() << "Call operand #" << i << " has unhandled type "
6093 << ArgVT << "\n";
6094#endif
6095 llvm_unreachable(nullptr);
6096 }
6097 }
6098 } else {
6099 // All arguments are treated the same.
6100 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
6101 }
6102
6103 // Assign locations to all of the outgoing aggregate by value arguments.
6104 SmallVector<CCValAssign, 16> ByValArgLocs;
6105 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
6106
6107 // Reserve stack space for the allocations in CCInfo.
6108 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
6109
6110 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
6111
6112 // Size of the linkage area, parameter list area and the part of the local
6113 // space variable where copies of aggregates which are passed by value are
6114 // stored.
6115 unsigned NumBytes = CCByValInfo.getStackSize();
6116
6117 // Calculate by how many bytes the stack has to be adjusted in case of tail
6118 // call optimization.
6119 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
6120
6121 // Adjust the stack pointer for the new arguments...
6122 // These operations are automatically eliminated by the prolog/epilog pass
6123 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6124 SDValue CallSeqStart = Chain;
6125
6126 // Load the return address and frame pointer so it can be moved somewhere else
6127 // later.
6128 SDValue LROp, FPOp;
6129 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6130
6131 // Set up a copy of the stack pointer for use loading and storing any
6132 // arguments that may not fit in the registers available for argument
6133 // passing.
6134 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
6135
6137 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6138 SmallVector<SDValue, 8> MemOpChains;
6139
6140 bool seenFloatArg = false;
6141 // Walk the register/memloc assignments, inserting copies/loads.
6142 // i - Tracks the index into the list of registers allocated for the call
6143 // RealArgIdx - Tracks the index into the list of actual function arguments
6144 // j - Tracks the index into the list of byval arguments
6145 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
6146 i != e;
6147 ++i, ++RealArgIdx) {
6148 CCValAssign &VA = ArgLocs[i];
6149 SDValue Arg = OutVals[RealArgIdx];
6150 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
6151
6152 if (Flags.isByVal()) {
6153 // Argument is an aggregate which is passed by value, thus we need to
6154 // create a copy of it in the local variable space of the current stack
6155 // frame (which is the stack frame of the caller) and pass the address of
6156 // this copy to the callee.
6157 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
6158 CCValAssign &ByValVA = ByValArgLocs[j++];
6159 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
6160
6161 // Memory reserved in the local variable space of the callers stack frame.
6162 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6163
6164 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6165 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6166 StackPtr, PtrOff);
6167
6168 // Create a copy of the argument in the local area of the current
6169 // stack frame.
6170 SDValue MemcpyCall =
6171 CreateCopyOfByValArgument(Arg, PtrOff,
6172 CallSeqStart.getNode()->getOperand(0),
6173 Flags, DAG, dl);
6174
6175 // This must go outside the CALLSEQ_START..END.
6176 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6177 SDLoc(MemcpyCall));
6178 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6179 NewCallSeqStart.getNode());
6180 Chain = CallSeqStart = NewCallSeqStart;
6181
6182 // Pass the address of the aggregate copy on the stack either in a
6183 // physical register or in the parameter list area of the current stack
6184 // frame to the callee.
6185 Arg = PtrOff;
6186 }
6187
6188 // When useCRBits() is true, there can be i1 arguments.
6189 // It is because getRegisterType(MVT::i1) => MVT::i1,
6190 // and for other integer types getRegisterType() => MVT::i32.
6191 // Extend i1 and ensure callee will get i32.
6192 if (Arg.getValueType() == MVT::i1)
6193 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6194 dl, MVT::i32, Arg);
6195
6196 if (VA.isRegLoc()) {
6197 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6198 // Put argument in a physical register.
6199 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6200 bool IsLE = Subtarget.isLittleEndian();
6201 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6202 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6203 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6204 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6205 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6206 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6207 SVal.getValue(0)));
6208 } else
6209 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6210 } else {
6211 // Put argument in the parameter list area of the current stack frame.
6212 assert(VA.isMemLoc());
6213 unsigned LocMemOffset = VA.getLocMemOffset();
6214
6215 if (!IsTailCall) {
6216 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6217 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6218 StackPtr, PtrOff);
6219
6220 MemOpChains.push_back(
6221 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6222 } else {
6223 // Calculate and remember argument location.
6224 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6225 TailCallArguments);
6226 }
6227 }
6228 }
6229
6230 if (!MemOpChains.empty())
6231 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6232
6233 // Build a sequence of copy-to-reg nodes chained together with token chain
6234 // and flag operands which copy the outgoing args into the appropriate regs.
6235 SDValue InGlue;
6236 for (const auto &[Reg, N] : RegsToPass) {
6237 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6238 InGlue = Chain.getValue(1);
6239 }
6240
6241 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6242 // registers.
6243 if (IsVarArg) {
6244 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6245 SDValue Ops[] = { Chain, InGlue };
6246
6247 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6248 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6249
6250 InGlue = Chain.getValue(1);
6251 }
6252
6253 if (IsTailCall)
6254 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6255 TailCallArguments);
6256
6257 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6258 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6259}
6260
6261// Copy an argument into memory, being careful to do this outside the
6262// call sequence for the call to which the argument belongs.
6263SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6264 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6265 SelectionDAG &DAG, const SDLoc &dl) const {
6266 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6267 CallSeqStart.getNode()->getOperand(0),
6268 Flags, DAG, dl);
6269 // The MEMCPY must go outside the CALLSEQ_START..END.
6270 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6271 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6272 SDLoc(MemcpyCall));
6273 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6274 NewCallSeqStart.getNode());
6275 return NewCallSeqStart;
6276}
6277
6278SDValue PPCTargetLowering::LowerCall_64SVR4(
6279 SDValue Chain, SDValue Callee, CallFlags CFlags,
6281 const SmallVectorImpl<SDValue> &OutVals,
6282 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6284 const CallBase *CB) const {
6285 bool isELFv2ABI = Subtarget.isELFv2ABI();
6286 bool isLittleEndian = Subtarget.isLittleEndian();
6287 unsigned NumOps = Outs.size();
6288 bool IsSibCall = false;
6289 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6290
6291 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6292 unsigned PtrByteSize = 8;
6293
6294 MachineFunction &MF = DAG.getMachineFunction();
6295
6296 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6297 IsSibCall = true;
6298
6299 // Mark this function as potentially containing a function that contains a
6300 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6301 // and restoring the callers stack pointer in this functions epilog. This is
6302 // done because by tail calling the called function might overwrite the value
6303 // in this function's (MF) stack pointer stack slot 0(SP).
6304 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6305 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6306
6307 assert(!(IsFastCall && CFlags.IsVarArg) &&
6308 "fastcc not supported on varargs functions");
6309
6310 // Count how many bytes are to be pushed on the stack, including the linkage
6311 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6312 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6313 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6314 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6315 unsigned NumBytes = LinkageSize;
6316 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6317
6318 static const MCPhysReg GPR[] = {
6319 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6320 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6321 };
6322 static const MCPhysReg VR[] = {
6323 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6324 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6325 };
6326
6327 const unsigned NumGPRs = std::size(GPR);
6328 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6329 const unsigned NumVRs = std::size(VR);
6330
6331 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6332 // can be passed to the callee in registers.
6333 // For the fast calling convention, there is another check below.
6334 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6335 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6336 if (!HasParameterArea) {
6337 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6338 unsigned AvailableFPRs = NumFPRs;
6339 unsigned AvailableVRs = NumVRs;
6340 unsigned NumBytesTmp = NumBytes;
6341 for (unsigned i = 0; i != NumOps; ++i) {
6342 if (Outs[i].Flags.isNest()) continue;
6343 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6344 PtrByteSize, LinkageSize, ParamAreaSize,
6345 NumBytesTmp, AvailableFPRs, AvailableVRs))
6346 HasParameterArea = true;
6347 }
6348 }
6349
6350 // When using the fast calling convention, we don't provide backing for
6351 // arguments that will be in registers.
6352 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6353
6354 // Avoid allocating parameter area for fastcc functions if all the arguments
6355 // can be passed in the registers.
6356 if (IsFastCall)
6357 HasParameterArea = false;
6358
6359 // Add up all the space actually used.
6360 for (unsigned i = 0; i != NumOps; ++i) {
6361 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6362 EVT ArgVT = Outs[i].VT;
6363 EVT OrigVT = Outs[i].ArgVT;
6364
6365 if (Flags.isNest())
6366 continue;
6367
6368 if (IsFastCall) {
6369 if (Flags.isByVal()) {
6370 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6371 if (NumGPRsUsed > NumGPRs)
6372 HasParameterArea = true;
6373 } else {
6374 switch (ArgVT.getSimpleVT().SimpleTy) {
6375 default: llvm_unreachable("Unexpected ValueType for argument!");
6376 case MVT::i1:
6377 case MVT::i32:
6378 case MVT::i64:
6379 if (++NumGPRsUsed <= NumGPRs)
6380 continue;
6381 break;
6382 case MVT::v4i32:
6383 case MVT::v8i16:
6384 case MVT::v16i8:
6385 case MVT::v2f64:
6386 case MVT::v2i64:
6387 case MVT::v1i128:
6388 case MVT::f128:
6389 if (++NumVRsUsed <= NumVRs)
6390 continue;
6391 break;
6392 case MVT::v4f32:
6393 if (++NumVRsUsed <= NumVRs)
6394 continue;
6395 break;
6396 case MVT::f32:
6397 case MVT::f64:
6398 if (++NumFPRsUsed <= NumFPRs)
6399 continue;
6400 break;
6401 }
6402 HasParameterArea = true;
6403 }
6404 }
6405
6406 /* Respect alignment of argument on the stack. */
6407 auto Alignement =
6408 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6409 NumBytes = alignTo(NumBytes, Alignement);
6410
6411 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6412 if (Flags.isInConsecutiveRegsLast())
6413 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6414 }
6415
6416 unsigned NumBytesActuallyUsed = NumBytes;
6417
6418 // In the old ELFv1 ABI,
6419 // the prolog code of the callee may store up to 8 GPR argument registers to
6420 // the stack, allowing va_start to index over them in memory if its varargs.
6421 // Because we cannot tell if this is needed on the caller side, we have to
6422 // conservatively assume that it is needed. As such, make sure we have at
6423 // least enough stack space for the caller to store the 8 GPRs.
6424 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6425 // really requires memory operands, e.g. a vararg function.
6426 if (HasParameterArea)
6427 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6428 else
6429 NumBytes = LinkageSize;
6430
6431 // Tail call needs the stack to be aligned.
6432 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6433 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6434
6435 int SPDiff = 0;
6436
6437 // Calculate by how many bytes the stack has to be adjusted in case of tail
6438 // call optimization.
6439 if (!IsSibCall)
6440 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6441
6442 // To protect arguments on the stack from being clobbered in a tail call,
6443 // force all the loads to happen before doing any other lowering.
6444 if (CFlags.IsTailCall)
6445 Chain = DAG.getStackArgumentTokenFactor(Chain);
6446
6447 // Adjust the stack pointer for the new arguments...
6448 // These operations are automatically eliminated by the prolog/epilog pass
6449 if (!IsSibCall)
6450 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6451 SDValue CallSeqStart = Chain;
6452
6453 // Load the return address and frame pointer so it can be move somewhere else
6454 // later.
6455 SDValue LROp, FPOp;
6456 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6457
6458 // Set up a copy of the stack pointer for use loading and storing any
6459 // arguments that may not fit in the registers available for argument
6460 // passing.
6461 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6462
6463 // Figure out which arguments are going to go in registers, and which in
6464 // memory. Also, if this is a vararg function, floating point operations
6465 // must be stored to our stack, and loaded into integer regs as well, if
6466 // any integer regs are available for argument passing.
6467 unsigned ArgOffset = LinkageSize;
6468
6470 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6471
6472 SmallVector<SDValue, 8> MemOpChains;
6473 for (unsigned i = 0; i != NumOps; ++i) {
6474 SDValue Arg = OutVals[i];
6475 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6476 EVT ArgVT = Outs[i].VT;
6477 EVT OrigVT = Outs[i].ArgVT;
6478
6479 // PtrOff will be used to store the current argument to the stack if a
6480 // register cannot be found for it.
6481 SDValue PtrOff;
6482
6483 // We re-align the argument offset for each argument, except when using the
6484 // fast calling convention, when we need to make sure we do that only when
6485 // we'll actually use a stack slot.
6486 auto ComputePtrOff = [&]() {
6487 /* Respect alignment of argument on the stack. */
6488 auto Alignment =
6489 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6490 ArgOffset = alignTo(ArgOffset, Alignment);
6491
6492 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6493
6494 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6495 };
6496
6497 if (!IsFastCall) {
6498 ComputePtrOff();
6499
6500 /* Compute GPR index associated with argument offset. */
6501 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6502 GPR_idx = std::min(GPR_idx, NumGPRs);
6503 }
6504
6505 // Promote integers to 64-bit values.
6506 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6507 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6508 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6509 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6510 }
6511
6512 // FIXME memcpy is used way more than necessary. Correctness first.
6513 // Note: "by value" is code for passing a structure by value, not
6514 // basic types.
6515 if (Flags.isByVal()) {
6516 // Note: Size includes alignment padding, so
6517 // struct x { short a; char b; }
6518 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6519 // These are the proper values we need for right-justifying the
6520 // aggregate in a parameter register.
6521 unsigned Size = Flags.getByValSize();
6522
6523 // An empty aggregate parameter takes up no storage and no
6524 // registers.
6525 if (Size == 0)
6526 continue;
6527
6528 if (IsFastCall)
6529 ComputePtrOff();
6530
6531 // All aggregates smaller than 8 bytes must be passed right-justified.
6532 if (Size==1 || Size==2 || Size==4) {
6533 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6534 if (GPR_idx != NumGPRs) {
6535 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6536 MachinePointerInfo(), VT);
6537 MemOpChains.push_back(Load.getValue(1));
6538 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6539
6540 ArgOffset += PtrByteSize;
6541 continue;
6542 }
6543 }
6544
6545 if (GPR_idx == NumGPRs && Size < 8) {
6546 SDValue AddPtr = PtrOff;
6547 if (!isLittleEndian) {
6548 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6549 PtrOff.getValueType());
6550 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6551 }
6552 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6553 CallSeqStart,
6554 Flags, DAG, dl);
6555 ArgOffset += PtrByteSize;
6556 continue;
6557 }
6558 // Copy the object to parameter save area if it can not be entirely passed
6559 // by registers.
6560 // FIXME: we only need to copy the parts which need to be passed in
6561 // parameter save area. For the parts passed by registers, we don't need
6562 // to copy them to the stack although we need to allocate space for them
6563 // in parameter save area.
6564 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6565 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6566 CallSeqStart,
6567 Flags, DAG, dl);
6568
6569 // When a register is available, pass a small aggregate right-justified.
6570 if (Size < 8 && GPR_idx != NumGPRs) {
6571 // The easiest way to get this right-justified in a register
6572 // is to copy the structure into the rightmost portion of a
6573 // local variable slot, then load the whole slot into the
6574 // register.
6575 // FIXME: The memcpy seems to produce pretty awful code for
6576 // small aggregates, particularly for packed ones.
6577 // FIXME: It would be preferable to use the slot in the
6578 // parameter save area instead of a new local variable.
6579 SDValue AddPtr = PtrOff;
6580 if (!isLittleEndian) {
6581 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6582 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6583 }
6584 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6585 CallSeqStart,
6586 Flags, DAG, dl);
6587
6588 // Load the slot into the register.
6589 SDValue Load =
6590 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6591 MemOpChains.push_back(Load.getValue(1));
6592 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6593
6594 // Done with this argument.
6595 ArgOffset += PtrByteSize;
6596 continue;
6597 }
6598
6599 // For aggregates larger than PtrByteSize, copy the pieces of the
6600 // object that fit into registers from the parameter save area.
6601 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6602 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6603 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6604 if (GPR_idx != NumGPRs) {
6605 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6606 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6607 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6608 MachinePointerInfo(), ObjType);
6609
6610 MemOpChains.push_back(Load.getValue(1));
6611 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6612 ArgOffset += PtrByteSize;
6613 } else {
6614 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6615 break;
6616 }
6617 }
6618 continue;
6619 }
6620
6621 switch (Arg.getSimpleValueType().SimpleTy) {
6622 default: llvm_unreachable("Unexpected ValueType for argument!");
6623 case MVT::i1:
6624 case MVT::i32:
6625 case MVT::i64:
6626 if (Flags.isNest()) {
6627 // The 'nest' parameter, if any, is passed in R11.
6628 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6629 break;
6630 }
6631
6632 // These can be scalar arguments or elements of an integer array type
6633 // passed directly. Clang may use those instead of "byval" aggregate
6634 // types to avoid forcing arguments to memory unnecessarily.
6635 if (GPR_idx != NumGPRs) {
6636 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6637 } else {
6638 if (IsFastCall)
6639 ComputePtrOff();
6640
6641 assert(HasParameterArea &&
6642 "Parameter area must exist to pass an argument in memory.");
6643 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6644 true, CFlags.IsTailCall, false, MemOpChains,
6645 TailCallArguments, dl);
6646 if (IsFastCall)
6647 ArgOffset += PtrByteSize;
6648 }
6649 if (!IsFastCall)
6650 ArgOffset += PtrByteSize;
6651 break;
6652 case MVT::f32:
6653 case MVT::f64: {
6654 // These can be scalar arguments or elements of a float array type
6655 // passed directly. The latter are used to implement ELFv2 homogenous
6656 // float aggregates.
6657
6658 // Named arguments go into FPRs first, and once they overflow, the
6659 // remaining arguments go into GPRs and then the parameter save area.
6660 // Unnamed arguments for vararg functions always go to GPRs and
6661 // then the parameter save area. For now, put all arguments to vararg
6662 // routines always in both locations (FPR *and* GPR or stack slot).
6663 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6664 bool NeededLoad = false;
6665
6666 // First load the argument into the next available FPR.
6667 if (FPR_idx != NumFPRs)
6668 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6669
6670 // Next, load the argument into GPR or stack slot if needed.
6671 if (!NeedGPROrStack)
6672 ;
6673 else if (GPR_idx != NumGPRs && !IsFastCall) {
6674 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6675 // once we support fp <-> gpr moves.
6676
6677 // In the non-vararg case, this can only ever happen in the
6678 // presence of f32 array types, since otherwise we never run
6679 // out of FPRs before running out of GPRs.
6680 SDValue ArgVal;
6681
6682 // Double values are always passed in a single GPR.
6683 if (Arg.getValueType() != MVT::f32) {
6684 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6685
6686 // Non-array float values are extended and passed in a GPR.
6687 } else if (!Flags.isInConsecutiveRegs()) {
6688 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6689 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6690
6691 // If we have an array of floats, we collect every odd element
6692 // together with its predecessor into one GPR.
6693 } else if (ArgOffset % PtrByteSize != 0) {
6694 SDValue Lo, Hi;
6695 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6696 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6697 if (!isLittleEndian)
6698 std::swap(Lo, Hi);
6699 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6700
6701 // The final element, if even, goes into the first half of a GPR.
6702 } else if (Flags.isInConsecutiveRegsLast()) {
6703 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6704 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6705 if (!isLittleEndian)
6706 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6707 DAG.getConstant(32, dl, MVT::i32));
6708
6709 // Non-final even elements are skipped; they will be handled
6710 // together the with subsequent argument on the next go-around.
6711 } else
6712 ArgVal = SDValue();
6713
6714 if (ArgVal.getNode())
6715 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6716 } else {
6717 if (IsFastCall)
6718 ComputePtrOff();
6719
6720 // Single-precision floating-point values are mapped to the
6721 // second (rightmost) word of the stack doubleword.
6722 if (Arg.getValueType() == MVT::f32 &&
6723 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6724 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6725 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6726 }
6727
6728 assert(HasParameterArea &&
6729 "Parameter area must exist to pass an argument in memory.");
6730 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6731 true, CFlags.IsTailCall, false, MemOpChains,
6732 TailCallArguments, dl);
6733
6734 NeededLoad = true;
6735 }
6736 // When passing an array of floats, the array occupies consecutive
6737 // space in the argument area; only round up to the next doubleword
6738 // at the end of the array. Otherwise, each float takes 8 bytes.
6739 if (!IsFastCall || NeededLoad) {
6740 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6741 Flags.isInConsecutiveRegs()) ? 4 : 8;
6742 if (Flags.isInConsecutiveRegsLast())
6743 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6744 }
6745 break;
6746 }
6747 case MVT::v4f32:
6748 case MVT::v4i32:
6749 case MVT::v8i16:
6750 case MVT::v16i8:
6751 case MVT::v2f64:
6752 case MVT::v2i64:
6753 case MVT::v1i128:
6754 case MVT::f128:
6755 // These can be scalar arguments or elements of a vector array type
6756 // passed directly. The latter are used to implement ELFv2 homogenous
6757 // vector aggregates.
6758
6759 // For a varargs call, named arguments go into VRs or on the stack as
6760 // usual; unnamed arguments always go to the stack or the corresponding
6761 // GPRs when within range. For now, we always put the value in both
6762 // locations (or even all three).
6763 if (CFlags.IsVarArg) {
6764 assert(HasParameterArea &&
6765 "Parameter area must exist if we have a varargs call.");
6766 // We could elide this store in the case where the object fits
6767 // entirely in R registers. Maybe later.
6768 SDValue Store =
6769 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6770 MemOpChains.push_back(Store);
6771 if (VR_idx != NumVRs) {
6772 SDValue Load =
6773 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6774 MemOpChains.push_back(Load.getValue(1));
6775 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6776 }
6777 ArgOffset += 16;
6778 for (unsigned i=0; i<16; i+=PtrByteSize) {
6779 if (GPR_idx == NumGPRs)
6780 break;
6781 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6782 DAG.getConstant(i, dl, PtrVT));
6783 SDValue Load =
6784 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6785 MemOpChains.push_back(Load.getValue(1));
6786 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6787 }
6788 break;
6789 }
6790
6791 // Non-varargs Altivec params go into VRs or on the stack.
6792 if (VR_idx != NumVRs) {
6793 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6794 } else {
6795 if (IsFastCall)
6796 ComputePtrOff();
6797
6798 assert(HasParameterArea &&
6799 "Parameter area must exist to pass an argument in memory.");
6800 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6801 true, CFlags.IsTailCall, true, MemOpChains,
6802 TailCallArguments, dl);
6803 if (IsFastCall)
6804 ArgOffset += 16;
6805 }
6806
6807 if (!IsFastCall)
6808 ArgOffset += 16;
6809 break;
6810 }
6811 }
6812
6813 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6814 "mismatch in size of parameter area");
6815 (void)NumBytesActuallyUsed;
6816
6817 if (!MemOpChains.empty())
6818 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6819
6820 // Check if this is an indirect call (MTCTR/BCTRL).
6821 // See prepareDescriptorIndirectCall and buildCallOperands for more
6822 // information about calls through function pointers in the 64-bit SVR4 ABI.
6823 if (CFlags.IsIndirect) {
6824 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6825 // caller in the TOC save area.
6826 if (isTOCSaveRestoreRequired(Subtarget)) {
6827 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6828 // Load r2 into a virtual register and store it to the TOC save area.
6829 setUsesTOCBasePtr(DAG);
6830 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6831 // TOC save area offset.
6832 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6833 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6834 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6835 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6837 DAG.getMachineFunction(), TOCSaveOffset));
6838 }
6839 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6840 // This does not mean the MTCTR instruction must use R12; it's easier
6841 // to model this as an extra parameter, so do that.
6842 if (isELFv2ABI && !CFlags.IsPatchPoint)
6843 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6844 }
6845
6846 // Build a sequence of copy-to-reg nodes chained together with token chain
6847 // and flag operands which copy the outgoing args into the appropriate regs.
6848 SDValue InGlue;
6849 for (const auto &[Reg, N] : RegsToPass) {
6850 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6851 InGlue = Chain.getValue(1);
6852 }
6853
6854 if (CFlags.IsTailCall && !IsSibCall)
6855 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6856 TailCallArguments);
6857
6858 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6859 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6860}
6861
6862// Returns true when the shadow of a general purpose argument register
6863// in the parameter save area is aligned to at least 'RequiredAlign'.
6864static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6865 assert(RequiredAlign.value() <= 16 &&
6866 "Required alignment greater than stack alignment.");
6867 switch (Reg) {
6868 default:
6869 report_fatal_error("called on invalid register.");
6870 case PPC::R5:
6871 case PPC::R9:
6872 case PPC::X3:
6873 case PPC::X5:
6874 case PPC::X7:
6875 case PPC::X9:
6876 // These registers are 16 byte aligned which is the most strict aligment
6877 // we can support.
6878 return true;
6879 case PPC::R3:
6880 case PPC::R7:
6881 case PPC::X4:
6882 case PPC::X6:
6883 case PPC::X8:
6884 case PPC::X10:
6885 // The shadow of these registers in the PSA is 8 byte aligned.
6886 return RequiredAlign <= 8;
6887 case PPC::R4:
6888 case PPC::R6:
6889 case PPC::R8:
6890 case PPC::R10:
6891 return RequiredAlign <= 4;
6892 }
6893}
6894
6895static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6896 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6897 Type *OrigTy, CCState &State) {
6898 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6899 State.getMachineFunction().getSubtarget());
6900 const bool IsPPC64 = Subtarget.isPPC64();
6901 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6902 const Align PtrAlign(PtrSize);
6903 const Align StackAlign(16);
6904 const MVT RegVT = Subtarget.getScalarIntVT();
6905
6906 if (ValVT == MVT::f128)
6907 report_fatal_error("f128 is unimplemented on AIX.");
6908
6909 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6910 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6911 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6912 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6913 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6914 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6915
6916 static const MCPhysReg VR[] = {// Vector registers.
6917 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6918 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6919 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6920
6921 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6922
6923 if (ArgFlags.isNest()) {
6924 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6925 if (!EnvReg)
6926 report_fatal_error("More then one nest argument.");
6927 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6928 return false;
6929 }
6930
6931 if (ArgFlags.isByVal()) {
6932 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6933 if (ByValAlign > StackAlign)
6934 report_fatal_error("Pass-by-value arguments with alignment greater than "
6935 "16 are not supported.");
6936
6937 const unsigned ByValSize = ArgFlags.getByValSize();
6938 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6939
6940 // An empty aggregate parameter takes up no storage and no registers,
6941 // but needs a MemLoc for a stack slot for the formal arguments side.
6942 if (ByValSize == 0) {
6944 State.getStackSize(), RegVT, LocInfo));
6945 return false;
6946 }
6947
6948 // Shadow allocate any registers that are not properly aligned.
6949 unsigned NextReg = State.getFirstUnallocated(GPRs);
6950 while (NextReg != GPRs.size() &&
6951 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6952 // Shadow allocate next registers since its aligment is not strict enough.
6953 MCRegister Reg = State.AllocateReg(GPRs);
6954 // Allocate the stack space shadowed by said register.
6955 State.AllocateStack(PtrSize, PtrAlign);
6956 assert(Reg && "Alocating register unexpectedly failed.");
6957 (void)Reg;
6958 NextReg = State.getFirstUnallocated(GPRs);
6959 }
6960
6961 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6962 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6963 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6964 if (MCRegister Reg = State.AllocateReg(GPRs))
6965 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6966 else {
6969 LocInfo));
6970 break;
6971 }
6972 }
6973 return false;
6974 }
6975
6976 // Arguments always reserve parameter save area.
6977 switch (ValVT.SimpleTy) {
6978 default:
6979 report_fatal_error("Unhandled value type for argument.");
6980 case MVT::i64:
6981 // i64 arguments should have been split to i32 for PPC32.
6982 assert(IsPPC64 && "PPC32 should have split i64 values.");
6983 [[fallthrough]];
6984 case MVT::i1:
6985 case MVT::i32: {
6986 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6987 // AIX integer arguments are always passed in register width.
6988 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6989 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6991 if (MCRegister Reg = State.AllocateReg(GPRs))
6992 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6993 else
6994 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6995
6996 return false;
6997 }
6998 case MVT::f32:
6999 case MVT::f64: {
7000 // Parameter save area (PSA) is reserved even if the float passes in fpr.
7001 const unsigned StoreSize = LocVT.getStoreSize();
7002 // Floats are always 4-byte aligned in the PSA on AIX.
7003 // This includes f64 in 64-bit mode for ABI compatibility.
7004 const unsigned Offset =
7005 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
7006 MCRegister FReg = State.AllocateReg(FPR);
7007 if (FReg)
7008 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
7009
7010 // Reserve and initialize GPRs or initialize the PSA as required.
7011 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
7012 if (MCRegister Reg = State.AllocateReg(GPRs)) {
7013 assert(FReg && "An FPR should be available when a GPR is reserved.");
7014 if (State.isVarArg()) {
7015 // Successfully reserved GPRs are only initialized for vararg calls.
7016 // Custom handling is required for:
7017 // f64 in PPC32 needs to be split into 2 GPRs.
7018 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
7019 State.addLoc(
7020 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7021 }
7022 } else {
7023 // If there are insufficient GPRs, the PSA needs to be initialized.
7024 // Initialization occurs even if an FPR was initialized for
7025 // compatibility with the AIX XL compiler. The full memory for the
7026 // argument will be initialized even if a prior word is saved in GPR.
7027 // A custom memLoc is used when the argument also passes in FPR so
7028 // that the callee handling can skip over it easily.
7029 State.addLoc(
7030 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
7031 LocInfo)
7032 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7033 break;
7034 }
7035 }
7036
7037 return false;
7038 }
7039 case MVT::v4f32:
7040 case MVT::v4i32:
7041 case MVT::v8i16:
7042 case MVT::v16i8:
7043 case MVT::v2i64:
7044 case MVT::v2f64:
7045 case MVT::v1i128: {
7046 const unsigned VecSize = 16;
7047 const Align VecAlign(VecSize);
7048
7049 if (!State.isVarArg()) {
7050 // If there are vector registers remaining we don't consume any stack
7051 // space.
7052 if (MCRegister VReg = State.AllocateReg(VR)) {
7053 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7054 return false;
7055 }
7056 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
7057 // might be allocated in the portion of the PSA that is shadowed by the
7058 // GPRs.
7059 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7060 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7061 return false;
7062 }
7063
7064 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
7065 // Burn any underaligned registers and their shadowed stack space until
7066 // we reach the required alignment.
7067 while (NextRegIndex != GPRs.size() &&
7068 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
7069 // Shadow allocate register and its stack shadow.
7070 MCRegister Reg = State.AllocateReg(GPRs);
7071 State.AllocateStack(PtrSize, PtrAlign);
7072 assert(Reg && "Allocating register unexpectedly failed.");
7073 (void)Reg;
7074 NextRegIndex = State.getFirstUnallocated(GPRs);
7075 }
7076
7077 // Vectors that are passed as fixed arguments are handled differently.
7078 // They are passed in VRs if any are available (unlike arguments passed
7079 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
7080 // functions)
7081 if (!ArgFlags.isVarArg()) {
7082 if (MCRegister VReg = State.AllocateReg(VR)) {
7083 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
7084 // Shadow allocate GPRs and stack space even though we pass in a VR.
7085 for (unsigned I = 0; I != VecSize; I += PtrSize)
7086 State.AllocateReg(GPRs);
7087 State.AllocateStack(VecSize, VecAlign);
7088 return false;
7089 }
7090 // No vector registers remain so pass on the stack.
7091 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7092 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7093 return false;
7094 }
7095
7096 // If all GPRS are consumed then we pass the argument fully on the stack.
7097 if (NextRegIndex == GPRs.size()) {
7098 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7099 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7100 return false;
7101 }
7102
7103 // Corner case for 32-bit codegen. We have 2 registers to pass the first
7104 // half of the argument, and then need to pass the remaining half on the
7105 // stack.
7106 if (GPRs[NextRegIndex] == PPC::R9) {
7107 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7108 State.addLoc(
7109 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7110
7111 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
7112 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
7113 assert(FirstReg && SecondReg &&
7114 "Allocating R9 or R10 unexpectedly failed.");
7115 State.addLoc(
7116 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
7117 State.addLoc(
7118 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
7119 return false;
7120 }
7121
7122 // We have enough GPRs to fully pass the vector argument, and we have
7123 // already consumed any underaligned registers. Start with the custom
7124 // MemLoc and then the custom RegLocs.
7125 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
7126 State.addLoc(
7127 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
7128 for (unsigned I = 0; I != VecSize; I += PtrSize) {
7129 const MCRegister Reg = State.AllocateReg(GPRs);
7130 assert(Reg && "Failed to allocated register for vararg vector argument");
7131 State.addLoc(
7132 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
7133 }
7134 return false;
7135 }
7136 }
7137 return true;
7138}
7139
7140// So far, this function is only used by LowerFormalArguments_AIX()
7142 bool IsPPC64,
7143 bool HasP8Vector,
7144 bool HasVSX) {
7145 assert((IsPPC64 || SVT != MVT::i64) &&
7146 "i64 should have been split for 32-bit codegen.");
7147
7148 switch (SVT) {
7149 default:
7150 report_fatal_error("Unexpected value type for formal argument");
7151 case MVT::i1:
7152 case MVT::i32:
7153 case MVT::i64:
7154 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7155 case MVT::f32:
7156 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
7157 case MVT::f64:
7158 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
7159 case MVT::v4f32:
7160 case MVT::v4i32:
7161 case MVT::v8i16:
7162 case MVT::v16i8:
7163 case MVT::v2i64:
7164 case MVT::v2f64:
7165 case MVT::v1i128:
7166 return &PPC::VRRCRegClass;
7167 }
7168}
7169
7171 SelectionDAG &DAG, SDValue ArgValue,
7172 MVT LocVT, const SDLoc &dl) {
7173 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7174 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7175
7176 if (Flags.isSExt())
7177 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7178 DAG.getValueType(ValVT));
7179 else if (Flags.isZExt())
7180 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7181 DAG.getValueType(ValVT));
7182
7183 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7184}
7185
7186static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7187 const unsigned LASize = FL->getLinkageSize();
7188
7189 if (PPC::GPRCRegClass.contains(Reg)) {
7190 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7191 "Reg must be a valid argument register!");
7192 return LASize + 4 * (Reg - PPC::R3);
7193 }
7194
7195 if (PPC::G8RCRegClass.contains(Reg)) {
7196 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7197 "Reg must be a valid argument register!");
7198 return LASize + 8 * (Reg - PPC::X3);
7199 }
7200
7201 llvm_unreachable("Only general purpose registers expected.");
7202}
7203
7204// AIX ABI Stack Frame Layout:
7205//
7206// Low Memory +--------------------------------------------+
7207// SP +---> | Back chain | ---+
7208// | +--------------------------------------------+ |
7209// | | Saved Condition Register | |
7210// | +--------------------------------------------+ |
7211// | | Saved Linkage Register | |
7212// | +--------------------------------------------+ | Linkage Area
7213// | | Reserved for compilers | |
7214// | +--------------------------------------------+ |
7215// | | Reserved for binders | |
7216// | +--------------------------------------------+ |
7217// | | Saved TOC pointer | ---+
7218// | +--------------------------------------------+
7219// | | Parameter save area |
7220// | +--------------------------------------------+
7221// | | Alloca space |
7222// | +--------------------------------------------+
7223// | | Local variable space |
7224// | +--------------------------------------------+
7225// | | Float/int conversion temporary |
7226// | +--------------------------------------------+
7227// | | Save area for AltiVec registers |
7228// | +--------------------------------------------+
7229// | | AltiVec alignment padding |
7230// | +--------------------------------------------+
7231// | | Save area for VRSAVE register |
7232// | +--------------------------------------------+
7233// | | Save area for General Purpose registers |
7234// | +--------------------------------------------+
7235// | | Save area for Floating Point registers |
7236// | +--------------------------------------------+
7237// +---- | Back chain |
7238// High Memory +--------------------------------------------+
7239//
7240// Specifications:
7241// AIX 7.2 Assembler Language Reference
7242// Subroutine linkage convention
7243
7244SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7245 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7246 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7247 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7248
7249 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7250 CallConv == CallingConv::Fast) &&
7251 "Unexpected calling convention!");
7252
7253 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7254 report_fatal_error("Tail call support is unimplemented on AIX.");
7255
7256 if (useSoftFloat())
7257 report_fatal_error("Soft float support is unimplemented on AIX.");
7258
7259 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7260
7261 const bool IsPPC64 = Subtarget.isPPC64();
7262 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7263
7264 // Assign locations to all of the incoming arguments.
7266 MachineFunction &MF = DAG.getMachineFunction();
7267 MachineFrameInfo &MFI = MF.getFrameInfo();
7268 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7269 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7270
7271 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7272 // Reserve space for the linkage area on the stack.
7273 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7274 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7275 uint64_t SaveStackPos = CCInfo.getStackSize();
7276 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7277 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7278
7280
7281 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7282 CCValAssign &VA = ArgLocs[I++];
7283 MVT LocVT = VA.getLocVT();
7284 MVT ValVT = VA.getValVT();
7285 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7286
7287 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7288 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7289 // For compatibility with the AIX XL compiler, the float args in the
7290 // parameter save area are initialized even if the argument is available
7291 // in register. The caller is required to initialize both the register
7292 // and memory, however, the callee can choose to expect it in either.
7293 // The memloc is dismissed here because the argument is retrieved from
7294 // the register.
7295 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7296 continue;
7297
7298 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7299 const TargetRegisterClass *RegClass = getRegClassForSVT(
7300 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7301 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7302 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7303 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7304 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7305 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7306 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7307 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7308 MachinePointerInfo(), Align(PtrByteSize));
7309 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7310 MemOps.push_back(StoreReg);
7311 }
7312
7313 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7314 unsigned StoreSize =
7315 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7316 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7317 }
7318
7319 auto HandleMemLoc = [&]() {
7320 const unsigned LocSize = LocVT.getStoreSize();
7321 const unsigned ValSize = ValVT.getStoreSize();
7322 assert((ValSize <= LocSize) &&
7323 "Object size is larger than size of MemLoc");
7324 int CurArgOffset = VA.getLocMemOffset();
7325 // Objects are right-justified because AIX is big-endian.
7326 if (LocSize > ValSize)
7327 CurArgOffset += LocSize - ValSize;
7328 // Potential tail calls could cause overwriting of argument stack slots.
7329 const bool IsImmutable =
7331 (CallConv == CallingConv::Fast));
7332 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7333 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7334 SDValue ArgValue =
7335 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7336
7337 // While the ABI specifies the argument type is (sign or zero) extended
7338 // out to register width, not all code is compliant. We truncate and
7339 // re-extend to be more forgiving of these callers when the argument type
7340 // is smaller than register width.
7341 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7342 ValVT.isInteger() &&
7343 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7344 // It is possible to have either real integer values
7345 // or integers that were not originally integers.
7346 // In the latter case, these could have came from structs,
7347 // and these integers would not have an extend on the parameter.
7348 // Since these types of integers do not have an extend specified
7349 // in the first place, the type of extend that we do should not matter.
7350 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7351 ? MVT::i8
7352 : ArgVT;
7353 SDValue ArgValueTrunc =
7354 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7355 SDValue ArgValueExt =
7356 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7357 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7358 InVals.push_back(ArgValueExt);
7359 } else {
7360 InVals.push_back(ArgValue);
7361 }
7362 };
7363
7364 // Vector arguments to VaArg functions are passed both on the stack, and
7365 // in any available GPRs. Load the value from the stack and add the GPRs
7366 // as live ins.
7367 if (VA.isMemLoc() && VA.needsCustom()) {
7368 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7369 assert(isVarArg && "Only use custom memloc for vararg.");
7370 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7371 // matching custom RegLocs.
7372 const unsigned OriginalValNo = VA.getValNo();
7373 (void)OriginalValNo;
7374
7375 auto HandleCustomVecRegLoc = [&]() {
7376 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7377 "Missing custom RegLoc.");
7378 VA = ArgLocs[I++];
7379 assert(VA.getValVT().isVector() &&
7380 "Unexpected Val type for custom RegLoc.");
7381 assert(VA.getValNo() == OriginalValNo &&
7382 "ValNo mismatch between custom MemLoc and RegLoc.");
7384 MF.addLiveIn(VA.getLocReg(),
7385 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7386 Subtarget.hasVSX()));
7387 };
7388
7389 HandleMemLoc();
7390 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7391 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7392 // R10.
7393 HandleCustomVecRegLoc();
7394 HandleCustomVecRegLoc();
7395
7396 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7397 // we passed the vector in R5, R6, R7 and R8.
7398 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7399 assert(!IsPPC64 &&
7400 "Only 2 custom RegLocs expected for 64-bit codegen.");
7401 HandleCustomVecRegLoc();
7402 HandleCustomVecRegLoc();
7403 }
7404
7405 continue;
7406 }
7407
7408 if (VA.isRegLoc()) {
7409 if (VA.getValVT().isScalarInteger())
7411 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7412 switch (VA.getValVT().SimpleTy) {
7413 default:
7414 report_fatal_error("Unhandled value type for argument.");
7415 case MVT::f32:
7417 break;
7418 case MVT::f64:
7420 break;
7421 }
7422 } else if (VA.getValVT().isVector()) {
7423 switch (VA.getValVT().SimpleTy) {
7424 default:
7425 report_fatal_error("Unhandled value type for argument.");
7426 case MVT::v16i8:
7428 break;
7429 case MVT::v8i16:
7431 break;
7432 case MVT::v4i32:
7433 case MVT::v2i64:
7434 case MVT::v1i128:
7436 break;
7437 case MVT::v4f32:
7438 case MVT::v2f64:
7440 break;
7441 }
7442 }
7443 }
7444
7445 if (Flags.isByVal() && VA.isMemLoc()) {
7446 const unsigned Size =
7447 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7448 PtrByteSize);
7449 const int FI = MF.getFrameInfo().CreateFixedObject(
7450 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7451 /* IsAliased */ true);
7452 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7453 InVals.push_back(FIN);
7454
7455 continue;
7456 }
7457
7458 if (Flags.isByVal()) {
7459 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7460
7461 const MCPhysReg ArgReg = VA.getLocReg();
7462 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7463
7464 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7465 const int FI = MF.getFrameInfo().CreateFixedObject(
7466 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7467 /* IsAliased */ true);
7468 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7469 InVals.push_back(FIN);
7470
7471 // Add live ins for all the RegLocs for the same ByVal.
7472 const TargetRegisterClass *RegClass =
7473 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7474
7475 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7476 unsigned Offset) {
7477 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7478 // Since the callers side has left justified the aggregate in the
7479 // register, we can simply store the entire register into the stack
7480 // slot.
7481 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7482 // The store to the fixedstack object is needed becuase accessing a
7483 // field of the ByVal will use a gep and load. Ideally we will optimize
7484 // to extracting the value from the register directly, and elide the
7485 // stores when the arguments address is not taken, but that will need to
7486 // be future work.
7487 SDValue Store = DAG.getStore(
7488 CopyFrom.getValue(1), dl, CopyFrom,
7491
7492 MemOps.push_back(Store);
7493 };
7494
7495 unsigned Offset = 0;
7496 HandleRegLoc(VA.getLocReg(), Offset);
7497 Offset += PtrByteSize;
7498 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7499 Offset += PtrByteSize) {
7500 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7501 "RegLocs should be for ByVal argument.");
7502
7503 const CCValAssign RL = ArgLocs[I++];
7504 HandleRegLoc(RL.getLocReg(), Offset);
7506 }
7507
7508 if (Offset != StackSize) {
7509 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7510 "Expected MemLoc for remaining bytes.");
7511 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7512 // Consume the MemLoc.The InVal has already been emitted, so nothing
7513 // more needs to be done.
7514 ++I;
7515 }
7516
7517 continue;
7518 }
7519
7520 if (VA.isRegLoc() && !VA.needsCustom()) {
7521 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7522 Register VReg =
7523 MF.addLiveIn(VA.getLocReg(),
7524 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7525 Subtarget.hasVSX()));
7526 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7527 if (ValVT.isScalarInteger() &&
7528 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7529 ArgValue =
7530 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7531 }
7532 InVals.push_back(ArgValue);
7533 continue;
7534 }
7535 if (VA.isMemLoc()) {
7536 HandleMemLoc();
7537 continue;
7538 }
7539 }
7540
7541 // On AIX a minimum of 8 words is saved to the parameter save area.
7542 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7543 // Area that is at least reserved in the caller of this function.
7544 unsigned CallerReservedArea = std::max<unsigned>(
7545 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7546
7547 // Set the size that is at least reserved in caller of this function. Tail
7548 // call optimized function's reserved stack space needs to be aligned so
7549 // that taking the difference between two stack areas will result in an
7550 // aligned stack.
7551 CallerReservedArea =
7552 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7553 FuncInfo->setMinReservedArea(CallerReservedArea);
7554
7555 if (isVarArg) {
7556 FuncInfo->setVarArgsFrameIndex(
7557 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(), true));
7558 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7559
7560 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7561 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7562
7563 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7564 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7565 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7566
7567 // The fixed integer arguments of a variadic function are stored to the
7568 // VarArgsFrameIndex on the stack so that they may be loaded by
7569 // dereferencing the result of va_next.
7570 for (unsigned GPRIndex =
7571 (CCInfo.getStackSize() - LinkageSize) / PtrByteSize;
7572 GPRIndex < NumGPArgRegs; ++GPRIndex) {
7573
7574 const Register VReg =
7575 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7576 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7577
7578 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7579 SDValue Store =
7580 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7581 MemOps.push_back(Store);
7582 // Increment the address for the next argument to store.
7583 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7584 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7585 }
7586 }
7587
7588 if (!MemOps.empty())
7589 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7590
7591 return Chain;
7592}
7593
7594SDValue PPCTargetLowering::LowerCall_AIX(
7595 SDValue Chain, SDValue Callee, CallFlags CFlags,
7597 const SmallVectorImpl<SDValue> &OutVals,
7598 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7600 const CallBase *CB) const {
7601 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7602 // AIX ABI stack frame layout.
7603
7604 assert((CFlags.CallConv == CallingConv::C ||
7605 CFlags.CallConv == CallingConv::Cold ||
7606 CFlags.CallConv == CallingConv::Fast) &&
7607 "Unexpected calling convention!");
7608
7609 if (CFlags.IsPatchPoint)
7610 report_fatal_error("This call type is unimplemented on AIX.");
7611
7612 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7613
7614 MachineFunction &MF = DAG.getMachineFunction();
7616 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7617 *DAG.getContext());
7618
7619 // Reserve space for the linkage save area (LSA) on the stack.
7620 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7621 // [SP][CR][LR][2 x reserved][TOC].
7622 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7623 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7624 const bool IsPPC64 = Subtarget.isPPC64();
7625 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7626 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7627 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7628 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7629
7630 // The prolog code of the callee may store up to 8 GPR argument registers to
7631 // the stack, allowing va_start to index over them in memory if the callee
7632 // is variadic.
7633 // Because we cannot tell if this is needed on the caller side, we have to
7634 // conservatively assume that it is needed. As such, make sure we have at
7635 // least enough stack space for the caller to store the 8 GPRs.
7636 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7637 const unsigned NumBytes = std::max<unsigned>(
7638 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7639
7640 // Adjust the stack pointer for the new arguments...
7641 // These operations are automatically eliminated by the prolog/epilog pass.
7642 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7643 SDValue CallSeqStart = Chain;
7644
7646 SmallVector<SDValue, 8> MemOpChains;
7647
7648 // Set up a copy of the stack pointer for loading and storing any
7649 // arguments that may not fit in the registers available for argument
7650 // passing.
7651 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7652 : DAG.getRegister(PPC::R1, MVT::i32);
7653
7654 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7655 const unsigned ValNo = ArgLocs[I].getValNo();
7656 SDValue Arg = OutVals[ValNo];
7657 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7658
7659 if (Flags.isByVal()) {
7660 const unsigned ByValSize = Flags.getByValSize();
7661
7662 // Nothing to do for zero-sized ByVals on the caller side.
7663 if (!ByValSize) {
7664 ++I;
7665 continue;
7666 }
7667
7668 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7669 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7670 (LoadOffset != 0)
7671 ? DAG.getObjectPtrOffset(
7672 dl, Arg, TypeSize::getFixed(LoadOffset))
7673 : Arg,
7674 MachinePointerInfo(), VT);
7675 };
7676
7677 unsigned LoadOffset = 0;
7678
7679 // Initialize registers, which are fully occupied by the by-val argument.
7680 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7681 SDValue Load = GetLoad(PtrVT, LoadOffset);
7682 MemOpChains.push_back(Load.getValue(1));
7683 LoadOffset += PtrByteSize;
7684 const CCValAssign &ByValVA = ArgLocs[I++];
7685 assert(ByValVA.getValNo() == ValNo &&
7686 "Unexpected location for pass-by-value argument.");
7687 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7688 }
7689
7690 if (LoadOffset == ByValSize)
7691 continue;
7692
7693 // There must be one more loc to handle the remainder.
7694 assert(ArgLocs[I].getValNo() == ValNo &&
7695 "Expected additional location for by-value argument.");
7696
7697 if (ArgLocs[I].isMemLoc()) {
7698 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7699 const CCValAssign &ByValVA = ArgLocs[I++];
7700 ISD::ArgFlagsTy MemcpyFlags = Flags;
7701 // Only memcpy the bytes that don't pass in register.
7702 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7703 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7704 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7705 dl, Arg, TypeSize::getFixed(LoadOffset))
7706 : Arg,
7708 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7709 CallSeqStart, MemcpyFlags, DAG, dl);
7710 continue;
7711 }
7712
7713 // Initialize the final register residue.
7714 // Any residue that occupies the final by-val arg register must be
7715 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7716 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7717 // 2 and 1 byte loads.
7718 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7719 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7720 "Unexpected register residue for by-value argument.");
7721 SDValue ResidueVal;
7722 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7723 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7724 const MVT VT =
7725 N == 1 ? MVT::i8
7726 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7727 SDValue Load = GetLoad(VT, LoadOffset);
7728 MemOpChains.push_back(Load.getValue(1));
7729 LoadOffset += N;
7730 Bytes += N;
7731
7732 // By-val arguments are passed left-justfied in register.
7733 // Every load here needs to be shifted, otherwise a full register load
7734 // should have been used.
7735 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7736 "Unexpected load emitted during handling of pass-by-value "
7737 "argument.");
7738 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7739 EVT ShiftAmountTy =
7740 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7741 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7742 SDValue ShiftedLoad =
7743 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7744 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7745 ShiftedLoad)
7746 : ShiftedLoad;
7747 }
7748
7749 const CCValAssign &ByValVA = ArgLocs[I++];
7750 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7751 continue;
7752 }
7753
7754 CCValAssign &VA = ArgLocs[I++];
7755 const MVT LocVT = VA.getLocVT();
7756 const MVT ValVT = VA.getValVT();
7757
7758 switch (VA.getLocInfo()) {
7759 default:
7760 report_fatal_error("Unexpected argument extension type.");
7761 case CCValAssign::Full:
7762 break;
7763 case CCValAssign::ZExt:
7764 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7765 break;
7766 case CCValAssign::SExt:
7767 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7768 break;
7769 }
7770
7771 if (VA.isRegLoc() && !VA.needsCustom()) {
7772 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7773 continue;
7774 }
7775
7776 // Vector arguments passed to VarArg functions need custom handling when
7777 // they are passed (at least partially) in GPRs.
7778 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7779 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7780 // Store value to its stack slot.
7781 SDValue PtrOff =
7782 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7783 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7784 SDValue Store =
7785 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7786 MemOpChains.push_back(Store);
7787 const unsigned OriginalValNo = VA.getValNo();
7788 // Then load the GPRs from the stack
7789 unsigned LoadOffset = 0;
7790 auto HandleCustomVecRegLoc = [&]() {
7791 assert(I != E && "Unexpected end of CCvalAssigns.");
7792 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7793 "Expected custom RegLoc.");
7794 CCValAssign RegVA = ArgLocs[I++];
7795 assert(RegVA.getValNo() == OriginalValNo &&
7796 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7797 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7798 DAG.getConstant(LoadOffset, dl, PtrVT));
7799 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7800 MemOpChains.push_back(Load.getValue(1));
7801 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7802 LoadOffset += PtrByteSize;
7803 };
7804
7805 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7806 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7807 // R10.
7808 HandleCustomVecRegLoc();
7809 HandleCustomVecRegLoc();
7810
7811 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7812 ArgLocs[I].getValNo() == OriginalValNo) {
7813 assert(!IsPPC64 &&
7814 "Only 2 custom RegLocs expected for 64-bit codegen.");
7815 HandleCustomVecRegLoc();
7816 HandleCustomVecRegLoc();
7817 }
7818
7819 continue;
7820 }
7821
7822 if (VA.isMemLoc()) {
7823 SDValue PtrOff =
7824 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7825 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7826 MemOpChains.push_back(
7827 DAG.getStore(Chain, dl, Arg, PtrOff,
7829 Subtarget.getFrameLowering()->getStackAlign()));
7830
7831 continue;
7832 }
7833
7834 if (!ValVT.isFloatingPoint())
7836 "Unexpected register handling for calling convention.");
7837
7838 // Custom handling is used for GPR initializations for vararg float
7839 // arguments.
7840 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7841 LocVT.isInteger() &&
7842 "Custom register handling only expected for VarArg.");
7843
7844 SDValue ArgAsInt =
7845 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7846
7847 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7848 // f32 in 32-bit GPR
7849 // f64 in 64-bit GPR
7850 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7851 else if (Arg.getValueType().getFixedSizeInBits() <
7852 LocVT.getFixedSizeInBits())
7853 // f32 in 64-bit GPR.
7854 RegsToPass.push_back(std::make_pair(
7855 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7856 else {
7857 // f64 in two 32-bit GPRs
7858 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7859 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7860 "Unexpected custom register for argument!");
7861 CCValAssign &GPR1 = VA;
7862 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7863 DAG.getConstant(32, dl, MVT::i8));
7864 RegsToPass.push_back(std::make_pair(
7865 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7866
7867 if (I != E) {
7868 // If only 1 GPR was available, there will only be one custom GPR and
7869 // the argument will also pass in memory.
7870 CCValAssign &PeekArg = ArgLocs[I];
7871 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7872 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7873 CCValAssign &GPR2 = ArgLocs[I++];
7874 RegsToPass.push_back(std::make_pair(
7875 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7876 }
7877 }
7878 }
7879 }
7880
7881 if (!MemOpChains.empty())
7882 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7883
7884 // For indirect calls, we need to save the TOC base to the stack for
7885 // restoration after the call.
7886 if (CFlags.IsIndirect) {
7887 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7888 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7889 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7890 const MVT PtrVT = Subtarget.getScalarIntVT();
7891 const unsigned TOCSaveOffset =
7892 Subtarget.getFrameLowering()->getTOCSaveOffset();
7893
7894 setUsesTOCBasePtr(DAG);
7895 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7896 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7897 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7898 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7899 Chain = DAG.getStore(
7900 Val.getValue(1), dl, Val, AddPtr,
7901 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7902 }
7903
7904 // Build a sequence of copy-to-reg nodes chained together with token chain
7905 // and flag operands which copy the outgoing args into the appropriate regs.
7906 SDValue InGlue;
7907 for (auto Reg : RegsToPass) {
7908 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7909 InGlue = Chain.getValue(1);
7910 }
7911
7912 const int SPDiff = 0;
7913 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7914 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7915}
7916
7917bool
7918PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7919 MachineFunction &MF, bool isVarArg,
7922 const Type *RetTy) const {
7924 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7925 return CCInfo.CheckReturn(
7926 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7928 : RetCC_PPC);
7929}
7930
7931SDValue
7932PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7933 bool isVarArg,
7935 const SmallVectorImpl<SDValue> &OutVals,
7936 const SDLoc &dl, SelectionDAG &DAG) const {
7938 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7939 *DAG.getContext());
7940 CCInfo.AnalyzeReturn(Outs,
7941 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7943 : RetCC_PPC);
7944
7945 SDValue Glue;
7946 SmallVector<SDValue, 4> RetOps(1, Chain);
7947
7948 // Copy the result values into the output registers.
7949 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7950 CCValAssign &VA = RVLocs[i];
7951 assert(VA.isRegLoc() && "Can only return in registers!");
7952
7953 SDValue Arg = OutVals[RealResIdx];
7954
7955 switch (VA.getLocInfo()) {
7956 default: llvm_unreachable("Unknown loc info!");
7957 case CCValAssign::Full: break;
7958 case CCValAssign::AExt:
7959 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7960 break;
7961 case CCValAssign::ZExt:
7962 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7963 break;
7964 case CCValAssign::SExt:
7965 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7966 break;
7967 }
7968 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7969 bool isLittleEndian = Subtarget.isLittleEndian();
7970 // Legalize ret f64 -> ret 2 x i32.
7971 SDValue SVal =
7972 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7973 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7974 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7975 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7976 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7977 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7978 Glue = Chain.getValue(1);
7979 VA = RVLocs[++i]; // skip ahead to next loc
7980 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7981 } else
7982 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7983 Glue = Chain.getValue(1);
7984 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7985 }
7986
7987 RetOps[0] = Chain; // Update chain.
7988
7989 // Add the glue if we have it.
7990 if (Glue.getNode())
7991 RetOps.push_back(Glue);
7992
7993 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7994}
7995
7996SDValue
7997PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7998 SelectionDAG &DAG) const {
7999 SDLoc dl(Op);
8000
8001 // Get the correct type for integers.
8002 EVT IntVT = Op.getValueType();
8003
8004 // Get the inputs.
8005 SDValue Chain = Op.getOperand(0);
8006 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8007 // Build a DYNAREAOFFSET node.
8008 SDValue Ops[2] = {Chain, FPSIdx};
8009 SDVTList VTs = DAG.getVTList(IntVT);
8010 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
8011}
8012
8013SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
8014 SelectionDAG &DAG) const {
8015 // When we pop the dynamic allocation we need to restore the SP link.
8016 SDLoc dl(Op);
8017
8018 // Get the correct type for pointers.
8019 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8020
8021 // Construct the stack pointer operand.
8022 bool isPPC64 = Subtarget.isPPC64();
8023 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
8024 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
8025
8026 // Get the operands for the STACKRESTORE.
8027 SDValue Chain = Op.getOperand(0);
8028 SDValue SaveSP = Op.getOperand(1);
8029
8030 // Load the old link SP.
8031 SDValue LoadLinkSP =
8032 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
8033
8034 // Restore the stack pointer.
8035 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
8036
8037 // Store the old link SP.
8038 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
8039}
8040
8041SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
8042 MachineFunction &MF = DAG.getMachineFunction();
8043 bool isPPC64 = Subtarget.isPPC64();
8044 EVT PtrVT = getPointerTy(MF.getDataLayout());
8045
8046 // Get current frame pointer save index. The users of this index will be
8047 // primarily DYNALLOC instructions.
8048 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8049 int RASI = FI->getReturnAddrSaveIndex();
8050
8051 // If the frame pointer save index hasn't been defined yet.
8052 if (!RASI) {
8053 // Find out what the fix offset of the frame pointer save area.
8054 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
8055 // Allocate the frame index for frame pointer save area.
8056 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
8057 // Save the result.
8058 FI->setReturnAddrSaveIndex(RASI);
8059 }
8060 return DAG.getFrameIndex(RASI, PtrVT);
8061}
8062
8063SDValue
8064PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
8065 MachineFunction &MF = DAG.getMachineFunction();
8066 bool isPPC64 = Subtarget.isPPC64();
8067 EVT PtrVT = getPointerTy(MF.getDataLayout());
8068
8069 // Get current frame pointer save index. The users of this index will be
8070 // primarily DYNALLOC instructions.
8071 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
8072 int FPSI = FI->getFramePointerSaveIndex();
8073
8074 // If the frame pointer save index hasn't been defined yet.
8075 if (!FPSI) {
8076 // Find out what the fix offset of the frame pointer save area.
8077 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
8078 // Allocate the frame index for frame pointer save area.
8079 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
8080 // Save the result.
8081 FI->setFramePointerSaveIndex(FPSI);
8082 }
8083 return DAG.getFrameIndex(FPSI, PtrVT);
8084}
8085
8086SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
8087 SelectionDAG &DAG) const {
8088 MachineFunction &MF = DAG.getMachineFunction();
8089 // Get the inputs.
8090 SDValue Chain = Op.getOperand(0);
8091 SDValue Size = Op.getOperand(1);
8092 SDLoc dl(Op);
8093
8094 // Get the correct type for pointers.
8095 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8096 // Negate the size.
8097 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
8098 DAG.getConstant(0, dl, PtrVT), Size);
8099 // Construct a node for the frame pointer save index.
8100 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
8101 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
8102 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
8103 if (hasInlineStackProbe(MF))
8104 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
8105 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
8106}
8107
8108SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
8109 SelectionDAG &DAG) const {
8110 MachineFunction &MF = DAG.getMachineFunction();
8111
8112 bool isPPC64 = Subtarget.isPPC64();
8113 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8114
8115 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
8116 return DAG.getFrameIndex(FI, PtrVT);
8117}
8118
8119SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
8120 SelectionDAG &DAG) const {
8121 SDLoc DL(Op);
8122 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
8123 DAG.getVTList(MVT::i32, MVT::Other),
8124 Op.getOperand(0), Op.getOperand(1));
8125}
8126
8127SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
8128 SelectionDAG &DAG) const {
8129 SDLoc DL(Op);
8130 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
8131 Op.getOperand(0), Op.getOperand(1));
8132}
8133
8134SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
8135 if (Op.getValueType().isVector())
8136 return LowerVectorLoad(Op, DAG);
8137
8138 assert(Op.getValueType() == MVT::i1 &&
8139 "Custom lowering only for i1 loads");
8140
8141 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8142
8143 SDLoc dl(Op);
8144 LoadSDNode *LD = cast<LoadSDNode>(Op);
8145
8146 SDValue Chain = LD->getChain();
8147 SDValue BasePtr = LD->getBasePtr();
8148 MachineMemOperand *MMO = LD->getMemOperand();
8149
8150 SDValue NewLD =
8151 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8152 BasePtr, MVT::i8, MMO);
8153 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8154
8155 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8156 return DAG.getMergeValues(Ops, dl);
8157}
8158
8159SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8160 if (Op.getOperand(1).getValueType().isVector())
8161 return LowerVectorStore(Op, DAG);
8162
8163 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8164 "Custom lowering only for i1 stores");
8165
8166 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8167
8168 SDLoc dl(Op);
8169 StoreSDNode *ST = cast<StoreSDNode>(Op);
8170
8171 SDValue Chain = ST->getChain();
8172 SDValue BasePtr = ST->getBasePtr();
8173 SDValue Value = ST->getValue();
8174 MachineMemOperand *MMO = ST->getMemOperand();
8175
8177 Value);
8178 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8179}
8180
8181// FIXME: Remove this once the ANDI glue bug is fixed:
8182SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8183 assert(Op.getValueType() == MVT::i1 &&
8184 "Custom lowering only for i1 results");
8185
8186 SDLoc DL(Op);
8187 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8188}
8189
8190SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8191 SelectionDAG &DAG) const {
8192
8193 // Implements a vector truncate that fits in a vector register as a shuffle.
8194 // We want to legalize vector truncates down to where the source fits in
8195 // a vector register (and target is therefore smaller than vector register
8196 // size). At that point legalization will try to custom lower the sub-legal
8197 // result and get here - where we can contain the truncate as a single target
8198 // operation.
8199
8200 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8201 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8202 //
8203 // We will implement it for big-endian ordering as this (where x denotes
8204 // undefined):
8205 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8206 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8207 //
8208 // The same operation in little-endian ordering will be:
8209 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8210 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8211
8212 EVT TrgVT = Op.getValueType();
8213 assert(TrgVT.isVector() && "Vector type expected.");
8214 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8215 EVT EltVT = TrgVT.getVectorElementType();
8216 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8217 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8219 return SDValue();
8220
8221 SDValue N1 = Op.getOperand(0);
8222 EVT SrcVT = N1.getValueType();
8223 unsigned SrcSize = SrcVT.getSizeInBits();
8224 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8227 return SDValue();
8228 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8229 return SDValue();
8230
8231 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8232 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8233
8234 SDLoc DL(Op);
8235 SDValue Op1, Op2;
8236 if (SrcSize == 256) {
8237 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8238 EVT SplitVT =
8240 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8241 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8242 DAG.getConstant(0, DL, VecIdxTy));
8243 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8244 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8245 }
8246 else {
8247 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8248 Op2 = DAG.getUNDEF(WideVT);
8249 }
8250
8251 // First list the elements we want to keep.
8252 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8253 SmallVector<int, 16> ShuffV;
8254 if (Subtarget.isLittleEndian())
8255 for (unsigned i = 0; i < TrgNumElts; ++i)
8256 ShuffV.push_back(i * SizeMult);
8257 else
8258 for (unsigned i = 1; i <= TrgNumElts; ++i)
8259 ShuffV.push_back(i * SizeMult - 1);
8260
8261 // Populate the remaining elements with undefs.
8262 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8263 // ShuffV.push_back(i + WideNumElts);
8264 ShuffV.push_back(WideNumElts + 1);
8265
8266 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8267 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8268 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8269}
8270
8271/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8272/// possible.
8273SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8274 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8275 EVT ResVT = Op.getValueType();
8276 EVT CmpVT = Op.getOperand(0).getValueType();
8277 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8278 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8279 SDLoc dl(Op);
8280
8281 // Without power9-vector, we don't have native instruction for f128 comparison.
8282 // Following transformation to libcall is needed for setcc:
8283 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8284 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8285 SDValue Z = DAG.getSetCC(
8286 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8287 LHS, RHS, CC);
8288 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8289 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8290 }
8291
8292 // Not FP, or using SPE? Not a fsel.
8293 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8294 Subtarget.hasSPE())
8295 return Op;
8296
8297 SDNodeFlags Flags = Op.getNode()->getFlags();
8298
8299 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8300 // presence of infinities.
8301 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8302 switch (CC) {
8303 default:
8304 break;
8305 case ISD::SETOGT:
8306 case ISD::SETGT:
8307 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8308 case ISD::SETOLT:
8309 case ISD::SETLT:
8310 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8311 }
8312 }
8313
8314 // We might be able to do better than this under some circumstances, but in
8315 // general, fsel-based lowering of select is a finite-math-only optimization.
8316 // For more information, see section F.3 of the 2.06 ISA specification.
8317 // With ISA 3.0
8318 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8319 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8320 ResVT == MVT::f128)
8321 return Op;
8322
8323 // If the RHS of the comparison is a 0.0, we don't need to do the
8324 // subtraction at all.
8325 SDValue Sel1;
8327 switch (CC) {
8328 default: break; // SETUO etc aren't handled by fsel.
8329 case ISD::SETNE:
8330 std::swap(TV, FV);
8331 [[fallthrough]];
8332 case ISD::SETEQ:
8333 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8334 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8335 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8336 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8337 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8338 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8339 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8340 case ISD::SETULT:
8341 case ISD::SETLT:
8342 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8343 [[fallthrough]];
8344 case ISD::SETOGE:
8345 case ISD::SETGE:
8346 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8347 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8348 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8349 case ISD::SETUGT:
8350 case ISD::SETGT:
8351 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8352 [[fallthrough]];
8353 case ISD::SETOLE:
8354 case ISD::SETLE:
8355 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8356 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8357 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8358 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8359 }
8360
8361 SDValue Cmp;
8362 switch (CC) {
8363 default: break; // SETUO etc aren't handled by fsel.
8364 case ISD::SETNE:
8365 std::swap(TV, FV);
8366 [[fallthrough]];
8367 case ISD::SETEQ:
8368 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8369 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8370 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8371 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8372 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8373 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8374 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8375 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8376 case ISD::SETULT:
8377 case ISD::SETLT:
8378 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8379 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8380 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8381 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8382 case ISD::SETOGE:
8383 case ISD::SETGE:
8384 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8385 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8386 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8387 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8388 case ISD::SETUGT:
8389 case ISD::SETGT:
8390 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8391 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8392 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8393 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8394 case ISD::SETOLE:
8395 case ISD::SETLE:
8396 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8397 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8398 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8399 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8400 }
8401 return Op;
8402}
8403
8404static unsigned getPPCStrictOpcode(unsigned Opc) {
8405 switch (Opc) {
8406 default:
8407 llvm_unreachable("No strict version of this opcode!");
8408 case PPCISD::FCTIDZ:
8409 return PPCISD::STRICT_FCTIDZ;
8410 case PPCISD::FCTIWZ:
8411 return PPCISD::STRICT_FCTIWZ;
8412 case PPCISD::FCTIDUZ:
8414 case PPCISD::FCTIWUZ:
8416 case PPCISD::FCFID:
8417 return PPCISD::STRICT_FCFID;
8418 case PPCISD::FCFIDU:
8419 return PPCISD::STRICT_FCFIDU;
8420 case PPCISD::FCFIDS:
8421 return PPCISD::STRICT_FCFIDS;
8422 case PPCISD::FCFIDUS:
8424 }
8425}
8426
8428 const PPCSubtarget &Subtarget) {
8429 SDLoc dl(Op);
8430 bool IsStrict = Op->isStrictFPOpcode();
8431 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8432 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8433
8434 // TODO: Any other flags to propagate?
8435 SDNodeFlags Flags;
8436 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8437
8438 // For strict nodes, source is the second operand.
8439 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8440 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8441 MVT DestTy = Op.getSimpleValueType();
8442 assert(Src.getValueType().isFloatingPoint() &&
8443 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8444 DestTy == MVT::i64) &&
8445 "Invalid FP_TO_INT types");
8446 if (Src.getValueType() == MVT::f32) {
8447 if (IsStrict) {
8448 Src =
8450 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8451 Chain = Src.getValue(1);
8452 } else
8453 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8454 }
8455 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8456 DestTy = Subtarget.getScalarIntVT();
8457 unsigned Opc = ISD::DELETED_NODE;
8458 switch (DestTy.SimpleTy) {
8459 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8460 case MVT::i32:
8461 Opc = IsSigned ? PPCISD::FCTIWZ
8462 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8463 break;
8464 case MVT::i64:
8465 assert((IsSigned || Subtarget.hasFPCVT()) &&
8466 "i64 FP_TO_UINT is supported only with FPCVT");
8467 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8468 }
8469 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8470 SDValue Conv;
8471 if (IsStrict) {
8473 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8474 Flags);
8475 } else {
8476 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8477 }
8478 return Conv;
8479}
8480
8481void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8482 SelectionDAG &DAG,
8483 const SDLoc &dl) const {
8484 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8485 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8486 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8487 bool IsStrict = Op->isStrictFPOpcode();
8488
8489 // Convert the FP value to an int value through memory.
8490 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8491 (IsSigned || Subtarget.hasFPCVT());
8492 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8493 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8494 MachinePointerInfo MPI =
8496
8497 // Emit a store to the stack slot.
8498 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8499 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8500 if (i32Stack) {
8501 MachineFunction &MF = DAG.getMachineFunction();
8502 Alignment = Align(4);
8503 MachineMemOperand *MMO =
8504 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8505 SDValue Ops[] = { Chain, Tmp, FIPtr };
8506 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8507 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8508 } else
8509 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8510
8511 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8512 // add in a bias on big endian.
8513 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8514 !Subtarget.isLittleEndian()) {
8515 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8516 DAG.getConstant(4, dl, FIPtr.getValueType()));
8517 MPI = MPI.getWithOffset(4);
8518 }
8519
8520 RLI.Chain = Chain;
8521 RLI.Ptr = FIPtr;
8522 RLI.MPI = MPI;
8523 RLI.Alignment = Alignment;
8524}
8525
8526/// Custom lowers floating point to integer conversions to use
8527/// the direct move instructions available in ISA 2.07 to avoid the
8528/// need for load/store combinations.
8529SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8530 SelectionDAG &DAG,
8531 const SDLoc &dl) const {
8532 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8533 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8534 if (Op->isStrictFPOpcode())
8535 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8536 else
8537 return Mov;
8538}
8539
8540SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8541 const SDLoc &dl) const {
8542 bool IsStrict = Op->isStrictFPOpcode();
8543 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8544 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8545 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8546 EVT SrcVT = Src.getValueType();
8547 EVT DstVT = Op.getValueType();
8548
8549 // FP to INT conversions are legal for f128.
8550 if (SrcVT == MVT::f128)
8551 return Subtarget.hasP9Vector() ? Op : SDValue();
8552
8553 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8554 // PPC (the libcall is not available).
8555 if (SrcVT == MVT::ppcf128) {
8556 if (DstVT == MVT::i32) {
8557 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8558 // set other fast-math flags to FP operations in both strict and
8559 // non-strict cases. (FP_TO_SINT, FSUB)
8560 SDNodeFlags Flags;
8561 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8562
8563 if (IsSigned) {
8564 SDValue Lo, Hi;
8565 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8566
8567 // Add the two halves of the long double in round-to-zero mode, and use
8568 // a smaller FP_TO_SINT.
8569 if (IsStrict) {
8571 DAG.getVTList(MVT::f64, MVT::Other),
8572 {Op.getOperand(0), Lo, Hi}, Flags);
8573 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8574 DAG.getVTList(MVT::i32, MVT::Other),
8575 {Res.getValue(1), Res}, Flags);
8576 } else {
8577 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8578 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8579 }
8580 } else {
8581 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8582 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8583 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8584 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8585 if (IsStrict) {
8586 // Sel = Src < 0x80000000
8587 // FltOfs = select Sel, 0.0, 0x80000000
8588 // IntOfs = select Sel, 0, 0x80000000
8589 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8590 SDValue Chain = Op.getOperand(0);
8591 EVT SetCCVT =
8592 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8593 EVT DstSetCCVT =
8594 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8595 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8596 Chain, true);
8597 Chain = Sel.getValue(1);
8598
8599 SDValue FltOfs = DAG.getSelect(
8600 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8601 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8602
8603 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8604 DAG.getVTList(SrcVT, MVT::Other),
8605 {Chain, Src, FltOfs}, Flags);
8606 Chain = Val.getValue(1);
8607 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8608 DAG.getVTList(DstVT, MVT::Other),
8609 {Chain, Val}, Flags);
8610 Chain = SInt.getValue(1);
8611 SDValue IntOfs = DAG.getSelect(
8612 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8613 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8614 return DAG.getMergeValues({Result, Chain}, dl);
8615 } else {
8616 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8617 // FIXME: generated code sucks.
8618 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8619 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8620 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8621 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8622 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8623 }
8624 }
8625 }
8626
8627 return SDValue();
8628 }
8629
8630 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8631 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8632
8633 ReuseLoadInfo RLI;
8634 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8635
8636 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8637 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8638}
8639
8640// We're trying to insert a regular store, S, and then a load, L. If the
8641// incoming value, O, is a load, we might just be able to have our load use the
8642// address used by O. However, we don't know if anything else will store to
8643// that address before we can load from it. To prevent this situation, we need
8644// to insert our load, L, into the chain as a peer of O. To do this, we give L
8645// the same chain operand as O, we create a token factor from the chain results
8646// of O and L, and we replace all uses of O's chain result with that token
8647// factor (this last part is handled by makeEquivalentMemoryOrdering).
8648bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8649 ReuseLoadInfo &RLI,
8650 SelectionDAG &DAG,
8651 ISD::LoadExtType ET) const {
8652 // Conservatively skip reusing for constrained FP nodes.
8653 if (Op->isStrictFPOpcode())
8654 return false;
8655
8656 SDLoc dl(Op);
8657 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8658 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8659 if (ET == ISD::NON_EXTLOAD &&
8660 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8661 isOperationLegalOrCustom(Op.getOpcode(),
8662 Op.getOperand(0).getValueType())) {
8663
8664 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8665 return true;
8666 }
8667
8668 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8669 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8670 LD->isNonTemporal())
8671 return false;
8672 if (LD->getMemoryVT() != MemVT)
8673 return false;
8674
8675 // If the result of the load is an illegal type, then we can't build a
8676 // valid chain for reuse since the legalised loads and token factor node that
8677 // ties the legalised loads together uses a different output chain then the
8678 // illegal load.
8679 if (!isTypeLegal(LD->getValueType(0)))
8680 return false;
8681
8682 RLI.Ptr = LD->getBasePtr();
8683 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8684 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8685 "Non-pre-inc AM on PPC?");
8686 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8687 LD->getOffset());
8688 }
8689
8690 RLI.Chain = LD->getChain();
8691 RLI.MPI = LD->getPointerInfo();
8692 RLI.IsDereferenceable = LD->isDereferenceable();
8693 RLI.IsInvariant = LD->isInvariant();
8694 RLI.Alignment = LD->getAlign();
8695 RLI.AAInfo = LD->getAAInfo();
8696 RLI.Ranges = LD->getRanges();
8697
8698 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8699 return true;
8700}
8701
8702/// Analyze profitability of direct move
8703/// prefer float load to int load plus direct move
8704/// when there is no integer use of int load
8705bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8706 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8707 if (Origin->getOpcode() != ISD::LOAD)
8708 return true;
8709
8710 // If there is no LXSIBZX/LXSIHZX, like Power8,
8711 // prefer direct move if the memory size is 1 or 2 bytes.
8712 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8713 if (!Subtarget.hasP9Vector() &&
8714 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8715 return true;
8716
8717 for (SDUse &Use : Origin->uses()) {
8718
8719 // Only look at the users of the loaded value.
8720 if (Use.getResNo() != 0)
8721 continue;
8722
8723 SDNode *User = Use.getUser();
8724 if (User->getOpcode() != ISD::SINT_TO_FP &&
8725 User->getOpcode() != ISD::UINT_TO_FP &&
8726 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8727 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8728 return true;
8729 }
8730
8731 return false;
8732}
8733
8735 const PPCSubtarget &Subtarget,
8736 SDValue Chain = SDValue()) {
8737 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8738 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8739 SDLoc dl(Op);
8740
8741 // TODO: Any other flags to propagate?
8742 SDNodeFlags Flags;
8743 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8744
8745 // If we have FCFIDS, then use it when converting to single-precision.
8746 // Otherwise, convert to double-precision and then round.
8747 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8748 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8749 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8750 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8751 if (Op->isStrictFPOpcode()) {
8752 if (!Chain)
8753 Chain = Op.getOperand(0);
8754 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8755 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8756 } else
8757 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8758}
8759
8760/// Custom lowers integer to floating point conversions to use
8761/// the direct move instructions available in ISA 2.07 to avoid the
8762/// need for load/store combinations.
8763SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8764 SelectionDAG &DAG,
8765 const SDLoc &dl) const {
8766 assert((Op.getValueType() == MVT::f32 ||
8767 Op.getValueType() == MVT::f64) &&
8768 "Invalid floating point type as target of conversion");
8769 assert(Subtarget.hasFPCVT() &&
8770 "Int to FP conversions with direct moves require FPCVT");
8771 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8772 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8773 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8774 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8775 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8776 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8777 return convertIntToFP(Op, Mov, DAG, Subtarget);
8778}
8779
8780static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8781
8782 EVT VecVT = Vec.getValueType();
8783 assert(VecVT.isVector() && "Expected a vector type.");
8784 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8785
8786 EVT EltVT = VecVT.getVectorElementType();
8787 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8788 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8789
8790 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8791 SmallVector<SDValue, 16> Ops(NumConcat);
8792 Ops[0] = Vec;
8793 SDValue UndefVec = DAG.getUNDEF(VecVT);
8794 for (unsigned i = 1; i < NumConcat; ++i)
8795 Ops[i] = UndefVec;
8796
8797 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8798}
8799
8800SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8801 const SDLoc &dl) const {
8802 bool IsStrict = Op->isStrictFPOpcode();
8803 unsigned Opc = Op.getOpcode();
8804 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8807 "Unexpected conversion type");
8808 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8809 "Supports conversions to v2f64/v4f32 only.");
8810
8811 // TODO: Any other flags to propagate?
8812 SDNodeFlags Flags;
8813 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8814
8815 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8816 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8817
8818 SDValue Wide = widenVec(DAG, Src, dl);
8819 EVT WideVT = Wide.getValueType();
8820 unsigned WideNumElts = WideVT.getVectorNumElements();
8821 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8822
8823 SmallVector<int, 16> ShuffV;
8824 for (unsigned i = 0; i < WideNumElts; ++i)
8825 ShuffV.push_back(i + WideNumElts);
8826
8827 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8828 int SaveElts = FourEltRes ? 4 : 2;
8829 if (Subtarget.isLittleEndian())
8830 for (int i = 0; i < SaveElts; i++)
8831 ShuffV[i * Stride] = i;
8832 else
8833 for (int i = 1; i <= SaveElts; i++)
8834 ShuffV[i * Stride - 1] = i - 1;
8835
8836 SDValue ShuffleSrc2 =
8837 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8838 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8839
8840 SDValue Extend;
8841 if (SignedConv) {
8842 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8843 EVT ExtVT = Src.getValueType();
8844 if (Subtarget.hasP9Altivec())
8845 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8846 IntermediateVT.getVectorNumElements());
8847
8848 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8849 DAG.getValueType(ExtVT));
8850 } else
8851 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8852
8853 if (IsStrict)
8854 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8855 {Op.getOperand(0), Extend}, Flags);
8856
8857 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8858}
8859
8860SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8861 SelectionDAG &DAG) const {
8862 SDLoc dl(Op);
8863 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8864 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8865 bool IsStrict = Op->isStrictFPOpcode();
8866 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8867 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8868
8869 // TODO: Any other flags to propagate?
8870 SDNodeFlags Flags;
8871 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8872
8873 EVT InVT = Src.getValueType();
8874 EVT OutVT = Op.getValueType();
8875 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8876 isOperationCustom(Op.getOpcode(), InVT))
8877 return LowerINT_TO_FPVector(Op, DAG, dl);
8878
8879 // Conversions to f128 are legal.
8880 if (Op.getValueType() == MVT::f128)
8881 return Subtarget.hasP9Vector() ? Op : SDValue();
8882
8883 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8884 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8885 return SDValue();
8886
8887 if (Src.getValueType() == MVT::i1) {
8888 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8889 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8890 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8891 if (IsStrict)
8892 return DAG.getMergeValues({Sel, Chain}, dl);
8893 else
8894 return Sel;
8895 }
8896
8897 // If we have direct moves, we can do all the conversion, skip the store/load
8898 // however, without FPCVT we can't do most conversions.
8899 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8900 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8901 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8902
8903 assert((IsSigned || Subtarget.hasFPCVT()) &&
8904 "UINT_TO_FP is supported only with FPCVT");
8905
8906 if (Src.getValueType() == MVT::i64) {
8907 SDValue SINT = Src;
8908 // When converting to single-precision, we actually need to convert
8909 // to double-precision first and then round to single-precision.
8910 // To avoid double-rounding effects during that operation, we have
8911 // to prepare the input operand. Bits that might be truncated when
8912 // converting to double-precision are replaced by a bit that won't
8913 // be lost at this stage, but is below the single-precision rounding
8914 // position.
8915 //
8916 // However, if afn is in effect, accept double
8917 // rounding to avoid the extra overhead.
8918 // FIXME: Currently INT_TO_FP can't support fast math flags because
8919 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8920 // false.
8921 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8922 !Op->getFlags().hasApproximateFuncs()) {
8923
8924 // Twiddle input to make sure the low 11 bits are zero. (If this
8925 // is the case, we are guaranteed the value will fit into the 53 bit
8926 // mantissa of an IEEE double-precision value without rounding.)
8927 // If any of those low 11 bits were not zero originally, make sure
8928 // bit 12 (value 2048) is set instead, so that the final rounding
8929 // to single-precision gets the correct result.
8930 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8931 SINT, DAG.getConstant(2047, dl, MVT::i64));
8932 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8933 Round, DAG.getConstant(2047, dl, MVT::i64));
8934 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8935 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8936 DAG.getSignedConstant(-2048, dl, MVT::i64));
8937
8938 // However, we cannot use that value unconditionally: if the magnitude
8939 // of the input value is small, the bit-twiddling we did above might
8940 // end up visibly changing the output. Fortunately, in that case, we
8941 // don't need to twiddle bits since the original input will convert
8942 // exactly to double-precision floating-point already. Therefore,
8943 // construct a conditional to use the original value if the top 11
8944 // bits are all sign-bit copies, and use the rounded value computed
8945 // above otherwise.
8946 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8947 SINT, DAG.getConstant(53, dl, MVT::i32));
8948 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8949 Cond, DAG.getConstant(1, dl, MVT::i64));
8950 Cond = DAG.getSetCC(
8951 dl,
8952 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8953 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8954
8955 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8956 }
8957
8958 ReuseLoadInfo RLI;
8959 SDValue Bits;
8960
8961 MachineFunction &MF = DAG.getMachineFunction();
8962 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8963 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8964 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8965 if (RLI.ResChain)
8966 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8967 } else if (Subtarget.hasLFIWAX() &&
8968 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8969 MachineMemOperand *MMO =
8971 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8972 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8974 DAG.getVTList(MVT::f64, MVT::Other),
8975 Ops, MVT::i32, MMO);
8976 if (RLI.ResChain)
8977 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8978 } else if (Subtarget.hasFPCVT() &&
8979 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8980 MachineMemOperand *MMO =
8982 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8983 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8985 DAG.getVTList(MVT::f64, MVT::Other),
8986 Ops, MVT::i32, MMO);
8987 if (RLI.ResChain)
8988 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8989 } else if (((Subtarget.hasLFIWAX() &&
8990 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8991 (Subtarget.hasFPCVT() &&
8992 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8993 SINT.getOperand(0).getValueType() == MVT::i32) {
8994 MachineFrameInfo &MFI = MF.getFrameInfo();
8995 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8996
8997 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8998 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8999
9000 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
9002 DAG.getMachineFunction(), FrameIdx));
9003 Chain = Store;
9004
9005 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9006 "Expected an i32 store");
9007
9008 RLI.Ptr = FIdx;
9009 RLI.Chain = Chain;
9010 RLI.MPI =
9012 RLI.Alignment = Align(4);
9013
9014 MachineMemOperand *MMO =
9016 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9017 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9020 dl, DAG.getVTList(MVT::f64, MVT::Other),
9021 Ops, MVT::i32, MMO);
9022 Chain = Bits.getValue(1);
9023 } else
9024 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
9025
9026 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
9027 if (IsStrict)
9028 Chain = FP.getValue(1);
9029
9030 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9031 if (IsStrict)
9032 FP = DAG.getNode(
9033 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9034 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
9035 Flags);
9036 else
9037 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9038 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9039 }
9040 return FP;
9041 }
9042
9043 assert(Src.getValueType() == MVT::i32 &&
9044 "Unhandled INT_TO_FP type in custom expander!");
9045 // Since we only generate this in 64-bit mode, we can take advantage of
9046 // 64-bit registers. In particular, sign extend the input value into the
9047 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
9048 // then lfd it and fcfid it.
9049 MachineFunction &MF = DAG.getMachineFunction();
9050 MachineFrameInfo &MFI = MF.getFrameInfo();
9051 EVT PtrVT = getPointerTy(MF.getDataLayout());
9052
9053 SDValue Ld;
9054 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
9055 ReuseLoadInfo RLI;
9056 bool ReusingLoad;
9057 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
9058 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
9059 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9060
9061 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
9063 DAG.getMachineFunction(), FrameIdx));
9064 Chain = Store;
9065
9066 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
9067 "Expected an i32 store");
9068
9069 RLI.Ptr = FIdx;
9070 RLI.Chain = Chain;
9071 RLI.MPI =
9073 RLI.Alignment = Align(4);
9074 }
9075
9076 MachineMemOperand *MMO =
9078 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
9079 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
9080 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
9081 DAG.getVTList(MVT::f64, MVT::Other), Ops,
9082 MVT::i32, MMO);
9083 Chain = Ld.getValue(1);
9084 if (ReusingLoad && RLI.ResChain) {
9085 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
9086 }
9087 } else {
9088 assert(Subtarget.isPPC64() &&
9089 "i32->FP without LFIWAX supported only on PPC64");
9090
9091 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
9092 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
9093
9094 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
9095
9096 // STD the extended value into the stack slot.
9097 SDValue Store = DAG.getStore(
9098 Chain, dl, Ext64, FIdx,
9100 Chain = Store;
9101
9102 // Load the value as a double.
9103 Ld = DAG.getLoad(
9104 MVT::f64, dl, Chain, FIdx,
9106 Chain = Ld.getValue(1);
9107 }
9108
9109 // FCFID it and return it.
9110 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
9111 if (IsStrict)
9112 Chain = FP.getValue(1);
9113 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
9114 if (IsStrict)
9115 FP = DAG.getNode(
9116 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
9117 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
9118 else
9119 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
9120 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
9121 }
9122 return FP;
9123}
9124
9125SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
9126 SelectionDAG &DAG) const {
9127 SDLoc Dl(Op);
9128 MachineFunction &MF = DAG.getMachineFunction();
9129 EVT PtrVT = getPointerTy(MF.getDataLayout());
9130 SDValue Chain = Op.getOperand(0);
9131
9132 // If requested mode is constant, just use simpler mtfsb/mffscrni
9133 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
9134 uint64_t Mode = CVal->getZExtValue();
9135 assert(Mode < 4 && "Unsupported rounding mode!");
9136 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
9137 if (Subtarget.isISA3_0())
9138 return SDValue(
9139 DAG.getMachineNode(
9140 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9141 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9142 1);
9143 SDNode *SetHi = DAG.getMachineNode(
9144 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9145 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9146 SDNode *SetLo = DAG.getMachineNode(
9147 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9148 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9149 return SDValue(SetLo, 0);
9150 }
9151
9152 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9153 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9154 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9155 DAG.getConstant(3, Dl, MVT::i32));
9156 SDValue DstFlag = DAG.getNode(
9157 ISD::XOR, Dl, MVT::i32, SrcFlag,
9158 DAG.getNode(ISD::AND, Dl, MVT::i32,
9159 DAG.getNOT(Dl,
9160 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9161 MVT::i32),
9162 One));
9163 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9164 SDValue MFFS;
9165 if (!Subtarget.isISA3_0()) {
9166 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9167 Chain = MFFS.getValue(1);
9168 }
9169 SDValue NewFPSCR;
9170 if (Subtarget.isPPC64()) {
9171 if (Subtarget.isISA3_0()) {
9172 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9173 } else {
9174 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9175 SDNode *InsertRN = DAG.getMachineNode(
9176 PPC::RLDIMI, Dl, MVT::i64,
9177 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9178 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9179 DAG.getTargetConstant(0, Dl, MVT::i32),
9180 DAG.getTargetConstant(62, Dl, MVT::i32)});
9181 NewFPSCR = SDValue(InsertRN, 0);
9182 }
9183 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9184 } else {
9185 // In 32-bit mode, store f64, load and update the lower half.
9186 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9187 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9188 SDValue Addr = Subtarget.isLittleEndian()
9189 ? StackSlot
9190 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9191 DAG.getConstant(4, Dl, PtrVT));
9192 if (Subtarget.isISA3_0()) {
9193 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9194 } else {
9195 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9196 SDValue Tmp =
9197 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9198 Chain = Tmp.getValue(1);
9199 Tmp = SDValue(DAG.getMachineNode(
9200 PPC::RLWIMI, Dl, MVT::i32,
9201 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9202 DAG.getTargetConstant(30, Dl, MVT::i32),
9203 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9204 0);
9205 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9206 }
9207 NewFPSCR =
9208 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9209 Chain = NewFPSCR.getValue(1);
9210 }
9211 if (Subtarget.isISA3_0())
9212 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9213 {NewFPSCR, Chain}),
9214 1);
9215 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9216 SDNode *MTFSF = DAG.getMachineNode(
9217 PPC::MTFSF, Dl, MVT::Other,
9218 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9219 return SDValue(MTFSF, 0);
9220}
9221
9222SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9223 SelectionDAG &DAG) const {
9224 SDLoc dl(Op);
9225 /*
9226 The rounding mode is in bits 30:31 of FPSR, and has the following
9227 settings:
9228 00 Round to nearest
9229 01 Round to 0
9230 10 Round to +inf
9231 11 Round to -inf
9232
9233 GET_ROUNDING, on the other hand, expects the following:
9234 -1 Undefined
9235 0 Round to 0
9236 1 Round to nearest
9237 2 Round to +inf
9238 3 Round to -inf
9239
9240 To perform the conversion, we do:
9241 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9242 */
9243
9244 MachineFunction &MF = DAG.getMachineFunction();
9245 EVT VT = Op.getValueType();
9246 EVT PtrVT = getPointerTy(MF.getDataLayout());
9247
9248 // Save FP Control Word to register
9249 SDValue Chain = Op.getOperand(0);
9250 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9251 Chain = MFFS.getValue(1);
9252
9253 SDValue CWD;
9254 if (isTypeLegal(MVT::i64)) {
9255 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9256 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9257 } else {
9258 // Save FP register to stack slot
9259 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9260 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9261 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9262
9263 // Load FP Control Word from low 32 bits of stack slot.
9265 "Stack slot adjustment is valid only on big endian subtargets!");
9266 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9267 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9268 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9269 Chain = CWD.getValue(1);
9270 }
9271
9272 // Transform as necessary
9273 SDValue CWD1 =
9274 DAG.getNode(ISD::AND, dl, MVT::i32,
9275 CWD, DAG.getConstant(3, dl, MVT::i32));
9276 SDValue CWD2 =
9277 DAG.getNode(ISD::SRL, dl, MVT::i32,
9278 DAG.getNode(ISD::AND, dl, MVT::i32,
9279 DAG.getNode(ISD::XOR, dl, MVT::i32,
9280 CWD, DAG.getConstant(3, dl, MVT::i32)),
9281 DAG.getConstant(3, dl, MVT::i32)),
9282 DAG.getConstant(1, dl, MVT::i32));
9283
9284 SDValue RetVal =
9285 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9286
9287 RetVal =
9289 dl, VT, RetVal);
9290
9291 return DAG.getMergeValues({RetVal, Chain}, dl);
9292}
9293
9294SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9295 EVT VT = Op.getValueType();
9296 uint64_t BitWidth = VT.getSizeInBits();
9297 SDLoc dl(Op);
9298 assert(Op.getNumOperands() == 3 &&
9299 VT == Op.getOperand(1).getValueType() &&
9300 "Unexpected SHL!");
9301
9302 // Expand into a bunch of logical ops. Note that these ops
9303 // depend on the PPC behavior for oversized shift amounts.
9304 SDValue Lo = Op.getOperand(0);
9305 SDValue Hi = Op.getOperand(1);
9306 SDValue Amt = Op.getOperand(2);
9307 EVT AmtVT = Amt.getValueType();
9308
9309 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9310 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9311 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9312 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9313 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9314 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9315 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9316 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9317 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9318 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9319 SDValue OutOps[] = { OutLo, OutHi };
9320 return DAG.getMergeValues(OutOps, dl);
9321}
9322
9323SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9324 EVT VT = Op.getValueType();
9325 SDLoc dl(Op);
9326 uint64_t BitWidth = VT.getSizeInBits();
9327 assert(Op.getNumOperands() == 3 &&
9328 VT == Op.getOperand(1).getValueType() &&
9329 "Unexpected SRL!");
9330
9331 // Expand into a bunch of logical ops. Note that these ops
9332 // depend on the PPC behavior for oversized shift amounts.
9333 SDValue Lo = Op.getOperand(0);
9334 SDValue Hi = Op.getOperand(1);
9335 SDValue Amt = Op.getOperand(2);
9336 EVT AmtVT = Amt.getValueType();
9337
9338 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9339 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9340 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9341 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9342 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9343 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9344 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9345 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9346 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9347 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9348 SDValue OutOps[] = { OutLo, OutHi };
9349 return DAG.getMergeValues(OutOps, dl);
9350}
9351
9352SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9353 SDLoc dl(Op);
9354 EVT VT = Op.getValueType();
9355 uint64_t BitWidth = VT.getSizeInBits();
9356 assert(Op.getNumOperands() == 3 &&
9357 VT == Op.getOperand(1).getValueType() &&
9358 "Unexpected SRA!");
9359
9360 // Expand into a bunch of logical ops, followed by a select_cc.
9361 SDValue Lo = Op.getOperand(0);
9362 SDValue Hi = Op.getOperand(1);
9363 SDValue Amt = Op.getOperand(2);
9364 EVT AmtVT = Amt.getValueType();
9365
9366 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9367 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9368 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9369 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9370 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9371 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9372 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9373 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9374 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9375 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9376 Tmp4, Tmp6, ISD::SETLE);
9377 SDValue OutOps[] = { OutLo, OutHi };
9378 return DAG.getMergeValues(OutOps, dl);
9379}
9380
9381SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9382 SelectionDAG &DAG) const {
9383 SDLoc dl(Op);
9384 EVT VT = Op.getValueType();
9385 unsigned BitWidth = VT.getSizeInBits();
9386
9387 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9388 SDValue X = Op.getOperand(0);
9389 SDValue Y = Op.getOperand(1);
9390 SDValue Z = Op.getOperand(2);
9391 EVT AmtVT = Z.getValueType();
9392
9393 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9394 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9395 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9396 // on PowerPC shift by BW being well defined.
9397 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9398 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9399 SDValue SubZ =
9400 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9401 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9402 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9403 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9404}
9405
9406//===----------------------------------------------------------------------===//
9407// Vector related lowering.
9408//
9409
9410/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9411/// element size of SplatSize. Cast the result to VT.
9412static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9413 SelectionDAG &DAG, const SDLoc &dl) {
9414 static const MVT VTys[] = { // canonical VT to use for each size.
9415 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9416 };
9417
9418 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9419
9420 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9421 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9422 SplatSize = 1;
9423 Val = 0xFF;
9424 }
9425
9426 EVT CanonicalVT = VTys[SplatSize-1];
9427
9428 // Build a canonical splat for this value.
9429 // Explicitly truncate APInt here, as this API is used with a mix of
9430 // signed and unsigned values.
9431 return DAG.getBitcast(
9432 ReqVT,
9433 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9434}
9435
9436/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9437/// specified intrinsic ID.
9439 const SDLoc &dl, EVT DestVT = MVT::Other) {
9440 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9441 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9442 DAG.getConstant(IID, dl, MVT::i32), Op);
9443}
9444
9445/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9446/// specified intrinsic ID.
9448 SelectionDAG &DAG, const SDLoc &dl,
9449 EVT DestVT = MVT::Other) {
9450 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9451 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9452 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9453}
9454
9455/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9456/// specified intrinsic ID.
9457static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9458 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9459 EVT DestVT = MVT::Other) {
9460 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9461 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9462 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9463}
9464
9465/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9466/// amount. The result has the specified value type.
9467static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9468 SelectionDAG &DAG, const SDLoc &dl) {
9469 // Force LHS/RHS to be the right type.
9470 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9471 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9472
9473 int Ops[16];
9474 for (unsigned i = 0; i != 16; ++i)
9475 Ops[i] = i + Amt;
9476 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9477 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9478}
9479
9480/// Do we have an efficient pattern in a .td file for this node?
9481///
9482/// \param V - pointer to the BuildVectorSDNode being matched
9483/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9484///
9485/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9486/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9487/// the opposite is true (expansion is beneficial) are:
9488/// - The node builds a vector out of integers that are not 32 or 64-bits
9489/// - The node builds a vector out of constants
9490/// - The node is a "load-and-splat"
9491/// In all other cases, we will choose to keep the BUILD_VECTOR.
9493 bool HasDirectMove,
9494 bool HasP8Vector) {
9495 EVT VecVT = V->getValueType(0);
9496 bool RightType = VecVT == MVT::v2f64 ||
9497 (HasP8Vector && VecVT == MVT::v4f32) ||
9498 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9499 if (!RightType)
9500 return false;
9501
9502 bool IsSplat = true;
9503 bool IsLoad = false;
9504 SDValue Op0 = V->getOperand(0);
9505
9506 // This function is called in a block that confirms the node is not a constant
9507 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9508 // different constants.
9509 if (V->isConstant())
9510 return false;
9511 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9512 if (V->getOperand(i).isUndef())
9513 return false;
9514 // We want to expand nodes that represent load-and-splat even if the
9515 // loaded value is a floating point truncation or conversion to int.
9516 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9517 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9518 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9519 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9520 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9521 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9522 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9523 IsLoad = true;
9524 // If the operands are different or the input is not a load and has more
9525 // uses than just this BV node, then it isn't a splat.
9526 if (V->getOperand(i) != Op0 ||
9527 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9528 IsSplat = false;
9529 }
9530 return !(IsSplat && IsLoad);
9531}
9532
9533// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9534SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9535
9536 SDLoc dl(Op);
9537 SDValue Op0 = Op->getOperand(0);
9538
9539 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9540 (Op.getValueType() != MVT::f128))
9541 return SDValue();
9542
9543 SDValue Lo = Op0.getOperand(0);
9544 SDValue Hi = Op0.getOperand(1);
9545 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9546 return SDValue();
9547
9548 if (!Subtarget.isLittleEndian())
9549 std::swap(Lo, Hi);
9550
9551 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9552}
9553
9554static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9555 const SDValue *InputLoad = &Op;
9556 while (InputLoad->getOpcode() == ISD::BITCAST)
9557 InputLoad = &InputLoad->getOperand(0);
9558 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9560 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9561 InputLoad = &InputLoad->getOperand(0);
9562 }
9563 if (InputLoad->getOpcode() != ISD::LOAD)
9564 return nullptr;
9565 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9566 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9567}
9568
9569// Convert the argument APFloat to a single precision APFloat if there is no
9570// loss in information during the conversion to single precision APFloat and the
9571// resulting number is not a denormal number. Return true if successful.
9573 APFloat APFloatToConvert = ArgAPFloat;
9574 bool LosesInfo = true;
9576 &LosesInfo);
9577 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9578 if (Success)
9579 ArgAPFloat = APFloatToConvert;
9580 return Success;
9581}
9582
9583// Bitcast the argument APInt to a double and convert it to a single precision
9584// APFloat, bitcast the APFloat to an APInt and assign it to the original
9585// argument if there is no loss in information during the conversion from
9586// double to single precision APFloat and the resulting number is not a denormal
9587// number. Return true if successful.
9589 double DpValue = ArgAPInt.bitsToDouble();
9590 APFloat APFloatDp(DpValue);
9591 bool Success = convertToNonDenormSingle(APFloatDp);
9592 if (Success)
9593 ArgAPInt = APFloatDp.bitcastToAPInt();
9594 return Success;
9595}
9596
9597// Nondestructive check for convertTonNonDenormSingle.
9599 // Only convert if it loses info, since XXSPLTIDP should
9600 // handle the other case.
9601 APFloat APFloatToConvert = ArgAPFloat;
9602 bool LosesInfo = true;
9604 &LosesInfo);
9605
9606 return (!LosesInfo && !APFloatToConvert.isDenormal());
9607}
9608
9609static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9610 unsigned &Opcode) {
9611 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9612 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9613 return false;
9614
9615 EVT Ty = Op->getValueType(0);
9616 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9617 // as we cannot handle extending loads for these types.
9618 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9619 ISD::isNON_EXTLoad(InputNode))
9620 return true;
9621
9622 EVT MemVT = InputNode->getMemoryVT();
9623 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9624 // memory VT is the same vector element VT type.
9625 // The loads feeding into the v8i16 and v16i8 types will be extending because
9626 // scalar i8/i16 are not legal types.
9627 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9628 (MemVT == Ty.getVectorElementType()))
9629 return true;
9630
9631 if (Ty == MVT::v2i64) {
9632 // Check the extend type, when the input type is i32, and the output vector
9633 // type is v2i64.
9634 if (MemVT == MVT::i32) {
9635 if (ISD::isZEXTLoad(InputNode))
9636 Opcode = PPCISD::ZEXT_LD_SPLAT;
9637 if (ISD::isSEXTLoad(InputNode))
9638 Opcode = PPCISD::SEXT_LD_SPLAT;
9639 }
9640 return true;
9641 }
9642 return false;
9643}
9644
9646 bool IsLittleEndian) {
9647 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9648
9649 BitMask.clearAllBits();
9650 EVT VT = BVN.getValueType(0);
9651 unsigned VTSize = VT.getSizeInBits();
9652 APInt ConstValue(VTSize, 0);
9653
9654 unsigned EltWidth = VT.getScalarSizeInBits();
9655
9656 unsigned BitPos = 0;
9657 for (auto OpVal : BVN.op_values()) {
9658 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9659
9660 if (!CN)
9661 return false;
9662 // The elements in a vector register are ordered in reverse byte order
9663 // between little-endian and big-endian modes.
9664 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9665 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9666 BitPos += EltWidth;
9667 }
9668
9669 for (unsigned J = 0; J < 16; ++J) {
9670 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9671 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9672 return false;
9673 if (ExtractValue == 0xFF)
9674 BitMask.setBit(J);
9675 }
9676 return true;
9677}
9678
9679// If this is a case we can't handle, return null and let the default
9680// expansion code take care of it. If we CAN select this case, and if it
9681// selects to a single instruction, return Op. Otherwise, if we can codegen
9682// this case more efficiently than a constant pool load, lower it to the
9683// sequence of ops that should be used.
9684SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9685 SelectionDAG &DAG) const {
9686 SDLoc dl(Op);
9687 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9688 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9689
9690 if (Subtarget.hasP10Vector()) {
9691 APInt BitMask(32, 0);
9692 // If the value of the vector is all zeros or all ones,
9693 // we do not convert it to MTVSRBMI.
9694 // The xxleqv instruction sets a vector with all ones.
9695 // The xxlxor instruction sets a vector with all zeros.
9696 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9697 BitMask != 0 && BitMask != 0xffff) {
9698 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9699 MachineSDNode *MSDNode =
9700 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9701 SDValue SDV = SDValue(MSDNode, 0);
9702 EVT DVT = BVN->getValueType(0);
9703 EVT SVT = SDV.getValueType();
9704 if (SVT != DVT) {
9705 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9706 }
9707 return SDV;
9708 }
9709 // Recognize build vector patterns to emit VSX vector instructions
9710 // instead of loading value from memory.
9711 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9712 return VecPat;
9713 }
9714 // Check if this is a splat of a constant value.
9715 APInt APSplatBits, APSplatUndef;
9716 unsigned SplatBitSize;
9717 bool HasAnyUndefs;
9718 bool BVNIsConstantSplat =
9719 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9720 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9721
9722 // If it is a splat of a double, check if we can shrink it to a 32 bit
9723 // non-denormal float which when converted back to double gives us the same
9724 // double. This is to exploit the XXSPLTIDP instruction.
9725 // If we lose precision, we use XXSPLTI32DX.
9726 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9727 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9728 // Check the type first to short-circuit so we don't modify APSplatBits if
9729 // this block isn't executed.
9730 if ((Op->getValueType(0) == MVT::v2f64) &&
9731 convertToNonDenormSingle(APSplatBits)) {
9732 SDValue SplatNode = DAG.getNode(
9733 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9734 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9735 return DAG.getBitcast(Op.getValueType(), SplatNode);
9736 } else {
9737 // We may lose precision, so we have to use XXSPLTI32DX.
9738
9739 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9740 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9741 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9742
9743 if (!Hi || !Lo)
9744 // If either load is 0, then we should generate XXLXOR to set to 0.
9745 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9746
9747 if (Hi)
9748 SplatNode = DAG.getNode(
9749 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9750 DAG.getTargetConstant(0, dl, MVT::i32),
9751 DAG.getTargetConstant(Hi, dl, MVT::i32));
9752
9753 if (Lo)
9754 SplatNode =
9755 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9756 DAG.getTargetConstant(1, dl, MVT::i32),
9757 DAG.getTargetConstant(Lo, dl, MVT::i32));
9758
9759 return DAG.getBitcast(Op.getValueType(), SplatNode);
9760 }
9761 }
9762
9763 bool IsSplat64 = false;
9764 uint64_t SplatBits = 0;
9765 int32_t SextVal = 0;
9766 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9767 SplatBits = APSplatBits.getZExtValue();
9768 if (SplatBitSize <= 32) {
9769 SextVal = SignExtend32(SplatBits, SplatBitSize);
9770 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9771 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9772 bool P9Vector = Subtarget.hasP9Vector();
9773 int32_t Hi = P9Vector ? 127 : 15;
9774 int32_t Lo = P9Vector ? -128 : -16;
9775 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9776 SextVal = static_cast<int32_t>(SplatBits);
9777 }
9778 }
9779
9780 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9781 unsigned NewOpcode = PPCISD::LD_SPLAT;
9782
9783 // Handle load-and-splat patterns as we have instructions that will do this
9784 // in one go.
9785 if (DAG.isSplatValue(Op, true) &&
9786 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9787 const SDValue *InputLoad = &Op.getOperand(0);
9788 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9789
9790 // If the input load is an extending load, it will be an i32 -> i64
9791 // extending load and isValidSplatLoad() will update NewOpcode.
9792 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9793 unsigned ElementSize =
9794 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9795
9796 assert(((ElementSize == 2 * MemorySize)
9797 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9798 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9799 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9800 "Unmatched element size and opcode!\n");
9801
9802 // Checking for a single use of this load, we have to check for vector
9803 // width (128 bits) / ElementSize uses (since each operand of the
9804 // BUILD_VECTOR is a separate use of the value.
9805 unsigned NumUsesOfInputLD = 128 / ElementSize;
9806 for (SDValue BVInOp : Op->ops())
9807 if (BVInOp.isUndef())
9808 NumUsesOfInputLD--;
9809
9810 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9811 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9812 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9813 // 15", but function IsValidSplatLoad() now will only return true when
9814 // the data at index 0 is not nullptr. So we will not get into trouble for
9815 // these cases.
9816 //
9817 // case 1 - lfiwzx/lfiwax
9818 // 1.1: load result is i32 and is sign/zero extend to i64;
9819 // 1.2: build a v2i64 vector type with above loaded value;
9820 // 1.3: the vector has only one value at index 0, others are all undef;
9821 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9822 if (NumUsesOfInputLD == 1 &&
9823 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9824 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9825 Subtarget.hasLFIWAX()))
9826 return SDValue();
9827
9828 // case 2 - lxvr[hb]x
9829 // 2.1: load result is at most i16;
9830 // 2.2: build a vector with above loaded value;
9831 // 2.3: the vector has only one value at index 0, others are all undef;
9832 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9833 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9834 Subtarget.isISA3_1() && ElementSize <= 16)
9835 return SDValue();
9836
9837 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9838 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9839 Subtarget.hasVSX()) {
9840 SDValue Ops[] = {
9841 LD->getChain(), // Chain
9842 LD->getBasePtr(), // Ptr
9843 DAG.getValueType(Op.getValueType()) // VT
9844 };
9845 SDValue LdSplt = DAG.getMemIntrinsicNode(
9846 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9847 LD->getMemoryVT(), LD->getMemOperand());
9848 // Replace all uses of the output chain of the original load with the
9849 // output chain of the new load.
9850 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9851 LdSplt.getValue(1));
9852 return LdSplt;
9853 }
9854 }
9855
9856 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9857 // 32-bits can be lowered to VSX instructions under certain conditions.
9858 // Without VSX, there is no pattern more efficient than expanding the node.
9859 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9860 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9861 Subtarget.hasP8Vector()))
9862 return Op;
9863 return SDValue();
9864 }
9865
9866 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9867 unsigned SplatSize = SplatBitSize / 8;
9868
9869 // First, handle single instruction cases.
9870
9871 // All zeros?
9872 if (SplatBits == 0) {
9873 // Canonicalize all zero vectors to be v4i32.
9874 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9875 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9876 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9877 }
9878 return Op;
9879 }
9880
9881 // We have XXSPLTIW for constant splats four bytes wide.
9882 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9883 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9884 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9885 // turned into a 4-byte splat of 0xABABABAB.
9886 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9887 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9888 Op.getValueType(), DAG, dl);
9889
9890 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9891 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9892 dl);
9893
9894 // We have XXSPLTIB for constant splats one byte wide.
9895 if (Subtarget.hasP9Vector() && SplatSize == 1)
9896 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9897 dl);
9898
9899 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9900 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9901 if (SextVal >= -16 && SextVal <= 15) {
9902 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9903 // generate a splat word with extend for size 8.
9904 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9905 SDValue Res =
9906 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9907 if (SplatSize != 8)
9908 return Res;
9909 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9910 }
9911
9912 // Two instruction sequences.
9913
9914 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9915 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9917 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9918 unsigned IID;
9919 EVT VT;
9920 switch (SplatSize) {
9921 default:
9922 llvm_unreachable("Unexpected type for vector constant.");
9923 case 2:
9924 IID = Intrinsic::ppc_altivec_vupklsb;
9925 VT = MVT::v8i16;
9926 break;
9927 case 4:
9928 IID = Intrinsic::ppc_altivec_vextsb2w;
9929 VT = MVT::v4i32;
9930 break;
9931 case 8:
9932 IID = Intrinsic::ppc_altivec_vextsb2d;
9933 VT = MVT::v2i64;
9934 break;
9935 }
9936 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9937 return DAG.getBitcast(Op->getValueType(0), Extend);
9938 }
9939 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9940
9941 // If this value is in the range [-32,30] and is even, use:
9942 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9943 // If this value is in the range [17,31] and is odd, use:
9944 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9945 // If this value is in the range [-31,-17] and is odd, use:
9946 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9947 // Note the last two are three-instruction sequences.
9948 if (SextVal >= -32 && SextVal <= 31) {
9949 // To avoid having these optimizations undone by constant folding,
9950 // we convert to a pseudo that will be expanded later into one of
9951 // the above forms.
9952 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9953 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9954 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9955 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9956 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9957 if (VT == Op.getValueType())
9958 return RetVal;
9959 else
9960 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9961 }
9962
9963 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9964 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9965 // for fneg/fabs.
9966 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9967 // Make -1 and vspltisw -1:
9968 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9969
9970 // Make the VSLW intrinsic, computing 0x8000_0000.
9971 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9972 OnesV, DAG, dl);
9973
9974 // xor by OnesV to invert it.
9975 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9976 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9977 }
9978
9979 // Check to see if this is a wide variety of vsplti*, binop self cases.
9980 static const signed char SplatCsts[] = {
9981 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9982 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9983 };
9984
9985 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9986 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9987 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9988 int i = SplatCsts[idx];
9989
9990 // Figure out what shift amount will be used by altivec if shifted by i in
9991 // this splat size.
9992 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9993
9994 // vsplti + shl self.
9995 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9996 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9997 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9998 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9999 Intrinsic::ppc_altivec_vslw
10000 };
10001 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10002 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10003 }
10004
10005 // vsplti + srl self.
10006 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
10007 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10008 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10009 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
10010 Intrinsic::ppc_altivec_vsrw
10011 };
10012 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10013 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10014 }
10015
10016 // vsplti + rol self.
10017 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
10018 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
10019 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
10020 static const unsigned IIDs[] = { // Intrinsic to use for each size.
10021 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
10022 Intrinsic::ppc_altivec_vrlw
10023 };
10024 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
10025 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
10026 }
10027
10028 // t = vsplti c, result = vsldoi t, t, 1
10029 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
10030 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10031 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
10032 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10033 }
10034 // t = vsplti c, result = vsldoi t, t, 2
10035 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
10036 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10037 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
10038 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10039 }
10040 // t = vsplti c, result = vsldoi t, t, 3
10041 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
10042 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
10043 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
10044 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
10045 }
10046 }
10047
10048 return SDValue();
10049}
10050
10051/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
10052/// the specified operations to build the shuffle.
10054 SDValue RHS, SelectionDAG &DAG,
10055 const SDLoc &dl) {
10056 unsigned OpNum = (PFEntry >> 26) & 0x0F;
10057 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
10058 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
10059
10060 enum {
10061 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
10062 OP_VMRGHW,
10063 OP_VMRGLW,
10064 OP_VSPLTISW0,
10065 OP_VSPLTISW1,
10066 OP_VSPLTISW2,
10067 OP_VSPLTISW3,
10068 OP_VSLDOI4,
10069 OP_VSLDOI8,
10070 OP_VSLDOI12
10071 };
10072
10073 if (OpNum == OP_COPY) {
10074 if (LHSID == (1*9+2)*9+3) return LHS;
10075 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
10076 return RHS;
10077 }
10078
10079 SDValue OpLHS, OpRHS;
10080 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
10081 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
10082
10083 int ShufIdxs[16];
10084 switch (OpNum) {
10085 default: llvm_unreachable("Unknown i32 permute!");
10086 case OP_VMRGHW:
10087 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
10088 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
10089 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
10090 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
10091 break;
10092 case OP_VMRGLW:
10093 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
10094 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
10095 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
10096 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
10097 break;
10098 case OP_VSPLTISW0:
10099 for (unsigned i = 0; i != 16; ++i)
10100 ShufIdxs[i] = (i&3)+0;
10101 break;
10102 case OP_VSPLTISW1:
10103 for (unsigned i = 0; i != 16; ++i)
10104 ShufIdxs[i] = (i&3)+4;
10105 break;
10106 case OP_VSPLTISW2:
10107 for (unsigned i = 0; i != 16; ++i)
10108 ShufIdxs[i] = (i&3)+8;
10109 break;
10110 case OP_VSPLTISW3:
10111 for (unsigned i = 0; i != 16; ++i)
10112 ShufIdxs[i] = (i&3)+12;
10113 break;
10114 case OP_VSLDOI4:
10115 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
10116 case OP_VSLDOI8:
10117 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
10118 case OP_VSLDOI12:
10119 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
10120 }
10121 EVT VT = OpLHS.getValueType();
10122 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
10123 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
10124 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
10125 return DAG.getNode(ISD::BITCAST, dl, VT, T);
10126}
10127
10128/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
10129/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
10130/// SDValue.
10131SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
10132 SelectionDAG &DAG) const {
10133 const unsigned BytesInVector = 16;
10134 bool IsLE = Subtarget.isLittleEndian();
10135 SDLoc dl(N);
10136 SDValue V1 = N->getOperand(0);
10137 SDValue V2 = N->getOperand(1);
10138 unsigned ShiftElts = 0, InsertAtByte = 0;
10139 bool Swap = false;
10140
10141 // Shifts required to get the byte we want at element 7.
10142 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10143 0, 15, 14, 13, 12, 11, 10, 9};
10144 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10145 1, 2, 3, 4, 5, 6, 7, 8};
10146
10147 ArrayRef<int> Mask = N->getMask();
10148 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10149
10150 // For each mask element, find out if we're just inserting something
10151 // from V2 into V1 or vice versa.
10152 // Possible permutations inserting an element from V2 into V1:
10153 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10154 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10155 // ...
10156 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10157 // Inserting from V1 into V2 will be similar, except mask range will be
10158 // [16,31].
10159
10160 bool FoundCandidate = false;
10161 // If both vector operands for the shuffle are the same vector, the mask
10162 // will contain only elements from the first one and the second one will be
10163 // undef.
10164 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10165 // Go through the mask of half-words to find an element that's being moved
10166 // from one vector to the other.
10167 for (unsigned i = 0; i < BytesInVector; ++i) {
10168 unsigned CurrentElement = Mask[i];
10169 // If 2nd operand is undefined, we should only look for element 7 in the
10170 // Mask.
10171 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10172 continue;
10173
10174 bool OtherElementsInOrder = true;
10175 // Examine the other elements in the Mask to see if they're in original
10176 // order.
10177 for (unsigned j = 0; j < BytesInVector; ++j) {
10178 if (j == i)
10179 continue;
10180 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10181 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10182 // in which we always assume we're always picking from the 1st operand.
10183 int MaskOffset =
10184 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10185 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10186 OtherElementsInOrder = false;
10187 break;
10188 }
10189 }
10190 // If other elements are in original order, we record the number of shifts
10191 // we need to get the element we want into element 7. Also record which byte
10192 // in the vector we should insert into.
10193 if (OtherElementsInOrder) {
10194 // If 2nd operand is undefined, we assume no shifts and no swapping.
10195 if (V2.isUndef()) {
10196 ShiftElts = 0;
10197 Swap = false;
10198 } else {
10199 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10200 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10201 : BigEndianShifts[CurrentElement & 0xF];
10202 Swap = CurrentElement < BytesInVector;
10203 }
10204 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10205 FoundCandidate = true;
10206 break;
10207 }
10208 }
10209
10210 if (!FoundCandidate)
10211 return SDValue();
10212
10213 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10214 // optionally with VECSHL if shift is required.
10215 if (Swap)
10216 std::swap(V1, V2);
10217 if (V2.isUndef())
10218 V2 = V1;
10219 if (ShiftElts) {
10220 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10221 DAG.getConstant(ShiftElts, dl, MVT::i32));
10222 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10223 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10224 }
10225 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10226 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10227}
10228
10229/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10230/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10231/// SDValue.
10232SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10233 SelectionDAG &DAG) const {
10234 const unsigned NumHalfWords = 8;
10235 const unsigned BytesInVector = NumHalfWords * 2;
10236 // Check that the shuffle is on half-words.
10237 if (!isNByteElemShuffleMask(N, 2, 1))
10238 return SDValue();
10239
10240 bool IsLE = Subtarget.isLittleEndian();
10241 SDLoc dl(N);
10242 SDValue V1 = N->getOperand(0);
10243 SDValue V2 = N->getOperand(1);
10244 unsigned ShiftElts = 0, InsertAtByte = 0;
10245 bool Swap = false;
10246
10247 // Shifts required to get the half-word we want at element 3.
10248 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10249 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10250
10251 uint32_t Mask = 0;
10252 uint32_t OriginalOrderLow = 0x1234567;
10253 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10254 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10255 // 32-bit space, only need 4-bit nibbles per element.
10256 for (unsigned i = 0; i < NumHalfWords; ++i) {
10257 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10258 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10259 }
10260
10261 // For each mask element, find out if we're just inserting something
10262 // from V2 into V1 or vice versa. Possible permutations inserting an element
10263 // from V2 into V1:
10264 // X, 1, 2, 3, 4, 5, 6, 7
10265 // 0, X, 2, 3, 4, 5, 6, 7
10266 // 0, 1, X, 3, 4, 5, 6, 7
10267 // 0, 1, 2, X, 4, 5, 6, 7
10268 // 0, 1, 2, 3, X, 5, 6, 7
10269 // 0, 1, 2, 3, 4, X, 6, 7
10270 // 0, 1, 2, 3, 4, 5, X, 7
10271 // 0, 1, 2, 3, 4, 5, 6, X
10272 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10273
10274 bool FoundCandidate = false;
10275 // Go through the mask of half-words to find an element that's being moved
10276 // from one vector to the other.
10277 for (unsigned i = 0; i < NumHalfWords; ++i) {
10278 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10279 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10280 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10281 uint32_t TargetOrder = 0x0;
10282
10283 // If both vector operands for the shuffle are the same vector, the mask
10284 // will contain only elements from the first one and the second one will be
10285 // undef.
10286 if (V2.isUndef()) {
10287 ShiftElts = 0;
10288 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10289 TargetOrder = OriginalOrderLow;
10290 Swap = false;
10291 // Skip if not the correct element or mask of other elements don't equal
10292 // to our expected order.
10293 if (MaskOneElt == VINSERTHSrcElem &&
10294 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10295 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10296 FoundCandidate = true;
10297 break;
10298 }
10299 } else { // If both operands are defined.
10300 // Target order is [8,15] if the current mask is between [0,7].
10301 TargetOrder =
10302 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10303 // Skip if mask of other elements don't equal our expected order.
10304 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10305 // We only need the last 3 bits for the number of shifts.
10306 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10307 : BigEndianShifts[MaskOneElt & 0x7];
10308 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10309 Swap = MaskOneElt < NumHalfWords;
10310 FoundCandidate = true;
10311 break;
10312 }
10313 }
10314 }
10315
10316 if (!FoundCandidate)
10317 return SDValue();
10318
10319 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10320 // optionally with VECSHL if shift is required.
10321 if (Swap)
10322 std::swap(V1, V2);
10323 if (V2.isUndef())
10324 V2 = V1;
10325 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10326 if (ShiftElts) {
10327 // Double ShiftElts because we're left shifting on v16i8 type.
10328 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10329 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10330 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10331 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10332 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10333 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10334 }
10335 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10336 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10337 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10338 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10339}
10340
10341/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10342/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10343/// return the default SDValue.
10344SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10345 SelectionDAG &DAG) const {
10346 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10347 // to v16i8. Peek through the bitcasts to get the actual operands.
10350
10351 auto ShuffleMask = SVN->getMask();
10352 SDValue VecShuffle(SVN, 0);
10353 SDLoc DL(SVN);
10354
10355 // Check that we have a four byte shuffle.
10356 if (!isNByteElemShuffleMask(SVN, 4, 1))
10357 return SDValue();
10358
10359 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10360 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10361 std::swap(LHS, RHS);
10363 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10364 if (!CommutedSV)
10365 return SDValue();
10366 ShuffleMask = CommutedSV->getMask();
10367 }
10368
10369 // Ensure that the RHS is a vector of constants.
10370 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10371 if (!BVN)
10372 return SDValue();
10373
10374 // Check if RHS is a splat of 4-bytes (or smaller).
10375 APInt APSplatValue, APSplatUndef;
10376 unsigned SplatBitSize;
10377 bool HasAnyUndefs;
10378 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10379 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10380 SplatBitSize > 32)
10381 return SDValue();
10382
10383 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10384 // The instruction splats a constant C into two words of the source vector
10385 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10386 // Thus we check that the shuffle mask is the equivalent of
10387 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10388 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10389 // within each word are consecutive, so we only need to check the first byte.
10390 SDValue Index;
10391 bool IsLE = Subtarget.isLittleEndian();
10392 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10393 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10394 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10395 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10396 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10397 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10398 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10399 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10400 else
10401 return SDValue();
10402
10403 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10404 // for XXSPLTI32DX.
10405 unsigned SplatVal = APSplatValue.getZExtValue();
10406 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10407 SplatVal |= (SplatVal << SplatBitSize);
10408
10409 SDValue SplatNode = DAG.getNode(
10410 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10411 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10412 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10413}
10414
10415/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10416/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10417/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10418/// i.e (or (shl x, C1), (srl x, 128-C1)).
10419SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10420 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10421 assert(Op.getValueType() == MVT::v1i128 &&
10422 "Only set v1i128 as custom, other type shouldn't reach here!");
10423 SDLoc dl(Op);
10424 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10425 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10426 unsigned SHLAmt = N1.getConstantOperandVal(0);
10427 if (SHLAmt % 8 == 0) {
10428 std::array<int, 16> Mask;
10429 std::iota(Mask.begin(), Mask.end(), 0);
10430 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10431 if (SDValue Shuffle =
10432 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10433 DAG.getUNDEF(MVT::v16i8), Mask))
10434 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10435 }
10436 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10437 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10438 DAG.getConstant(SHLAmt, dl, MVT::i32));
10439 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10440 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10441 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10442 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10443}
10444
10445/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10446/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10447/// return the code it can be lowered into. Worst case, it can always be
10448/// lowered into a vperm.
10449SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10450 SelectionDAG &DAG) const {
10451 SDLoc dl(Op);
10452 SDValue V1 = Op.getOperand(0);
10453 SDValue V2 = Op.getOperand(1);
10454 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10455
10456 // Any nodes that were combined in the target-independent combiner prior
10457 // to vector legalization will not be sent to the target combine. Try to
10458 // combine it here.
10459 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10460 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10461 return NewShuffle;
10462 Op = NewShuffle;
10464 V1 = Op.getOperand(0);
10465 V2 = Op.getOperand(1);
10466 }
10467 EVT VT = Op.getValueType();
10468 bool isLittleEndian = Subtarget.isLittleEndian();
10469
10470 unsigned ShiftElts, InsertAtByte;
10471 bool Swap = false;
10472
10473 // If this is a load-and-splat, we can do that with a single instruction
10474 // in some cases. However if the load has multiple uses, we don't want to
10475 // combine it because that will just produce multiple loads.
10476 bool IsPermutedLoad = false;
10477 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10478 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10479 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10480 InputLoad->hasOneUse()) {
10481 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10482 int SplatIdx =
10483 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10484
10485 // The splat index for permuted loads will be in the left half of the vector
10486 // which is strictly wider than the loaded value by 8 bytes. So we need to
10487 // adjust the splat index to point to the correct address in memory.
10488 if (IsPermutedLoad) {
10489 assert((isLittleEndian || IsFourByte) &&
10490 "Unexpected size for permuted load on big endian target");
10491 SplatIdx += IsFourByte ? 2 : 1;
10492 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10493 "Splat of a value outside of the loaded memory");
10494 }
10495
10496 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10497 // For 4-byte load-and-splat, we need Power9.
10498 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10499 uint64_t Offset = 0;
10500 if (IsFourByte)
10501 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10502 else
10503 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10504
10505 // If the width of the load is the same as the width of the splat,
10506 // loading with an offset would load the wrong memory.
10507 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10508 Offset = 0;
10509
10510 SDValue BasePtr = LD->getBasePtr();
10511 if (Offset != 0)
10513 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10514 SDValue Ops[] = {
10515 LD->getChain(), // Chain
10516 BasePtr, // BasePtr
10517 DAG.getValueType(Op.getValueType()) // VT
10518 };
10519 SDVTList VTL =
10520 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10521 SDValue LdSplt =
10523 Ops, LD->getMemoryVT(), LD->getMemOperand());
10524 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10525 if (LdSplt.getValueType() != SVOp->getValueType(0))
10526 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10527 return LdSplt;
10528 }
10529 }
10530
10531 // All v2i64 and v2f64 shuffles are legal
10532 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10533 return Op;
10534
10535 if (Subtarget.hasP9Vector() &&
10536 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10537 isLittleEndian)) {
10538 if (V2.isUndef())
10539 V2 = V1;
10540 else if (Swap)
10541 std::swap(V1, V2);
10542 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10543 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10544 if (ShiftElts) {
10545 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10546 DAG.getConstant(ShiftElts, dl, MVT::i32));
10547 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10548 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10549 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10550 }
10551 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10552 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10553 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10554 }
10555
10556 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10557 SDValue SplatInsertNode;
10558 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10559 return SplatInsertNode;
10560 }
10561
10562 if (Subtarget.hasP9Altivec()) {
10563 SDValue NewISDNode;
10564 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10565 return NewISDNode;
10566
10567 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10568 return NewISDNode;
10569 }
10570
10571 if (Subtarget.hasVSX() &&
10572 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10573 if (Swap)
10574 std::swap(V1, V2);
10575 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10576 SDValue Conv2 =
10577 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10578
10579 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10580 DAG.getConstant(ShiftElts, dl, MVT::i32));
10581 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10582 }
10583
10584 if (Subtarget.hasVSX() &&
10585 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10586 if (Swap)
10587 std::swap(V1, V2);
10588 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10589 SDValue Conv2 =
10590 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10591
10592 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10593 DAG.getConstant(ShiftElts, dl, MVT::i32));
10594 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10595 }
10596
10597 if (Subtarget.hasP9Vector()) {
10598 if (PPC::isXXBRHShuffleMask(SVOp)) {
10599 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10600 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10601 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10602 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10603 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10604 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10605 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10606 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10607 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10608 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10609 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10610 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10611 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10612 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10613 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10614 }
10615 }
10616
10617 if (Subtarget.hasVSX()) {
10618 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10619 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10620
10621 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10622 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10623 DAG.getConstant(SplatIdx, dl, MVT::i32));
10624 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10625 }
10626
10627 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10628 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10629 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10630 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10631 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10632 }
10633 }
10634
10635 // Cases that are handled by instructions that take permute immediates
10636 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10637 // selected by the instruction selector.
10638 if (V2.isUndef()) {
10639 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10640 PPC::isSplatShuffleMask(SVOp, 2) ||
10641 PPC::isSplatShuffleMask(SVOp, 4) ||
10642 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10643 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10644 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10645 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10646 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10647 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10648 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10649 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10650 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10651 (Subtarget.hasP8Altivec() && (
10652 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10653 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10654 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10655 return Op;
10656 }
10657 }
10658
10659 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10660 // and produce a fixed permutation. If any of these match, do not lower to
10661 // VPERM.
10662 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10663 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10664 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10665 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10666 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10667 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10668 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10669 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10670 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10671 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10672 (Subtarget.hasP8Altivec() && (
10673 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10674 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10675 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10676 return Op;
10677
10678 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10679 // perfect shuffle table to emit an optimal matching sequence.
10680 ArrayRef<int> PermMask = SVOp->getMask();
10681
10682 if (!DisablePerfectShuffle && !isLittleEndian) {
10683 unsigned PFIndexes[4];
10684 bool isFourElementShuffle = true;
10685 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10686 ++i) { // Element number
10687 unsigned EltNo = 8; // Start out undef.
10688 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10689 if (PermMask[i * 4 + j] < 0)
10690 continue; // Undef, ignore it.
10691
10692 unsigned ByteSource = PermMask[i * 4 + j];
10693 if ((ByteSource & 3) != j) {
10694 isFourElementShuffle = false;
10695 break;
10696 }
10697
10698 if (EltNo == 8) {
10699 EltNo = ByteSource / 4;
10700 } else if (EltNo != ByteSource / 4) {
10701 isFourElementShuffle = false;
10702 break;
10703 }
10704 }
10705 PFIndexes[i] = EltNo;
10706 }
10707
10708 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10709 // perfect shuffle vector to determine if it is cost effective to do this as
10710 // discrete instructions, or whether we should use a vperm.
10711 // For now, we skip this for little endian until such time as we have a
10712 // little-endian perfect shuffle table.
10713 if (isFourElementShuffle) {
10714 // Compute the index in the perfect shuffle table.
10715 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10716 PFIndexes[2] * 9 + PFIndexes[3];
10717
10718 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10719 unsigned Cost = (PFEntry >> 30);
10720
10721 // Determining when to avoid vperm is tricky. Many things affect the cost
10722 // of vperm, particularly how many times the perm mask needs to be
10723 // computed. For example, if the perm mask can be hoisted out of a loop or
10724 // is already used (perhaps because there are multiple permutes with the
10725 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10726 // permute mask out of the loop requires an extra register.
10727 //
10728 // As a compromise, we only emit discrete instructions if the shuffle can
10729 // be generated in 3 or fewer operations. When we have loop information
10730 // available, if this block is within a loop, we should avoid using vperm
10731 // for 3-operation perms and use a constant pool load instead.
10732 if (Cost < 3)
10733 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10734 }
10735 }
10736
10737 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10738 // vector that will get spilled to the constant pool.
10739 if (V2.isUndef()) V2 = V1;
10740
10741 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10742}
10743
10744SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10745 ArrayRef<int> PermMask, EVT VT,
10746 SDValue V1, SDValue V2) const {
10747 unsigned Opcode = PPCISD::VPERM;
10748 EVT ValType = V1.getValueType();
10749 SDLoc dl(Op);
10750 bool NeedSwap = false;
10751 bool isLittleEndian = Subtarget.isLittleEndian();
10752 bool isPPC64 = Subtarget.isPPC64();
10753
10754 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10755 (V1->hasOneUse() || V2->hasOneUse())) {
10756 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10757 "XXPERM instead\n");
10758 Opcode = PPCISD::XXPERM;
10759
10760 // The second input to XXPERM is also an output so if the second input has
10761 // multiple uses then copying is necessary, as a result we want the
10762 // single-use operand to be used as the second input to prevent copying.
10763 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10764 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10765 std::swap(V1, V2);
10766 NeedSwap = !NeedSwap;
10767 }
10768 }
10769
10770 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10771 // that it is in input element units, not in bytes. Convert now.
10772
10773 // For little endian, the order of the input vectors is reversed, and
10774 // the permutation mask is complemented with respect to 31. This is
10775 // necessary to produce proper semantics with the big-endian-based vperm
10776 // instruction.
10777 EVT EltVT = V1.getValueType().getVectorElementType();
10778 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10779
10780 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10781 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10782
10783 /*
10784 Vectors will be appended like so: [ V1 | v2 ]
10785 XXSWAPD on V1:
10786 [ A | B | C | D ] -> [ C | D | A | B ]
10787 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10788 i.e. index of A, B += 8, and index of C, D -= 8.
10789 XXSWAPD on V2:
10790 [ E | F | G | H ] -> [ G | H | E | F ]
10791 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10792 i.e. index of E, F += 8, index of G, H -= 8
10793 Swap V1 and V2:
10794 [ V1 | V2 ] -> [ V2 | V1 ]
10795 0-15 16-31 0-15 16-31
10796 i.e. index of V1 += 16, index of V2 -= 16
10797 */
10798
10799 SmallVector<SDValue, 16> ResultMask;
10800 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10801 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10802
10803 if (V1HasXXSWAPD) {
10804 if (SrcElt < 8)
10805 SrcElt += 8;
10806 else if (SrcElt < 16)
10807 SrcElt -= 8;
10808 }
10809 if (V2HasXXSWAPD) {
10810 if (SrcElt > 23)
10811 SrcElt -= 8;
10812 else if (SrcElt > 15)
10813 SrcElt += 8;
10814 }
10815 if (NeedSwap) {
10816 if (SrcElt < 16)
10817 SrcElt += 16;
10818 else
10819 SrcElt -= 16;
10820 }
10821 for (unsigned j = 0; j != BytesPerElement; ++j)
10822 if (isLittleEndian)
10823 ResultMask.push_back(
10824 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10825 else
10826 ResultMask.push_back(
10827 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10828 }
10829
10830 if (V1HasXXSWAPD) {
10831 dl = SDLoc(V1->getOperand(0));
10832 V1 = V1->getOperand(0)->getOperand(1);
10833 }
10834 if (V2HasXXSWAPD) {
10835 dl = SDLoc(V2->getOperand(0));
10836 V2 = V2->getOperand(0)->getOperand(1);
10837 }
10838
10839 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10840 if (ValType != MVT::v2f64)
10841 V1 = DAG.getBitcast(MVT::v2f64, V1);
10842 if (V2.getValueType() != MVT::v2f64)
10843 V2 = DAG.getBitcast(MVT::v2f64, V2);
10844 }
10845
10846 ShufflesHandledWithVPERM++;
10847 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10848 LLVM_DEBUG({
10849 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10850 if (Opcode == PPCISD::XXPERM) {
10851 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10852 } else {
10853 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10854 }
10855 SVOp->dump();
10856 dbgs() << "With the following permute control vector:\n";
10857 VPermMask.dump();
10858 });
10859
10860 if (Opcode == PPCISD::XXPERM)
10861 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10862
10863 // Only need to place items backwards in LE,
10864 // the mask was properly calculated.
10865 if (isLittleEndian)
10866 std::swap(V1, V2);
10867
10868 SDValue VPERMNode =
10869 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10870
10871 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10872 return VPERMNode;
10873}
10874
10875/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10876/// vector comparison. If it is, return true and fill in Opc/isDot with
10877/// information about the intrinsic.
10878static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10879 bool &isDot, const PPCSubtarget &Subtarget) {
10880 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10881 CompareOpc = -1;
10882 isDot = false;
10883 switch (IntrinsicID) {
10884 default:
10885 return false;
10886 // Comparison predicates.
10887 case Intrinsic::ppc_altivec_vcmpbfp_p:
10888 CompareOpc = 966;
10889 isDot = true;
10890 break;
10891 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10892 CompareOpc = 198;
10893 isDot = true;
10894 break;
10895 case Intrinsic::ppc_altivec_vcmpequb_p:
10896 CompareOpc = 6;
10897 isDot = true;
10898 break;
10899 case Intrinsic::ppc_altivec_vcmpequh_p:
10900 CompareOpc = 70;
10901 isDot = true;
10902 break;
10903 case Intrinsic::ppc_altivec_vcmpequw_p:
10904 CompareOpc = 134;
10905 isDot = true;
10906 break;
10907 case Intrinsic::ppc_altivec_vcmpequd_p:
10908 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10909 CompareOpc = 199;
10910 isDot = true;
10911 } else
10912 return false;
10913 break;
10914 case Intrinsic::ppc_altivec_vcmpneb_p:
10915 case Intrinsic::ppc_altivec_vcmpneh_p:
10916 case Intrinsic::ppc_altivec_vcmpnew_p:
10917 case Intrinsic::ppc_altivec_vcmpnezb_p:
10918 case Intrinsic::ppc_altivec_vcmpnezh_p:
10919 case Intrinsic::ppc_altivec_vcmpnezw_p:
10920 if (Subtarget.hasP9Altivec()) {
10921 switch (IntrinsicID) {
10922 default:
10923 llvm_unreachable("Unknown comparison intrinsic.");
10924 case Intrinsic::ppc_altivec_vcmpneb_p:
10925 CompareOpc = 7;
10926 break;
10927 case Intrinsic::ppc_altivec_vcmpneh_p:
10928 CompareOpc = 71;
10929 break;
10930 case Intrinsic::ppc_altivec_vcmpnew_p:
10931 CompareOpc = 135;
10932 break;
10933 case Intrinsic::ppc_altivec_vcmpnezb_p:
10934 CompareOpc = 263;
10935 break;
10936 case Intrinsic::ppc_altivec_vcmpnezh_p:
10937 CompareOpc = 327;
10938 break;
10939 case Intrinsic::ppc_altivec_vcmpnezw_p:
10940 CompareOpc = 391;
10941 break;
10942 }
10943 isDot = true;
10944 } else
10945 return false;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpgefp_p:
10948 CompareOpc = 454;
10949 isDot = true;
10950 break;
10951 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10952 CompareOpc = 710;
10953 isDot = true;
10954 break;
10955 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10956 CompareOpc = 774;
10957 isDot = true;
10958 break;
10959 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10960 CompareOpc = 838;
10961 isDot = true;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10964 CompareOpc = 902;
10965 isDot = true;
10966 break;
10967 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10968 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10969 CompareOpc = 967;
10970 isDot = true;
10971 } else
10972 return false;
10973 break;
10974 case Intrinsic::ppc_altivec_vcmpgtub_p:
10975 CompareOpc = 518;
10976 isDot = true;
10977 break;
10978 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10979 CompareOpc = 582;
10980 isDot = true;
10981 break;
10982 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10983 CompareOpc = 646;
10984 isDot = true;
10985 break;
10986 case Intrinsic::ppc_altivec_vcmpgtud_p:
10987 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10988 CompareOpc = 711;
10989 isDot = true;
10990 } else
10991 return false;
10992 break;
10993
10994 case Intrinsic::ppc_altivec_vcmpequq:
10995 case Intrinsic::ppc_altivec_vcmpgtsq:
10996 case Intrinsic::ppc_altivec_vcmpgtuq:
10997 if (!Subtarget.isISA3_1())
10998 return false;
10999 switch (IntrinsicID) {
11000 default:
11001 llvm_unreachable("Unknown comparison intrinsic.");
11002 case Intrinsic::ppc_altivec_vcmpequq:
11003 CompareOpc = 455;
11004 break;
11005 case Intrinsic::ppc_altivec_vcmpgtsq:
11006 CompareOpc = 903;
11007 break;
11008 case Intrinsic::ppc_altivec_vcmpgtuq:
11009 CompareOpc = 647;
11010 break;
11011 }
11012 break;
11013
11014 // VSX predicate comparisons use the same infrastructure
11015 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11016 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11017 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11018 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11019 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11020 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11021 if (Subtarget.hasVSX()) {
11022 switch (IntrinsicID) {
11023 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
11024 CompareOpc = 99;
11025 break;
11026 case Intrinsic::ppc_vsx_xvcmpgedp_p:
11027 CompareOpc = 115;
11028 break;
11029 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
11030 CompareOpc = 107;
11031 break;
11032 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
11033 CompareOpc = 67;
11034 break;
11035 case Intrinsic::ppc_vsx_xvcmpgesp_p:
11036 CompareOpc = 83;
11037 break;
11038 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
11039 CompareOpc = 75;
11040 break;
11041 }
11042 isDot = true;
11043 } else
11044 return false;
11045 break;
11046
11047 // Normal Comparisons.
11048 case Intrinsic::ppc_altivec_vcmpbfp:
11049 CompareOpc = 966;
11050 break;
11051 case Intrinsic::ppc_altivec_vcmpeqfp:
11052 CompareOpc = 198;
11053 break;
11054 case Intrinsic::ppc_altivec_vcmpequb:
11055 CompareOpc = 6;
11056 break;
11057 case Intrinsic::ppc_altivec_vcmpequh:
11058 CompareOpc = 70;
11059 break;
11060 case Intrinsic::ppc_altivec_vcmpequw:
11061 CompareOpc = 134;
11062 break;
11063 case Intrinsic::ppc_altivec_vcmpequd:
11064 if (Subtarget.hasP8Altivec())
11065 CompareOpc = 199;
11066 else
11067 return false;
11068 break;
11069 case Intrinsic::ppc_altivec_vcmpneb:
11070 case Intrinsic::ppc_altivec_vcmpneh:
11071 case Intrinsic::ppc_altivec_vcmpnew:
11072 case Intrinsic::ppc_altivec_vcmpnezb:
11073 case Intrinsic::ppc_altivec_vcmpnezh:
11074 case Intrinsic::ppc_altivec_vcmpnezw:
11075 if (Subtarget.hasP9Altivec())
11076 switch (IntrinsicID) {
11077 default:
11078 llvm_unreachable("Unknown comparison intrinsic.");
11079 case Intrinsic::ppc_altivec_vcmpneb:
11080 CompareOpc = 7;
11081 break;
11082 case Intrinsic::ppc_altivec_vcmpneh:
11083 CompareOpc = 71;
11084 break;
11085 case Intrinsic::ppc_altivec_vcmpnew:
11086 CompareOpc = 135;
11087 break;
11088 case Intrinsic::ppc_altivec_vcmpnezb:
11089 CompareOpc = 263;
11090 break;
11091 case Intrinsic::ppc_altivec_vcmpnezh:
11092 CompareOpc = 327;
11093 break;
11094 case Intrinsic::ppc_altivec_vcmpnezw:
11095 CompareOpc = 391;
11096 break;
11097 }
11098 else
11099 return false;
11100 break;
11101 case Intrinsic::ppc_altivec_vcmpgefp:
11102 CompareOpc = 454;
11103 break;
11104 case Intrinsic::ppc_altivec_vcmpgtfp:
11105 CompareOpc = 710;
11106 break;
11107 case Intrinsic::ppc_altivec_vcmpgtsb:
11108 CompareOpc = 774;
11109 break;
11110 case Intrinsic::ppc_altivec_vcmpgtsh:
11111 CompareOpc = 838;
11112 break;
11113 case Intrinsic::ppc_altivec_vcmpgtsw:
11114 CompareOpc = 902;
11115 break;
11116 case Intrinsic::ppc_altivec_vcmpgtsd:
11117 if (Subtarget.hasP8Altivec())
11118 CompareOpc = 967;
11119 else
11120 return false;
11121 break;
11122 case Intrinsic::ppc_altivec_vcmpgtub:
11123 CompareOpc = 518;
11124 break;
11125 case Intrinsic::ppc_altivec_vcmpgtuh:
11126 CompareOpc = 582;
11127 break;
11128 case Intrinsic::ppc_altivec_vcmpgtuw:
11129 CompareOpc = 646;
11130 break;
11131 case Intrinsic::ppc_altivec_vcmpgtud:
11132 if (Subtarget.hasP8Altivec())
11133 CompareOpc = 711;
11134 else
11135 return false;
11136 break;
11137 case Intrinsic::ppc_altivec_vcmpequq_p:
11138 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11139 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11140 if (!Subtarget.isISA3_1())
11141 return false;
11142 switch (IntrinsicID) {
11143 default:
11144 llvm_unreachable("Unknown comparison intrinsic.");
11145 case Intrinsic::ppc_altivec_vcmpequq_p:
11146 CompareOpc = 455;
11147 break;
11148 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11149 CompareOpc = 903;
11150 break;
11151 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11152 CompareOpc = 647;
11153 break;
11154 }
11155 isDot = true;
11156 break;
11157 }
11158 return true;
11159}
11160
11161/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11162/// lower, do it, otherwise return null.
11163SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11164 SelectionDAG &DAG) const {
11165 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11166
11167 SDLoc dl(Op);
11168
11169 switch (IntrinsicID) {
11170 case Intrinsic::thread_pointer:
11171 // Reads the thread pointer register, used for __builtin_thread_pointer.
11172 if (Subtarget.isPPC64())
11173 return DAG.getRegister(PPC::X13, MVT::i64);
11174 return DAG.getRegister(PPC::R2, MVT::i32);
11175
11176 case Intrinsic::ppc_rldimi: {
11177 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11178 SDValue Src = Op.getOperand(1);
11179 APInt Mask = Op.getConstantOperandAPInt(4);
11180 if (Mask.isZero())
11181 return Op.getOperand(2);
11182 if (Mask.isAllOnes())
11183 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11184 uint64_t SH = Op.getConstantOperandVal(3);
11185 unsigned MB = 0, ME = 0;
11186 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11187 report_fatal_error("invalid rldimi mask!");
11188 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11189 if (ME < 63 - SH) {
11190 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11191 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11192 } else if (ME > 63 - SH) {
11193 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11194 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11195 }
11196 return SDValue(
11197 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11198 {Op.getOperand(2), Src,
11199 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11200 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11201 0);
11202 }
11203
11204 case Intrinsic::ppc_rlwimi: {
11205 APInt Mask = Op.getConstantOperandAPInt(4);
11206 if (Mask.isZero())
11207 return Op.getOperand(2);
11208 if (Mask.isAllOnes())
11209 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11210 Op.getOperand(3));
11211 unsigned MB = 0, ME = 0;
11212 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11213 report_fatal_error("invalid rlwimi mask!");
11214 return SDValue(DAG.getMachineNode(
11215 PPC::RLWIMI, dl, MVT::i32,
11216 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11217 DAG.getTargetConstant(MB, dl, MVT::i32),
11218 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11219 0);
11220 }
11221
11222 case Intrinsic::ppc_rlwnm: {
11223 if (Op.getConstantOperandVal(3) == 0)
11224 return DAG.getConstant(0, dl, MVT::i32);
11225 unsigned MB = 0, ME = 0;
11226 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11227 report_fatal_error("invalid rlwnm mask!");
11228 return SDValue(
11229 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11230 {Op.getOperand(1), Op.getOperand(2),
11231 DAG.getTargetConstant(MB, dl, MVT::i32),
11232 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11233 0);
11234 }
11235
11236 case Intrinsic::ppc_mma_disassemble_acc: {
11237 if (Subtarget.isISAFuture()) {
11238 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11239 SDValue WideVec =
11240 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11241 Op.getOperand(1)),
11242 0);
11244 SDValue Value = SDValue(WideVec.getNode(), 0);
11245 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11246
11247 SDValue Extract;
11248 Extract = DAG.getNode(
11249 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11250 Subtarget.isLittleEndian() ? Value2 : Value,
11251 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11252 dl, getPointerTy(DAG.getDataLayout())));
11253 RetOps.push_back(Extract);
11254 Extract = DAG.getNode(
11255 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11256 Subtarget.isLittleEndian() ? Value2 : Value,
11257 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11258 dl, getPointerTy(DAG.getDataLayout())));
11259 RetOps.push_back(Extract);
11260 Extract = DAG.getNode(
11261 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11262 Subtarget.isLittleEndian() ? Value : Value2,
11263 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11264 dl, getPointerTy(DAG.getDataLayout())));
11265 RetOps.push_back(Extract);
11266 Extract = DAG.getNode(
11267 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11268 Subtarget.isLittleEndian() ? Value : Value2,
11269 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11270 dl, getPointerTy(DAG.getDataLayout())));
11271 RetOps.push_back(Extract);
11272 return DAG.getMergeValues(RetOps, dl);
11273 }
11274 [[fallthrough]];
11275 }
11276 case Intrinsic::ppc_vsx_disassemble_pair: {
11277 int NumVecs = 2;
11278 SDValue WideVec = Op.getOperand(1);
11279 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11280 NumVecs = 4;
11281 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11282 }
11284 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11285 SDValue Extract = DAG.getNode(
11286 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11287 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11288 : VecNo,
11289 dl, getPointerTy(DAG.getDataLayout())));
11290 RetOps.push_back(Extract);
11291 }
11292 return DAG.getMergeValues(RetOps, dl);
11293 }
11294
11295 case Intrinsic::ppc_mma_build_dmr: {
11298 for (int i = 1; i < 9; i += 2) {
11299 SDValue Hi = Op.getOperand(i);
11300 SDValue Lo = Op.getOperand(i + 1);
11301 if (Hi->getOpcode() == ISD::LOAD)
11302 Chains.push_back(Hi.getValue(1));
11303 if (Lo->getOpcode() == ISD::LOAD)
11304 Chains.push_back(Lo.getValue(1));
11305 Pairs.push_back(
11306 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11307 }
11308 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11309 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11310 return DAG.getMergeValues({Value, TF}, dl);
11311 }
11312
11313 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11314 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11315 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11316 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11317 "Specify P of 0 or 1 for lower or upper 512 bytes");
11318 unsigned HiLo = Idx->getSExtValue();
11319 unsigned Opcode;
11320 unsigned Subx;
11321 if (HiLo == 0) {
11322 Opcode = PPC::DMXXEXTFDMR512;
11323 Subx = PPC::sub_wacc_lo;
11324 } else {
11325 Opcode = PPC::DMXXEXTFDMR512_HI;
11326 Subx = PPC::sub_wacc_hi;
11327 }
11328 SDValue Subreg(
11329 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11330 Op.getOperand(1),
11331 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11332 0);
11333 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11334 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11335 }
11336
11337 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11338 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11339 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11340 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11341 "Specify a dmr row pair 0-3");
11342 unsigned IdxVal = Idx->getSExtValue();
11343 unsigned Subx;
11344 switch (IdxVal) {
11345 case 0:
11346 Subx = PPC::sub_dmrrowp0;
11347 break;
11348 case 1:
11349 Subx = PPC::sub_dmrrowp1;
11350 break;
11351 case 2:
11352 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11353 break;
11354 case 3:
11355 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11356 break;
11357 }
11358 SDValue Subreg(
11359 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11360 Op.getOperand(1),
11361 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11362 0);
11363 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11364 return SDValue(
11365 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11366 0);
11367 }
11368
11369 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11370 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11371 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11372 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11373 "Specify P of 0 or 1 for lower or upper 512 bytes");
11374 unsigned HiLo = Idx->getSExtValue();
11375 unsigned Opcode;
11376 unsigned Subx;
11377 if (HiLo == 0) {
11378 Opcode = PPC::DMXXINSTDMR512;
11379 Subx = PPC::sub_wacc_lo;
11380 } else {
11381 Opcode = PPC::DMXXINSTDMR512_HI;
11382 Subx = PPC::sub_wacc_hi;
11383 }
11384 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11385 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11386 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11387 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11388 Op.getOperand(1), Wacc, SubReg),
11389 0);
11390 }
11391
11392 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11393 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11394 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11395 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11396 "Specify a dmr row pair 0-3");
11397 unsigned IdxVal = Idx->getSExtValue();
11398 unsigned Subx;
11399 switch (IdxVal) {
11400 case 0:
11401 Subx = PPC::sub_dmrrowp0;
11402 break;
11403 case 1:
11404 Subx = PPC::sub_dmrrowp1;
11405 break;
11406 case 2:
11407 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11408 break;
11409 case 3:
11410 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11411 break;
11412 }
11413 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11414 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11415 SDValue Ops[] = {Op.getOperand(2), P};
11416 SDValue DMRRowp = SDValue(
11417 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11418 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11419 Op.getOperand(1), DMRRowp, SubReg),
11420 0);
11421 }
11422
11423 case Intrinsic::ppc_mma_xxmfacc:
11424 case Intrinsic::ppc_mma_xxmtacc: {
11425 // Allow pre-isa-future subtargets to lower as normal.
11426 if (!Subtarget.isISAFuture())
11427 return SDValue();
11428 // The intrinsics for xxmtacc and xxmfacc take one argument of
11429 // type v512i1, for future cpu the corresponding wacc instruction
11430 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11431 // the need to produce the xxm[t|f]acc.
11432 SDValue WideVec = Op.getOperand(1);
11433 DAG.ReplaceAllUsesWith(Op, WideVec);
11434 return SDValue();
11435 }
11436
11437 case Intrinsic::ppc_unpack_longdouble: {
11438 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11439 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11440 "Argument of long double unpack must be 0 or 1!");
11441 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11442 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11443 Idx->getValueType(0)));
11444 }
11445
11446 case Intrinsic::ppc_compare_exp_lt:
11447 case Intrinsic::ppc_compare_exp_gt:
11448 case Intrinsic::ppc_compare_exp_eq:
11449 case Intrinsic::ppc_compare_exp_uo: {
11450 unsigned Pred;
11451 switch (IntrinsicID) {
11452 case Intrinsic::ppc_compare_exp_lt:
11453 Pred = PPC::PRED_LT;
11454 break;
11455 case Intrinsic::ppc_compare_exp_gt:
11456 Pred = PPC::PRED_GT;
11457 break;
11458 case Intrinsic::ppc_compare_exp_eq:
11459 Pred = PPC::PRED_EQ;
11460 break;
11461 case Intrinsic::ppc_compare_exp_uo:
11462 Pred = PPC::PRED_UN;
11463 break;
11464 }
11465 return SDValue(
11466 DAG.getMachineNode(
11467 PPC::SELECT_CC_I4, dl, MVT::i32,
11468 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11469 Op.getOperand(1), Op.getOperand(2)),
11470 0),
11471 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11472 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11473 0);
11474 }
11475 case Intrinsic::ppc_test_data_class: {
11476 EVT OpVT = Op.getOperand(1).getValueType();
11477 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11478 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11479 : PPC::XSTSTDCSP);
11480 return SDValue(
11481 DAG.getMachineNode(
11482 PPC::SELECT_CC_I4, dl, MVT::i32,
11483 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11484 Op.getOperand(1)),
11485 0),
11486 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11487 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11488 0);
11489 }
11490 case Intrinsic::ppc_fnmsub: {
11491 EVT VT = Op.getOperand(1).getValueType();
11492 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11493 return DAG.getNode(
11494 ISD::FNEG, dl, VT,
11495 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11496 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11497 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11498 Op.getOperand(2), Op.getOperand(3));
11499 }
11500 case Intrinsic::ppc_convert_f128_to_ppcf128:
11501 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11502 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11503 ? RTLIB::CONVERT_PPCF128_F128
11504 : RTLIB::CONVERT_F128_PPCF128;
11505 MakeLibCallOptions CallOptions;
11506 std::pair<SDValue, SDValue> Result =
11507 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11508 dl, SDValue());
11509 return Result.first;
11510 }
11511 case Intrinsic::ppc_maxfe:
11512 case Intrinsic::ppc_maxfl:
11513 case Intrinsic::ppc_maxfs:
11514 case Intrinsic::ppc_minfe:
11515 case Intrinsic::ppc_minfl:
11516 case Intrinsic::ppc_minfs: {
11517 EVT VT = Op.getValueType();
11518 assert(
11519 all_of(Op->ops().drop_front(4),
11520 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11521 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11522 (void)VT;
11524 if (IntrinsicID == Intrinsic::ppc_minfe ||
11525 IntrinsicID == Intrinsic::ppc_minfl ||
11526 IntrinsicID == Intrinsic::ppc_minfs)
11527 CC = ISD::SETLT;
11528 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11529 SDValue Res = Op.getOperand(I);
11530 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11531 Res =
11532 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11533 }
11534 return Res;
11535 }
11536 }
11537
11538 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11539 // opcode number of the comparison.
11540 int CompareOpc;
11541 bool isDot;
11542 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11543 return SDValue(); // Don't custom lower most intrinsics.
11544
11545 // If this is a non-dot comparison, make the VCMP node and we are done.
11546 if (!isDot) {
11547 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11548 Op.getOperand(1), Op.getOperand(2),
11549 DAG.getConstant(CompareOpc, dl, MVT::i32));
11550 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11551 }
11552
11553 // Create the PPCISD altivec 'dot' comparison node.
11554 SDValue Ops[] = {
11555 Op.getOperand(2), // LHS
11556 Op.getOperand(3), // RHS
11557 DAG.getConstant(CompareOpc, dl, MVT::i32)
11558 };
11559 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11560 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11561
11562 // Unpack the result based on how the target uses it.
11563 unsigned BitNo; // Bit # of CR6.
11564 bool InvertBit; // Invert result?
11565 unsigned Bitx;
11566 unsigned SetOp;
11567 switch (Op.getConstantOperandVal(1)) {
11568 default: // Can't happen, don't crash on invalid number though.
11569 case 0: // Return the value of the EQ bit of CR6.
11570 BitNo = 0;
11571 InvertBit = false;
11572 Bitx = PPC::sub_eq;
11573 SetOp = PPCISD::SETBC;
11574 break;
11575 case 1: // Return the inverted value of the EQ bit of CR6.
11576 BitNo = 0;
11577 InvertBit = true;
11578 Bitx = PPC::sub_eq;
11579 SetOp = PPCISD::SETBCR;
11580 break;
11581 case 2: // Return the value of the LT bit of CR6.
11582 BitNo = 2;
11583 InvertBit = false;
11584 Bitx = PPC::sub_lt;
11585 SetOp = PPCISD::SETBC;
11586 break;
11587 case 3: // Return the inverted value of the LT bit of CR6.
11588 BitNo = 2;
11589 InvertBit = true;
11590 Bitx = PPC::sub_lt;
11591 SetOp = PPCISD::SETBCR;
11592 break;
11593 }
11594
11595 SDValue GlueOp = CompNode.getValue(1);
11596 if (Subtarget.isISA3_1()) {
11597 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11598 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11599 SDValue CRBit =
11600 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11601 CR6Reg, SubRegIdx, GlueOp),
11602 0);
11603 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11604 }
11605
11606 // Now that we have the comparison, emit a copy from the CR to a GPR.
11607 // This is flagged to the above dot comparison.
11608 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11609 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11610
11611 // Shift the bit into the low position.
11612 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11613 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11614 // Isolate the bit.
11615 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11616 DAG.getConstant(1, dl, MVT::i32));
11617
11618 // If we are supposed to, toggle the bit.
11619 if (InvertBit)
11620 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11621 DAG.getConstant(1, dl, MVT::i32));
11622 return Flags;
11623}
11624
11625SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11626 SelectionDAG &DAG) const {
11627 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11628 // the beginning of the argument list.
11629 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11630 SDLoc DL(Op);
11631 switch (Op.getConstantOperandVal(ArgStart)) {
11632 case Intrinsic::ppc_cfence: {
11633 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11634 SDValue Val = Op.getOperand(ArgStart + 1);
11635 EVT Ty = Val.getValueType();
11636 if (Ty == MVT::i128) {
11637 // FIXME: Testing one of two paired registers is sufficient to guarantee
11638 // ordering?
11639 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11640 }
11641 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11642 return SDValue(
11643 DAG.getMachineNode(
11644 Opcode, DL, MVT::Other,
11645 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11646 Op.getOperand(0)),
11647 0);
11648 }
11649 case Intrinsic::ppc_mma_disassemble_dmr: {
11650 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11651 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11652 }
11653 default:
11654 break;
11655 }
11656 return SDValue();
11657}
11658
11659// Lower scalar BSWAP64 to xxbrd.
11660SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11661 SDLoc dl(Op);
11662 if (!Subtarget.isPPC64())
11663 return Op;
11664 // MTVSRDD
11665 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11666 Op.getOperand(0));
11667 // XXBRD
11668 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11669 // MFVSRD
11670 int VectorIndex = 0;
11671 if (Subtarget.isLittleEndian())
11672 VectorIndex = 1;
11673 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11674 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11675 return Op;
11676}
11677
11678// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11679// compared to a value that is atomically loaded (atomic loads zero-extend).
11680SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11681 SelectionDAG &DAG) const {
11682 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11683 "Expecting an atomic compare-and-swap here.");
11684 SDLoc dl(Op);
11685 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11686 EVT MemVT = AtomicNode->getMemoryVT();
11687 if (MemVT.getSizeInBits() >= 32)
11688 return Op;
11689
11690 SDValue CmpOp = Op.getOperand(2);
11691 // If this is already correctly zero-extended, leave it alone.
11692 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11693 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11694 return Op;
11695
11696 // Clear the high bits of the compare operand.
11697 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11698 SDValue NewCmpOp =
11699 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11700 DAG.getConstant(MaskVal, dl, MVT::i32));
11701
11702 // Replace the existing compare operand with the properly zero-extended one.
11704 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11705 Ops.push_back(AtomicNode->getOperand(i));
11706 Ops[2] = NewCmpOp;
11707 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11708 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11709 auto NodeTy =
11710 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11711 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11712}
11713
11714SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11715 SelectionDAG &DAG) const {
11716 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11717 EVT MemVT = N->getMemoryVT();
11718 assert(MemVT.getSimpleVT() == MVT::i128 &&
11719 "Expect quadword atomic operations");
11720 SDLoc dl(N);
11721 unsigned Opc = N->getOpcode();
11722 switch (Opc) {
11723 case ISD::ATOMIC_LOAD: {
11724 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11725 // lowered to ppc instructions by pattern matching instruction selector.
11726 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11728 N->getOperand(0),
11729 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11730 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11731 Ops.push_back(N->getOperand(I));
11732 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11733 Ops, MemVT, N->getMemOperand());
11734 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11735 SDValue ValHi =
11736 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11737 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11738 DAG.getConstant(64, dl, MVT::i32));
11739 SDValue Val =
11740 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11741 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11742 {Val, LoadedVal.getValue(2)});
11743 }
11744 case ISD::ATOMIC_STORE: {
11745 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11746 // lowered to ppc instructions by pattern matching instruction selector.
11747 SDVTList Tys = DAG.getVTList(MVT::Other);
11749 N->getOperand(0),
11750 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11751 SDValue Val = N->getOperand(1);
11752 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11753 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11754 DAG.getConstant(64, dl, MVT::i32));
11755 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11756 Ops.push_back(ValLo);
11757 Ops.push_back(ValHi);
11758 Ops.push_back(N->getOperand(2));
11759 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11760 N->getMemOperand());
11761 }
11762 default:
11763 llvm_unreachable("Unexpected atomic opcode");
11764 }
11765}
11766
11768 SelectionDAG &DAG,
11769 const PPCSubtarget &Subtarget) {
11770 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11771
11772 enum DataClassMask {
11773 DC_NAN = 1 << 6,
11774 DC_NEG_INF = 1 << 4,
11775 DC_POS_INF = 1 << 5,
11776 DC_NEG_ZERO = 1 << 2,
11777 DC_POS_ZERO = 1 << 3,
11778 DC_NEG_SUBNORM = 1,
11779 DC_POS_SUBNORM = 1 << 1,
11780 };
11781
11782 EVT VT = Op.getValueType();
11783
11784 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11785 : VT == MVT::f64 ? PPC::XSTSTDCDP
11786 : PPC::XSTSTDCSP;
11787
11788 if (Mask == fcAllFlags)
11789 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11790 if (Mask == 0)
11791 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11792
11793 // When it's cheaper or necessary to test reverse flags.
11794 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11795 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11796 return DAG.getNOT(Dl, Rev, MVT::i1);
11797 }
11798
11799 // Power doesn't support testing whether a value is 'normal'. Test the rest
11800 // first, and test if it's 'not not-normal' with expected sign.
11801 if (Mask & fcNormal) {
11802 SDValue Rev(DAG.getMachineNode(
11803 TestOp, Dl, MVT::i32,
11804 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11805 DC_NEG_ZERO | DC_POS_ZERO |
11806 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11807 Dl, MVT::i32),
11808 Op),
11809 0);
11810 // Sign are stored in CR bit 0, result are in CR bit 2.
11811 SDValue Sign(
11812 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11813 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11814 0);
11815 SDValue Normal(DAG.getNOT(
11816 Dl,
11818 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11819 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11820 0),
11821 MVT::i1));
11822 if (Mask & fcPosNormal)
11823 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11824 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11825 if (Mask == fcPosNormal || Mask == fcNegNormal)
11826 return Result;
11827
11828 return DAG.getNode(
11829 ISD::OR, Dl, MVT::i1,
11830 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11831 }
11832
11833 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11834 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11835 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11836 bool IsQuiet = Mask & fcQNan;
11837 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11838
11839 // Quietness is determined by the first bit in fraction field.
11840 uint64_t QuietMask = 0;
11841 SDValue HighWord;
11842 if (VT == MVT::f128) {
11843 HighWord = DAG.getNode(
11844 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11845 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11846 QuietMask = 0x8000;
11847 } else if (VT == MVT::f64) {
11848 if (Subtarget.isPPC64()) {
11849 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11850 DAG.getBitcast(MVT::i64, Op),
11851 DAG.getConstant(1, Dl, MVT::i32));
11852 } else {
11853 SDValue Vec = DAG.getBitcast(
11854 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11855 HighWord = DAG.getNode(
11856 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11857 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11858 }
11859 QuietMask = 0x80000;
11860 } else if (VT == MVT::f32) {
11861 HighWord = DAG.getBitcast(MVT::i32, Op);
11862 QuietMask = 0x400000;
11863 }
11864 SDValue NanRes = DAG.getSetCC(
11865 Dl, MVT::i1,
11866 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11867 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11868 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11869 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11870 if (Mask == fcQNan || Mask == fcSNan)
11871 return NanRes;
11872
11873 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11874 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11875 NanRes);
11876 }
11877
11878 unsigned NativeMask = 0;
11879 if ((Mask & fcNan) == fcNan)
11880 NativeMask |= DC_NAN;
11881 if (Mask & fcNegInf)
11882 NativeMask |= DC_NEG_INF;
11883 if (Mask & fcPosInf)
11884 NativeMask |= DC_POS_INF;
11885 if (Mask & fcNegZero)
11886 NativeMask |= DC_NEG_ZERO;
11887 if (Mask & fcPosZero)
11888 NativeMask |= DC_POS_ZERO;
11889 if (Mask & fcNegSubnormal)
11890 NativeMask |= DC_NEG_SUBNORM;
11891 if (Mask & fcPosSubnormal)
11892 NativeMask |= DC_POS_SUBNORM;
11893 return SDValue(
11894 DAG.getMachineNode(
11895 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11897 TestOp, Dl, MVT::i32,
11898 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11899 0),
11900 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11901 0);
11902}
11903
11904SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11905 SelectionDAG &DAG) const {
11906 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11907 SDValue LHS = Op.getOperand(0);
11908 uint64_t RHSC = Op.getConstantOperandVal(1);
11909 SDLoc Dl(Op);
11910 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11911 if (LHS.getValueType() == MVT::ppcf128) {
11912 // The higher part determines the value class.
11913 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11914 DAG.getConstant(1, Dl, MVT::i32));
11915 }
11916
11917 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11918}
11919
11920SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11921 SelectionDAG &DAG) const {
11922 SDLoc dl(Op);
11923
11924 MachineFunction &MF = DAG.getMachineFunction();
11925 SDValue Op0 = Op.getOperand(0);
11926 EVT ValVT = Op0.getValueType();
11927 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11928 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11929 int64_t IntVal = Op.getConstantOperandVal(0);
11930 if (IntVal >= -16 && IntVal <= 15)
11931 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11932 dl);
11933 }
11934
11935 ReuseLoadInfo RLI;
11936 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11937 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11938 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11939 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11940
11941 MachineMemOperand *MMO =
11943 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11944 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11946 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11947 MVT::i32, MMO);
11948 if (RLI.ResChain)
11949 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11950 return Bits.getValue(0);
11951 }
11952
11953 // Create a stack slot that is 16-byte aligned.
11954 MachineFrameInfo &MFI = MF.getFrameInfo();
11955 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11956 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11957 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11958
11959 SDValue Val = Op0;
11960 // P10 hardware store forwarding requires that a single store contains all
11961 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11962 // to avoid load hit store on P10 when running binaries compiled for older
11963 // processors by generating two mergeable scalar stores to forward with the
11964 // vector load.
11965 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11966 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11967 ValVT.getSizeInBits() <= 64) {
11968 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11969 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11970 SDValue ShiftBy = DAG.getConstant(
11971 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11972 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11973 SDValue Plus8 =
11974 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11975 SDValue Store2 =
11976 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11977 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11978 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11979 MachinePointerInfo());
11980 }
11981
11982 // Store the input value into Value#0 of the stack slot.
11983 SDValue Store =
11984 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11985 // Load it out.
11986 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11987}
11988
11989SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11990 SelectionDAG &DAG) const {
11991 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11992 "Should only be called for ISD::INSERT_VECTOR_ELT");
11993
11994 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11995
11996 EVT VT = Op.getValueType();
11997 SDLoc dl(Op);
11998 SDValue V1 = Op.getOperand(0);
11999 SDValue V2 = Op.getOperand(1);
12000
12001 if (VT == MVT::v2f64 && C)
12002 return Op;
12003
12004 if (Subtarget.hasP9Vector()) {
12005 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
12006 // because on P10, it allows this specific insert_vector_elt load pattern to
12007 // utilize the refactored load and store infrastructure in order to exploit
12008 // prefixed loads.
12009 // On targets with inexpensive direct moves (Power9 and up), a
12010 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
12011 // load since a single precision load will involve conversion to double
12012 // precision on the load followed by another conversion to single precision.
12013 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
12014 (isa<LoadSDNode>(V2))) {
12015 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
12016 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
12017 SDValue InsVecElt =
12018 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
12019 BitcastLoad, Op.getOperand(2));
12020 return DAG.getBitcast(MVT::v4f32, InsVecElt);
12021 }
12022 }
12023
12024 if (Subtarget.isISA3_1()) {
12025 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
12026 return SDValue();
12027 // On P10, we have legal lowering for constant and variable indices for
12028 // all vectors.
12029 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
12030 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
12031 return Op;
12032 }
12033
12034 // Before P10, we have legal lowering for constant indices but not for
12035 // variable ones.
12036 if (!C)
12037 return SDValue();
12038
12039 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
12040 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
12041 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
12042 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
12043 unsigned InsertAtElement = C->getZExtValue();
12044 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
12045 if (Subtarget.isLittleEndian()) {
12046 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
12047 }
12048 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
12049 DAG.getConstant(InsertAtByte, dl, MVT::i32));
12050 }
12051 return Op;
12052}
12053
12054SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
12055 SelectionDAG &DAG) const {
12056 SDLoc dl(Op);
12057 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12058 SDValue LoadChain = LN->getChain();
12059 SDValue BasePtr = LN->getBasePtr();
12060 EVT VT = Op.getValueType();
12061 bool IsV1024i1 = VT == MVT::v1024i1;
12062 bool IsV2048i1 = VT == MVT::v2048i1;
12063
12064 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12065 // Dense Math dmr pair registers, respectively.
12066 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12067 (void)IsV2048i1;
12068 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12069 "Dense Math support required.");
12070 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12071
12073 SmallVector<SDValue, 8> LoadChains;
12074
12075 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
12076 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
12077 MachineMemOperand *MMO = LN->getMemOperand();
12078 unsigned NumVecs = VT.getSizeInBits() / 256;
12079 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12080 MachineMemOperand *NewMMO =
12081 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12082 if (Idx > 0) {
12083 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12084 DAG.getConstant(32, dl, BasePtr.getValueType()));
12085 LoadOps[2] = BasePtr;
12086 }
12088 DAG.getVTList(MVT::v256i1, MVT::Other),
12089 LoadOps, MVT::v256i1, NewMMO);
12090 LoadChains.push_back(Ld.getValue(1));
12091 Loads.push_back(Ld);
12092 }
12093
12094 if (Subtarget.isLittleEndian()) {
12095 std::reverse(Loads.begin(), Loads.end());
12096 std::reverse(LoadChains.begin(), LoadChains.end());
12097 }
12098
12099 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12100 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12101 Loads[1]),
12102 0);
12103 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12104 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12105 Loads[2], Loads[3]),
12106 0);
12107 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12108 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12109 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12110
12111 SDValue Value =
12112 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12113
12114 if (IsV1024i1) {
12115 return DAG.getMergeValues({Value, TF}, dl);
12116 }
12117
12118 // Handle Loads for V2048i1 which represents a dmr pair.
12119 SDValue DmrPValue;
12120 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12121 Loads[4], Loads[5]),
12122 0);
12123 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12124 Loads[6], Loads[7]),
12125 0);
12126 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12127 SDValue Dmr1Value = SDValue(
12128 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12129
12130 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12131 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12132
12133 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12134 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12135
12136 DmrPValue = SDValue(
12137 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12138
12139 return DAG.getMergeValues({DmrPValue, TF}, dl);
12140}
12141
12142SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12143 const SDLoc &dl,
12144 SelectionDAG &DAG) const {
12145 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12146 Pairs[1]),
12147 0);
12148 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12149 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12150 Pairs[2], Pairs[3]),
12151 0);
12152 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12153 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12154
12155 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12156 {RC, Lo, LoSub, Hi, HiSub}),
12157 0);
12158}
12159
12160SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12161 SelectionDAG &DAG) const {
12162 SDLoc dl(Op);
12163 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12164 SDValue LoadChain = LN->getChain();
12165 SDValue BasePtr = LN->getBasePtr();
12166 EVT VT = Op.getValueType();
12167
12168 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12169 return LowerDMFVectorLoad(Op, DAG);
12170
12171 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12172 return Op;
12173
12174 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12175 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12176 // 2 or 4 vsx registers.
12177 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12178 "Type unsupported without MMA");
12179 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12180 "Type unsupported without paired vector support");
12181 Align Alignment = LN->getAlign();
12183 SmallVector<SDValue, 4> LoadChains;
12184 unsigned NumVecs = VT.getSizeInBits() / 128;
12185 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12186 SDValue Load =
12187 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12188 LN->getPointerInfo().getWithOffset(Idx * 16),
12189 commonAlignment(Alignment, Idx * 16),
12190 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12191 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12192 DAG.getConstant(16, dl, BasePtr.getValueType()));
12193 Loads.push_back(Load);
12194 LoadChains.push_back(Load.getValue(1));
12195 }
12196 if (Subtarget.isLittleEndian()) {
12197 std::reverse(Loads.begin(), Loads.end());
12198 std::reverse(LoadChains.begin(), LoadChains.end());
12199 }
12200 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12201 SDValue Value =
12202 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12203 dl, VT, Loads);
12204 SDValue RetOps[] = {Value, TF};
12205 return DAG.getMergeValues(RetOps, dl);
12206}
12207
12208SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12209 SelectionDAG &DAG) const {
12210
12211 SDLoc dl(Op);
12212 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12213 SDValue StoreChain = SN->getChain();
12214 SDValue BasePtr = SN->getBasePtr();
12217 EVT VT = SN->getValue().getValueType();
12218 bool IsV1024i1 = VT == MVT::v1024i1;
12219 bool IsV2048i1 = VT == MVT::v2048i1;
12220
12221 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12222 // Dense Math dmr pair registers, respectively.
12223 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12224 (void)IsV2048i1;
12225 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12226 "Dense Math support required.");
12227 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12228
12229 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12230 if (IsV1024i1) {
12232 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12233 Op.getOperand(1),
12234 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12235 0);
12237 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12238 Op.getOperand(1),
12239 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12240 0);
12241 MachineSDNode *ExtNode =
12242 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12243 Values.push_back(SDValue(ExtNode, 0));
12244 Values.push_back(SDValue(ExtNode, 1));
12245 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12246 Values.push_back(SDValue(ExtNode, 0));
12247 Values.push_back(SDValue(ExtNode, 1));
12248 } else {
12249 // This corresponds to v2048i1 which represents a dmr pair.
12250 SDValue Dmr0(
12251 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12252 Op.getOperand(1),
12253 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12254 0);
12255
12256 SDValue Dmr1(
12257 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12258 Op.getOperand(1),
12259 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12260 0);
12261
12262 SDValue Dmr0Lo(DAG.getMachineNode(
12263 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12264 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12265 0);
12266
12267 SDValue Dmr0Hi(DAG.getMachineNode(
12268 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12269 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12270 0);
12271
12272 SDValue Dmr1Lo(DAG.getMachineNode(
12273 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12274 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12275 0);
12276
12277 SDValue Dmr1Hi(DAG.getMachineNode(
12278 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12279 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12280 0);
12281
12282 MachineSDNode *ExtNode =
12283 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12284 Values.push_back(SDValue(ExtNode, 0));
12285 Values.push_back(SDValue(ExtNode, 1));
12286 ExtNode =
12287 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12288 Values.push_back(SDValue(ExtNode, 0));
12289 Values.push_back(SDValue(ExtNode, 1));
12290 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12291 Values.push_back(SDValue(ExtNode, 0));
12292 Values.push_back(SDValue(ExtNode, 1));
12293 ExtNode =
12294 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12295 Values.push_back(SDValue(ExtNode, 0));
12296 Values.push_back(SDValue(ExtNode, 1));
12297 }
12298
12299 if (Subtarget.isLittleEndian())
12300 std::reverse(Values.begin(), Values.end());
12301
12302 SDVTList Tys = DAG.getVTList(MVT::Other);
12304 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12305 Values[0], BasePtr};
12306 MachineMemOperand *MMO = SN->getMemOperand();
12307 unsigned NumVecs = VT.getSizeInBits() / 256;
12308 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12309 MachineMemOperand *NewMMO =
12310 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12311 if (Idx > 0) {
12312 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12313 DAG.getConstant(32, dl, BasePtr.getValueType()));
12314 Ops[3] = BasePtr;
12315 }
12316 Ops[2] = Values[Idx];
12318 MVT::v256i1, NewMMO);
12319 Stores.push_back(St);
12320 }
12321
12322 SDValue TF = DAG.getTokenFactor(dl, Stores);
12323 return TF;
12324}
12325
12326SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12327 SelectionDAG &DAG) const {
12328 SDLoc dl(Op);
12329 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12330 SDValue StoreChain = SN->getChain();
12331 SDValue BasePtr = SN->getBasePtr();
12332 SDValue Value = SN->getValue();
12333 SDValue Value2 = SN->getValue();
12334 EVT StoreVT = Value.getValueType();
12335
12336 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12337 return LowerDMFVectorStore(Op, DAG);
12338
12339 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12340 return Op;
12341
12342 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12343 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12344 // underlying registers individually.
12345 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12346 "Type unsupported without MMA");
12347 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12348 "Type unsupported without paired vector support");
12349 Align Alignment = SN->getAlign();
12351 unsigned NumVecs = 2;
12352 if (StoreVT == MVT::v512i1) {
12353 if (Subtarget.isISAFuture()) {
12354 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12355 MachineSDNode *ExtNode = DAG.getMachineNode(
12356 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12357
12358 Value = SDValue(ExtNode, 0);
12359 Value2 = SDValue(ExtNode, 1);
12360 } else
12361 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12362 NumVecs = 4;
12363 }
12364 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12365 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12366 SDValue Elt;
12367 if (Subtarget.isISAFuture()) {
12368 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12369 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12370 Idx > 1 ? Value2 : Value,
12371 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12372 } else
12373 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12374 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12375
12376 SDValue Store =
12377 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12378 SN->getPointerInfo().getWithOffset(Idx * 16),
12379 commonAlignment(Alignment, Idx * 16),
12380 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12381 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12382 DAG.getConstant(16, dl, BasePtr.getValueType()));
12383 Stores.push_back(Store);
12384 }
12385 SDValue TF = DAG.getTokenFactor(dl, Stores);
12386 return TF;
12387}
12388
12389SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12390 SDLoc dl(Op);
12391 if (Op.getValueType() == MVT::v4i32) {
12392 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12393
12394 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12395 // +16 as shift amt.
12396 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12397 SDValue RHSSwap = // = vrlw RHS, 16
12398 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12399
12400 // Shrinkify inputs to v8i16.
12401 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12402 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12403 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12404
12405 // Low parts multiplied together, generating 32-bit results (we ignore the
12406 // top parts).
12407 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12408 LHS, RHS, DAG, dl, MVT::v4i32);
12409
12410 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12411 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12412 // Shift the high parts up 16 bits.
12413 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12414 Neg16, DAG, dl);
12415 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12416 } else if (Op.getValueType() == MVT::v16i8) {
12417 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12418 bool isLittleEndian = Subtarget.isLittleEndian();
12419
12420 // Multiply the even 8-bit parts, producing 16-bit sums.
12421 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12422 LHS, RHS, DAG, dl, MVT::v8i16);
12423 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12424
12425 // Multiply the odd 8-bit parts, producing 16-bit sums.
12426 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12427 LHS, RHS, DAG, dl, MVT::v8i16);
12428 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12429
12430 // Merge the results together. Because vmuleub and vmuloub are
12431 // instructions with a big-endian bias, we must reverse the
12432 // element numbering and reverse the meaning of "odd" and "even"
12433 // when generating little endian code.
12434 int Ops[16];
12435 for (unsigned i = 0; i != 8; ++i) {
12436 if (isLittleEndian) {
12437 Ops[i*2 ] = 2*i;
12438 Ops[i*2+1] = 2*i+16;
12439 } else {
12440 Ops[i*2 ] = 2*i+1;
12441 Ops[i*2+1] = 2*i+1+16;
12442 }
12443 }
12444 if (isLittleEndian)
12445 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12446 else
12447 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12448 } else {
12449 llvm_unreachable("Unknown mul to lower!");
12450 }
12451}
12452
12453SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12454 bool IsStrict = Op->isStrictFPOpcode();
12455 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12456 !Subtarget.hasP9Vector())
12457 return SDValue();
12458
12459 return Op;
12460}
12461
12462// Custom lowering for fpext vf32 to v2f64
12463SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12464
12465 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12466 "Should only be called for ISD::FP_EXTEND");
12467
12468 // FIXME: handle extends from half precision float vectors on P9.
12469 // We only want to custom lower an extend from v2f32 to v2f64.
12470 if (Op.getValueType() != MVT::v2f64 ||
12471 Op.getOperand(0).getValueType() != MVT::v2f32)
12472 return SDValue();
12473
12474 SDLoc dl(Op);
12475 SDValue Op0 = Op.getOperand(0);
12476
12477 switch (Op0.getOpcode()) {
12478 default:
12479 return SDValue();
12481 assert(Op0.getNumOperands() == 2 &&
12483 "Node should have 2 operands with second one being a constant!");
12484
12485 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12486 return SDValue();
12487
12488 // Custom lower is only done for high or low doubleword.
12489 int Idx = Op0.getConstantOperandVal(1);
12490 if (Idx % 2 != 0)
12491 return SDValue();
12492
12493 // Since input is v4f32, at this point Idx is either 0 or 2.
12494 // Shift to get the doubleword position we want.
12495 int DWord = Idx >> 1;
12496
12497 // High and low word positions are different on little endian.
12498 if (Subtarget.isLittleEndian())
12499 DWord ^= 0x1;
12500
12501 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12502 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12503 }
12504 case ISD::FADD:
12505 case ISD::FMUL:
12506 case ISD::FSUB: {
12507 SDValue NewLoad[2];
12508 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12509 // Ensure both input are loads.
12510 SDValue LdOp = Op0.getOperand(i);
12511 if (LdOp.getOpcode() != ISD::LOAD)
12512 return SDValue();
12513 // Generate new load node.
12514 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12515 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12516 NewLoad[i] = DAG.getMemIntrinsicNode(
12517 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12518 LD->getMemoryVT(), LD->getMemOperand());
12519 }
12520 SDValue NewOp =
12521 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12522 NewLoad[1], Op0.getNode()->getFlags());
12523 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12524 DAG.getConstant(0, dl, MVT::i32));
12525 }
12526 case ISD::LOAD: {
12527 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12528 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12529 SDValue NewLd = DAG.getMemIntrinsicNode(
12530 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12531 LD->getMemoryVT(), LD->getMemOperand());
12532 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12533 DAG.getConstant(0, dl, MVT::i32));
12534 }
12535 }
12536 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12537}
12538
12540 SelectionDAG &DAG,
12541 const PPCSubtarget &STI) {
12542 SDLoc DL(Value);
12543 if (STI.useCRBits())
12544 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12545 DAG.getConstant(1, DL, SumType),
12546 DAG.getConstant(0, DL, SumType));
12547 else
12548 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12549 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12550 Value, DAG.getAllOnesConstant(DL, SumType));
12551 return Sum.getValue(1);
12552}
12553
12555 EVT CarryType, SelectionDAG &DAG,
12556 const PPCSubtarget &STI) {
12557 SDLoc DL(Flag);
12558 SDValue Zero = DAG.getConstant(0, DL, SumType);
12559 SDValue Carry = DAG.getNode(
12560 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12561 if (STI.useCRBits())
12562 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12563 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12564}
12565
12566SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12567
12568 SDLoc DL(Op);
12569 SDNode *N = Op.getNode();
12570 EVT VT = N->getValueType(0);
12571 EVT CarryType = N->getValueType(1);
12572 unsigned Opc = N->getOpcode();
12573 bool IsAdd = Opc == ISD::UADDO;
12574 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12575 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12576 N->getOperand(0), N->getOperand(1));
12577 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12578 DAG, Subtarget);
12579 if (!IsAdd)
12580 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12581 DAG.getConstant(1UL, DL, CarryType));
12582 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12583}
12584
12585SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12586 SelectionDAG &DAG) const {
12587 SDLoc DL(Op);
12588 SDNode *N = Op.getNode();
12589 unsigned Opc = N->getOpcode();
12590 EVT VT = N->getValueType(0);
12591 EVT CarryType = N->getValueType(1);
12592 SDValue CarryOp = N->getOperand(2);
12593 bool IsAdd = Opc == ISD::UADDO_CARRY;
12594 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12595 if (!IsAdd)
12596 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12597 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12598 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12599 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12600 Op.getOperand(0), Op.getOperand(1), CarryOp);
12601 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12602 Subtarget);
12603 if (!IsAdd)
12604 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12605 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12606 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12607}
12608
12609SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12610
12611 SDLoc dl(Op);
12612 SDValue LHS = Op.getOperand(0);
12613 SDValue RHS = Op.getOperand(1);
12614 EVT VT = Op.getNode()->getValueType(0);
12615
12616 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12617
12618 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12619 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12620
12621 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12622
12623 SDValue Overflow =
12624 DAG.getNode(ISD::SRL, dl, VT, And,
12625 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12626
12627 SDValue OverflowTrunc =
12628 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12629
12630 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12631}
12632
12633// Lower unsigned 3-way compare producing -1/0/1.
12634SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12635 SDLoc DL(Op);
12636 SDValue A = DAG.getFreeze(Op.getOperand(0));
12637 SDValue B = DAG.getFreeze(Op.getOperand(1));
12638 EVT OpVT = A.getValueType(); // operand type
12639 EVT ResVT = Op.getValueType(); // result type
12640
12641 // First compute diff = A - B (will become subf).
12642 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12643
12644 // Generate B - A using SUBC to capture carry.
12645 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12646 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12647 SDValue CA0 = SubC.getValue(1);
12648
12649 // t2 = A - B + CA0 using SUBE.
12650 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12651 SDValue CA1 = SubE1.getValue(1);
12652
12653 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12654 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12655
12656 // Extract the first result and truncate to result type if needed
12657 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12658}
12659
12660/// LowerOperation - Provide custom lowering hooks for some operations.
12661///
12663 switch (Op.getOpcode()) {
12664 default:
12665 llvm_unreachable("Wasn't expecting to be able to lower this!");
12666 case ISD::FPOW: return lowerPow(Op, DAG);
12667 case ISD::FSIN: return lowerSin(Op, DAG);
12668 case ISD::FCOS: return lowerCos(Op, DAG);
12669 case ISD::FLOG: return lowerLog(Op, DAG);
12670 case ISD::FLOG10: return lowerLog10(Op, DAG);
12671 case ISD::FEXP: return lowerExp(Op, DAG);
12672 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12673 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12674 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12675 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12676 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12677 case ISD::STRICT_FSETCC:
12679 case ISD::SETCC: return LowerSETCC(Op, DAG);
12680 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12681 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12682 case ISD::SSUBO:
12683 return LowerSSUBO(Op, DAG);
12684
12685 case ISD::INLINEASM:
12686 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12687 // Variable argument lowering.
12688 case ISD::VASTART: return LowerVASTART(Op, DAG);
12689 case ISD::VAARG: return LowerVAARG(Op, DAG);
12690 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12691
12692 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12693 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12694 case ISD::GET_DYNAMIC_AREA_OFFSET:
12695 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12696
12697 // Exception handling lowering.
12698 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12699 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12700 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12701
12702 case ISD::LOAD: return LowerLOAD(Op, DAG);
12703 case ISD::STORE: return LowerSTORE(Op, DAG);
12704 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12705 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12708 case ISD::FP_TO_UINT:
12709 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12712 case ISD::UINT_TO_FP:
12713 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12714 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12715 case ISD::SET_ROUNDING:
12716 return LowerSET_ROUNDING(Op, DAG);
12717
12718 // Lower 64-bit shifts.
12719 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12720 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12721 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12722
12723 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12724 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12725
12726 // Vector-related lowering.
12727 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12728 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12729 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12730 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12731 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12732 case ISD::MUL: return LowerMUL(Op, DAG);
12733 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12735 case ISD::FP_ROUND:
12736 return LowerFP_ROUND(Op, DAG);
12737 case ISD::ROTL: return LowerROTL(Op, DAG);
12738
12739 // For counter-based loop handling.
12740 case ISD::INTRINSIC_W_CHAIN: return SDValue();
12741
12742 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12743
12744 // Frame & Return address.
12745 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12746 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12747
12749 return LowerINTRINSIC_VOID(Op, DAG);
12750 case ISD::BSWAP:
12751 return LowerBSWAP(Op, DAG);
12752 case ISD::ATOMIC_CMP_SWAP:
12753 return LowerATOMIC_CMP_SWAP(Op, DAG);
12754 case ISD::ATOMIC_STORE:
12755 return LowerATOMIC_LOAD_STORE(Op, DAG);
12756 case ISD::IS_FPCLASS:
12757 return LowerIS_FPCLASS(Op, DAG);
12758 case ISD::UADDO:
12759 case ISD::USUBO:
12760 return LowerADDSUBO(Op, DAG);
12761 case ISD::UADDO_CARRY:
12762 case ISD::USUBO_CARRY:
12763 return LowerADDSUBO_CARRY(Op, DAG);
12764 case ISD::UCMP:
12765 return LowerUCMP(Op, DAG);
12766 case ISD::STRICT_LRINT:
12767 case ISD::STRICT_LLRINT:
12768 case ISD::STRICT_LROUND:
12771 if (Op->getFlags().hasNoFPExcept())
12772 return Op;
12773 return SDValue();
12774 }
12775}
12776
12779 SelectionDAG &DAG) const {
12780 SDLoc dl(N);
12781 switch (N->getOpcode()) {
12782 default:
12783 llvm_unreachable("Do not know how to custom type legalize this operation!");
12784 case ISD::ATOMIC_LOAD: {
12785 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12786 Results.push_back(Res);
12787 Results.push_back(Res.getValue(1));
12788 break;
12789 }
12790 case ISD::READCYCLECOUNTER: {
12791 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12792 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12793
12794 Results.push_back(
12795 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12796 Results.push_back(RTB.getValue(2));
12797 break;
12798 }
12800 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12801 break;
12802
12803 assert(N->getValueType(0) == MVT::i1 &&
12804 "Unexpected result type for CTR decrement intrinsic");
12805 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12806 N->getValueType(0));
12807 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12808 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12809 N->getOperand(1));
12810
12811 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12812 Results.push_back(NewInt.getValue(1));
12813 break;
12814 }
12816 switch (N->getConstantOperandVal(0)) {
12817 case Intrinsic::ppc_pack_longdouble:
12818 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12819 N->getOperand(2), N->getOperand(1)));
12820 break;
12821 case Intrinsic::ppc_maxfe:
12822 case Intrinsic::ppc_minfe:
12823 case Intrinsic::ppc_fnmsub:
12824 case Intrinsic::ppc_convert_f128_to_ppcf128:
12825 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12826 break;
12827 }
12828 break;
12829 }
12830 case ISD::VAARG: {
12831 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12832 return;
12833
12834 EVT VT = N->getValueType(0);
12835
12836 if (VT == MVT::i64) {
12837 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12838
12839 Results.push_back(NewNode);
12840 Results.push_back(NewNode.getValue(1));
12841 }
12842 return;
12843 }
12846 case ISD::FP_TO_SINT:
12847 case ISD::FP_TO_UINT: {
12848 // LowerFP_TO_INT() can only handle f32 and f64.
12849 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12850 MVT::ppcf128)
12851 return;
12852 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12853 Results.push_back(LoweredValue);
12854 if (N->isStrictFPOpcode())
12855 Results.push_back(LoweredValue.getValue(1));
12856 return;
12857 }
12858 case ISD::TRUNCATE: {
12859 if (!N->getValueType(0).isVector())
12860 return;
12861 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12862 if (Lowered)
12863 Results.push_back(Lowered);
12864 return;
12865 }
12866 case ISD::SCALAR_TO_VECTOR: {
12867 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12868 if (Lowered)
12869 Results.push_back(Lowered);
12870 return;
12871 }
12872 case ISD::FSHL:
12873 case ISD::FSHR:
12874 // Don't handle funnel shifts here.
12875 return;
12876 case ISD::BITCAST:
12877 // Don't handle bitcast here.
12878 return;
12879 case ISD::FP_EXTEND:
12880 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12881 if (Lowered)
12882 Results.push_back(Lowered);
12883 return;
12884 }
12885}
12886
12887//===----------------------------------------------------------------------===//
12888// Other Lowering Code
12889//===----------------------------------------------------------------------===//
12890
12892 return Builder.CreateIntrinsic(Id, {});
12893}
12894
12896 Value *Addr,
12897 AtomicOrdering Ord) const {
12898 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12899
12900 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12901 "Only 8/16/32/64-bit atomic loads supported");
12902 Intrinsic::ID IntID;
12903 switch (SZ) {
12904 default:
12905 llvm_unreachable("Unexpected PrimitiveSize");
12906 case 8:
12907 IntID = Intrinsic::ppc_lbarx;
12908 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12909 break;
12910 case 16:
12911 IntID = Intrinsic::ppc_lharx;
12912 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12913 break;
12914 case 32:
12915 IntID = Intrinsic::ppc_lwarx;
12916 break;
12917 case 64:
12918 IntID = Intrinsic::ppc_ldarx;
12919 break;
12920 }
12921 Value *Call =
12922 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12923
12924 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12925}
12926
12927// Perform a store-conditional operation to Addr. Return the status of the
12928// store. This should be 0 if the store succeeded, non-zero otherwise.
12930 Value *Val, Value *Addr,
12931 AtomicOrdering Ord) const {
12932 Type *Ty = Val->getType();
12933 unsigned SZ = Ty->getPrimitiveSizeInBits();
12934
12935 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12936 "Only 8/16/32/64-bit atomic loads supported");
12937 Intrinsic::ID IntID;
12938 switch (SZ) {
12939 default:
12940 llvm_unreachable("Unexpected PrimitiveSize");
12941 case 8:
12942 IntID = Intrinsic::ppc_stbcx;
12943 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12944 break;
12945 case 16:
12946 IntID = Intrinsic::ppc_sthcx;
12947 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12948 break;
12949 case 32:
12950 IntID = Intrinsic::ppc_stwcx;
12951 break;
12952 case 64:
12953 IntID = Intrinsic::ppc_stdcx;
12954 break;
12955 }
12956
12957 if (SZ == 8 || SZ == 16)
12958 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12959
12960 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12961 /*FMFSource=*/nullptr, "stcx");
12962 return Builder.CreateXor(Call, Builder.getInt32(1));
12963}
12964
12965// The mappings for emitLeading/TrailingFence is taken from
12966// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12968 Instruction *Inst,
12969 AtomicOrdering Ord) const {
12971 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12972 if (isReleaseOrStronger(Ord))
12973 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12974 return nullptr;
12975}
12976
12978 Instruction *Inst,
12979 AtomicOrdering Ord) const {
12980 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12981 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12982 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12983 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12984 if (isa<LoadInst>(Inst))
12985 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12986 {Inst});
12987 // FIXME: Can use isync for rmw operation.
12988 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12989 }
12990 return nullptr;
12991}
12992
12995 unsigned AtomicSize,
12996 unsigned BinOpcode,
12997 unsigned CmpOpcode,
12998 unsigned CmpPred) const {
12999 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13000 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13001
13002 auto LoadMnemonic = PPC::LDARX;
13003 auto StoreMnemonic = PPC::STDCX;
13004 switch (AtomicSize) {
13005 default:
13006 llvm_unreachable("Unexpected size of atomic entity");
13007 case 1:
13008 LoadMnemonic = PPC::LBARX;
13009 StoreMnemonic = PPC::STBCX;
13010 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13011 break;
13012 case 2:
13013 LoadMnemonic = PPC::LHARX;
13014 StoreMnemonic = PPC::STHCX;
13015 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
13016 break;
13017 case 4:
13018 LoadMnemonic = PPC::LWARX;
13019 StoreMnemonic = PPC::STWCX;
13020 break;
13021 case 8:
13022 LoadMnemonic = PPC::LDARX;
13023 StoreMnemonic = PPC::STDCX;
13024 break;
13025 }
13026
13027 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13028 MachineFunction *F = BB->getParent();
13030
13031 Register dest = MI.getOperand(0).getReg();
13032 Register ptrA = MI.getOperand(1).getReg();
13033 Register ptrB = MI.getOperand(2).getReg();
13034 Register incr = MI.getOperand(3).getReg();
13035 DebugLoc dl = MI.getDebugLoc();
13036
13037 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13038 MachineBasicBlock *loop2MBB =
13039 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13040 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13041 F->insert(It, loopMBB);
13042 if (CmpOpcode)
13043 F->insert(It, loop2MBB);
13044 F->insert(It, exitMBB);
13045 exitMBB->splice(exitMBB->begin(), BB,
13046 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13048
13049 MachineRegisterInfo &RegInfo = F->getRegInfo();
13050 Register TmpReg = (!BinOpcode) ? incr :
13051 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13052 : &PPC::GPRCRegClass);
13053
13054 // thisMBB:
13055 // ...
13056 // fallthrough --> loopMBB
13057 BB->addSuccessor(loopMBB);
13058
13059 // loopMBB:
13060 // l[wd]arx dest, ptr
13061 // add r0, dest, incr
13062 // st[wd]cx. r0, ptr
13063 // bne- loopMBB
13064 // fallthrough --> exitMBB
13065
13066 // For max/min...
13067 // loopMBB:
13068 // l[wd]arx dest, ptr
13069 // cmpl?[wd] dest, incr
13070 // bgt exitMBB
13071 // loop2MBB:
13072 // st[wd]cx. dest, ptr
13073 // bne- loopMBB
13074 // fallthrough --> exitMBB
13075
13076 BB = loopMBB;
13077 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13078 .addReg(ptrA).addReg(ptrB);
13079 if (BinOpcode)
13080 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13081 if (CmpOpcode) {
13082 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13083 // Signed comparisons of byte or halfword values must be sign-extended.
13084 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13085 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13086 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13087 ExtReg).addReg(dest);
13088 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13089 } else
13090 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13091
13092 BuildMI(BB, dl, TII->get(PPC::BCC))
13093 .addImm(CmpPred)
13094 .addReg(CrReg)
13095 .addMBB(exitMBB);
13096 BB->addSuccessor(loop2MBB);
13097 BB->addSuccessor(exitMBB);
13098 BB = loop2MBB;
13099 }
13100 BuildMI(BB, dl, TII->get(StoreMnemonic))
13101 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13102 BuildMI(BB, dl, TII->get(PPC::BCC))
13104 .addReg(PPC::CR0)
13105 .addMBB(loopMBB);
13106 BB->addSuccessor(loopMBB);
13107 BB->addSuccessor(exitMBB);
13108
13109 // exitMBB:
13110 // ...
13111 BB = exitMBB;
13112 return BB;
13113}
13114
13116 switch(MI.getOpcode()) {
13117 default:
13118 return false;
13119 case PPC::COPY:
13120 return TII->isSignExtended(MI.getOperand(1).getReg(),
13121 &MI.getMF()->getRegInfo());
13122 case PPC::LHA:
13123 case PPC::LHA8:
13124 case PPC::LHAU:
13125 case PPC::LHAU8:
13126 case PPC::LHAUX:
13127 case PPC::LHAUX8:
13128 case PPC::LHAX:
13129 case PPC::LHAX8:
13130 case PPC::LWA:
13131 case PPC::LWAUX:
13132 case PPC::LWAX:
13133 case PPC::LWAX_32:
13134 case PPC::LWA_32:
13135 case PPC::PLHA:
13136 case PPC::PLHA8:
13137 case PPC::PLHA8pc:
13138 case PPC::PLHApc:
13139 case PPC::PLWA:
13140 case PPC::PLWA8:
13141 case PPC::PLWA8pc:
13142 case PPC::PLWApc:
13143 case PPC::EXTSB:
13144 case PPC::EXTSB8:
13145 case PPC::EXTSB8_32_64:
13146 case PPC::EXTSB8_rec:
13147 case PPC::EXTSB_rec:
13148 case PPC::EXTSH:
13149 case PPC::EXTSH8:
13150 case PPC::EXTSH8_32_64:
13151 case PPC::EXTSH8_rec:
13152 case PPC::EXTSH_rec:
13153 case PPC::EXTSW:
13154 case PPC::EXTSWSLI:
13155 case PPC::EXTSWSLI_32_64:
13156 case PPC::EXTSWSLI_32_64_rec:
13157 case PPC::EXTSWSLI_rec:
13158 case PPC::EXTSW_32:
13159 case PPC::EXTSW_32_64:
13160 case PPC::EXTSW_32_64_rec:
13161 case PPC::EXTSW_rec:
13162 case PPC::SRAW:
13163 case PPC::SRAWI:
13164 case PPC::SRAWI_rec:
13165 case PPC::SRAW_rec:
13166 return true;
13167 }
13168 return false;
13169}
13170
13173 bool is8bit, // operation
13174 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13175 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13176 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13177
13178 // If this is a signed comparison and the value being compared is not known
13179 // to be sign extended, sign extend it here.
13180 DebugLoc dl = MI.getDebugLoc();
13181 MachineFunction *F = BB->getParent();
13182 MachineRegisterInfo &RegInfo = F->getRegInfo();
13183 Register incr = MI.getOperand(3).getReg();
13184 bool IsSignExtended =
13185 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13186
13187 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13188 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13189 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13190 .addReg(MI.getOperand(3).getReg());
13191 MI.getOperand(3).setReg(ValueReg);
13192 incr = ValueReg;
13193 }
13194 // If we support part-word atomic mnemonics, just use them
13195 if (Subtarget.hasPartwordAtomics())
13196 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13197 CmpPred);
13198
13199 // In 64 bit mode we have to use 64 bits for addresses, even though the
13200 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13201 // registers without caring whether they're 32 or 64, but here we're
13202 // doing actual arithmetic on the addresses.
13203 bool is64bit = Subtarget.isPPC64();
13204 bool isLittleEndian = Subtarget.isLittleEndian();
13205 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13206
13207 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13209
13210 Register dest = MI.getOperand(0).getReg();
13211 Register ptrA = MI.getOperand(1).getReg();
13212 Register ptrB = MI.getOperand(2).getReg();
13213
13214 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13215 MachineBasicBlock *loop2MBB =
13216 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13217 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13218 F->insert(It, loopMBB);
13219 if (CmpOpcode)
13220 F->insert(It, loop2MBB);
13221 F->insert(It, exitMBB);
13222 exitMBB->splice(exitMBB->begin(), BB,
13223 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13225
13226 const TargetRegisterClass *RC =
13227 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13228 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13229
13230 Register PtrReg = RegInfo.createVirtualRegister(RC);
13231 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13232 Register ShiftReg =
13233 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13234 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13235 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13236 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13237 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13238 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13239 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13240 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13241 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13242 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13243 Register Ptr1Reg;
13244 Register TmpReg =
13245 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13246
13247 // thisMBB:
13248 // ...
13249 // fallthrough --> loopMBB
13250 BB->addSuccessor(loopMBB);
13251
13252 // The 4-byte load must be aligned, while a char or short may be
13253 // anywhere in the word. Hence all this nasty bookkeeping code.
13254 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13255 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13256 // xori shift, shift1, 24 [16]
13257 // rlwinm ptr, ptr1, 0, 0, 29
13258 // slw incr2, incr, shift
13259 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13260 // slw mask, mask2, shift
13261 // loopMBB:
13262 // lwarx tmpDest, ptr
13263 // add tmp, tmpDest, incr2
13264 // andc tmp2, tmpDest, mask
13265 // and tmp3, tmp, mask
13266 // or tmp4, tmp3, tmp2
13267 // stwcx. tmp4, ptr
13268 // bne- loopMBB
13269 // fallthrough --> exitMBB
13270 // srw SrwDest, tmpDest, shift
13271 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13272 if (ptrA != ZeroReg) {
13273 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13274 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13275 .addReg(ptrA)
13276 .addReg(ptrB);
13277 } else {
13278 Ptr1Reg = ptrB;
13279 }
13280 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13281 // mode.
13282 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13283 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13284 .addImm(3)
13285 .addImm(27)
13286 .addImm(is8bit ? 28 : 27);
13287 if (!isLittleEndian)
13288 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13289 .addReg(Shift1Reg)
13290 .addImm(is8bit ? 24 : 16);
13291 if (is64bit)
13292 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13293 .addReg(Ptr1Reg)
13294 .addImm(0)
13295 .addImm(61);
13296 else
13297 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13298 .addReg(Ptr1Reg)
13299 .addImm(0)
13300 .addImm(0)
13301 .addImm(29);
13302 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13303 if (is8bit)
13304 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13305 else {
13306 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13307 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13308 .addReg(Mask3Reg)
13309 .addImm(65535);
13310 }
13311 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13312 .addReg(Mask2Reg)
13313 .addReg(ShiftReg);
13314
13315 BB = loopMBB;
13316 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13317 .addReg(ZeroReg)
13318 .addReg(PtrReg);
13319 if (BinOpcode)
13320 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13321 .addReg(Incr2Reg)
13322 .addReg(TmpDestReg);
13323 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13324 .addReg(TmpDestReg)
13325 .addReg(MaskReg);
13326 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13327 if (CmpOpcode) {
13328 // For unsigned comparisons, we can directly compare the shifted values.
13329 // For signed comparisons we shift and sign extend.
13330 Register SReg = RegInfo.createVirtualRegister(GPRC);
13331 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13332 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13333 .addReg(TmpDestReg)
13334 .addReg(MaskReg);
13335 unsigned ValueReg = SReg;
13336 unsigned CmpReg = Incr2Reg;
13337 if (CmpOpcode == PPC::CMPW) {
13338 ValueReg = RegInfo.createVirtualRegister(GPRC);
13339 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13340 .addReg(SReg)
13341 .addReg(ShiftReg);
13342 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13343 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13344 .addReg(ValueReg);
13345 ValueReg = ValueSReg;
13346 CmpReg = incr;
13347 }
13348 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13349 BuildMI(BB, dl, TII->get(PPC::BCC))
13350 .addImm(CmpPred)
13351 .addReg(CrReg)
13352 .addMBB(exitMBB);
13353 BB->addSuccessor(loop2MBB);
13354 BB->addSuccessor(exitMBB);
13355 BB = loop2MBB;
13356 }
13357 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13358 BuildMI(BB, dl, TII->get(PPC::STWCX))
13359 .addReg(Tmp4Reg)
13360 .addReg(ZeroReg)
13361 .addReg(PtrReg);
13362 BuildMI(BB, dl, TII->get(PPC::BCC))
13364 .addReg(PPC::CR0)
13365 .addMBB(loopMBB);
13366 BB->addSuccessor(loopMBB);
13367 BB->addSuccessor(exitMBB);
13368
13369 // exitMBB:
13370 // ...
13371 BB = exitMBB;
13372 // Since the shift amount is not a constant, we need to clear
13373 // the upper bits with a separate RLWINM.
13374 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13375 .addReg(SrwDestReg)
13376 .addImm(0)
13377 .addImm(is8bit ? 24 : 16)
13378 .addImm(31);
13379 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13380 .addReg(TmpDestReg)
13381 .addReg(ShiftReg);
13382 return BB;
13383}
13384
13387 MachineBasicBlock *MBB) const {
13388 DebugLoc DL = MI.getDebugLoc();
13389 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13390 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13391
13392 MachineFunction *MF = MBB->getParent();
13394
13395 const BasicBlock *BB = MBB->getBasicBlock();
13396 MachineFunction::iterator I = ++MBB->getIterator();
13397
13398 Register DstReg = MI.getOperand(0).getReg();
13399 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13400 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13401 Register mainDstReg = MRI.createVirtualRegister(RC);
13402 Register restoreDstReg = MRI.createVirtualRegister(RC);
13403
13404 MVT PVT = getPointerTy(MF->getDataLayout());
13405 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13406 "Invalid Pointer Size!");
13407 // For v = setjmp(buf), we generate
13408 //
13409 // thisMBB:
13410 // SjLjSetup mainMBB
13411 // bl mainMBB
13412 // v_restore = 1
13413 // b sinkMBB
13414 //
13415 // mainMBB:
13416 // buf[LabelOffset] = LR
13417 // v_main = 0
13418 //
13419 // sinkMBB:
13420 // v = phi(main, restore)
13421 //
13422
13423 MachineBasicBlock *thisMBB = MBB;
13424 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13425 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13426 MF->insert(I, mainMBB);
13427 MF->insert(I, sinkMBB);
13428
13430
13431 // Transfer the remainder of BB and its successor edges to sinkMBB.
13432 sinkMBB->splice(sinkMBB->begin(), MBB,
13433 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13435
13436 // Note that the structure of the jmp_buf used here is not compatible
13437 // with that used by libc, and is not designed to be. Specifically, it
13438 // stores only those 'reserved' registers that LLVM does not otherwise
13439 // understand how to spill. Also, by convention, by the time this
13440 // intrinsic is called, Clang has already stored the frame address in the
13441 // first slot of the buffer and stack address in the third. Following the
13442 // X86 target code, we'll store the jump address in the second slot. We also
13443 // need to save the TOC pointer (R2) to handle jumps between shared
13444 // libraries, and that will be stored in the fourth slot. The thread
13445 // identifier (R13) is not affected.
13446
13447 // thisMBB:
13448 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13449 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13450 const int64_t BPOffset = 4 * PVT.getStoreSize();
13451
13452 // Prepare IP either in reg.
13453 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13454 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13455 Register BufReg = MI.getOperand(1).getReg();
13456
13457 if (Subtarget.is64BitELFABI()) {
13458 setUsesTOCBasePtr(*MBB->getParent());
13459 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13460 .addReg(PPC::X2)
13461 .addImm(TOCOffset)
13462 .addReg(BufReg)
13463 .cloneMemRefs(MI);
13464 }
13465
13466 // Naked functions never have a base pointer, and so we use r1. For all
13467 // other functions, this decision must be delayed until during PEI.
13468 unsigned BaseReg;
13469 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13470 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13471 else
13472 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13473
13474 MIB = BuildMI(*thisMBB, MI, DL,
13475 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13476 .addReg(BaseReg)
13477 .addImm(BPOffset)
13478 .addReg(BufReg)
13479 .cloneMemRefs(MI);
13480
13481 // Setup
13482 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13483 MIB.addRegMask(TRI->getNoPreservedMask());
13484
13485 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13486
13487 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13488 .addMBB(mainMBB);
13489 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13490
13491 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13492 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13493
13494 // mainMBB:
13495 // mainDstReg = 0
13496 MIB =
13497 BuildMI(mainMBB, DL,
13498 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13499
13500 // Store IP
13501 if (Subtarget.isPPC64()) {
13502 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13503 .addReg(LabelReg)
13504 .addImm(LabelOffset)
13505 .addReg(BufReg);
13506 } else {
13507 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13508 .addReg(LabelReg)
13509 .addImm(LabelOffset)
13510 .addReg(BufReg);
13511 }
13512 MIB.cloneMemRefs(MI);
13513
13514 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13515 mainMBB->addSuccessor(sinkMBB);
13516
13517 // sinkMBB:
13518 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13519 TII->get(PPC::PHI), DstReg)
13520 .addReg(mainDstReg).addMBB(mainMBB)
13521 .addReg(restoreDstReg).addMBB(thisMBB);
13522
13523 MI.eraseFromParent();
13524 return sinkMBB;
13525}
13526
13529 MachineBasicBlock *MBB) const {
13530 DebugLoc DL = MI.getDebugLoc();
13531 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13532
13533 MachineFunction *MF = MBB->getParent();
13535
13536 MVT PVT = getPointerTy(MF->getDataLayout());
13537 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13538 "Invalid Pointer Size!");
13539
13540 const TargetRegisterClass *RC =
13541 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13542 Register Tmp = MRI.createVirtualRegister(RC);
13543 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13544 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13545 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13546 unsigned BP =
13547 (PVT == MVT::i64)
13548 ? PPC::X30
13549 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13550 : PPC::R30);
13551
13553
13554 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13555 const int64_t SPOffset = 2 * PVT.getStoreSize();
13556 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13557 const int64_t BPOffset = 4 * PVT.getStoreSize();
13558
13559 Register BufReg = MI.getOperand(0).getReg();
13560
13561 // Reload FP (the jumped-to function may not have had a
13562 // frame pointer, and if so, then its r31 will be restored
13563 // as necessary).
13564 if (PVT == MVT::i64) {
13565 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13566 .addImm(0)
13567 .addReg(BufReg);
13568 } else {
13569 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13570 .addImm(0)
13571 .addReg(BufReg);
13572 }
13573 MIB.cloneMemRefs(MI);
13574
13575 // Reload IP
13576 if (PVT == MVT::i64) {
13577 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13578 .addImm(LabelOffset)
13579 .addReg(BufReg);
13580 } else {
13581 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13582 .addImm(LabelOffset)
13583 .addReg(BufReg);
13584 }
13585 MIB.cloneMemRefs(MI);
13586
13587 // Reload SP
13588 if (PVT == MVT::i64) {
13589 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13590 .addImm(SPOffset)
13591 .addReg(BufReg);
13592 } else {
13593 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13594 .addImm(SPOffset)
13595 .addReg(BufReg);
13596 }
13597 MIB.cloneMemRefs(MI);
13598
13599 // Reload BP
13600 if (PVT == MVT::i64) {
13601 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13602 .addImm(BPOffset)
13603 .addReg(BufReg);
13604 } else {
13605 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13606 .addImm(BPOffset)
13607 .addReg(BufReg);
13608 }
13609 MIB.cloneMemRefs(MI);
13610
13611 // Reload TOC
13612 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13613 setUsesTOCBasePtr(*MBB->getParent());
13614 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13615 .addImm(TOCOffset)
13616 .addReg(BufReg)
13617 .cloneMemRefs(MI);
13618 }
13619
13620 // Jump
13621 BuildMI(*MBB, MI, DL,
13622 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13623 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13624
13625 MI.eraseFromParent();
13626 return MBB;
13627}
13628
13630 // If the function specifically requests inline stack probes, emit them.
13631 if (MF.getFunction().hasFnAttribute("probe-stack"))
13632 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13633 "inline-asm";
13634 return false;
13635}
13636
13638 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13639 unsigned StackAlign = TFI->getStackAlignment();
13640 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13641 "Unexpected stack alignment");
13642 // The default stack probe size is 4096 if the function has no
13643 // stack-probe-size attribute.
13644 const Function &Fn = MF.getFunction();
13645 unsigned StackProbeSize =
13646 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13647 // Round down to the stack alignment.
13648 StackProbeSize &= ~(StackAlign - 1);
13649 return StackProbeSize ? StackProbeSize : StackAlign;
13650}
13651
13652// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13653// into three phases. In the first phase, it uses pseudo instruction
13654// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13655// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13656// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13657// MaxCallFrameSize so that it can calculate correct data area pointer.
13660 MachineBasicBlock *MBB) const {
13661 const bool isPPC64 = Subtarget.isPPC64();
13662 MachineFunction *MF = MBB->getParent();
13663 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13664 DebugLoc DL = MI.getDebugLoc();
13665 const unsigned ProbeSize = getStackProbeSize(*MF);
13666 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13668 // The CFG of probing stack looks as
13669 // +-----+
13670 // | MBB |
13671 // +--+--+
13672 // |
13673 // +----v----+
13674 // +--->+ TestMBB +---+
13675 // | +----+----+ |
13676 // | | |
13677 // | +-----v----+ |
13678 // +---+ BlockMBB | |
13679 // +----------+ |
13680 // |
13681 // +---------+ |
13682 // | TailMBB +<--+
13683 // +---------+
13684 // In MBB, calculate previous frame pointer and final stack pointer.
13685 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13686 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13687 // TailMBB is spliced via \p MI.
13688 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13689 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13690 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13691
13692 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13693 MF->insert(MBBIter, TestMBB);
13694 MF->insert(MBBIter, BlockMBB);
13695 MF->insert(MBBIter, TailMBB);
13696
13697 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13698 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13699
13700 Register DstReg = MI.getOperand(0).getReg();
13701 Register NegSizeReg = MI.getOperand(1).getReg();
13702 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13703 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13704 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13705 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13706
13707 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13708 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13709 // NegSize.
13710 unsigned ProbeOpc;
13711 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13712 ProbeOpc =
13713 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13714 else
13715 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13716 // and NegSizeReg will be allocated in the same phyreg to avoid
13717 // redundant copy when NegSizeReg has only one use which is current MI and
13718 // will be replaced by PREPARE_PROBED_ALLOCA then.
13719 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13720 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13721 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13722 .addDef(ActualNegSizeReg)
13723 .addReg(NegSizeReg)
13724 .add(MI.getOperand(2))
13725 .add(MI.getOperand(3));
13726
13727 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13728 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13729 FinalStackPtr)
13730 .addReg(SPReg)
13731 .addReg(ActualNegSizeReg);
13732
13733 // Materialize a scratch register for update.
13734 int64_t NegProbeSize = -(int64_t)ProbeSize;
13735 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13736 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13737 if (!isInt<16>(NegProbeSize)) {
13738 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13739 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13740 .addImm(NegProbeSize >> 16);
13741 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13742 ScratchReg)
13743 .addReg(TempReg)
13744 .addImm(NegProbeSize & 0xFFFF);
13745 } else
13746 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13747 .addImm(NegProbeSize);
13748
13749 {
13750 // Probing leading residual part.
13751 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13752 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13753 .addReg(ActualNegSizeReg)
13754 .addReg(ScratchReg);
13755 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13756 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13757 .addReg(Div)
13758 .addReg(ScratchReg);
13759 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13760 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13761 .addReg(Mul)
13762 .addReg(ActualNegSizeReg);
13763 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13764 .addReg(FramePointer)
13765 .addReg(SPReg)
13766 .addReg(NegMod);
13767 }
13768
13769 {
13770 // Remaining part should be multiple of ProbeSize.
13771 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13772 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13773 .addReg(SPReg)
13774 .addReg(FinalStackPtr);
13775 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13777 .addReg(CmpResult)
13778 .addMBB(TailMBB);
13779 TestMBB->addSuccessor(BlockMBB);
13780 TestMBB->addSuccessor(TailMBB);
13781 }
13782
13783 {
13784 // Touch the block.
13785 // |P...|P...|P...
13786 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13787 .addReg(FramePointer)
13788 .addReg(SPReg)
13789 .addReg(ScratchReg);
13790 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13791 BlockMBB->addSuccessor(TestMBB);
13792 }
13793
13794 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13795 // DYNAREAOFFSET pseudo instruction to get the future result.
13796 Register MaxCallFrameSizeReg =
13797 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13798 BuildMI(TailMBB, DL,
13799 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13800 MaxCallFrameSizeReg)
13801 .add(MI.getOperand(2))
13802 .add(MI.getOperand(3));
13803 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13804 .addReg(SPReg)
13805 .addReg(MaxCallFrameSizeReg);
13806
13807 // Splice instructions after MI to TailMBB.
13808 TailMBB->splice(TailMBB->end(), MBB,
13809 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13811 MBB->addSuccessor(TestMBB);
13812
13813 // Delete the pseudo instruction.
13814 MI.eraseFromParent();
13815
13816 ++NumDynamicAllocaProbed;
13817 return TailMBB;
13818}
13819
13821 switch (MI.getOpcode()) {
13822 case PPC::SELECT_CC_I4:
13823 case PPC::SELECT_CC_I8:
13824 case PPC::SELECT_CC_F4:
13825 case PPC::SELECT_CC_F8:
13826 case PPC::SELECT_CC_F16:
13827 case PPC::SELECT_CC_VRRC:
13828 case PPC::SELECT_CC_VSFRC:
13829 case PPC::SELECT_CC_VSSRC:
13830 case PPC::SELECT_CC_VSRC:
13831 case PPC::SELECT_CC_SPE4:
13832 case PPC::SELECT_CC_SPE:
13833 return true;
13834 default:
13835 return false;
13836 }
13837}
13838
13839static bool IsSelect(MachineInstr &MI) {
13840 switch (MI.getOpcode()) {
13841 case PPC::SELECT_I4:
13842 case PPC::SELECT_I8:
13843 case PPC::SELECT_F4:
13844 case PPC::SELECT_F8:
13845 case PPC::SELECT_F16:
13846 case PPC::SELECT_SPE:
13847 case PPC::SELECT_SPE4:
13848 case PPC::SELECT_VRRC:
13849 case PPC::SELECT_VSFRC:
13850 case PPC::SELECT_VSSRC:
13851 case PPC::SELECT_VSRC:
13852 return true;
13853 default:
13854 return false;
13855 }
13856}
13857
13860 MachineBasicBlock *BB) const {
13861 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13862 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13863 if (Subtarget.is64BitELFABI() &&
13864 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13865 !Subtarget.isUsingPCRelativeCalls()) {
13866 // Call lowering should have added an r2 operand to indicate a dependence
13867 // on the TOC base pointer value. It can't however, because there is no
13868 // way to mark the dependence as implicit there, and so the stackmap code
13869 // will confuse it with a regular operand. Instead, add the dependence
13870 // here.
13871 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13872 }
13873
13874 return emitPatchPoint(MI, BB);
13875 }
13876
13877 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13878 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13879 return emitEHSjLjSetJmp(MI, BB);
13880 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13881 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13882 return emitEHSjLjLongJmp(MI, BB);
13883 }
13884
13885 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13886
13887 // To "insert" these instructions we actually have to insert their
13888 // control-flow patterns.
13889 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13891
13892 MachineFunction *F = BB->getParent();
13893 MachineRegisterInfo &MRI = F->getRegInfo();
13894
13895 if (Subtarget.hasISEL() &&
13896 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13897 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13898 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13900 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13901 MI.getOpcode() == PPC::SELECT_CC_I8)
13902 Cond.push_back(MI.getOperand(4));
13903 else
13905 Cond.push_back(MI.getOperand(1));
13906
13907 DebugLoc dl = MI.getDebugLoc();
13908 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13909 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13910 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13911 // The incoming instruction knows the destination vreg to set, the
13912 // condition code register to branch on, the true/false values to
13913 // select between, and a branch opcode to use.
13914
13915 // thisMBB:
13916 // ...
13917 // TrueVal = ...
13918 // cmpTY ccX, r1, r2
13919 // bCC sinkMBB
13920 // fallthrough --> copy0MBB
13921 MachineBasicBlock *thisMBB = BB;
13922 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13923 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13924 DebugLoc dl = MI.getDebugLoc();
13925 F->insert(It, copy0MBB);
13926 F->insert(It, sinkMBB);
13927
13928 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13929 copy0MBB->addLiveIn(PPC::CARRY);
13930 sinkMBB->addLiveIn(PPC::CARRY);
13931 }
13932
13933 // Set the call frame size on entry to the new basic blocks.
13934 // See https://reviews.llvm.org/D156113.
13935 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13936 copy0MBB->setCallFrameSize(CallFrameSize);
13937 sinkMBB->setCallFrameSize(CallFrameSize);
13938
13939 // Transfer the remainder of BB and its successor edges to sinkMBB.
13940 sinkMBB->splice(sinkMBB->begin(), BB,
13941 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13943
13944 // Next, add the true and fallthrough blocks as its successors.
13945 BB->addSuccessor(copy0MBB);
13946 BB->addSuccessor(sinkMBB);
13947
13948 if (IsSelect(MI)) {
13949 BuildMI(BB, dl, TII->get(PPC::BC))
13950 .addReg(MI.getOperand(1).getReg())
13951 .addMBB(sinkMBB);
13952 } else {
13953 unsigned SelectPred = MI.getOperand(4).getImm();
13954 BuildMI(BB, dl, TII->get(PPC::BCC))
13955 .addImm(SelectPred)
13956 .addReg(MI.getOperand(1).getReg())
13957 .addMBB(sinkMBB);
13958 }
13959
13960 // copy0MBB:
13961 // %FalseValue = ...
13962 // # fallthrough to sinkMBB
13963 BB = copy0MBB;
13964
13965 // Update machine-CFG edges
13966 BB->addSuccessor(sinkMBB);
13967
13968 // sinkMBB:
13969 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13970 // ...
13971 BB = sinkMBB;
13972 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13973 .addReg(MI.getOperand(3).getReg())
13974 .addMBB(copy0MBB)
13975 .addReg(MI.getOperand(2).getReg())
13976 .addMBB(thisMBB);
13977 } else if (MI.getOpcode() == PPC::ReadTB) {
13978 // To read the 64-bit time-base register on a 32-bit target, we read the
13979 // two halves. Should the counter have wrapped while it was being read, we
13980 // need to try again.
13981 // ...
13982 // readLoop:
13983 // mfspr Rx,TBU # load from TBU
13984 // mfspr Ry,TB # load from TB
13985 // mfspr Rz,TBU # load from TBU
13986 // cmpw crX,Rx,Rz # check if 'old'='new'
13987 // bne readLoop # branch if they're not equal
13988 // ...
13989
13990 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13991 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13992 DebugLoc dl = MI.getDebugLoc();
13993 F->insert(It, readMBB);
13994 F->insert(It, sinkMBB);
13995
13996 // Transfer the remainder of BB and its successor edges to sinkMBB.
13997 sinkMBB->splice(sinkMBB->begin(), BB,
13998 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14000
14001 BB->addSuccessor(readMBB);
14002 BB = readMBB;
14003
14004 MachineRegisterInfo &RegInfo = F->getRegInfo();
14005 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
14006 Register LoReg = MI.getOperand(0).getReg();
14007 Register HiReg = MI.getOperand(1).getReg();
14008
14009 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
14010 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
14011 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
14012
14013 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14014
14015 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
14016 .addReg(HiReg)
14017 .addReg(ReadAgainReg);
14018 BuildMI(BB, dl, TII->get(PPC::BCC))
14020 .addReg(CmpReg)
14021 .addMBB(readMBB);
14022
14023 BB->addSuccessor(readMBB);
14024 BB->addSuccessor(sinkMBB);
14025 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
14026 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
14027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
14028 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
14029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
14030 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
14031 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
14032 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
14033
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
14035 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
14036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
14037 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
14038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
14039 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
14040 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
14041 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
14042
14043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
14044 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
14045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
14046 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14048 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14049 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14050 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14051
14052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14053 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14055 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14057 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14058 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14059 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14060
14061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14062 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14064 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14066 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14067 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14068 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14069
14070 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14071 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14072 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14073 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14074 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14075 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14076 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14077 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14078
14079 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14080 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14081 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14082 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14083 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14084 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14085 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14086 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14087
14088 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14089 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14090 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14091 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14092 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14093 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14094 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14095 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14096
14097 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14098 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14099 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14100 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14101 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14102 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14103 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14104 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14105
14106 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14107 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14108 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14109 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14110 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14111 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14112 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14113 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14114
14115 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14116 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14117 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14118 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14119 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14120 BB = EmitAtomicBinary(MI, BB, 4, 0);
14121 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14122 BB = EmitAtomicBinary(MI, BB, 8, 0);
14123 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14124 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14125 (Subtarget.hasPartwordAtomics() &&
14126 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14127 (Subtarget.hasPartwordAtomics() &&
14128 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14129 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14130
14131 auto LoadMnemonic = PPC::LDARX;
14132 auto StoreMnemonic = PPC::STDCX;
14133 switch (MI.getOpcode()) {
14134 default:
14135 llvm_unreachable("Compare and swap of unknown size");
14136 case PPC::ATOMIC_CMP_SWAP_I8:
14137 LoadMnemonic = PPC::LBARX;
14138 StoreMnemonic = PPC::STBCX;
14139 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14140 break;
14141 case PPC::ATOMIC_CMP_SWAP_I16:
14142 LoadMnemonic = PPC::LHARX;
14143 StoreMnemonic = PPC::STHCX;
14144 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14145 break;
14146 case PPC::ATOMIC_CMP_SWAP_I32:
14147 LoadMnemonic = PPC::LWARX;
14148 StoreMnemonic = PPC::STWCX;
14149 break;
14150 case PPC::ATOMIC_CMP_SWAP_I64:
14151 LoadMnemonic = PPC::LDARX;
14152 StoreMnemonic = PPC::STDCX;
14153 break;
14154 }
14155 MachineRegisterInfo &RegInfo = F->getRegInfo();
14156 Register dest = MI.getOperand(0).getReg();
14157 Register ptrA = MI.getOperand(1).getReg();
14158 Register ptrB = MI.getOperand(2).getReg();
14159 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14160 Register oldval = MI.getOperand(3).getReg();
14161 Register newval = MI.getOperand(4).getReg();
14162 DebugLoc dl = MI.getDebugLoc();
14163
14164 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14165 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14166 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14167 F->insert(It, loop1MBB);
14168 F->insert(It, loop2MBB);
14169 F->insert(It, exitMBB);
14170 exitMBB->splice(exitMBB->begin(), BB,
14171 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14173
14174 // thisMBB:
14175 // ...
14176 // fallthrough --> loopMBB
14177 BB->addSuccessor(loop1MBB);
14178
14179 // loop1MBB:
14180 // l[bhwd]arx dest, ptr
14181 // cmp[wd] dest, oldval
14182 // bne- exitBB
14183 // loop2MBB:
14184 // st[bhwd]cx. newval, ptr
14185 // bne- loopMBB
14186 // b exitBB
14187 // exitBB:
14188 BB = loop1MBB;
14189 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14190 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14191 .addReg(dest)
14192 .addReg(oldval);
14193 BuildMI(BB, dl, TII->get(PPC::BCC))
14195 .addReg(CrReg)
14196 .addMBB(exitMBB);
14197 BB->addSuccessor(loop2MBB);
14198 BB->addSuccessor(exitMBB);
14199
14200 BB = loop2MBB;
14201 BuildMI(BB, dl, TII->get(StoreMnemonic))
14202 .addReg(newval)
14203 .addReg(ptrA)
14204 .addReg(ptrB);
14205 BuildMI(BB, dl, TII->get(PPC::BCC))
14207 .addReg(PPC::CR0)
14208 .addMBB(loop1MBB);
14209 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14210 BB->addSuccessor(loop1MBB);
14211 BB->addSuccessor(exitMBB);
14212
14213 // exitMBB:
14214 // ...
14215 BB = exitMBB;
14216 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14217 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14218 // We must use 64-bit registers for addresses when targeting 64-bit,
14219 // since we're actually doing arithmetic on them. Other registers
14220 // can be 32-bit.
14221 bool is64bit = Subtarget.isPPC64();
14222 bool isLittleEndian = Subtarget.isLittleEndian();
14223 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14224
14225 Register dest = MI.getOperand(0).getReg();
14226 Register ptrA = MI.getOperand(1).getReg();
14227 Register ptrB = MI.getOperand(2).getReg();
14228 Register oldval = MI.getOperand(3).getReg();
14229 Register newval = MI.getOperand(4).getReg();
14230 DebugLoc dl = MI.getDebugLoc();
14231
14232 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14233 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14234 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14235 F->insert(It, loop1MBB);
14236 F->insert(It, loop2MBB);
14237 F->insert(It, exitMBB);
14238 exitMBB->splice(exitMBB->begin(), BB,
14239 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14241
14242 MachineRegisterInfo &RegInfo = F->getRegInfo();
14243 const TargetRegisterClass *RC =
14244 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14245 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14246
14247 Register PtrReg = RegInfo.createVirtualRegister(RC);
14248 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14249 Register ShiftReg =
14250 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14251 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14252 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14253 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14254 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14255 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14256 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14257 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14258 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14259 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14260 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14261 Register Ptr1Reg;
14262 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14263 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14264 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14265 // thisMBB:
14266 // ...
14267 // fallthrough --> loopMBB
14268 BB->addSuccessor(loop1MBB);
14269
14270 // The 4-byte load must be aligned, while a char or short may be
14271 // anywhere in the word. Hence all this nasty bookkeeping code.
14272 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14273 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14274 // xori shift, shift1, 24 [16]
14275 // rlwinm ptr, ptr1, 0, 0, 29
14276 // slw newval2, newval, shift
14277 // slw oldval2, oldval,shift
14278 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14279 // slw mask, mask2, shift
14280 // and newval3, newval2, mask
14281 // and oldval3, oldval2, mask
14282 // loop1MBB:
14283 // lwarx tmpDest, ptr
14284 // and tmp, tmpDest, mask
14285 // cmpw tmp, oldval3
14286 // bne- exitBB
14287 // loop2MBB:
14288 // andc tmp2, tmpDest, mask
14289 // or tmp4, tmp2, newval3
14290 // stwcx. tmp4, ptr
14291 // bne- loop1MBB
14292 // b exitBB
14293 // exitBB:
14294 // srw dest, tmpDest, shift
14295 if (ptrA != ZeroReg) {
14296 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14297 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14298 .addReg(ptrA)
14299 .addReg(ptrB);
14300 } else {
14301 Ptr1Reg = ptrB;
14302 }
14303
14304 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14305 // mode.
14306 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14307 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14308 .addImm(3)
14309 .addImm(27)
14310 .addImm(is8bit ? 28 : 27);
14311 if (!isLittleEndian)
14312 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14313 .addReg(Shift1Reg)
14314 .addImm(is8bit ? 24 : 16);
14315 if (is64bit)
14316 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14317 .addReg(Ptr1Reg)
14318 .addImm(0)
14319 .addImm(61);
14320 else
14321 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14322 .addReg(Ptr1Reg)
14323 .addImm(0)
14324 .addImm(0)
14325 .addImm(29);
14326 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14327 .addReg(newval)
14328 .addReg(ShiftReg);
14329 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14330 .addReg(oldval)
14331 .addReg(ShiftReg);
14332 if (is8bit)
14333 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14334 else {
14335 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14336 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14337 .addReg(Mask3Reg)
14338 .addImm(65535);
14339 }
14340 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14341 .addReg(Mask2Reg)
14342 .addReg(ShiftReg);
14343 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14344 .addReg(NewVal2Reg)
14345 .addReg(MaskReg);
14346 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14347 .addReg(OldVal2Reg)
14348 .addReg(MaskReg);
14349
14350 BB = loop1MBB;
14351 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14352 .addReg(ZeroReg)
14353 .addReg(PtrReg);
14354 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14355 .addReg(TmpDestReg)
14356 .addReg(MaskReg);
14357 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14358 .addReg(TmpReg)
14359 .addReg(OldVal3Reg);
14360 BuildMI(BB, dl, TII->get(PPC::BCC))
14362 .addReg(CrReg)
14363 .addMBB(exitMBB);
14364 BB->addSuccessor(loop2MBB);
14365 BB->addSuccessor(exitMBB);
14366
14367 BB = loop2MBB;
14368 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14369 .addReg(TmpDestReg)
14370 .addReg(MaskReg);
14371 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14372 .addReg(Tmp2Reg)
14373 .addReg(NewVal3Reg);
14374 BuildMI(BB, dl, TII->get(PPC::STWCX))
14375 .addReg(Tmp4Reg)
14376 .addReg(ZeroReg)
14377 .addReg(PtrReg);
14378 BuildMI(BB, dl, TII->get(PPC::BCC))
14380 .addReg(PPC::CR0)
14381 .addMBB(loop1MBB);
14382 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14383 BB->addSuccessor(loop1MBB);
14384 BB->addSuccessor(exitMBB);
14385
14386 // exitMBB:
14387 // ...
14388 BB = exitMBB;
14389 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14390 .addReg(TmpReg)
14391 .addReg(ShiftReg);
14392 } else if (MI.getOpcode() == PPC::FADDrtz) {
14393 // This pseudo performs an FADD with rounding mode temporarily forced
14394 // to round-to-zero. We emit this via custom inserter since the FPSCR
14395 // is not modeled at the SelectionDAG level.
14396 Register Dest = MI.getOperand(0).getReg();
14397 Register Src1 = MI.getOperand(1).getReg();
14398 Register Src2 = MI.getOperand(2).getReg();
14399 DebugLoc dl = MI.getDebugLoc();
14400
14401 MachineRegisterInfo &RegInfo = F->getRegInfo();
14402 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14403
14404 // Save FPSCR value.
14405 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14406
14407 // Set rounding mode to round-to-zero.
14408 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14409 .addImm(31)
14411
14412 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14413 .addImm(30)
14415
14416 // Perform addition.
14417 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14418 .addReg(Src1)
14419 .addReg(Src2);
14420 if (MI.getFlag(MachineInstr::NoFPExcept))
14422
14423 // Restore FPSCR value.
14424 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14425 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14426 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14427 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14428 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14429 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14430 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14431 ? PPC::ANDI8_rec
14432 : PPC::ANDI_rec;
14433 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14434 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14435
14436 MachineRegisterInfo &RegInfo = F->getRegInfo();
14437 Register Dest = RegInfo.createVirtualRegister(
14438 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14439
14440 DebugLoc Dl = MI.getDebugLoc();
14441 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14442 .addReg(MI.getOperand(1).getReg())
14443 .addImm(1);
14444 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14445 MI.getOperand(0).getReg())
14446 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14447 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14448 DebugLoc Dl = MI.getDebugLoc();
14449 MachineRegisterInfo &RegInfo = F->getRegInfo();
14450 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14451 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14452 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14453 MI.getOperand(0).getReg())
14454 .addReg(CRReg);
14455 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14456 DebugLoc Dl = MI.getDebugLoc();
14457 unsigned Imm = MI.getOperand(1).getImm();
14458 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14459 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14460 MI.getOperand(0).getReg())
14461 .addReg(PPC::CR0EQ);
14462 } else if (MI.getOpcode() == PPC::SETRNDi) {
14463 DebugLoc dl = MI.getDebugLoc();
14464 Register OldFPSCRReg = MI.getOperand(0).getReg();
14465
14466 // Save FPSCR value.
14467 if (MRI.use_empty(OldFPSCRReg))
14468 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14469 else
14470 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14471
14472 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14473 // the following settings:
14474 // 00 Round to nearest
14475 // 01 Round to 0
14476 // 10 Round to +inf
14477 // 11 Round to -inf
14478
14479 // When the operand is immediate, using the two least significant bits of
14480 // the immediate to set the bits 62:63 of FPSCR.
14481 unsigned Mode = MI.getOperand(1).getImm();
14482 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14483 .addImm(31)
14485
14486 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14487 .addImm(30)
14489 } else if (MI.getOpcode() == PPC::SETRND) {
14490 DebugLoc dl = MI.getDebugLoc();
14491
14492 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14493 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14494 // If the target doesn't have DirectMove, we should use stack to do the
14495 // conversion, because the target doesn't have the instructions like mtvsrd
14496 // or mfvsrd to do this conversion directly.
14497 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14498 if (Subtarget.hasDirectMove()) {
14499 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14500 .addReg(SrcReg);
14501 } else {
14502 // Use stack to do the register copy.
14503 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14504 MachineRegisterInfo &RegInfo = F->getRegInfo();
14505 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14506 if (RC == &PPC::F8RCRegClass) {
14507 // Copy register from F8RCRegClass to G8RCRegclass.
14508 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14509 "Unsupported RegClass.");
14510
14511 StoreOp = PPC::STFD;
14512 LoadOp = PPC::LD;
14513 } else {
14514 // Copy register from G8RCRegClass to F8RCRegclass.
14515 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14516 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14517 "Unsupported RegClass.");
14518 }
14519
14520 MachineFrameInfo &MFI = F->getFrameInfo();
14521 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14522
14523 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14524 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14526 MFI.getObjectAlign(FrameIdx));
14527
14528 // Store the SrcReg into the stack.
14529 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14530 .addReg(SrcReg)
14531 .addImm(0)
14532 .addFrameIndex(FrameIdx)
14533 .addMemOperand(MMOStore);
14534
14535 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14536 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14538 MFI.getObjectAlign(FrameIdx));
14539
14540 // Load from the stack where SrcReg is stored, and save to DestReg,
14541 // so we have done the RegClass conversion from RegClass::SrcReg to
14542 // RegClass::DestReg.
14543 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14544 .addImm(0)
14545 .addFrameIndex(FrameIdx)
14546 .addMemOperand(MMOLoad);
14547 }
14548 };
14549
14550 Register OldFPSCRReg = MI.getOperand(0).getReg();
14551
14552 // Save FPSCR value.
14553 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14554
14555 // When the operand is gprc register, use two least significant bits of the
14556 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14557 //
14558 // copy OldFPSCRTmpReg, OldFPSCRReg
14559 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14560 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14561 // copy NewFPSCRReg, NewFPSCRTmpReg
14562 // mtfsf 255, NewFPSCRReg
14563 MachineOperand SrcOp = MI.getOperand(1);
14564 MachineRegisterInfo &RegInfo = F->getRegInfo();
14565 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14566
14567 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14568
14569 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14570 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14571
14572 // The first operand of INSERT_SUBREG should be a register which has
14573 // subregisters, we only care about its RegClass, so we should use an
14574 // IMPLICIT_DEF register.
14575 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14576 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14577 .addReg(ImDefReg)
14578 .add(SrcOp)
14579 .addImm(1);
14580
14581 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14582 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14583 .addReg(OldFPSCRTmpReg)
14584 .addReg(ExtSrcReg)
14585 .addImm(0)
14586 .addImm(62);
14587
14588 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14589 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14590
14591 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14592 // bits of FPSCR.
14593 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14594 .addImm(255)
14595 .addReg(NewFPSCRReg)
14596 .addImm(0)
14597 .addImm(0);
14598 } else if (MI.getOpcode() == PPC::SETFLM) {
14599 DebugLoc Dl = MI.getDebugLoc();
14600
14601 // Result of setflm is previous FPSCR content, so we need to save it first.
14602 Register OldFPSCRReg = MI.getOperand(0).getReg();
14603 if (MRI.use_empty(OldFPSCRReg))
14604 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14605 else
14606 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14607
14608 // Put bits in 32:63 to FPSCR.
14609 Register NewFPSCRReg = MI.getOperand(1).getReg();
14610 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14611 .addImm(255)
14612 .addReg(NewFPSCRReg)
14613 .addImm(0)
14614 .addImm(0);
14615 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14616 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14617 return emitProbedAlloca(MI, BB);
14618 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14619 DebugLoc DL = MI.getDebugLoc();
14620 Register Src = MI.getOperand(2).getReg();
14621 Register Lo = MI.getOperand(0).getReg();
14622 Register Hi = MI.getOperand(1).getReg();
14623 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14624 .addDef(Lo)
14625 .addUse(Src, 0, PPC::sub_gp8_x1);
14626 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14627 .addDef(Hi)
14628 .addUse(Src, 0, PPC::sub_gp8_x0);
14629 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14630 MI.getOpcode() == PPC::STQX_PSEUDO) {
14631 DebugLoc DL = MI.getDebugLoc();
14632 // Ptr is used as the ptr_rc_no_r0 part
14633 // of LQ/STQ's memory operand and adding result of RA and RB,
14634 // so it has to be g8rc_and_g8rc_nox0.
14635 Register Ptr =
14636 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14637 Register Val = MI.getOperand(0).getReg();
14638 Register RA = MI.getOperand(1).getReg();
14639 Register RB = MI.getOperand(2).getReg();
14640 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14641 BuildMI(*BB, MI, DL,
14642 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14643 : TII->get(PPC::STQ))
14644 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14645 .addImm(0)
14646 .addReg(Ptr);
14647 } else {
14648 llvm_unreachable("Unexpected instr type to insert");
14649 }
14650
14651 MI.eraseFromParent(); // The pseudo instruction is gone now.
14652 return BB;
14653}
14654
14655//===----------------------------------------------------------------------===//
14656// Target Optimization Hooks
14657//===----------------------------------------------------------------------===//
14658
14659static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14660 // For the estimates, convergence is quadratic, so we essentially double the
14661 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14662 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14663 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14664 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14665 if (VT.getScalarType() == MVT::f64)
14666 RefinementSteps++;
14667 return RefinementSteps;
14668}
14669
14670SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14671 const DenormalMode &Mode) const {
14672 // We only have VSX Vector Test for software Square Root.
14673 EVT VT = Op.getValueType();
14674 if (!isTypeLegal(MVT::i1) ||
14675 (VT != MVT::f64 &&
14676 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14678
14679 SDLoc DL(Op);
14680 // The output register of FTSQRT is CR field.
14681 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14682 // ftsqrt BF,FRB
14683 // Let e_b be the unbiased exponent of the double-precision
14684 // floating-point operand in register FRB.
14685 // fe_flag is set to 1 if either of the following conditions occurs.
14686 // - The double-precision floating-point operand in register FRB is a zero,
14687 // a NaN, or an infinity, or a negative value.
14688 // - e_b is less than or equal to -970.
14689 // Otherwise fe_flag is set to 0.
14690 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14691 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14692 // exponent is less than -970)
14693 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14694 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14695 FTSQRT, SRIdxVal),
14696 0);
14697}
14698
14699SDValue
14700PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14701 SelectionDAG &DAG) const {
14702 // We only have VSX Vector Square Root.
14703 EVT VT = Op.getValueType();
14704 if (VT != MVT::f64 &&
14705 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14707
14708 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14709}
14710
14711SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14712 int Enabled, int &RefinementSteps,
14713 bool &UseOneConstNR,
14714 bool Reciprocal) const {
14715 EVT VT = Operand.getValueType();
14716 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14717 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14718 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14719 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14720 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14721 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14722
14723 // The Newton-Raphson computation with a single constant does not provide
14724 // enough accuracy on some CPUs.
14725 UseOneConstNR = !Subtarget.needsTwoConstNR();
14726 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14727 }
14728 return SDValue();
14729}
14730
14731SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14732 int Enabled,
14733 int &RefinementSteps) const {
14734 EVT VT = Operand.getValueType();
14735 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14736 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14737 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14738 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14739 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14740 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14741 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14742 }
14743 return SDValue();
14744}
14745
14747 // Note: This functionality is used only when arcp is enabled, and
14748 // on cores with reciprocal estimates (which are used when arcp is
14749 // enabled for division), this functionality is redundant with the default
14750 // combiner logic (once the division -> reciprocal/multiply transformation
14751 // has taken place). As a result, this matters more for older cores than for
14752 // newer ones.
14753
14754 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14755 // reciprocal if there are two or more FDIVs (for embedded cores with only
14756 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14757 switch (Subtarget.getCPUDirective()) {
14758 default:
14759 return 3;
14760 case PPC::DIR_440:
14761 case PPC::DIR_A2:
14762 case PPC::DIR_E500:
14763 case PPC::DIR_E500mc:
14764 case PPC::DIR_E5500:
14765 return 2;
14766 }
14767}
14768
14769// isConsecutiveLSLoc needs to work even if all adds have not yet been
14770// collapsed, and so we need to look through chains of them.
14772 int64_t& Offset, SelectionDAG &DAG) {
14773 if (DAG.isBaseWithConstantOffset(Loc)) {
14774 Base = Loc.getOperand(0);
14775 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14776
14777 // The base might itself be a base plus an offset, and if so, accumulate
14778 // that as well.
14779 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14780 }
14781}
14782
14784 unsigned Bytes, int Dist,
14785 SelectionDAG &DAG) {
14786 if (VT.getSizeInBits() / 8 != Bytes)
14787 return false;
14788
14789 SDValue BaseLoc = Base->getBasePtr();
14790 if (Loc.getOpcode() == ISD::FrameIndex) {
14791 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14792 return false;
14794 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14795 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14796 int FS = MFI.getObjectSize(FI);
14797 int BFS = MFI.getObjectSize(BFI);
14798 if (FS != BFS || FS != (int)Bytes) return false;
14799 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14800 }
14801
14802 SDValue Base1 = Loc, Base2 = BaseLoc;
14803 int64_t Offset1 = 0, Offset2 = 0;
14804 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14805 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14806 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14807 return true;
14808
14809 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14810 const GlobalValue *GV1 = nullptr;
14811 const GlobalValue *GV2 = nullptr;
14812 Offset1 = 0;
14813 Offset2 = 0;
14814 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14815 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14816 if (isGA1 && isGA2 && GV1 == GV2)
14817 return Offset1 == (Offset2 + Dist*Bytes);
14818 return false;
14819}
14820
14821// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14822// not enforce equality of the chain operands.
14824 unsigned Bytes, int Dist,
14825 SelectionDAG &DAG) {
14827 EVT VT = LS->getMemoryVT();
14828 SDValue Loc = LS->getBasePtr();
14829 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14830 }
14831
14832 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14833 EVT VT;
14834 switch (N->getConstantOperandVal(1)) {
14835 default: return false;
14836 case Intrinsic::ppc_altivec_lvx:
14837 case Intrinsic::ppc_altivec_lvxl:
14838 case Intrinsic::ppc_vsx_lxvw4x:
14839 case Intrinsic::ppc_vsx_lxvw4x_be:
14840 VT = MVT::v4i32;
14841 break;
14842 case Intrinsic::ppc_vsx_lxvd2x:
14843 case Intrinsic::ppc_vsx_lxvd2x_be:
14844 VT = MVT::v2f64;
14845 break;
14846 case Intrinsic::ppc_altivec_lvebx:
14847 VT = MVT::i8;
14848 break;
14849 case Intrinsic::ppc_altivec_lvehx:
14850 VT = MVT::i16;
14851 break;
14852 case Intrinsic::ppc_altivec_lvewx:
14853 VT = MVT::i32;
14854 break;
14855 }
14856
14857 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14858 }
14859
14860 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14861 EVT VT;
14862 switch (N->getConstantOperandVal(1)) {
14863 default: return false;
14864 case Intrinsic::ppc_altivec_stvx:
14865 case Intrinsic::ppc_altivec_stvxl:
14866 case Intrinsic::ppc_vsx_stxvw4x:
14867 VT = MVT::v4i32;
14868 break;
14869 case Intrinsic::ppc_vsx_stxvd2x:
14870 VT = MVT::v2f64;
14871 break;
14872 case Intrinsic::ppc_vsx_stxvw4x_be:
14873 VT = MVT::v4i32;
14874 break;
14875 case Intrinsic::ppc_vsx_stxvd2x_be:
14876 VT = MVT::v2f64;
14877 break;
14878 case Intrinsic::ppc_altivec_stvebx:
14879 VT = MVT::i8;
14880 break;
14881 case Intrinsic::ppc_altivec_stvehx:
14882 VT = MVT::i16;
14883 break;
14884 case Intrinsic::ppc_altivec_stvewx:
14885 VT = MVT::i32;
14886 break;
14887 }
14888
14889 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14890 }
14891
14892 return false;
14893}
14894
14895// Return true is there is a nearyby consecutive load to the one provided
14896// (regardless of alignment). We search up and down the chain, looking though
14897// token factors and other loads (but nothing else). As a result, a true result
14898// indicates that it is safe to create a new consecutive load adjacent to the
14899// load provided.
14901 SDValue Chain = LD->getChain();
14902 EVT VT = LD->getMemoryVT();
14903
14904 SmallPtrSet<SDNode *, 16> LoadRoots;
14905 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14907
14908 // First, search up the chain, branching to follow all token-factor operands.
14909 // If we find a consecutive load, then we're done, otherwise, record all
14910 // nodes just above the top-level loads and token factors.
14911 while (!Queue.empty()) {
14912 SDNode *ChainNext = Queue.pop_back_val();
14913 if (!Visited.insert(ChainNext).second)
14914 continue;
14915
14916 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14917 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14918 return true;
14919
14920 if (!Visited.count(ChainLD->getChain().getNode()))
14921 Queue.push_back(ChainLD->getChain().getNode());
14922 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14923 for (const SDUse &O : ChainNext->ops())
14924 if (!Visited.count(O.getNode()))
14925 Queue.push_back(O.getNode());
14926 } else
14927 LoadRoots.insert(ChainNext);
14928 }
14929
14930 // Second, search down the chain, starting from the top-level nodes recorded
14931 // in the first phase. These top-level nodes are the nodes just above all
14932 // loads and token factors. Starting with their uses, recursively look though
14933 // all loads (just the chain uses) and token factors to find a consecutive
14934 // load.
14935 Visited.clear();
14936 Queue.clear();
14937
14938 for (SDNode *I : LoadRoots) {
14939 Queue.push_back(I);
14940
14941 while (!Queue.empty()) {
14942 SDNode *LoadRoot = Queue.pop_back_val();
14943 if (!Visited.insert(LoadRoot).second)
14944 continue;
14945
14946 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14947 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14948 return true;
14949
14950 for (SDNode *U : LoadRoot->users())
14951 if (((isa<MemSDNode>(U) &&
14952 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14953 U->getOpcode() == ISD::TokenFactor) &&
14954 !Visited.count(U))
14955 Queue.push_back(U);
14956 }
14957 }
14958
14959 return false;
14960}
14961
14962/// This function is called when we have proved that a SETCC node can be replaced
14963/// by subtraction (and other supporting instructions) so that the result of
14964/// comparison is kept in a GPR instead of CR. This function is purely for
14965/// codegen purposes and has some flags to guide the codegen process.
14966static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14967 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14968 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14969
14970 // Zero extend the operands to the largest legal integer. Originally, they
14971 // must be of a strictly smaller size.
14972 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14973 DAG.getConstant(Size, DL, MVT::i32));
14974 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14975 DAG.getConstant(Size, DL, MVT::i32));
14976
14977 // Swap if needed. Depends on the condition code.
14978 if (Swap)
14979 std::swap(Op0, Op1);
14980
14981 // Subtract extended integers.
14982 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14983
14984 // Move the sign bit to the least significant position and zero out the rest.
14985 // Now the least significant bit carries the result of original comparison.
14986 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14987 DAG.getConstant(Size - 1, DL, MVT::i32));
14988 auto Final = Shifted;
14989
14990 // Complement the result if needed. Based on the condition code.
14991 if (Complement)
14992 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14993 DAG.getConstant(1, DL, MVT::i64));
14994
14995 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14996}
14997
14998SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14999 DAGCombinerInfo &DCI) const {
15000 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
15001
15002 SelectionDAG &DAG = DCI.DAG;
15003 SDLoc DL(N);
15004
15005 // Size of integers being compared has a critical role in the following
15006 // analysis, so we prefer to do this when all types are legal.
15007 if (!DCI.isAfterLegalizeDAG())
15008 return SDValue();
15009
15010 // If all users of SETCC extend its value to a legal integer type
15011 // then we replace SETCC with a subtraction
15012 for (const SDNode *U : N->users())
15013 if (U->getOpcode() != ISD::ZERO_EXTEND)
15014 return SDValue();
15015
15016 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15017 auto OpSize = N->getOperand(0).getValueSizeInBits();
15018
15020
15021 if (OpSize < Size) {
15022 switch (CC) {
15023 default: break;
15024 case ISD::SETULT:
15025 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15026 case ISD::SETULE:
15027 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15028 case ISD::SETUGT:
15029 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15030 case ISD::SETUGE:
15031 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15032 }
15033 }
15034
15035 return SDValue();
15036}
15037
15038SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15039 DAGCombinerInfo &DCI) const {
15040 SelectionDAG &DAG = DCI.DAG;
15041 SDLoc dl(N);
15042
15043 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15044 // If we're tracking CR bits, we need to be careful that we don't have:
15045 // trunc(binary-ops(zext(x), zext(y)))
15046 // or
15047 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15048 // such that we're unnecessarily moving things into GPRs when it would be
15049 // better to keep them in CR bits.
15050
15051 // Note that trunc here can be an actual i1 trunc, or can be the effective
15052 // truncation that comes from a setcc or select_cc.
15053 if (N->getOpcode() == ISD::TRUNCATE &&
15054 N->getValueType(0) != MVT::i1)
15055 return SDValue();
15056
15057 if (N->getOperand(0).getValueType() != MVT::i32 &&
15058 N->getOperand(0).getValueType() != MVT::i64)
15059 return SDValue();
15060
15061 if (N->getOpcode() == ISD::SETCC ||
15062 N->getOpcode() == ISD::SELECT_CC) {
15063 // If we're looking at a comparison, then we need to make sure that the
15064 // high bits (all except for the first) don't matter the result.
15065 ISD::CondCode CC =
15066 cast<CondCodeSDNode>(N->getOperand(
15067 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15068 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15069
15070 if (ISD::isSignedIntSetCC(CC)) {
15071 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15072 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15073 return SDValue();
15074 } else if (ISD::isUnsignedIntSetCC(CC)) {
15075 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15076 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15077 !DAG.MaskedValueIsZero(N->getOperand(1),
15078 APInt::getHighBitsSet(OpBits, OpBits-1)))
15079 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15080 : SDValue());
15081 } else {
15082 // This is neither a signed nor an unsigned comparison, just make sure
15083 // that the high bits are equal.
15084 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15085 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15086
15087 // We don't really care about what is known about the first bit (if
15088 // anything), so pretend that it is known zero for both to ensure they can
15089 // be compared as constants.
15090 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15091 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15092
15093 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15094 Op1Known.getConstant() != Op2Known.getConstant())
15095 return SDValue();
15096 }
15097 }
15098
15099 // We now know that the higher-order bits are irrelevant, we just need to
15100 // make sure that all of the intermediate operations are bit operations, and
15101 // all inputs are extensions.
15102 if (N->getOperand(0).getOpcode() != ISD::AND &&
15103 N->getOperand(0).getOpcode() != ISD::OR &&
15104 N->getOperand(0).getOpcode() != ISD::XOR &&
15105 N->getOperand(0).getOpcode() != ISD::SELECT &&
15106 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15107 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15108 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15109 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15110 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15111 return SDValue();
15112
15113 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15114 N->getOperand(1).getOpcode() != ISD::AND &&
15115 N->getOperand(1).getOpcode() != ISD::OR &&
15116 N->getOperand(1).getOpcode() != ISD::XOR &&
15117 N->getOperand(1).getOpcode() != ISD::SELECT &&
15118 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15119 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15120 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15121 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15122 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15123 return SDValue();
15124
15126 SmallVector<SDValue, 8> BinOps, PromOps;
15127 SmallPtrSet<SDNode *, 16> Visited;
15128
15129 for (unsigned i = 0; i < 2; ++i) {
15130 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15131 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15132 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15133 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15134 isa<ConstantSDNode>(N->getOperand(i)))
15135 Inputs.push_back(N->getOperand(i));
15136 else
15137 BinOps.push_back(N->getOperand(i));
15138
15139 if (N->getOpcode() == ISD::TRUNCATE)
15140 break;
15141 }
15142
15143 // Visit all inputs, collect all binary operations (and, or, xor and
15144 // select) that are all fed by extensions.
15145 while (!BinOps.empty()) {
15146 SDValue BinOp = BinOps.pop_back_val();
15147
15148 if (!Visited.insert(BinOp.getNode()).second)
15149 continue;
15150
15151 PromOps.push_back(BinOp);
15152
15153 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15154 // The condition of the select is not promoted.
15155 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15156 continue;
15157 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15158 continue;
15159
15160 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15161 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15162 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15163 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15164 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15165 Inputs.push_back(BinOp.getOperand(i));
15166 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15167 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15168 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15169 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15170 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15171 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15172 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15173 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15174 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15175 BinOps.push_back(BinOp.getOperand(i));
15176 } else {
15177 // We have an input that is not an extension or another binary
15178 // operation; we'll abort this transformation.
15179 return SDValue();
15180 }
15181 }
15182 }
15183
15184 // Make sure that this is a self-contained cluster of operations (which
15185 // is not quite the same thing as saying that everything has only one
15186 // use).
15187 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15188 if (isa<ConstantSDNode>(Inputs[i]))
15189 continue;
15190
15191 for (const SDNode *User : Inputs[i].getNode()->users()) {
15192 if (User != N && !Visited.count(User))
15193 return SDValue();
15194
15195 // Make sure that we're not going to promote the non-output-value
15196 // operand(s) or SELECT or SELECT_CC.
15197 // FIXME: Although we could sometimes handle this, and it does occur in
15198 // practice that one of the condition inputs to the select is also one of
15199 // the outputs, we currently can't deal with this.
15200 if (User->getOpcode() == ISD::SELECT) {
15201 if (User->getOperand(0) == Inputs[i])
15202 return SDValue();
15203 } else if (User->getOpcode() == ISD::SELECT_CC) {
15204 if (User->getOperand(0) == Inputs[i] ||
15205 User->getOperand(1) == Inputs[i])
15206 return SDValue();
15207 }
15208 }
15209 }
15210
15211 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15212 for (const SDNode *User : PromOps[i].getNode()->users()) {
15213 if (User != N && !Visited.count(User))
15214 return SDValue();
15215
15216 // Make sure that we're not going to promote the non-output-value
15217 // operand(s) or SELECT or SELECT_CC.
15218 // FIXME: Although we could sometimes handle this, and it does occur in
15219 // practice that one of the condition inputs to the select is also one of
15220 // the outputs, we currently can't deal with this.
15221 if (User->getOpcode() == ISD::SELECT) {
15222 if (User->getOperand(0) == PromOps[i])
15223 return SDValue();
15224 } else if (User->getOpcode() == ISD::SELECT_CC) {
15225 if (User->getOperand(0) == PromOps[i] ||
15226 User->getOperand(1) == PromOps[i])
15227 return SDValue();
15228 }
15229 }
15230 }
15231
15232 // Replace all inputs with the extension operand.
15233 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15234 // Constants may have users outside the cluster of to-be-promoted nodes,
15235 // and so we need to replace those as we do the promotions.
15236 if (isa<ConstantSDNode>(Inputs[i]))
15237 continue;
15238 else
15239 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15240 }
15241
15242 std::list<HandleSDNode> PromOpHandles;
15243 for (auto &PromOp : PromOps)
15244 PromOpHandles.emplace_back(PromOp);
15245
15246 // Replace all operations (these are all the same, but have a different
15247 // (i1) return type). DAG.getNode will validate that the types of
15248 // a binary operator match, so go through the list in reverse so that
15249 // we've likely promoted both operands first. Any intermediate truncations or
15250 // extensions disappear.
15251 while (!PromOpHandles.empty()) {
15252 SDValue PromOp = PromOpHandles.back().getValue();
15253 PromOpHandles.pop_back();
15254
15255 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15256 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15257 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15258 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15259 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15260 PromOp.getOperand(0).getValueType() != MVT::i1) {
15261 // The operand is not yet ready (see comment below).
15262 PromOpHandles.emplace_front(PromOp);
15263 continue;
15264 }
15265
15266 SDValue RepValue = PromOp.getOperand(0);
15267 if (isa<ConstantSDNode>(RepValue))
15268 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15269
15270 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15271 continue;
15272 }
15273
15274 unsigned C;
15275 switch (PromOp.getOpcode()) {
15276 default: C = 0; break;
15277 case ISD::SELECT: C = 1; break;
15278 case ISD::SELECT_CC: C = 2; break;
15279 }
15280
15281 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15282 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15283 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15284 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15285 // The to-be-promoted operands of this node have not yet been
15286 // promoted (this should be rare because we're going through the
15287 // list backward, but if one of the operands has several users in
15288 // this cluster of to-be-promoted nodes, it is possible).
15289 PromOpHandles.emplace_front(PromOp);
15290 continue;
15291 }
15292
15294
15295 // If there are any constant inputs, make sure they're replaced now.
15296 for (unsigned i = 0; i < 2; ++i)
15297 if (isa<ConstantSDNode>(Ops[C+i]))
15298 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15299
15300 DAG.ReplaceAllUsesOfValueWith(PromOp,
15301 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15302 }
15303
15304 // Now we're left with the initial truncation itself.
15305 if (N->getOpcode() == ISD::TRUNCATE)
15306 return N->getOperand(0);
15307
15308 // Otherwise, this is a comparison. The operands to be compared have just
15309 // changed type (to i1), but everything else is the same.
15310 return SDValue(N, 0);
15311}
15312
15313SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15314 DAGCombinerInfo &DCI) const {
15315 SelectionDAG &DAG = DCI.DAG;
15316 SDLoc dl(N);
15317
15318 // If we're tracking CR bits, we need to be careful that we don't have:
15319 // zext(binary-ops(trunc(x), trunc(y)))
15320 // or
15321 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15322 // such that we're unnecessarily moving things into CR bits that can more
15323 // efficiently stay in GPRs. Note that if we're not certain that the high
15324 // bits are set as required by the final extension, we still may need to do
15325 // some masking to get the proper behavior.
15326
15327 // This same functionality is important on PPC64 when dealing with
15328 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15329 // the return values of functions. Because it is so similar, it is handled
15330 // here as well.
15331
15332 if (N->getValueType(0) != MVT::i32 &&
15333 N->getValueType(0) != MVT::i64)
15334 return SDValue();
15335
15336 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15337 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15338 return SDValue();
15339
15340 if (N->getOperand(0).getOpcode() != ISD::AND &&
15341 N->getOperand(0).getOpcode() != ISD::OR &&
15342 N->getOperand(0).getOpcode() != ISD::XOR &&
15343 N->getOperand(0).getOpcode() != ISD::SELECT &&
15344 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15345 return SDValue();
15346
15348 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15349 SmallPtrSet<SDNode *, 16> Visited;
15350
15351 // Visit all inputs, collect all binary operations (and, or, xor and
15352 // select) that are all fed by truncations.
15353 while (!BinOps.empty()) {
15354 SDValue BinOp = BinOps.pop_back_val();
15355
15356 if (!Visited.insert(BinOp.getNode()).second)
15357 continue;
15358
15359 PromOps.push_back(BinOp);
15360
15361 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15362 // The condition of the select is not promoted.
15363 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15364 continue;
15365 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15366 continue;
15367
15368 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15369 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15370 Inputs.push_back(BinOp.getOperand(i));
15371 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15372 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15373 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15374 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15375 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15376 BinOps.push_back(BinOp.getOperand(i));
15377 } else {
15378 // We have an input that is not a truncation or another binary
15379 // operation; we'll abort this transformation.
15380 return SDValue();
15381 }
15382 }
15383 }
15384
15385 // The operands of a select that must be truncated when the select is
15386 // promoted because the operand is actually part of the to-be-promoted set.
15387 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15388
15389 // Make sure that this is a self-contained cluster of operations (which
15390 // is not quite the same thing as saying that everything has only one
15391 // use).
15392 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15393 if (isa<ConstantSDNode>(Inputs[i]))
15394 continue;
15395
15396 for (SDNode *User : Inputs[i].getNode()->users()) {
15397 if (User != N && !Visited.count(User))
15398 return SDValue();
15399
15400 // If we're going to promote the non-output-value operand(s) or SELECT or
15401 // SELECT_CC, record them for truncation.
15402 if (User->getOpcode() == ISD::SELECT) {
15403 if (User->getOperand(0) == Inputs[i])
15404 SelectTruncOp[0].insert(std::make_pair(User,
15405 User->getOperand(0).getValueType()));
15406 } else if (User->getOpcode() == ISD::SELECT_CC) {
15407 if (User->getOperand(0) == Inputs[i])
15408 SelectTruncOp[0].insert(std::make_pair(User,
15409 User->getOperand(0).getValueType()));
15410 if (User->getOperand(1) == Inputs[i])
15411 SelectTruncOp[1].insert(std::make_pair(User,
15412 User->getOperand(1).getValueType()));
15413 }
15414 }
15415 }
15416
15417 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15418 for (SDNode *User : PromOps[i].getNode()->users()) {
15419 if (User != N && !Visited.count(User))
15420 return SDValue();
15421
15422 // If we're going to promote the non-output-value operand(s) or SELECT or
15423 // SELECT_CC, record them for truncation.
15424 if (User->getOpcode() == ISD::SELECT) {
15425 if (User->getOperand(0) == PromOps[i])
15426 SelectTruncOp[0].insert(std::make_pair(User,
15427 User->getOperand(0).getValueType()));
15428 } else if (User->getOpcode() == ISD::SELECT_CC) {
15429 if (User->getOperand(0) == PromOps[i])
15430 SelectTruncOp[0].insert(std::make_pair(User,
15431 User->getOperand(0).getValueType()));
15432 if (User->getOperand(1) == PromOps[i])
15433 SelectTruncOp[1].insert(std::make_pair(User,
15434 User->getOperand(1).getValueType()));
15435 }
15436 }
15437 }
15438
15439 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15440 bool ReallyNeedsExt = false;
15441 if (N->getOpcode() != ISD::ANY_EXTEND) {
15442 // If all of the inputs are not already sign/zero extended, then
15443 // we'll still need to do that at the end.
15444 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15445 if (isa<ConstantSDNode>(Inputs[i]))
15446 continue;
15447
15448 unsigned OpBits =
15449 Inputs[i].getOperand(0).getValueSizeInBits();
15450 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15451
15452 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15453 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15454 APInt::getHighBitsSet(OpBits,
15455 OpBits-PromBits))) ||
15456 (N->getOpcode() == ISD::SIGN_EXTEND &&
15457 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15458 (OpBits-(PromBits-1)))) {
15459 ReallyNeedsExt = true;
15460 break;
15461 }
15462 }
15463 }
15464
15465 // Convert PromOps to handles before doing any RAUW operations, as these
15466 // may CSE with existing nodes, deleting the originals.
15467 std::list<HandleSDNode> PromOpHandles;
15468 for (auto &PromOp : PromOps)
15469 PromOpHandles.emplace_back(PromOp);
15470
15471 // Replace all inputs, either with the truncation operand, or a
15472 // truncation or extension to the final output type.
15473 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15474 // Constant inputs need to be replaced with the to-be-promoted nodes that
15475 // use them because they might have users outside of the cluster of
15476 // promoted nodes.
15477 if (isa<ConstantSDNode>(Inputs[i]))
15478 continue;
15479
15480 SDValue InSrc = Inputs[i].getOperand(0);
15481 if (Inputs[i].getValueType() == N->getValueType(0))
15482 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15483 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15484 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15485 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15486 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15487 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15488 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15489 else
15490 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15491 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15492 }
15493
15494 // Replace all operations (these are all the same, but have a different
15495 // (promoted) return type). DAG.getNode will validate that the types of
15496 // a binary operator match, so go through the list in reverse so that
15497 // we've likely promoted both operands first.
15498 while (!PromOpHandles.empty()) {
15499 SDValue PromOp = PromOpHandles.back().getValue();
15500 PromOpHandles.pop_back();
15501
15502 unsigned C;
15503 switch (PromOp.getOpcode()) {
15504 default: C = 0; break;
15505 case ISD::SELECT: C = 1; break;
15506 case ISD::SELECT_CC: C = 2; break;
15507 }
15508
15509 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15510 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15511 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15512 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15513 // The to-be-promoted operands of this node have not yet been
15514 // promoted (this should be rare because we're going through the
15515 // list backward, but if one of the operands has several users in
15516 // this cluster of to-be-promoted nodes, it is possible).
15517 PromOpHandles.emplace_front(PromOp);
15518 continue;
15519 }
15520
15521 // For SELECT and SELECT_CC nodes, we do a similar check for any
15522 // to-be-promoted comparison inputs.
15523 if (PromOp.getOpcode() == ISD::SELECT ||
15524 PromOp.getOpcode() == ISD::SELECT_CC) {
15525 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15526 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15527 (SelectTruncOp[1].count(PromOp.getNode()) &&
15528 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15529 PromOpHandles.emplace_front(PromOp);
15530 continue;
15531 }
15532 }
15533
15535
15536 // If this node has constant inputs, then they'll need to be promoted here.
15537 for (unsigned i = 0; i < 2; ++i) {
15538 if (!isa<ConstantSDNode>(Ops[C+i]))
15539 continue;
15540 if (Ops[C+i].getValueType() == N->getValueType(0))
15541 continue;
15542
15543 if (N->getOpcode() == ISD::SIGN_EXTEND)
15544 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15545 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15546 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15547 else
15548 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15549 }
15550
15551 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15552 // truncate them again to the original value type.
15553 if (PromOp.getOpcode() == ISD::SELECT ||
15554 PromOp.getOpcode() == ISD::SELECT_CC) {
15555 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15556 if (SI0 != SelectTruncOp[0].end())
15557 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15558 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15559 if (SI1 != SelectTruncOp[1].end())
15560 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15561 }
15562
15563 DAG.ReplaceAllUsesOfValueWith(PromOp,
15564 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15565 }
15566
15567 // Now we're left with the initial extension itself.
15568 if (!ReallyNeedsExt)
15569 return N->getOperand(0);
15570
15571 // To zero extend, just mask off everything except for the first bit (in the
15572 // i1 case).
15573 if (N->getOpcode() == ISD::ZERO_EXTEND)
15574 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15576 N->getValueSizeInBits(0), PromBits),
15577 dl, N->getValueType(0)));
15578
15579 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15580 "Invalid extension type");
15581 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15582 SDValue ShiftCst =
15583 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15584 return DAG.getNode(
15585 ISD::SRA, dl, N->getValueType(0),
15586 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15587 ShiftCst);
15588}
15589
15590SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15591 DAGCombinerInfo &DCI) const {
15592 assert(N->getOpcode() == ISD::SETCC &&
15593 "Should be called with a SETCC node");
15594
15595 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15596 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15597 SDValue LHS = N->getOperand(0);
15598 SDValue RHS = N->getOperand(1);
15599
15600 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15601 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15602 LHS.hasOneUse())
15603 std::swap(LHS, RHS);
15604
15605 // x == 0-y --> x+y == 0
15606 // x != 0-y --> x+y != 0
15607 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15608 RHS.hasOneUse()) {
15609 SDLoc DL(N);
15610 SelectionDAG &DAG = DCI.DAG;
15611 EVT VT = N->getValueType(0);
15612 EVT OpVT = LHS.getValueType();
15613 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15614 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15615 }
15616 }
15617
15618 return DAGCombineTruncBoolExt(N, DCI);
15619}
15620
15621// Is this an extending load from an f32 to an f64?
15622static bool isFPExtLoad(SDValue Op) {
15623 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15624 return LD->getExtensionType() == ISD::EXTLOAD &&
15625 Op.getValueType() == MVT::f64;
15626 return false;
15627}
15628
15629/// Reduces the number of fp-to-int conversion when building a vector.
15630///
15631/// If this vector is built out of floating to integer conversions,
15632/// transform it to a vector built out of floating point values followed by a
15633/// single floating to integer conversion of the vector.
15634/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15635/// becomes (fptosi (build_vector ($A, $B, ...)))
15636SDValue PPCTargetLowering::
15637combineElementTruncationToVectorTruncation(SDNode *N,
15638 DAGCombinerInfo &DCI) const {
15639 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15640 "Should be called with a BUILD_VECTOR node");
15641
15642 SelectionDAG &DAG = DCI.DAG;
15643 SDLoc dl(N);
15644
15645 SDValue FirstInput = N->getOperand(0);
15646 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15647 "The input operand must be an fp-to-int conversion.");
15648
15649 // This combine happens after legalization so the fp_to_[su]i nodes are
15650 // already converted to PPCSISD nodes.
15651 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15652 if (FirstConversion == PPCISD::FCTIDZ ||
15653 FirstConversion == PPCISD::FCTIDUZ ||
15654 FirstConversion == PPCISD::FCTIWZ ||
15655 FirstConversion == PPCISD::FCTIWUZ) {
15656 bool IsSplat = true;
15657 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15658 FirstConversion == PPCISD::FCTIWUZ;
15659 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15661 EVT TargetVT = N->getValueType(0);
15662 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15663 SDValue NextOp = N->getOperand(i);
15664 if (NextOp.getOpcode() != PPCISD::MFVSR)
15665 return SDValue();
15666 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15667 if (NextConversion != FirstConversion)
15668 return SDValue();
15669 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15670 // This is not valid if the input was originally double precision. It is
15671 // also not profitable to do unless this is an extending load in which
15672 // case doing this combine will allow us to combine consecutive loads.
15673 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15674 return SDValue();
15675 if (N->getOperand(i) != FirstInput)
15676 IsSplat = false;
15677 }
15678
15679 // If this is a splat, we leave it as-is since there will be only a single
15680 // fp-to-int conversion followed by a splat of the integer. This is better
15681 // for 32-bit and smaller ints and neutral for 64-bit ints.
15682 if (IsSplat)
15683 return SDValue();
15684
15685 // Now that we know we have the right type of node, get its operands
15686 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15687 SDValue In = N->getOperand(i).getOperand(0);
15688 if (Is32Bit) {
15689 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15690 // here, we know that all inputs are extending loads so this is safe).
15691 if (In.isUndef())
15692 Ops.push_back(DAG.getUNDEF(SrcVT));
15693 else {
15694 SDValue Trunc =
15695 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15696 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15697 Ops.push_back(Trunc);
15698 }
15699 } else
15700 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15701 }
15702
15703 unsigned Opcode;
15704 if (FirstConversion == PPCISD::FCTIDZ ||
15705 FirstConversion == PPCISD::FCTIWZ)
15706 Opcode = ISD::FP_TO_SINT;
15707 else
15708 Opcode = ISD::FP_TO_UINT;
15709
15710 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15711 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15712 return DAG.getNode(Opcode, dl, TargetVT, BV);
15713 }
15714 return SDValue();
15715}
15716
15717// LXVKQ instruction load VSX vector with a special quadword value
15718// based on an immediate value. This helper method returns the details of the
15719// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15720// to help generate the LXVKQ instruction and the subsequent shift instruction
15721// required to match the original build vector pattern.
15722
15723// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15724using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15725
15726static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15727
15728 // LXVKQ instruction loads the Quadword value:
15729 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15730 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15731 static const uint32_t Uim = 16;
15732
15733 // Check for direct LXVKQ match (no shift needed)
15734 if (FullVal == BasePattern)
15735 return std::make_tuple(Uim, uint8_t{0});
15736
15737 // Check if FullValue is 1 (the result of the base pattern >> 127)
15738 if (FullVal == APInt(128, 1))
15739 return std::make_tuple(Uim, uint8_t{127});
15740
15741 return std::nullopt;
15742}
15743
15744/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15745/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15746/// LXVKQ instruction load VSX vector with a special quadword value based on an
15747/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15748/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15749/// This can be used to inline the build vector constants that have the
15750/// following patterns:
15751///
15752/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15753/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15754/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15755/// combination of splatting and right shift instructions.
15756
15757SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15758 SelectionDAG &DAG) const {
15759
15760 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15761 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15762
15763 // This transformation is only supported if we are loading either a byte,
15764 // halfword, word, or doubleword.
15765 EVT VT = Op.getValueType();
15766 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15767 VT == MVT::v2i64))
15768 return SDValue();
15769
15770 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15771 << VT.getEVTString() << "): ";
15772 Op->dump());
15773
15774 unsigned NumElems = VT.getVectorNumElements();
15775 unsigned ElemBits = VT.getScalarSizeInBits();
15776
15777 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15778
15779 // Check for Non-constant operand in the build vector.
15780 for (const SDValue &Operand : Op.getNode()->op_values()) {
15781 if (!isa<ConstantSDNode>(Operand))
15782 return SDValue();
15783 }
15784
15785 // Assemble build vector operands as a 128-bit register value
15786 // We need to reconstruct what the 128-bit register pattern would be
15787 // that produces this vector when interpreted with the current endianness
15788 APInt FullVal = APInt::getZero(128);
15789
15790 for (unsigned Index = 0; Index < NumElems; ++Index) {
15791 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15792
15793 // Get element value as raw bits (zero-extended)
15794 uint64_t ElemValue = C->getZExtValue();
15795
15796 // Mask to element size to ensure we only get the relevant bits
15797 if (ElemBits < 64)
15798 ElemValue &= ((1ULL << ElemBits) - 1);
15799
15800 // Calculate bit position for this element in the 128-bit register
15801 unsigned BitPos =
15802 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15803
15804 // Create APInt for the element value and shift it to correct position
15805 APInt ElemAPInt(128, ElemValue);
15806 ElemAPInt <<= BitPos;
15807
15808 // Place the element value at the correct bit position
15809 FullVal |= ElemAPInt;
15810 }
15811
15812 if (FullVal.isZero() || FullVal.isAllOnes())
15813 return SDValue();
15814
15815 if (auto UIMOpt = getPatternInfo(FullVal)) {
15816 const auto &[Uim, ShiftAmount] = *UIMOpt;
15817 SDLoc Dl(Op);
15818
15819 // Generate LXVKQ instruction if the shift amount is zero.
15820 if (ShiftAmount == 0) {
15821 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
15822 SDValue LxvkqInstr =
15823 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
15825 << "combineBVLoadsSpecialValue: Instruction Emitted ";
15826 LxvkqInstr.dump());
15827 return LxvkqInstr;
15828 }
15829
15830 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
15831
15832 // The right shifted pattern can be constructed using a combination of
15833 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
15834 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
15835 // value 255.
15836 SDValue ShiftAmountVec =
15837 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
15838 DAG.getTargetConstant(255, Dl, MVT::i32)),
15839 0);
15840 // Generate appropriate right shift instruction
15841 SDValue ShiftVec = SDValue(
15842 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
15843 0);
15845 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
15846 ShiftVec.dump());
15847 return ShiftVec;
15848 }
15849 // No patterns matched for build vectors.
15850 return SDValue();
15851}
15852
15853/// Reduce the number of loads when building a vector.
15854///
15855/// Building a vector out of multiple loads can be converted to a load
15856/// of the vector type if the loads are consecutive. If the loads are
15857/// consecutive but in descending order, a shuffle is added at the end
15858/// to reorder the vector.
15860 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15861 "Should be called with a BUILD_VECTOR node");
15862
15863 SDLoc dl(N);
15864
15865 // Return early for non byte-sized type, as they can't be consecutive.
15866 if (!N->getValueType(0).getVectorElementType().isByteSized())
15867 return SDValue();
15868
15869 bool InputsAreConsecutiveLoads = true;
15870 bool InputsAreReverseConsecutive = true;
15871 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
15872 SDValue FirstInput = N->getOperand(0);
15873 bool IsRoundOfExtLoad = false;
15874 LoadSDNode *FirstLoad = nullptr;
15875
15876 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
15877 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
15878 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
15879 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
15880 }
15881 // Not a build vector of (possibly fp_rounded) loads.
15882 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
15883 N->getNumOperands() == 1)
15884 return SDValue();
15885
15886 if (!IsRoundOfExtLoad)
15887 FirstLoad = cast<LoadSDNode>(FirstInput);
15888
15890 InputLoads.push_back(FirstLoad);
15891 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
15892 // If any inputs are fp_round(extload), they all must be.
15893 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
15894 return SDValue();
15895
15896 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
15897 N->getOperand(i);
15898 if (NextInput.getOpcode() != ISD::LOAD)
15899 return SDValue();
15900
15901 SDValue PreviousInput =
15902 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
15903 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
15904 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
15905
15906 // If any inputs are fp_round(extload), they all must be.
15907 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
15908 return SDValue();
15909
15910 // We only care about regular loads. The PPC-specific load intrinsics
15911 // will not lead to a merge opportunity.
15912 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
15913 InputsAreConsecutiveLoads = false;
15914 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
15915 InputsAreReverseConsecutive = false;
15916
15917 // Exit early if the loads are neither consecutive nor reverse consecutive.
15918 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
15919 return SDValue();
15920 InputLoads.push_back(LD2);
15921 }
15922
15923 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
15924 "The loads cannot be both consecutive and reverse consecutive.");
15925
15926 SDValue WideLoad;
15927 SDValue ReturnSDVal;
15928 if (InputsAreConsecutiveLoads) {
15929 assert(FirstLoad && "Input needs to be a LoadSDNode.");
15930 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
15931 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15932 FirstLoad->getAlign());
15933 ReturnSDVal = WideLoad;
15934 } else if (InputsAreReverseConsecutive) {
15935 LoadSDNode *LastLoad = InputLoads.back();
15936 assert(LastLoad && "Input needs to be a LoadSDNode.");
15937 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
15938 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
15939 LastLoad->getAlign());
15941 for (int i = N->getNumOperands() - 1; i >= 0; i--)
15942 Ops.push_back(i);
15943
15944 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
15945 DAG.getUNDEF(N->getValueType(0)), Ops);
15946 } else
15947 return SDValue();
15948
15949 for (auto *LD : InputLoads)
15950 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
15951 return ReturnSDVal;
15952}
15953
15954// This function adds the required vector_shuffle needed to get
15955// the elements of the vector extract in the correct position
15956// as specified by the CorrectElems encoding.
15958 SDValue Input, uint64_t Elems,
15959 uint64_t CorrectElems) {
15960 SDLoc dl(N);
15961
15962 unsigned NumElems = Input.getValueType().getVectorNumElements();
15963 SmallVector<int, 16> ShuffleMask(NumElems, -1);
15964
15965 // Knowing the element indices being extracted from the original
15966 // vector and the order in which they're being inserted, just put
15967 // them at element indices required for the instruction.
15968 for (unsigned i = 0; i < N->getNumOperands(); i++) {
15969 if (DAG.getDataLayout().isLittleEndian())
15970 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
15971 else
15972 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
15973 CorrectElems = CorrectElems >> 8;
15974 Elems = Elems >> 8;
15975 }
15976
15977 SDValue Shuffle =
15978 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
15979 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
15980
15981 EVT VT = N->getValueType(0);
15982 SDValue Conv = DAG.getBitcast(VT, Shuffle);
15983
15984 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
15985 Input.getValueType().getVectorElementType(),
15987 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
15988 DAG.getValueType(ExtVT));
15989}
15990
15991// Look for build vector patterns where input operands come from sign
15992// extended vector_extract elements of specific indices. If the correct indices
15993// aren't used, add a vector shuffle to fix up the indices and create
15994// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
15995// during instruction selection.
15997 // This array encodes the indices that the vector sign extend instructions
15998 // extract from when extending from one type to another for both BE and LE.
15999 // The right nibble of each byte corresponds to the LE incides.
16000 // and the left nibble of each byte corresponds to the BE incides.
16001 // For example: 0x3074B8FC byte->word
16002 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16003 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16004 // For example: 0x000070F8 byte->double word
16005 // For LE: the allowed indices are: 0x0,0x8
16006 // For BE: the allowed indices are: 0x7,0xF
16007 uint64_t TargetElems[] = {
16008 0x3074B8FC, // b->w
16009 0x000070F8, // b->d
16010 0x10325476, // h->w
16011 0x00003074, // h->d
16012 0x00001032, // w->d
16013 };
16014
16015 uint64_t Elems = 0;
16016 int Index;
16017 SDValue Input;
16018
16019 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16020 if (!Op)
16021 return false;
16022 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16023 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16024 return false;
16025
16026 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16027 // of the right width.
16028 SDValue Extract = Op.getOperand(0);
16029 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16030 Extract = Extract.getOperand(0);
16031 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16032 return false;
16033
16035 if (!ExtOp)
16036 return false;
16037
16038 Index = ExtOp->getZExtValue();
16039 if (Input && Input != Extract.getOperand(0))
16040 return false;
16041
16042 if (!Input)
16043 Input = Extract.getOperand(0);
16044
16045 Elems = Elems << 8;
16046 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16047 Elems |= Index;
16048
16049 return true;
16050 };
16051
16052 // If the build vector operands aren't sign extended vector extracts,
16053 // of the same input vector, then return.
16054 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16055 if (!isSExtOfVecExtract(N->getOperand(i))) {
16056 return SDValue();
16057 }
16058 }
16059
16060 // If the vector extract indices are not correct, add the appropriate
16061 // vector_shuffle.
16062 int TgtElemArrayIdx;
16063 int InputSize = Input.getValueType().getScalarSizeInBits();
16064 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16065 if (InputSize + OutputSize == 40)
16066 TgtElemArrayIdx = 0;
16067 else if (InputSize + OutputSize == 72)
16068 TgtElemArrayIdx = 1;
16069 else if (InputSize + OutputSize == 48)
16070 TgtElemArrayIdx = 2;
16071 else if (InputSize + OutputSize == 80)
16072 TgtElemArrayIdx = 3;
16073 else if (InputSize + OutputSize == 96)
16074 TgtElemArrayIdx = 4;
16075 else
16076 return SDValue();
16077
16078 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16079 CorrectElems = DAG.getDataLayout().isLittleEndian()
16080 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16081 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16082 if (Elems != CorrectElems) {
16083 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16084 }
16085
16086 // Regular lowering will catch cases where a shuffle is not needed.
16087 return SDValue();
16088}
16089
16090// Look for the pattern of a load from a narrow width to i128, feeding
16091// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16092// (LXVRZX). This node represents a zero extending load that will be matched
16093// to the Load VSX Vector Rightmost instructions.
16095 SDLoc DL(N);
16096
16097 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16098 if (N->getValueType(0) != MVT::v1i128)
16099 return SDValue();
16100
16101 SDValue Operand = N->getOperand(0);
16102 // Proceed with the transformation if the operand to the BUILD_VECTOR
16103 // is a load instruction.
16104 if (Operand.getOpcode() != ISD::LOAD)
16105 return SDValue();
16106
16107 auto *LD = cast<LoadSDNode>(Operand);
16108 EVT MemoryType = LD->getMemoryVT();
16109
16110 // This transformation is only valid if the we are loading either a byte,
16111 // halfword, word, or doubleword.
16112 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16113 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16114
16115 // Ensure that the load from the narrow width is being zero extended to i128.
16116 if (!ValidLDType ||
16117 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16118 LD->getExtensionType() != ISD::EXTLOAD))
16119 return SDValue();
16120
16121 SDValue LoadOps[] = {
16122 LD->getChain(), LD->getBasePtr(),
16123 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16124
16126 DAG.getVTList(MVT::v1i128, MVT::Other),
16127 LoadOps, MemoryType, LD->getMemOperand());
16128}
16129
16130SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16131 DAGCombinerInfo &DCI) const {
16132 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16133 "Should be called with a BUILD_VECTOR node");
16134
16135 SelectionDAG &DAG = DCI.DAG;
16136 SDLoc dl(N);
16137
16138 if (!Subtarget.hasVSX())
16139 return SDValue();
16140
16141 // The target independent DAG combiner will leave a build_vector of
16142 // float-to-int conversions intact. We can generate MUCH better code for
16143 // a float-to-int conversion of a vector of floats.
16144 SDValue FirstInput = N->getOperand(0);
16145 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16146 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16147 if (Reduced)
16148 return Reduced;
16149 }
16150
16151 // If we're building a vector out of consecutive loads, just load that
16152 // vector type.
16153 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16154 if (Reduced)
16155 return Reduced;
16156
16157 // If we're building a vector out of extended elements from another vector
16158 // we have P9 vector integer extend instructions. The code assumes legal
16159 // input types (i.e. it can't handle things like v4i16) so do not run before
16160 // legalization.
16161 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16162 Reduced = combineBVOfVecSExt(N, DAG);
16163 if (Reduced)
16164 return Reduced;
16165 }
16166
16167 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16168 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16169 // is a load from <valid narrow width> to i128.
16170 if (Subtarget.isISA3_1()) {
16171 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16172 if (BVOfZLoad)
16173 return BVOfZLoad;
16174 }
16175
16176 if (N->getValueType(0) != MVT::v2f64)
16177 return SDValue();
16178
16179 // Looking for:
16180 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16181 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16182 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16183 return SDValue();
16184 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16185 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16186 return SDValue();
16187 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16188 return SDValue();
16189
16190 SDValue Ext1 = FirstInput.getOperand(0);
16191 SDValue Ext2 = N->getOperand(1).getOperand(0);
16192 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16194 return SDValue();
16195
16196 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16197 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16198 if (!Ext1Op || !Ext2Op)
16199 return SDValue();
16200 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16201 Ext1.getOperand(0) != Ext2.getOperand(0))
16202 return SDValue();
16203
16204 int FirstElem = Ext1Op->getZExtValue();
16205 int SecondElem = Ext2Op->getZExtValue();
16206 int SubvecIdx;
16207 if (FirstElem == 0 && SecondElem == 1)
16208 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16209 else if (FirstElem == 2 && SecondElem == 3)
16210 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16211 else
16212 return SDValue();
16213
16214 SDValue SrcVec = Ext1.getOperand(0);
16215 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16217 return DAG.getNode(NodeType, dl, MVT::v2f64,
16218 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16219}
16220
16221SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16222 DAGCombinerInfo &DCI) const {
16223 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16224 N->getOpcode() == ISD::UINT_TO_FP) &&
16225 "Need an int -> FP conversion node here");
16226
16227 if (useSoftFloat() || !Subtarget.has64BitSupport())
16228 return SDValue();
16229
16230 SelectionDAG &DAG = DCI.DAG;
16231 SDLoc dl(N);
16232 SDValue Op(N, 0);
16233
16234 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16235 // from the hardware.
16236 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16237 return SDValue();
16238 if (!Op.getOperand(0).getValueType().isSimple())
16239 return SDValue();
16240 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16241 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16242 return SDValue();
16243
16244 SDValue FirstOperand(Op.getOperand(0));
16245 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16246 (FirstOperand.getValueType() == MVT::i8 ||
16247 FirstOperand.getValueType() == MVT::i16);
16248 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16249 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16250 bool DstDouble = Op.getValueType() == MVT::f64;
16251 unsigned ConvOp = Signed ?
16252 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16253 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16254 SDValue WidthConst =
16255 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16256 dl, false);
16257 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16258 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16260 DAG.getVTList(MVT::f64, MVT::Other),
16261 Ops, MVT::i8, LDN->getMemOperand());
16262 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16263
16264 // For signed conversion, we need to sign-extend the value in the VSR
16265 if (Signed) {
16266 SDValue ExtOps[] = { Ld, WidthConst };
16267 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16268 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16269 } else
16270 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16271 }
16272
16273
16274 // For i32 intermediate values, unfortunately, the conversion functions
16275 // leave the upper 32 bits of the value are undefined. Within the set of
16276 // scalar instructions, we have no method for zero- or sign-extending the
16277 // value. Thus, we cannot handle i32 intermediate values here.
16278 if (Op.getOperand(0).getValueType() == MVT::i32)
16279 return SDValue();
16280
16281 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16282 "UINT_TO_FP is supported only with FPCVT");
16283
16284 // If we have FCFIDS, then use it when converting to single-precision.
16285 // Otherwise, convert to double-precision and then round.
16286 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16287 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16289 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16290 : PPCISD::FCFID);
16291 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16292 ? MVT::f32
16293 : MVT::f64;
16294
16295 // If we're converting from a float, to an int, and back to a float again,
16296 // then we don't need the store/load pair at all.
16297 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16298 Subtarget.hasFPCVT()) ||
16299 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16300 SDValue Src = Op.getOperand(0).getOperand(0);
16301 if (Src.getValueType() == MVT::f32) {
16302 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16303 DCI.AddToWorklist(Src.getNode());
16304 } else if (Src.getValueType() != MVT::f64) {
16305 // Make sure that we don't pick up a ppc_fp128 source value.
16306 return SDValue();
16307 }
16308
16309 unsigned FCTOp =
16310 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16312
16313 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16314 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16315
16316 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16317 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16318 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16319 DCI.AddToWorklist(FP.getNode());
16320 }
16321
16322 return FP;
16323 }
16324
16325 return SDValue();
16326}
16327
16328// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16329// builtins) into loads with swaps.
16331 DAGCombinerInfo &DCI) const {
16332 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16333 // load combines.
16334 if (DCI.isBeforeLegalizeOps())
16335 return SDValue();
16336
16337 SelectionDAG &DAG = DCI.DAG;
16338 SDLoc dl(N);
16339 SDValue Chain;
16340 SDValue Base;
16341 MachineMemOperand *MMO;
16342
16343 switch (N->getOpcode()) {
16344 default:
16345 llvm_unreachable("Unexpected opcode for little endian VSX load");
16346 case ISD::LOAD: {
16348 Chain = LD->getChain();
16349 Base = LD->getBasePtr();
16350 MMO = LD->getMemOperand();
16351 // If the MMO suggests this isn't a load of a full vector, leave
16352 // things alone. For a built-in, we have to make the change for
16353 // correctness, so if there is a size problem that will be a bug.
16354 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16355 return SDValue();
16356 break;
16357 }
16360 Chain = Intrin->getChain();
16361 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16362 // us what we want. Get operand 2 instead.
16363 Base = Intrin->getOperand(2);
16364 MMO = Intrin->getMemOperand();
16365 break;
16366 }
16367 }
16368
16369 MVT VecTy = N->getValueType(0).getSimpleVT();
16370
16371 SDValue LoadOps[] = { Chain, Base };
16373 DAG.getVTList(MVT::v2f64, MVT::Other),
16374 LoadOps, MVT::v2f64, MMO);
16375
16376 DCI.AddToWorklist(Load.getNode());
16377 Chain = Load.getValue(1);
16378 SDValue Swap = DAG.getNode(
16379 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16380 DCI.AddToWorklist(Swap.getNode());
16381
16382 // Add a bitcast if the resulting load type doesn't match v2f64.
16383 if (VecTy != MVT::v2f64) {
16384 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16385 DCI.AddToWorklist(N.getNode());
16386 // Package {bitcast value, swap's chain} to match Load's shape.
16387 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16388 N, Swap.getValue(1));
16389 }
16390
16391 return Swap;
16392}
16393
16394// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16395// builtins) into stores with swaps.
16397 DAGCombinerInfo &DCI) const {
16398 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16399 // store combines.
16400 if (DCI.isBeforeLegalizeOps())
16401 return SDValue();
16402
16403 SelectionDAG &DAG = DCI.DAG;
16404 SDLoc dl(N);
16405 SDValue Chain;
16406 SDValue Base;
16407 unsigned SrcOpnd;
16408 MachineMemOperand *MMO;
16409
16410 switch (N->getOpcode()) {
16411 default:
16412 llvm_unreachable("Unexpected opcode for little endian VSX store");
16413 case ISD::STORE: {
16415 Chain = ST->getChain();
16416 Base = ST->getBasePtr();
16417 MMO = ST->getMemOperand();
16418 SrcOpnd = 1;
16419 // If the MMO suggests this isn't a store of a full vector, leave
16420 // things alone. For a built-in, we have to make the change for
16421 // correctness, so if there is a size problem that will be a bug.
16422 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16423 return SDValue();
16424 break;
16425 }
16426 case ISD::INTRINSIC_VOID: {
16428 Chain = Intrin->getChain();
16429 // Intrin->getBasePtr() oddly does not get what we want.
16430 Base = Intrin->getOperand(3);
16431 MMO = Intrin->getMemOperand();
16432 SrcOpnd = 2;
16433 break;
16434 }
16435 }
16436
16437 SDValue Src = N->getOperand(SrcOpnd);
16438 MVT VecTy = Src.getValueType().getSimpleVT();
16439
16440 // All stores are done as v2f64 and possible bit cast.
16441 if (VecTy != MVT::v2f64) {
16442 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16443 DCI.AddToWorklist(Src.getNode());
16444 }
16445
16446 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16447 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16448 DCI.AddToWorklist(Swap.getNode());
16449 Chain = Swap.getValue(1);
16450 SDValue StoreOps[] = { Chain, Swap, Base };
16452 DAG.getVTList(MVT::Other),
16453 StoreOps, VecTy, MMO);
16454 DCI.AddToWorklist(Store.getNode());
16455 return Store;
16456}
16457
16458// Handle DAG combine for STORE (FP_TO_INT F).
16459SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16460 DAGCombinerInfo &DCI) const {
16461 SelectionDAG &DAG = DCI.DAG;
16462 SDLoc dl(N);
16463 unsigned Opcode = N->getOperand(1).getOpcode();
16464 (void)Opcode;
16465 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16466
16467 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16468 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16469 && "Not a FP_TO_INT Instruction!");
16470
16471 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16472 EVT Op1VT = N->getOperand(1).getValueType();
16473 EVT ResVT = Val.getValueType();
16474
16475 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16476 return SDValue();
16477
16478 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16479 bool ValidTypeForStoreFltAsInt =
16480 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16481 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16482
16483 // TODO: Lower conversion from f128 on all VSX targets
16484 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16485 return SDValue();
16486
16487 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16488 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16489 return SDValue();
16490
16491 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16492
16493 // Set number of bytes being converted.
16494 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16495 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16496 DAG.getIntPtrConstant(ByteSize, dl, false),
16497 DAG.getValueType(Op1VT)};
16498
16500 DAG.getVTList(MVT::Other), Ops,
16501 cast<StoreSDNode>(N)->getMemoryVT(),
16502 cast<StoreSDNode>(N)->getMemOperand());
16503
16504 return Val;
16505}
16506
16507static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16508 // Check that the source of the element keeps flipping
16509 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16510 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16511 for (int i = 1, e = Mask.size(); i < e; i++) {
16512 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16513 return false;
16514 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16515 return false;
16516 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16517 }
16518 return true;
16519}
16520
16521static bool isSplatBV(SDValue Op) {
16522 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16523 return false;
16524 SDValue FirstOp;
16525
16526 // Find first non-undef input.
16527 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16528 FirstOp = Op.getOperand(i);
16529 if (!FirstOp.isUndef())
16530 break;
16531 }
16532
16533 // All inputs are undef or the same as the first non-undef input.
16534 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16535 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16536 return false;
16537 return true;
16538}
16539
16541 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16542 return Op;
16543 if (Op.getOpcode() != ISD::BITCAST)
16544 return SDValue();
16545 Op = Op.getOperand(0);
16546 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16547 return Op;
16548 return SDValue();
16549}
16550
16551// Fix up the shuffle mask to account for the fact that the result of
16552// scalar_to_vector is not in lane zero. This just takes all values in
16553// the ranges specified by the min/max indices and adds the number of
16554// elements required to ensure each element comes from the respective
16555// position in the valid lane.
16556// On little endian, that's just the corresponding element in the other
16557// half of the vector. On big endian, it is in the same half but right
16558// justified rather than left justified in that half.
16560 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16561 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16562 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16563 int LHSEltFixup =
16564 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16565 int RHSEltFixup =
16566 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16567 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16568 int Idx = ShuffV[I];
16569 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16570 ShuffV[I] += LHSEltFixup;
16571 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16572 ShuffV[I] += RHSEltFixup;
16573 }
16574}
16575
16576// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16577// the original is:
16578// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16579// In such a case, just change the shuffle mask to extract the element
16580// from the permuted index.
16582 const PPCSubtarget &Subtarget) {
16583 SDLoc dl(OrigSToV);
16584 EVT VT = OrigSToV.getValueType();
16585 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16586 "Expecting a SCALAR_TO_VECTOR here");
16587 SDValue Input = OrigSToV.getOperand(0);
16588
16589 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16590 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16591 SDValue OrigVector = Input.getOperand(0);
16592
16593 // Can't handle non-const element indices or different vector types
16594 // for the input to the extract and the output of the scalar_to_vector.
16595 if (Idx && VT == OrigVector.getValueType()) {
16596 unsigned NumElts = VT.getVectorNumElements();
16597 assert(
16598 NumElts > 1 &&
16599 "Cannot produce a permuted scalar_to_vector for one element vector");
16600 SmallVector<int, 16> NewMask(NumElts, -1);
16601 unsigned ResultInElt = NumElts / 2;
16602 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16603 NewMask[ResultInElt] = Idx->getZExtValue();
16604 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16605 }
16606 }
16607 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16608 OrigSToV.getOperand(0));
16609}
16610
16612 int HalfVec, int LHSLastElementDefined,
16613 int RHSLastElementDefined) {
16614 for (int Index : ShuffV) {
16615 if (Index < 0) // Skip explicitly undefined mask indices.
16616 continue;
16617 // Handle first input vector of the vector_shuffle.
16618 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16619 (Index > LHSLastElementDefined))
16620 return false;
16621 // Handle second input vector of the vector_shuffle.
16622 if ((RHSLastElementDefined >= 0) &&
16623 (Index > HalfVec + RHSLastElementDefined))
16624 return false;
16625 }
16626 return true;
16627}
16628
16630 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16631 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16632 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16633 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16634 // Set up the values for the shuffle vector fixup.
16635 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16636 // The last element depends on if the input comes from the LHS or RHS.
16637 //
16638 // For example:
16639 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16640 //
16641 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16642 // because elements 1 and higher of a scalar_to_vector are undefined.
16643 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16644 // because elements 1 and higher of a scalar_to_vector are undefined.
16645 // It is also not 4 because the original scalar_to_vector is wider and
16646 // actually contains two i32 elements.
16647 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16648 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16649 : FirstElt;
16650 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16651 if (SToVPermuted.getValueType() != VecShuffOperandType)
16652 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16653 return SToVPermuted;
16654}
16655
16656// On little endian subtargets, combine shuffles such as:
16657// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16658// into:
16659// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16660// because the latter can be matched to a single instruction merge.
16661// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16662// to put the value into element zero. Adjust the shuffle mask so that the
16663// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16664// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16665// nodes with elements smaller than doubleword because all the ways
16666// of getting scalar data into a vector register put the value in the
16667// rightmost element of the left half of the vector.
16668SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16669 SelectionDAG &DAG) const {
16670 SDValue LHS = SVN->getOperand(0);
16671 SDValue RHS = SVN->getOperand(1);
16672 auto Mask = SVN->getMask();
16673 int NumElts = LHS.getValueType().getVectorNumElements();
16674 SDValue Res(SVN, 0);
16675 SDLoc dl(SVN);
16676 bool IsLittleEndian = Subtarget.isLittleEndian();
16677
16678 // On big endian targets this is only useful for subtargets with direct moves.
16679 // On little endian targets it would be useful for all subtargets with VSX.
16680 // However adding special handling for LE subtargets without direct moves
16681 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16682 // which includes direct moves.
16683 if (!Subtarget.hasDirectMove())
16684 return Res;
16685
16686 // If this is not a shuffle of a shuffle and the first element comes from
16687 // the second vector, canonicalize to the commuted form. This will make it
16688 // more likely to match one of the single instruction patterns.
16689 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16690 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16691 std::swap(LHS, RHS);
16692 Res = DAG.getCommutedVectorShuffle(*SVN);
16693 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16694 }
16695
16696 // Adjust the shuffle mask if either input vector comes from a
16697 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16698 // form (to prevent the need for a swap).
16699 SmallVector<int, 16> ShuffV(Mask);
16700 SDValue SToVLHS = isScalarToVec(LHS);
16701 SDValue SToVRHS = isScalarToVec(RHS);
16702 if (SToVLHS || SToVRHS) {
16703 EVT VT = SVN->getValueType(0);
16704 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16705 int ShuffleNumElts = ShuffV.size();
16706 int HalfVec = ShuffleNumElts / 2;
16707 // The width of the "valid lane" (i.e. the lane that contains the value that
16708 // is vectorized) needs to be expressed in terms of the number of elements
16709 // of the shuffle. It is thereby the ratio of the values before and after
16710 // any bitcast, which will be set later on if the LHS or RHS are
16711 // SCALAR_TO_VECTOR nodes.
16712 unsigned LHSNumValidElts = HalfVec;
16713 unsigned RHSNumValidElts = HalfVec;
16714
16715 // Initially assume that neither input is permuted. These will be adjusted
16716 // accordingly if either input is. Note, that -1 means that all elements
16717 // are undefined.
16718 int LHSFirstElt = 0;
16719 int RHSFirstElt = ShuffleNumElts;
16720 int LHSLastElt = -1;
16721 int RHSLastElt = -1;
16722
16723 // Get the permuted scalar to vector nodes for the source(s) that come from
16724 // ISD::SCALAR_TO_VECTOR.
16725 // On big endian systems, this only makes sense for element sizes smaller
16726 // than 64 bits since for 64-bit elements, all instructions already put
16727 // the value into element zero. Since scalar size of LHS and RHS may differ
16728 // after isScalarToVec, this should be checked using their own sizes.
16729 int LHSScalarSize = 0;
16730 int RHSScalarSize = 0;
16731 if (SToVLHS) {
16732 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16733 if (!IsLittleEndian && LHSScalarSize >= 64)
16734 return Res;
16735 }
16736 if (SToVRHS) {
16737 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16738 if (!IsLittleEndian && RHSScalarSize >= 64)
16739 return Res;
16740 }
16741 if (LHSScalarSize != 0)
16743 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16744 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16745 if (RHSScalarSize != 0)
16747 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16748 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16749
16750 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16751 return Res;
16752
16753 // Fix up the shuffle mask to reflect where the desired element actually is.
16754 // The minimum and maximum indices that correspond to element zero for both
16755 // the LHS and RHS are computed and will control which shuffle mask entries
16756 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16757 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16759 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16760 LHSNumValidElts, RHSNumValidElts, Subtarget);
16761 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16762
16763 // We may have simplified away the shuffle. We won't be able to do anything
16764 // further with it here.
16765 if (!isa<ShuffleVectorSDNode>(Res))
16766 return Res;
16767 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16768 }
16769
16770 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16771 // The common case after we commuted the shuffle is that the RHS is a splat
16772 // and we have elements coming in from the splat at indices that are not
16773 // conducive to using a merge.
16774 // Example:
16775 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16776 if (!isSplatBV(TheSplat))
16777 return Res;
16778
16779 // We are looking for a mask such that all even elements are from
16780 // one vector and all odd elements from the other.
16781 if (!isAlternatingShuffMask(Mask, NumElts))
16782 return Res;
16783
16784 // Adjust the mask so we are pulling in the same index from the splat
16785 // as the index from the interesting vector in consecutive elements.
16786 if (IsLittleEndian) {
16787 // Example (even elements from first vector):
16788 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16789 if (Mask[0] < NumElts)
16790 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16791 if (ShuffV[i] < 0)
16792 continue;
16793 // If element from non-splat is undef, pick first element from splat.
16794 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16795 }
16796 // Example (odd elements from first vector):
16797 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16798 else
16799 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16800 if (ShuffV[i] < 0)
16801 continue;
16802 // If element from non-splat is undef, pick first element from splat.
16803 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
16804 }
16805 } else {
16806 // Example (even elements from first vector):
16807 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
16808 if (Mask[0] < NumElts)
16809 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16810 if (ShuffV[i] < 0)
16811 continue;
16812 // If element from non-splat is undef, pick first element from splat.
16813 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
16814 }
16815 // Example (odd elements from first vector):
16816 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
16817 else
16818 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16819 if (ShuffV[i] < 0)
16820 continue;
16821 // If element from non-splat is undef, pick first element from splat.
16822 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
16823 }
16824 }
16825
16826 // If the RHS has undefs, we need to remove them since we may have created
16827 // a shuffle that adds those instead of the splat value.
16828 SDValue SplatVal =
16829 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
16830 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
16831
16832 if (IsLittleEndian)
16833 RHS = TheSplat;
16834 else
16835 LHS = TheSplat;
16836 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16837}
16838
16839SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
16840 LSBaseSDNode *LSBase,
16841 DAGCombinerInfo &DCI) const {
16842 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
16843 "Not a reverse memop pattern!");
16844
16845 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
16846 auto Mask = SVN->getMask();
16847 int i = 0;
16848 auto I = Mask.rbegin();
16849 auto E = Mask.rend();
16850
16851 for (; I != E; ++I) {
16852 if (*I != i)
16853 return false;
16854 i++;
16855 }
16856 return true;
16857 };
16858
16859 SelectionDAG &DAG = DCI.DAG;
16860 EVT VT = SVN->getValueType(0);
16861
16862 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
16863 return SDValue();
16864
16865 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
16866 // See comment in PPCVSXSwapRemoval.cpp.
16867 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
16868 if (!Subtarget.hasP9Vector())
16869 return SDValue();
16870
16871 if(!IsElementReverse(SVN))
16872 return SDValue();
16873
16874 if (LSBase->getOpcode() == ISD::LOAD) {
16875 // If the load return value 0 has more than one user except the
16876 // shufflevector instruction, it is not profitable to replace the
16877 // shufflevector with a reverse load.
16878 for (SDUse &Use : LSBase->uses())
16879 if (Use.getResNo() == 0 &&
16880 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
16881 return SDValue();
16882
16883 SDLoc dl(LSBase);
16884 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
16885 return DAG.getMemIntrinsicNode(
16886 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
16887 LSBase->getMemoryVT(), LSBase->getMemOperand());
16888 }
16889
16890 if (LSBase->getOpcode() == ISD::STORE) {
16891 // If there are other uses of the shuffle, the swap cannot be avoided.
16892 // Forcing the use of an X-Form (since swapped stores only have
16893 // X-Forms) without removing the swap is unprofitable.
16894 if (!SVN->hasOneUse())
16895 return SDValue();
16896
16897 SDLoc dl(LSBase);
16898 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
16899 LSBase->getBasePtr()};
16900 return DAG.getMemIntrinsicNode(
16901 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
16902 LSBase->getMemoryVT(), LSBase->getMemOperand());
16903 }
16904
16905 llvm_unreachable("Expected a load or store node here");
16906}
16907
16908static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
16909 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
16910 if (IntrinsicID == Intrinsic::ppc_stdcx)
16911 StoreWidth = 8;
16912 else if (IntrinsicID == Intrinsic::ppc_stwcx)
16913 StoreWidth = 4;
16914 else if (IntrinsicID == Intrinsic::ppc_sthcx)
16915 StoreWidth = 2;
16916 else if (IntrinsicID == Intrinsic::ppc_stbcx)
16917 StoreWidth = 1;
16918 else
16919 return false;
16920 return true;
16921}
16922
16925 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
16926 // (ADDC (ADDE 0, 0, C), -1) -> C
16927 SDValue LHS = N->getOperand(0);
16928 SDValue RHS = N->getOperand(1);
16929 if (LHS->getOpcode() == PPCISD::ADDE &&
16930 isNullConstant(LHS->getOperand(0)) &&
16931 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
16932 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
16933 }
16934 }
16935 return SDValue();
16936}
16937
16939 DAGCombinerInfo &DCI) const {
16940 SelectionDAG &DAG = DCI.DAG;
16941 SDLoc dl(N);
16942 switch (N->getOpcode()) {
16943 default: break;
16944 case ISD::ADD:
16945 return combineADD(N, DCI);
16946 case ISD::AND: {
16947 // We don't want (and (zext (shift...)), C) if C fits in the width of the
16948 // original input as that will prevent us from selecting optimal rotates.
16949 // This only matters if the input to the extend is i32 widened to i64.
16950 SDValue Op1 = N->getOperand(0);
16951 SDValue Op2 = N->getOperand(1);
16952 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
16953 Op1.getOpcode() != ISD::ANY_EXTEND) ||
16954 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
16955 Op1.getOperand(0).getValueType() != MVT::i32)
16956 break;
16957 SDValue NarrowOp = Op1.getOperand(0);
16958 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
16959 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
16960 break;
16961
16962 uint64_t Imm = Op2->getAsZExtVal();
16963 // Make sure that the constant is narrow enough to fit in the narrow type.
16964 if (!isUInt<32>(Imm))
16965 break;
16966 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
16967 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
16968 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
16969 }
16970 case ISD::SHL:
16971 return combineSHL(N, DCI);
16972 case ISD::SRA:
16973 return combineSRA(N, DCI);
16974 case ISD::SRL:
16975 return combineSRL(N, DCI);
16976 case ISD::MUL:
16977 return combineMUL(N, DCI);
16978 case ISD::FMA:
16979 case PPCISD::FNMSUB:
16980 return combineFMALike(N, DCI);
16981 case PPCISD::SHL:
16982 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
16983 return N->getOperand(0);
16984 break;
16985 case PPCISD::SRL:
16986 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
16987 return N->getOperand(0);
16988 break;
16989 case PPCISD::SRA:
16990 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
16991 if (C->isZero() || // 0 >>s V -> 0.
16992 C->isAllOnes()) // -1 >>s V -> -1.
16993 return N->getOperand(0);
16994 }
16995 break;
16996 case ISD::SIGN_EXTEND:
16997 case ISD::ZERO_EXTEND:
16998 case ISD::ANY_EXTEND:
16999 return DAGCombineExtBoolTrunc(N, DCI);
17000 case ISD::TRUNCATE:
17001 return combineTRUNCATE(N, DCI);
17002 case ISD::SETCC:
17003 if (SDValue CSCC = combineSetCC(N, DCI))
17004 return CSCC;
17005 [[fallthrough]];
17006 case ISD::SELECT_CC:
17007 return DAGCombineTruncBoolExt(N, DCI);
17008 case ISD::SINT_TO_FP:
17009 case ISD::UINT_TO_FP:
17010 return combineFPToIntToFP(N, DCI);
17012 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17013 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17014 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17015 }
17016 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17017 case ISD::STORE: {
17018
17019 EVT Op1VT = N->getOperand(1).getValueType();
17020 unsigned Opcode = N->getOperand(1).getOpcode();
17021
17022 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17023 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17024 SDValue Val = combineStoreFPToInt(N, DCI);
17025 if (Val)
17026 return Val;
17027 }
17028
17029 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17030 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17031 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17032 if (Val)
17033 return Val;
17034 }
17035
17036 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17037 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17038 N->getOperand(1).getNode()->hasOneUse() &&
17039 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17040 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17041
17042 // STBRX can only handle simple types and it makes no sense to store less
17043 // two bytes in byte-reversed order.
17044 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17045 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17046 break;
17047
17048 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17049 // Do an any-extend to 32-bits if this is a half-word input.
17050 if (BSwapOp.getValueType() == MVT::i16)
17051 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17052
17053 // If the type of BSWAP operand is wider than stored memory width
17054 // it need to be shifted to the right side before STBRX.
17055 if (Op1VT.bitsGT(mVT)) {
17056 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17057 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17058 DAG.getConstant(Shift, dl, MVT::i32));
17059 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17060 if (Op1VT == MVT::i64)
17061 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17062 }
17063
17064 SDValue Ops[] = {
17065 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17066 };
17067 return
17068 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17069 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17070 cast<StoreSDNode>(N)->getMemOperand());
17071 }
17072
17073 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17074 // So it can increase the chance of CSE constant construction.
17075 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17076 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17077 // Need to sign-extended to 64-bits to handle negative values.
17078 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17079 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17080 MemVT.getSizeInBits());
17081 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17082
17083 auto *ST = cast<StoreSDNode>(N);
17084 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17085 ST->getBasePtr(), ST->getOffset(), MemVT,
17086 ST->getMemOperand(), ST->getAddressingMode(),
17087 /*IsTruncating=*/true);
17088 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17089 // new store which will change the constant by removing non-demanded bits.
17090 return ST->isUnindexed()
17091 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17092 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17093 }
17094
17095 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17096 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17097 if (Op1VT.isSimple()) {
17098 MVT StoreVT = Op1VT.getSimpleVT();
17099 if (Subtarget.needsSwapsForVSXMemOps() &&
17100 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17101 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17102 return expandVSXStoreForLE(N, DCI);
17103 }
17104 break;
17105 }
17106 case ISD::LOAD: {
17108 EVT VT = LD->getValueType(0);
17109
17110 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17111 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17112 if (VT.isSimple()) {
17113 MVT LoadVT = VT.getSimpleVT();
17114 if (Subtarget.needsSwapsForVSXMemOps() &&
17115 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17116 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17117 return expandVSXLoadForLE(N, DCI);
17118 }
17119
17120 // We sometimes end up with a 64-bit integer load, from which we extract
17121 // two single-precision floating-point numbers. This happens with
17122 // std::complex<float>, and other similar structures, because of the way we
17123 // canonicalize structure copies. However, if we lack direct moves,
17124 // then the final bitcasts from the extracted integer values to the
17125 // floating-point numbers turn into store/load pairs. Even with direct moves,
17126 // just loading the two floating-point numbers is likely better.
17127 auto ReplaceTwoFloatLoad = [&]() {
17128 if (VT != MVT::i64)
17129 return false;
17130
17131 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17132 LD->isVolatile())
17133 return false;
17134
17135 // We're looking for a sequence like this:
17136 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17137 // t16: i64 = srl t13, Constant:i32<32>
17138 // t17: i32 = truncate t16
17139 // t18: f32 = bitcast t17
17140 // t19: i32 = truncate t13
17141 // t20: f32 = bitcast t19
17142
17143 if (!LD->hasNUsesOfValue(2, 0))
17144 return false;
17145
17146 auto UI = LD->user_begin();
17147 while (UI.getUse().getResNo() != 0) ++UI;
17148 SDNode *Trunc = *UI++;
17149 while (UI.getUse().getResNo() != 0) ++UI;
17150 SDNode *RightShift = *UI;
17151 if (Trunc->getOpcode() != ISD::TRUNCATE)
17152 std::swap(Trunc, RightShift);
17153
17154 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17155 Trunc->getValueType(0) != MVT::i32 ||
17156 !Trunc->hasOneUse())
17157 return false;
17158 if (RightShift->getOpcode() != ISD::SRL ||
17159 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17160 RightShift->getConstantOperandVal(1) != 32 ||
17161 !RightShift->hasOneUse())
17162 return false;
17163
17164 SDNode *Trunc2 = *RightShift->user_begin();
17165 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17166 Trunc2->getValueType(0) != MVT::i32 ||
17167 !Trunc2->hasOneUse())
17168 return false;
17169
17170 SDNode *Bitcast = *Trunc->user_begin();
17171 SDNode *Bitcast2 = *Trunc2->user_begin();
17172
17173 if (Bitcast->getOpcode() != ISD::BITCAST ||
17174 Bitcast->getValueType(0) != MVT::f32)
17175 return false;
17176 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17177 Bitcast2->getValueType(0) != MVT::f32)
17178 return false;
17179
17180 if (Subtarget.isLittleEndian())
17181 std::swap(Bitcast, Bitcast2);
17182
17183 // Bitcast has the second float (in memory-layout order) and Bitcast2
17184 // has the first one.
17185
17186 SDValue BasePtr = LD->getBasePtr();
17187 if (LD->isIndexed()) {
17188 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17189 "Non-pre-inc AM on PPC?");
17190 BasePtr =
17191 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17192 LD->getOffset());
17193 }
17194
17195 auto MMOFlags =
17196 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17197 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17198 LD->getPointerInfo(), LD->getAlign(),
17199 MMOFlags, LD->getAAInfo());
17200 SDValue AddPtr =
17201 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17202 BasePtr, DAG.getIntPtrConstant(4, dl));
17203 SDValue FloatLoad2 = DAG.getLoad(
17204 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17205 LD->getPointerInfo().getWithOffset(4),
17206 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17207
17208 if (LD->isIndexed()) {
17209 // Note that DAGCombine should re-form any pre-increment load(s) from
17210 // what is produced here if that makes sense.
17211 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17212 }
17213
17214 DCI.CombineTo(Bitcast2, FloatLoad);
17215 DCI.CombineTo(Bitcast, FloatLoad2);
17216
17217 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17218 SDValue(FloatLoad2.getNode(), 1));
17219 return true;
17220 };
17221
17222 if (ReplaceTwoFloatLoad())
17223 return SDValue(N, 0);
17224
17225 EVT MemVT = LD->getMemoryVT();
17226 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17227 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17228 if (LD->isUnindexed() && VT.isVector() &&
17229 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17230 // P8 and later hardware should just use LOAD.
17231 !Subtarget.hasP8Vector() &&
17232 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17233 VT == MVT::v4f32))) &&
17234 LD->getAlign() < ABIAlignment) {
17235 // This is a type-legal unaligned Altivec load.
17236 SDValue Chain = LD->getChain();
17237 SDValue Ptr = LD->getBasePtr();
17238 bool isLittleEndian = Subtarget.isLittleEndian();
17239
17240 // This implements the loading of unaligned vectors as described in
17241 // the venerable Apple Velocity Engine overview. Specifically:
17242 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17243 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17244 //
17245 // The general idea is to expand a sequence of one or more unaligned
17246 // loads into an alignment-based permutation-control instruction (lvsl
17247 // or lvsr), a series of regular vector loads (which always truncate
17248 // their input address to an aligned address), and a series of
17249 // permutations. The results of these permutations are the requested
17250 // loaded values. The trick is that the last "extra" load is not taken
17251 // from the address you might suspect (sizeof(vector) bytes after the
17252 // last requested load), but rather sizeof(vector) - 1 bytes after the
17253 // last requested vector. The point of this is to avoid a page fault if
17254 // the base address happened to be aligned. This works because if the
17255 // base address is aligned, then adding less than a full vector length
17256 // will cause the last vector in the sequence to be (re)loaded.
17257 // Otherwise, the next vector will be fetched as you might suspect was
17258 // necessary.
17259
17260 // We might be able to reuse the permutation generation from
17261 // a different base address offset from this one by an aligned amount.
17262 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17263 // optimization later.
17264 Intrinsic::ID Intr, IntrLD, IntrPerm;
17265 MVT PermCntlTy, PermTy, LDTy;
17266 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17267 : Intrinsic::ppc_altivec_lvsl;
17268 IntrLD = Intrinsic::ppc_altivec_lvx;
17269 IntrPerm = Intrinsic::ppc_altivec_vperm;
17270 PermCntlTy = MVT::v16i8;
17271 PermTy = MVT::v4i32;
17272 LDTy = MVT::v4i32;
17273
17274 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17275
17276 // Create the new MMO for the new base load. It is like the original MMO,
17277 // but represents an area in memory almost twice the vector size centered
17278 // on the original address. If the address is unaligned, we might start
17279 // reading up to (sizeof(vector)-1) bytes below the address of the
17280 // original unaligned load.
17282 MachineMemOperand *BaseMMO =
17283 MF.getMachineMemOperand(LD->getMemOperand(),
17284 -(int64_t)MemVT.getStoreSize()+1,
17285 2*MemVT.getStoreSize()-1);
17286
17287 // Create the new base load.
17288 SDValue LDXIntID =
17289 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17290 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17291 SDValue BaseLoad =
17293 DAG.getVTList(PermTy, MVT::Other),
17294 BaseLoadOps, LDTy, BaseMMO);
17295
17296 // Note that the value of IncOffset (which is provided to the next
17297 // load's pointer info offset value, and thus used to calculate the
17298 // alignment), and the value of IncValue (which is actually used to
17299 // increment the pointer value) are different! This is because we
17300 // require the next load to appear to be aligned, even though it
17301 // is actually offset from the base pointer by a lesser amount.
17302 int IncOffset = VT.getSizeInBits() / 8;
17303 int IncValue = IncOffset;
17304
17305 // Walk (both up and down) the chain looking for another load at the real
17306 // (aligned) offset (the alignment of the other load does not matter in
17307 // this case). If found, then do not use the offset reduction trick, as
17308 // that will prevent the loads from being later combined (as they would
17309 // otherwise be duplicates).
17310 if (!findConsecutiveLoad(LD, DAG))
17311 --IncValue;
17312
17314 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17315 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17316
17317 MachineMemOperand *ExtraMMO =
17318 MF.getMachineMemOperand(LD->getMemOperand(),
17319 1, 2*MemVT.getStoreSize()-1);
17320 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17321 SDValue ExtraLoad =
17323 DAG.getVTList(PermTy, MVT::Other),
17324 ExtraLoadOps, LDTy, ExtraMMO);
17325
17326 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17327 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17328
17329 // Because vperm has a big-endian bias, we must reverse the order
17330 // of the input vectors and complement the permute control vector
17331 // when generating little endian code. We have already handled the
17332 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17333 // and ExtraLoad here.
17334 SDValue Perm;
17335 if (isLittleEndian)
17336 Perm = BuildIntrinsicOp(IntrPerm,
17337 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17338 else
17339 Perm = BuildIntrinsicOp(IntrPerm,
17340 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17341
17342 if (VT != PermTy)
17343 Perm = Subtarget.hasAltivec()
17344 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17345 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17346 DAG.getTargetConstant(1, dl, MVT::i64));
17347 // second argument is 1 because this rounding
17348 // is always exact.
17349
17350 // The output of the permutation is our loaded result, the TokenFactor is
17351 // our new chain.
17352 DCI.CombineTo(N, Perm, TF);
17353 return SDValue(N, 0);
17354 }
17355 }
17356 break;
17358 bool isLittleEndian = Subtarget.isLittleEndian();
17359 unsigned IID = N->getConstantOperandVal(0);
17360 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17361 : Intrinsic::ppc_altivec_lvsl);
17362 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17363 SDValue Add = N->getOperand(1);
17364
17365 int Bits = 4 /* 16 byte alignment */;
17366
17367 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17368 APInt::getAllOnes(Bits /* alignment */)
17369 .zext(Add.getScalarValueSizeInBits()))) {
17370 SDNode *BasePtr = Add->getOperand(0).getNode();
17371 for (SDNode *U : BasePtr->users()) {
17372 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17373 U->getConstantOperandVal(0) == IID) {
17374 // We've found another LVSL/LVSR, and this address is an aligned
17375 // multiple of that one. The results will be the same, so use the
17376 // one we've just found instead.
17377
17378 return SDValue(U, 0);
17379 }
17380 }
17381 }
17382
17383 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17384 SDNode *BasePtr = Add->getOperand(0).getNode();
17385 for (SDNode *U : BasePtr->users()) {
17386 if (U->getOpcode() == ISD::ADD &&
17387 isa<ConstantSDNode>(U->getOperand(1)) &&
17388 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17389 (1ULL << Bits) ==
17390 0) {
17391 SDNode *OtherAdd = U;
17392 for (SDNode *V : OtherAdd->users()) {
17393 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17394 V->getConstantOperandVal(0) == IID) {
17395 return SDValue(V, 0);
17396 }
17397 }
17398 }
17399 }
17400 }
17401 }
17402
17403 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17404 // Expose the vabsduw/h/b opportunity for down stream
17405 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17406 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17407 IID == Intrinsic::ppc_altivec_vmaxsh ||
17408 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17409 SDValue V1 = N->getOperand(1);
17410 SDValue V2 = N->getOperand(2);
17411 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17412 V1.getSimpleValueType() == MVT::v8i16 ||
17413 V1.getSimpleValueType() == MVT::v16i8) &&
17415 // (0-a, a)
17416 if (V1.getOpcode() == ISD::SUB &&
17418 V1.getOperand(1) == V2) {
17419 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17420 }
17421 // (a, 0-a)
17422 if (V2.getOpcode() == ISD::SUB &&
17424 V2.getOperand(1) == V1) {
17425 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17426 }
17427 // (x-y, y-x)
17428 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17429 V1.getOperand(0) == V2.getOperand(1) &&
17430 V1.getOperand(1) == V2.getOperand(0)) {
17431 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17432 }
17433 }
17434 }
17435 }
17436
17437 break;
17439 switch (N->getConstantOperandVal(1)) {
17440 default:
17441 break;
17442 case Intrinsic::ppc_altivec_vsum4sbs:
17443 case Intrinsic::ppc_altivec_vsum4shs:
17444 case Intrinsic::ppc_altivec_vsum4ubs: {
17445 // These sum-across intrinsics only have a chain due to the side effect
17446 // that they may set the SAT bit. If we know the SAT bit will not be set
17447 // for some inputs, we can replace any uses of their chain with the
17448 // input chain.
17449 if (BuildVectorSDNode *BVN =
17450 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17451 APInt APSplatBits, APSplatUndef;
17452 unsigned SplatBitSize;
17453 bool HasAnyUndefs;
17454 bool BVNIsConstantSplat = BVN->isConstantSplat(
17455 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17456 !Subtarget.isLittleEndian());
17457 // If the constant splat vector is 0, the SAT bit will not be set.
17458 if (BVNIsConstantSplat && APSplatBits == 0)
17459 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17460 }
17461 return SDValue();
17462 }
17463 case Intrinsic::ppc_vsx_lxvw4x:
17464 case Intrinsic::ppc_vsx_lxvd2x:
17465 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17466 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17467 if (Subtarget.needsSwapsForVSXMemOps())
17468 return expandVSXLoadForLE(N, DCI);
17469 break;
17470 }
17471 break;
17473 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17474 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17475 if (Subtarget.needsSwapsForVSXMemOps()) {
17476 switch (N->getConstantOperandVal(1)) {
17477 default:
17478 break;
17479 case Intrinsic::ppc_vsx_stxvw4x:
17480 case Intrinsic::ppc_vsx_stxvd2x:
17481 return expandVSXStoreForLE(N, DCI);
17482 }
17483 }
17484 break;
17485 case ISD::BSWAP: {
17486 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17487 // For subtargets without LDBRX, we can still do better than the default
17488 // expansion even for 64-bit BSWAP (LOAD).
17489 bool Is64BitBswapOn64BitTgt =
17490 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17491 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17492 N->getOperand(0).hasOneUse();
17493 if (IsSingleUseNormalLd &&
17494 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17495 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17496 SDValue Load = N->getOperand(0);
17497 LoadSDNode *LD = cast<LoadSDNode>(Load);
17498 // Create the byte-swapping load.
17499 SDValue Ops[] = {
17500 LD->getChain(), // Chain
17501 LD->getBasePtr(), // Ptr
17502 DAG.getValueType(N->getValueType(0)) // VT
17503 };
17504 SDValue BSLoad =
17506 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17507 MVT::i64 : MVT::i32, MVT::Other),
17508 Ops, LD->getMemoryVT(), LD->getMemOperand());
17509
17510 // If this is an i16 load, insert the truncate.
17511 SDValue ResVal = BSLoad;
17512 if (N->getValueType(0) == MVT::i16)
17513 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17514
17515 // First, combine the bswap away. This makes the value produced by the
17516 // load dead.
17517 DCI.CombineTo(N, ResVal);
17518
17519 // Next, combine the load away, we give it a bogus result value but a real
17520 // chain result. The result value is dead because the bswap is dead.
17521 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17522
17523 // Return N so it doesn't get rechecked!
17524 return SDValue(N, 0);
17525 }
17526 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17527 // before legalization so that the BUILD_PAIR is handled correctly.
17528 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17529 !IsSingleUseNormalLd)
17530 return SDValue();
17531 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17532
17533 // Can't split volatile or atomic loads.
17534 if (!LD->isSimple())
17535 return SDValue();
17536 SDValue BasePtr = LD->getBasePtr();
17537 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17538 LD->getPointerInfo(), LD->getAlign());
17539 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17540 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17541 DAG.getIntPtrConstant(4, dl));
17543 LD->getMemOperand(), 4, 4);
17544 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17545 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17546 SDValue Res;
17547 if (Subtarget.isLittleEndian())
17548 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17549 else
17550 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17551 SDValue TF =
17552 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17553 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17554 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17555 return Res;
17556 }
17557 case PPCISD::VCMP:
17558 // If a VCMP_rec node already exists with exactly the same operands as this
17559 // node, use its result instead of this node (VCMP_rec computes both a CR6
17560 // and a normal output).
17561 //
17562 if (!N->getOperand(0).hasOneUse() &&
17563 !N->getOperand(1).hasOneUse() &&
17564 !N->getOperand(2).hasOneUse()) {
17565
17566 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17567 SDNode *VCMPrecNode = nullptr;
17568
17569 SDNode *LHSN = N->getOperand(0).getNode();
17570 for (SDNode *User : LHSN->users())
17571 if (User->getOpcode() == PPCISD::VCMP_rec &&
17572 User->getOperand(1) == N->getOperand(1) &&
17573 User->getOperand(2) == N->getOperand(2) &&
17574 User->getOperand(0) == N->getOperand(0)) {
17575 VCMPrecNode = User;
17576 break;
17577 }
17578
17579 // If there is no VCMP_rec node, or if the flag value has a single use,
17580 // don't transform this.
17581 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17582 break;
17583
17584 // Look at the (necessarily single) use of the flag value. If it has a
17585 // chain, this transformation is more complex. Note that multiple things
17586 // could use the value result, which we should ignore.
17587 SDNode *FlagUser = nullptr;
17588 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17589 FlagUser == nullptr; ++UI) {
17590 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17591 SDNode *User = UI->getUser();
17592 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17593 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17594 FlagUser = User;
17595 break;
17596 }
17597 }
17598 }
17599
17600 // If the user is a MFOCRF instruction, we know this is safe.
17601 // Otherwise we give up for right now.
17602 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17603 return SDValue(VCMPrecNode, 0);
17604 }
17605 break;
17606 case ISD::BR_CC: {
17607 // If this is a branch on an altivec predicate comparison, lower this so
17608 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17609 // lowering is done pre-legalize, because the legalizer lowers the predicate
17610 // compare down to code that is difficult to reassemble.
17611 // This code also handles branches that depend on the result of a store
17612 // conditional.
17613 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17614 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17615
17616 int CompareOpc;
17617 bool isDot;
17618
17619 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17620 break;
17621
17622 // Since we are doing this pre-legalize, the RHS can be a constant of
17623 // arbitrary bitwidth which may cause issues when trying to get the value
17624 // from the underlying APInt.
17625 auto RHSAPInt = RHS->getAsAPIntVal();
17626 if (!RHSAPInt.isIntN(64))
17627 break;
17628
17629 unsigned Val = RHSAPInt.getZExtValue();
17630 auto isImpossibleCompare = [&]() {
17631 // If this is a comparison against something other than 0/1, then we know
17632 // that the condition is never/always true.
17633 if (Val != 0 && Val != 1) {
17634 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17635 return N->getOperand(0);
17636 // Always !=, turn it into an unconditional branch.
17637 return DAG.getNode(ISD::BR, dl, MVT::Other,
17638 N->getOperand(0), N->getOperand(4));
17639 }
17640 return SDValue();
17641 };
17642 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17643 unsigned StoreWidth = 0;
17644 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17645 isStoreConditional(LHS, StoreWidth)) {
17646 if (SDValue Impossible = isImpossibleCompare())
17647 return Impossible;
17648 PPC::Predicate CompOpc;
17649 // eq 0 => ne
17650 // ne 0 => eq
17651 // eq 1 => eq
17652 // ne 1 => ne
17653 if (Val == 0)
17654 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17655 else
17656 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17657
17658 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17659 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17660 auto *MemNode = cast<MemSDNode>(LHS);
17661 SDValue ConstSt = DAG.getMemIntrinsicNode(
17663 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17664 MemNode->getMemoryVT(), MemNode->getMemOperand());
17665
17666 SDValue InChain;
17667 // Unchain the branch from the original store conditional.
17668 if (N->getOperand(0) == LHS.getValue(1))
17669 InChain = LHS.getOperand(0);
17670 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17671 SmallVector<SDValue, 4> InChains;
17672 SDValue InTF = N->getOperand(0);
17673 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17674 if (InTF.getOperand(i) != LHS.getValue(1))
17675 InChains.push_back(InTF.getOperand(i));
17676 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17677 }
17678
17679 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17680 DAG.getConstant(CompOpc, dl, MVT::i32),
17681 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17682 ConstSt.getValue(2));
17683 }
17684
17685 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17686 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17687 assert(isDot && "Can't compare against a vector result!");
17688
17689 if (SDValue Impossible = isImpossibleCompare())
17690 return Impossible;
17691
17692 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17693 // Create the PPCISD altivec 'dot' comparison node.
17694 SDValue Ops[] = {
17695 LHS.getOperand(2), // LHS of compare
17696 LHS.getOperand(3), // RHS of compare
17697 DAG.getConstant(CompareOpc, dl, MVT::i32)
17698 };
17699 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17700 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17701
17702 // Unpack the result based on how the target uses it.
17703 PPC::Predicate CompOpc;
17704 switch (LHS.getConstantOperandVal(1)) {
17705 default: // Can't happen, don't crash on invalid number though.
17706 case 0: // Branch on the value of the EQ bit of CR6.
17707 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17708 break;
17709 case 1: // Branch on the inverted value of the EQ bit of CR6.
17710 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17711 break;
17712 case 2: // Branch on the value of the LT bit of CR6.
17713 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17714 break;
17715 case 3: // Branch on the inverted value of the LT bit of CR6.
17716 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17717 break;
17718 }
17719
17720 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17721 DAG.getConstant(CompOpc, dl, MVT::i32),
17722 DAG.getRegister(PPC::CR6, MVT::i32),
17723 N->getOperand(4), CompNode.getValue(1));
17724 }
17725 break;
17726 }
17727 case ISD::BUILD_VECTOR:
17728 return DAGCombineBuildVector(N, DCI);
17729 case PPCISD::ADDC:
17730 return DAGCombineAddc(N, DCI);
17731 }
17732
17733 return SDValue();
17734}
17735
17736SDValue
17738 SelectionDAG &DAG,
17739 SmallVectorImpl<SDNode *> &Created) const {
17740 // fold (sdiv X, pow2)
17741 EVT VT = N->getValueType(0);
17742 if (VT == MVT::i64 && !Subtarget.isPPC64())
17743 return SDValue();
17744 if ((VT != MVT::i32 && VT != MVT::i64) ||
17745 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17746 return SDValue();
17747
17748 SDLoc DL(N);
17749 SDValue N0 = N->getOperand(0);
17750
17751 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17752 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17753 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17754
17755 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17756 Created.push_back(Op.getNode());
17757
17758 if (IsNegPow2) {
17759 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17760 Created.push_back(Op.getNode());
17761 }
17762
17763 return Op;
17764}
17765
17766//===----------------------------------------------------------------------===//
17767// Inline Assembly Support
17768//===----------------------------------------------------------------------===//
17769
17771 KnownBits &Known,
17772 const APInt &DemandedElts,
17773 const SelectionDAG &DAG,
17774 unsigned Depth) const {
17775 Known.resetAll();
17776 switch (Op.getOpcode()) {
17777 default: break;
17778 case PPCISD::LBRX: {
17779 // lhbrx is known to have the top bits cleared out.
17780 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17781 Known.Zero = 0xFFFF0000;
17782 break;
17783 }
17784 case PPCISD::ADDE: {
17785 if (Op.getResNo() == 0) {
17786 // (0|1), _ = ADDE 0, 0, CARRY
17787 SDValue LHS = Op.getOperand(0);
17788 SDValue RHS = Op.getOperand(1);
17789 if (isNullConstant(LHS) && isNullConstant(RHS))
17790 Known.Zero = ~1ULL;
17791 }
17792 break;
17793 }
17795 switch (Op.getConstantOperandVal(0)) {
17796 default: break;
17797 case Intrinsic::ppc_altivec_vcmpbfp_p:
17798 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17799 case Intrinsic::ppc_altivec_vcmpequb_p:
17800 case Intrinsic::ppc_altivec_vcmpequh_p:
17801 case Intrinsic::ppc_altivec_vcmpequw_p:
17802 case Intrinsic::ppc_altivec_vcmpequd_p:
17803 case Intrinsic::ppc_altivec_vcmpequq_p:
17804 case Intrinsic::ppc_altivec_vcmpgefp_p:
17805 case Intrinsic::ppc_altivec_vcmpgtfp_p:
17806 case Intrinsic::ppc_altivec_vcmpgtsb_p:
17807 case Intrinsic::ppc_altivec_vcmpgtsh_p:
17808 case Intrinsic::ppc_altivec_vcmpgtsw_p:
17809 case Intrinsic::ppc_altivec_vcmpgtsd_p:
17810 case Intrinsic::ppc_altivec_vcmpgtsq_p:
17811 case Intrinsic::ppc_altivec_vcmpgtub_p:
17812 case Intrinsic::ppc_altivec_vcmpgtuh_p:
17813 case Intrinsic::ppc_altivec_vcmpgtuw_p:
17814 case Intrinsic::ppc_altivec_vcmpgtud_p:
17815 case Intrinsic::ppc_altivec_vcmpgtuq_p:
17816 Known.Zero = ~1U; // All bits but the low one are known to be zero.
17817 break;
17818 }
17819 break;
17820 }
17822 switch (Op.getConstantOperandVal(1)) {
17823 default:
17824 break;
17825 case Intrinsic::ppc_load2r:
17826 // Top bits are cleared for load2r (which is the same as lhbrx).
17827 Known.Zero = 0xFFFF0000;
17828 break;
17829 }
17830 break;
17831 }
17832 }
17833}
17834
17836 switch (Subtarget.getCPUDirective()) {
17837 default: break;
17838 case PPC::DIR_970:
17839 case PPC::DIR_PWR4:
17840 case PPC::DIR_PWR5:
17841 case PPC::DIR_PWR5X:
17842 case PPC::DIR_PWR6:
17843 case PPC::DIR_PWR6X:
17844 case PPC::DIR_PWR7:
17845 case PPC::DIR_PWR8:
17846 case PPC::DIR_PWR9:
17847 case PPC::DIR_PWR10:
17848 case PPC::DIR_PWR11:
17849 case PPC::DIR_PWR_FUTURE: {
17850 if (!ML)
17851 break;
17852
17854 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
17855 // so that we can decrease cache misses and branch-prediction misses.
17856 // Actual alignment of the loop will depend on the hotness check and other
17857 // logic in alignBlocks.
17858 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
17859 return Align(32);
17860 }
17861
17862 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
17863
17864 // For small loops (between 5 and 8 instructions), align to a 32-byte
17865 // boundary so that the entire loop fits in one instruction-cache line.
17866 uint64_t LoopSize = 0;
17867 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
17868 for (const MachineInstr &J : **I) {
17869 LoopSize += TII->getInstSizeInBytes(J);
17870 if (LoopSize > 32)
17871 break;
17872 }
17873
17874 if (LoopSize > 16 && LoopSize <= 32)
17875 return Align(32);
17876
17877 break;
17878 }
17879 }
17880
17882}
17883
17884/// getConstraintType - Given a constraint, return the type of
17885/// constraint it is for this target.
17888 if (Constraint.size() == 1) {
17889 switch (Constraint[0]) {
17890 default: break;
17891 case 'b':
17892 case 'r':
17893 case 'f':
17894 case 'd':
17895 case 'v':
17896 case 'y':
17897 return C_RegisterClass;
17898 case 'Z':
17899 // FIXME: While Z does indicate a memory constraint, it specifically
17900 // indicates an r+r address (used in conjunction with the 'y' modifier
17901 // in the replacement string). Currently, we're forcing the base
17902 // register to be r0 in the asm printer (which is interpreted as zero)
17903 // and forming the complete address in the second register. This is
17904 // suboptimal.
17905 return C_Memory;
17906 }
17907 } else if (Constraint == "wc") { // individual CR bits.
17908 return C_RegisterClass;
17909 } else if (Constraint == "wa" || Constraint == "wd" ||
17910 Constraint == "wf" || Constraint == "ws" ||
17911 Constraint == "wi" || Constraint == "ww") {
17912 return C_RegisterClass; // VSX registers.
17913 }
17914 return TargetLowering::getConstraintType(Constraint);
17915}
17916
17917/// Examine constraint type and operand type and determine a weight value.
17918/// This object must already have been set up with the operand type
17919/// and the current alternative constraint selected.
17922 AsmOperandInfo &info, const char *constraint) const {
17924 Value *CallOperandVal = info.CallOperandVal;
17925 // If we don't have a value, we can't do a match,
17926 // but allow it at the lowest weight.
17927 if (!CallOperandVal)
17928 return CW_Default;
17929 Type *type = CallOperandVal->getType();
17930
17931 // Look at the constraint type.
17932 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
17933 return CW_Register; // an individual CR bit.
17934 else if ((StringRef(constraint) == "wa" ||
17935 StringRef(constraint) == "wd" ||
17936 StringRef(constraint) == "wf") &&
17937 type->isVectorTy())
17938 return CW_Register;
17939 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
17940 return CW_Register; // just hold 64-bit integers data.
17941 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
17942 return CW_Register;
17943 else if (StringRef(constraint) == "ww" && type->isFloatTy())
17944 return CW_Register;
17945
17946 switch (*constraint) {
17947 default:
17949 break;
17950 case 'b':
17951 if (type->isIntegerTy())
17952 weight = CW_Register;
17953 break;
17954 case 'f':
17955 if (type->isFloatTy())
17956 weight = CW_Register;
17957 break;
17958 case 'd':
17959 if (type->isDoubleTy())
17960 weight = CW_Register;
17961 break;
17962 case 'v':
17963 if (type->isVectorTy())
17964 weight = CW_Register;
17965 break;
17966 case 'y':
17967 weight = CW_Register;
17968 break;
17969 case 'Z':
17970 weight = CW_Memory;
17971 break;
17972 }
17973 return weight;
17974}
17975
17976std::pair<unsigned, const TargetRegisterClass *>
17978 StringRef Constraint,
17979 MVT VT) const {
17980 if (Constraint.size() == 1) {
17981 // GCC RS6000 Constraint Letters
17982 switch (Constraint[0]) {
17983 case 'b': // R1-R31
17984 if (VT == MVT::i64 && Subtarget.isPPC64())
17985 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
17986 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
17987 case 'r': // R0-R31
17988 if (VT == MVT::i64 && Subtarget.isPPC64())
17989 return std::make_pair(0U, &PPC::G8RCRegClass);
17990 return std::make_pair(0U, &PPC::GPRCRegClass);
17991 // 'd' and 'f' constraints are both defined to be "the floating point
17992 // registers", where one is for 32-bit and the other for 64-bit. We don't
17993 // really care overly much here so just give them all the same reg classes.
17994 case 'd':
17995 case 'f':
17996 if (Subtarget.hasSPE()) {
17997 if (VT == MVT::f32 || VT == MVT::i32)
17998 return std::make_pair(0U, &PPC::GPRCRegClass);
17999 if (VT == MVT::f64 || VT == MVT::i64)
18000 return std::make_pair(0U, &PPC::SPERCRegClass);
18001 } else {
18002 if (VT == MVT::f32 || VT == MVT::i32)
18003 return std::make_pair(0U, &PPC::F4RCRegClass);
18004 if (VT == MVT::f64 || VT == MVT::i64)
18005 return std::make_pair(0U, &PPC::F8RCRegClass);
18006 }
18007 break;
18008 case 'v':
18009 if (Subtarget.hasAltivec() && VT.isVector())
18010 return std::make_pair(0U, &PPC::VRRCRegClass);
18011 else if (Subtarget.hasVSX())
18012 // Scalars in Altivec registers only make sense with VSX.
18013 return std::make_pair(0U, &PPC::VFRCRegClass);
18014 break;
18015 case 'y': // crrc
18016 return std::make_pair(0U, &PPC::CRRCRegClass);
18017 }
18018 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18019 // An individual CR bit.
18020 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18021 } else if ((Constraint == "wa" || Constraint == "wd" ||
18022 Constraint == "wf" || Constraint == "wi") &&
18023 Subtarget.hasVSX()) {
18024 // A VSX register for either a scalar (FP) or vector. There is no
18025 // support for single precision scalars on subtargets prior to Power8.
18026 if (VT.isVector())
18027 return std::make_pair(0U, &PPC::VSRCRegClass);
18028 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18029 return std::make_pair(0U, &PPC::VSSRCRegClass);
18030 return std::make_pair(0U, &PPC::VSFRCRegClass);
18031 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18032 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18033 return std::make_pair(0U, &PPC::VSSRCRegClass);
18034 else
18035 return std::make_pair(0U, &PPC::VSFRCRegClass);
18036 } else if (Constraint == "lr") {
18037 if (VT == MVT::i64)
18038 return std::make_pair(0U, &PPC::LR8RCRegClass);
18039 else
18040 return std::make_pair(0U, &PPC::LRRCRegClass);
18041 }
18042
18043 // Handle special cases of physical registers that are not properly handled
18044 // by the base class.
18045 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18046 // If we name a VSX register, we can't defer to the base class because it
18047 // will not recognize the correct register (their names will be VSL{0-31}
18048 // and V{0-31} so they won't match). So we match them here.
18049 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18050 int VSNum = atoi(Constraint.data() + 3);
18051 assert(VSNum >= 0 && VSNum <= 63 &&
18052 "Attempted to access a vsr out of range");
18053 if (VSNum < 32)
18054 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18055 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18056 }
18057
18058 // For float registers, we can't defer to the base class as it will match
18059 // the SPILLTOVSRRC class.
18060 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18061 int RegNum = atoi(Constraint.data() + 2);
18062 if (RegNum > 31 || RegNum < 0)
18063 report_fatal_error("Invalid floating point register number");
18064 if (VT == MVT::f32 || VT == MVT::i32)
18065 return Subtarget.hasSPE()
18066 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18067 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18068 if (VT == MVT::f64 || VT == MVT::i64)
18069 return Subtarget.hasSPE()
18070 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18071 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18072 }
18073 }
18074
18075 std::pair<unsigned, const TargetRegisterClass *> R =
18077
18078 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18079 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18080 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18081 // register.
18082 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18083 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18084 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18085 PPC::GPRCRegClass.contains(R.first))
18086 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18087 PPC::sub_32, &PPC::G8RCRegClass),
18088 &PPC::G8RCRegClass);
18089
18090 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18091 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18092 R.first = PPC::CR0;
18093 R.second = &PPC::CRRCRegClass;
18094 }
18095 // FIXME: This warning should ideally be emitted in the front end.
18096 const auto &TM = getTargetMachine();
18097 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18098 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18099 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18100 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18101 errs() << "warning: vector registers 20 to 32 are reserved in the "
18102 "default AIX AltiVec ABI and cannot be used\n";
18103 }
18104
18105 return R;
18106}
18107
18108/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18109/// vector. If it is invalid, don't add anything to Ops.
18111 StringRef Constraint,
18112 std::vector<SDValue> &Ops,
18113 SelectionDAG &DAG) const {
18114 SDValue Result;
18115
18116 // Only support length 1 constraints.
18117 if (Constraint.size() > 1)
18118 return;
18119
18120 char Letter = Constraint[0];
18121 switch (Letter) {
18122 default: break;
18123 case 'I':
18124 case 'J':
18125 case 'K':
18126 case 'L':
18127 case 'M':
18128 case 'N':
18129 case 'O':
18130 case 'P': {
18132 if (!CST) return; // Must be an immediate to match.
18133 SDLoc dl(Op);
18134 int64_t Value = CST->getSExtValue();
18135 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18136 // numbers are printed as such.
18137 switch (Letter) {
18138 default: llvm_unreachable("Unknown constraint letter!");
18139 case 'I': // "I" is a signed 16-bit constant.
18140 if (isInt<16>(Value))
18141 Result = DAG.getTargetConstant(Value, dl, TCVT);
18142 break;
18143 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18145 Result = DAG.getTargetConstant(Value, dl, TCVT);
18146 break;
18147 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18149 Result = DAG.getTargetConstant(Value, dl, TCVT);
18150 break;
18151 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18152 if (isUInt<16>(Value))
18153 Result = DAG.getTargetConstant(Value, dl, TCVT);
18154 break;
18155 case 'M': // "M" is a constant that is greater than 31.
18156 if (Value > 31)
18157 Result = DAG.getTargetConstant(Value, dl, TCVT);
18158 break;
18159 case 'N': // "N" is a positive constant that is an exact power of two.
18160 if (Value > 0 && isPowerOf2_64(Value))
18161 Result = DAG.getTargetConstant(Value, dl, TCVT);
18162 break;
18163 case 'O': // "O" is the constant zero.
18164 if (Value == 0)
18165 Result = DAG.getTargetConstant(Value, dl, TCVT);
18166 break;
18167 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18168 if (isInt<16>(-Value))
18169 Result = DAG.getTargetConstant(Value, dl, TCVT);
18170 break;
18171 }
18172 break;
18173 }
18174 }
18175
18176 if (Result.getNode()) {
18177 Ops.push_back(Result);
18178 return;
18179 }
18180
18181 // Handle standard constraint letters.
18183}
18184
18187 SelectionDAG &DAG) const {
18188 if (I.getNumOperands() <= 1)
18189 return;
18190 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18191 return;
18192 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18193 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18194 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18195 return;
18196
18197 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18198 Ops.push_back(DAG.getMDNode(MDN));
18199}
18200
18201// isLegalAddressingMode - Return true if the addressing mode represented
18202// by AM is legal for this target, for a load/store of the specified type.
18204 const AddrMode &AM, Type *Ty,
18205 unsigned AS,
18206 Instruction *I) const {
18207 // Vector type r+i form is supported since power9 as DQ form. We don't check
18208 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18209 // imm form is preferred and the offset can be adjusted to use imm form later
18210 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18211 // max offset to check legal addressing mode, we should be a little aggressive
18212 // to contain other offsets for that LSRUse.
18213 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18214 return false;
18215
18216 // PPC allows a sign-extended 16-bit immediate field.
18217 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18218 return false;
18219
18220 // No global is ever allowed as a base.
18221 if (AM.BaseGV)
18222 return false;
18223
18224 // PPC only support r+r,
18225 switch (AM.Scale) {
18226 case 0: // "r+i" or just "i", depending on HasBaseReg.
18227 break;
18228 case 1:
18229 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18230 return false;
18231 // Otherwise we have r+r or r+i.
18232 break;
18233 case 2:
18234 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18235 return false;
18236 // Allow 2*r as r+r.
18237 break;
18238 default:
18239 // No other scales are supported.
18240 return false;
18241 }
18242
18243 return true;
18244}
18245
18246SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18247 SelectionDAG &DAG) const {
18249 MachineFrameInfo &MFI = MF.getFrameInfo();
18250 MFI.setReturnAddressIsTaken(true);
18251
18252 SDLoc dl(Op);
18253 unsigned Depth = Op.getConstantOperandVal(0);
18254
18255 // Make sure the function does not optimize away the store of the RA to
18256 // the stack.
18257 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18258 FuncInfo->setLRStoreRequired();
18259 auto PtrVT = getPointerTy(MF.getDataLayout());
18260
18261 if (Depth > 0) {
18262 // The link register (return address) is saved in the caller's frame
18263 // not the callee's stack frame. So we must get the caller's frame
18264 // address and load the return address at the LR offset from there.
18265 SDValue FrameAddr =
18266 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18268 SDValue Offset =
18269 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18270 Subtarget.getScalarIntVT());
18271 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18272 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18274 }
18275
18276 // Just load the return address off the stack.
18277 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18278 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18280}
18281
18282SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18283 SelectionDAG &DAG) const {
18284 SDLoc dl(Op);
18285 unsigned Depth = Op.getConstantOperandVal(0);
18286
18287 MachineFunction &MF = DAG.getMachineFunction();
18288 MachineFrameInfo &MFI = MF.getFrameInfo();
18289 MFI.setFrameAddressIsTaken(true);
18290
18291 EVT PtrVT = getPointerTy(MF.getDataLayout());
18292 bool isPPC64 = PtrVT == MVT::i64;
18293
18294 // Naked functions never have a frame pointer, and so we use r1. For all
18295 // other functions, this decision must be delayed until during PEI.
18296 unsigned FrameReg;
18297 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18298 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18299 else
18300 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18301
18302 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18303 PtrVT);
18304 while (Depth--)
18305 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18306 FrameAddr, MachinePointerInfo());
18307 return FrameAddr;
18308}
18309
18310#define GET_REGISTER_MATCHER
18311#include "PPCGenAsmMatcher.inc"
18312
18314 const MachineFunction &MF) const {
18315 bool IsPPC64 = Subtarget.isPPC64();
18316
18317 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18318 if (!Is64Bit && VT != LLT::scalar(32))
18319 report_fatal_error("Invalid register global variable type");
18320
18322 if (!Reg)
18323 return Reg;
18324
18325 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18326 // Need followup investigation as to why.
18327 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18328 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18329 StringRef(RegName) + "\"."));
18330
18331 // Convert GPR to GP8R register for 64bit.
18332 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18333 Reg = Reg.id() - PPC::R0 + PPC::X0;
18334
18335 return Reg;
18336}
18337
18339 // 32-bit SVR4 ABI access everything as got-indirect.
18340 if (Subtarget.is32BitELFABI())
18341 return true;
18342
18343 // AIX accesses everything indirectly through the TOC, which is similar to
18344 // the GOT.
18345 if (Subtarget.isAIXABI())
18346 return true;
18347
18349 // If it is small or large code model, module locals are accessed
18350 // indirectly by loading their address from .toc/.got.
18351 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18352 return true;
18353
18354 // JumpTable and BlockAddress are accessed as got-indirect.
18356 return true;
18357
18359 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18360
18361 return false;
18362}
18363
18364bool
18366 // The PowerPC target isn't yet aware of offsets.
18367 return false;
18368}
18369
18371 const CallInst &I,
18372 MachineFunction &MF,
18373 unsigned Intrinsic) const {
18374 switch (Intrinsic) {
18375 case Intrinsic::ppc_atomicrmw_xchg_i128:
18376 case Intrinsic::ppc_atomicrmw_add_i128:
18377 case Intrinsic::ppc_atomicrmw_sub_i128:
18378 case Intrinsic::ppc_atomicrmw_nand_i128:
18379 case Intrinsic::ppc_atomicrmw_and_i128:
18380 case Intrinsic::ppc_atomicrmw_or_i128:
18381 case Intrinsic::ppc_atomicrmw_xor_i128:
18382 case Intrinsic::ppc_cmpxchg_i128:
18383 Info.opc = ISD::INTRINSIC_W_CHAIN;
18384 Info.memVT = MVT::i128;
18385 Info.ptrVal = I.getArgOperand(0);
18386 Info.offset = 0;
18387 Info.align = Align(16);
18390 return true;
18391 case Intrinsic::ppc_atomic_load_i128:
18392 Info.opc = ISD::INTRINSIC_W_CHAIN;
18393 Info.memVT = MVT::i128;
18394 Info.ptrVal = I.getArgOperand(0);
18395 Info.offset = 0;
18396 Info.align = Align(16);
18398 return true;
18399 case Intrinsic::ppc_atomic_store_i128:
18400 Info.opc = ISD::INTRINSIC_VOID;
18401 Info.memVT = MVT::i128;
18402 Info.ptrVal = I.getArgOperand(2);
18403 Info.offset = 0;
18404 Info.align = Align(16);
18406 return true;
18407 case Intrinsic::ppc_altivec_lvx:
18408 case Intrinsic::ppc_altivec_lvxl:
18409 case Intrinsic::ppc_altivec_lvebx:
18410 case Intrinsic::ppc_altivec_lvehx:
18411 case Intrinsic::ppc_altivec_lvewx:
18412 case Intrinsic::ppc_vsx_lxvd2x:
18413 case Intrinsic::ppc_vsx_lxvw4x:
18414 case Intrinsic::ppc_vsx_lxvd2x_be:
18415 case Intrinsic::ppc_vsx_lxvw4x_be:
18416 case Intrinsic::ppc_vsx_lxvl:
18417 case Intrinsic::ppc_vsx_lxvll: {
18418 EVT VT;
18419 switch (Intrinsic) {
18420 case Intrinsic::ppc_altivec_lvebx:
18421 VT = MVT::i8;
18422 break;
18423 case Intrinsic::ppc_altivec_lvehx:
18424 VT = MVT::i16;
18425 break;
18426 case Intrinsic::ppc_altivec_lvewx:
18427 VT = MVT::i32;
18428 break;
18429 case Intrinsic::ppc_vsx_lxvd2x:
18430 case Intrinsic::ppc_vsx_lxvd2x_be:
18431 VT = MVT::v2f64;
18432 break;
18433 default:
18434 VT = MVT::v4i32;
18435 break;
18436 }
18437
18438 Info.opc = ISD::INTRINSIC_W_CHAIN;
18439 Info.memVT = VT;
18440 Info.ptrVal = I.getArgOperand(0);
18441 Info.offset = -VT.getStoreSize()+1;
18442 Info.size = 2*VT.getStoreSize()-1;
18443 Info.align = Align(1);
18444 Info.flags = MachineMemOperand::MOLoad;
18445 return true;
18446 }
18447 case Intrinsic::ppc_altivec_stvx:
18448 case Intrinsic::ppc_altivec_stvxl:
18449 case Intrinsic::ppc_altivec_stvebx:
18450 case Intrinsic::ppc_altivec_stvehx:
18451 case Intrinsic::ppc_altivec_stvewx:
18452 case Intrinsic::ppc_vsx_stxvd2x:
18453 case Intrinsic::ppc_vsx_stxvw4x:
18454 case Intrinsic::ppc_vsx_stxvd2x_be:
18455 case Intrinsic::ppc_vsx_stxvw4x_be:
18456 case Intrinsic::ppc_vsx_stxvl:
18457 case Intrinsic::ppc_vsx_stxvll: {
18458 EVT VT;
18459 switch (Intrinsic) {
18460 case Intrinsic::ppc_altivec_stvebx:
18461 VT = MVT::i8;
18462 break;
18463 case Intrinsic::ppc_altivec_stvehx:
18464 VT = MVT::i16;
18465 break;
18466 case Intrinsic::ppc_altivec_stvewx:
18467 VT = MVT::i32;
18468 break;
18469 case Intrinsic::ppc_vsx_stxvd2x:
18470 case Intrinsic::ppc_vsx_stxvd2x_be:
18471 VT = MVT::v2f64;
18472 break;
18473 default:
18474 VT = MVT::v4i32;
18475 break;
18476 }
18477
18478 Info.opc = ISD::INTRINSIC_VOID;
18479 Info.memVT = VT;
18480 Info.ptrVal = I.getArgOperand(1);
18481 Info.offset = -VT.getStoreSize()+1;
18482 Info.size = 2*VT.getStoreSize()-1;
18483 Info.align = Align(1);
18484 Info.flags = MachineMemOperand::MOStore;
18485 return true;
18486 }
18487 case Intrinsic::ppc_stdcx:
18488 case Intrinsic::ppc_stwcx:
18489 case Intrinsic::ppc_sthcx:
18490 case Intrinsic::ppc_stbcx: {
18491 EVT VT;
18492 auto Alignment = Align(8);
18493 switch (Intrinsic) {
18494 case Intrinsic::ppc_stdcx:
18495 VT = MVT::i64;
18496 break;
18497 case Intrinsic::ppc_stwcx:
18498 VT = MVT::i32;
18499 Alignment = Align(4);
18500 break;
18501 case Intrinsic::ppc_sthcx:
18502 VT = MVT::i16;
18503 Alignment = Align(2);
18504 break;
18505 case Intrinsic::ppc_stbcx:
18506 VT = MVT::i8;
18507 Alignment = Align(1);
18508 break;
18509 }
18510 Info.opc = ISD::INTRINSIC_W_CHAIN;
18511 Info.memVT = VT;
18512 Info.ptrVal = I.getArgOperand(0);
18513 Info.offset = 0;
18514 Info.align = Alignment;
18516 return true;
18517 }
18518 default:
18519 break;
18520 }
18521
18522 return false;
18523}
18524
18525/// It returns EVT::Other if the type should be determined using generic
18526/// target-independent logic.
18528 LLVMContext &Context, const MemOp &Op,
18529 const AttributeList &FuncAttributes) const {
18530 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18531 // We should use Altivec/VSX loads and stores when available. For unaligned
18532 // addresses, unaligned VSX loads are only fast starting with the P8.
18533 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18534 if (Op.isMemset() && Subtarget.hasVSX()) {
18535 uint64_t TailSize = Op.size() % 16;
18536 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18537 // element if vector element type matches tail store. For tail size
18538 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18539 if (TailSize > 2 && TailSize <= 4) {
18540 return MVT::v8i16;
18541 }
18542 return MVT::v4i32;
18543 }
18544 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18545 return MVT::v4i32;
18546 }
18547 }
18548
18549 if (Subtarget.isPPC64()) {
18550 return MVT::i64;
18551 }
18552
18553 return MVT::i32;
18554}
18555
18556/// Returns true if it is beneficial to convert a load of a constant
18557/// to just the constant itself.
18559 Type *Ty) const {
18560 assert(Ty->isIntegerTy());
18561
18562 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18563 return !(BitSize == 0 || BitSize > 64);
18564}
18565
18567 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18568 return false;
18569 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18570 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18571 return NumBits1 == 64 && NumBits2 == 32;
18572}
18573
18575 if (!VT1.isInteger() || !VT2.isInteger())
18576 return false;
18577 unsigned NumBits1 = VT1.getSizeInBits();
18578 unsigned NumBits2 = VT2.getSizeInBits();
18579 return NumBits1 == 64 && NumBits2 == 32;
18580}
18581
18583 // Generally speaking, zexts are not free, but they are free when they can be
18584 // folded with other operations.
18585 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18586 EVT MemVT = LD->getMemoryVT();
18587 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18588 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18589 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18590 LD->getExtensionType() == ISD::ZEXTLOAD))
18591 return true;
18592 }
18593
18594 // FIXME: Add other cases...
18595 // - 32-bit shifts with a zext to i64
18596 // - zext after ctlz, bswap, etc.
18597 // - zext after and by a constant mask
18598
18599 return TargetLowering::isZExtFree(Val, VT2);
18600}
18601
18602bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18603 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18604 "invalid fpext types");
18605 // Extending to float128 is not free.
18606 if (DestVT == MVT::f128)
18607 return false;
18608 return true;
18609}
18610
18612 return isInt<16>(Imm) || isUInt<16>(Imm);
18613}
18614
18616 return isInt<16>(Imm) || isUInt<16>(Imm);
18617}
18618
18621 unsigned *Fast) const {
18623 return false;
18624
18625 // PowerPC supports unaligned memory access for simple non-vector types.
18626 // Although accessing unaligned addresses is not as efficient as accessing
18627 // aligned addresses, it is generally more efficient than manual expansion,
18628 // and generally only traps for software emulation when crossing page
18629 // boundaries.
18630
18631 if (!VT.isSimple())
18632 return false;
18633
18634 if (VT.isFloatingPoint() && !VT.isVector() &&
18635 !Subtarget.allowsUnalignedFPAccess())
18636 return false;
18637
18638 if (VT.getSimpleVT().isVector()) {
18639 if (Subtarget.hasVSX()) {
18640 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18641 VT != MVT::v4f32 && VT != MVT::v4i32)
18642 return false;
18643 } else {
18644 return false;
18645 }
18646 }
18647
18648 if (VT == MVT::ppcf128)
18649 return false;
18650
18651 if (Fast)
18652 *Fast = 1;
18653
18654 return true;
18655}
18656
18658 SDValue C) const {
18659 // Check integral scalar types.
18660 if (!VT.isScalarInteger())
18661 return false;
18662 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18663 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18664 return false;
18665 // This transformation will generate >= 2 operations. But the following
18666 // cases will generate <= 2 instructions during ISEL. So exclude them.
18667 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18668 // HW instruction, ie. MULLI
18669 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18670 // instruction is needed than case 1, ie. MULLI and RLDICR
18671 int64_t Imm = ConstNode->getSExtValue();
18672 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18673 Imm >>= Shift;
18674 if (isInt<16>(Imm))
18675 return false;
18676 uint64_t UImm = static_cast<uint64_t>(Imm);
18677 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18678 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18679 return true;
18680 }
18681 return false;
18682}
18683
18689
18691 Type *Ty) const {
18692 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18693 return false;
18694 switch (Ty->getScalarType()->getTypeID()) {
18695 case Type::FloatTyID:
18696 case Type::DoubleTyID:
18697 return true;
18698 case Type::FP128TyID:
18699 return Subtarget.hasP9Vector();
18700 default:
18701 return false;
18702 }
18703}
18704
18705// FIXME: add more patterns which are not profitable to hoist.
18707 if (!I->hasOneUse())
18708 return true;
18709
18710 Instruction *User = I->user_back();
18711 assert(User && "A single use instruction with no uses.");
18712
18713 switch (I->getOpcode()) {
18714 case Instruction::FMul: {
18715 // Don't break FMA, PowerPC prefers FMA.
18716 if (User->getOpcode() != Instruction::FSub &&
18717 User->getOpcode() != Instruction::FAdd)
18718 return true;
18719
18721 const Function *F = I->getFunction();
18722 const DataLayout &DL = F->getDataLayout();
18723 Type *Ty = User->getOperand(0)->getType();
18724 bool AllowContract = I->getFastMathFlags().allowContract() &&
18725 User->getFastMathFlags().allowContract();
18726
18727 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18729 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18730 }
18731 case Instruction::Load: {
18732 // Don't break "store (load float*)" pattern, this pattern will be combined
18733 // to "store (load int32)" in later InstCombine pass. See function
18734 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18735 // cycles than loading a 32 bit integer.
18736 LoadInst *LI = cast<LoadInst>(I);
18737 // For the loads that combineLoadToOperationType does nothing, like
18738 // ordered load, it should be profitable to hoist them.
18739 // For swifterror load, it can only be used for pointer to pointer type, so
18740 // later type check should get rid of this case.
18741 if (!LI->isUnordered())
18742 return true;
18743
18744 if (User->getOpcode() != Instruction::Store)
18745 return true;
18746
18747 if (I->getType()->getTypeID() != Type::FloatTyID)
18748 return true;
18749
18750 return false;
18751 }
18752 default:
18753 return true;
18754 }
18755 return true;
18756}
18757
18758const MCPhysReg *
18760 // LR is a callee-save register, but we must treat it as clobbered by any call
18761 // site. Hence we include LR in the scratch registers, which are in turn added
18762 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18763 // to CTR, which is used by any indirect call.
18764 static const MCPhysReg ScratchRegs[] = {
18765 PPC::X12, PPC::LR8, PPC::CTR8, 0
18766 };
18767
18768 return ScratchRegs;
18769}
18770
18772 const Constant *PersonalityFn) const {
18773 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18774}
18775
18777 const Constant *PersonalityFn) const {
18778 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18779}
18780
18781bool
18783 EVT VT , unsigned DefinedValues) const {
18784 if (VT == MVT::v2i64)
18785 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18786
18787 if (Subtarget.hasVSX())
18788 return true;
18789
18791}
18792
18794 if (DisableILPPref || Subtarget.enableMachineScheduler())
18796
18797 return Sched::ILP;
18798}
18799
18800// Create a fast isel object.
18801FastISel *
18803 const TargetLibraryInfo *LibInfo) const {
18804 return PPC::createFastISel(FuncInfo, LibInfo);
18805}
18806
18807// 'Inverted' means the FMA opcode after negating one multiplicand.
18808// For example, (fma -a b c) = (fnmsub a b c)
18809static unsigned invertFMAOpcode(unsigned Opc) {
18810 switch (Opc) {
18811 default:
18812 llvm_unreachable("Invalid FMA opcode for PowerPC!");
18813 case ISD::FMA:
18814 return PPCISD::FNMSUB;
18815 case PPCISD::FNMSUB:
18816 return ISD::FMA;
18817 }
18818}
18819
18821 bool LegalOps, bool OptForSize,
18823 unsigned Depth) const {
18825 return SDValue();
18826
18827 unsigned Opc = Op.getOpcode();
18828 EVT VT = Op.getValueType();
18829 SDNodeFlags Flags = Op.getNode()->getFlags();
18830
18831 switch (Opc) {
18832 case PPCISD::FNMSUB:
18833 if (!Op.hasOneUse() || !isTypeLegal(VT))
18834 break;
18835
18837 SDValue N0 = Op.getOperand(0);
18838 SDValue N1 = Op.getOperand(1);
18839 SDValue N2 = Op.getOperand(2);
18840 SDLoc Loc(Op);
18841
18843 SDValue NegN2 =
18844 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
18845
18846 if (!NegN2)
18847 return SDValue();
18848
18849 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
18850 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
18851 // These transformations may change sign of zeroes. For example,
18852 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
18853 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
18854 // Try and choose the cheaper one to negate.
18856 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
18857 N0Cost, Depth + 1);
18858
18860 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
18861 N1Cost, Depth + 1);
18862
18863 if (NegN0 && N0Cost <= N1Cost) {
18864 Cost = std::min(N0Cost, N2Cost);
18865 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
18866 } else if (NegN1) {
18867 Cost = std::min(N1Cost, N2Cost);
18868 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
18869 }
18870 }
18871
18872 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
18873 if (isOperationLegal(ISD::FMA, VT)) {
18874 Cost = N2Cost;
18875 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
18876 }
18877
18878 break;
18879 }
18880
18881 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
18882 Cost, Depth);
18883}
18884
18885// Override to enable LOAD_STACK_GUARD lowering on Linux.
18887 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
18888 return true;
18890}
18891
18893 bool ForCodeSize) const {
18894 if (!VT.isSimple() || !Subtarget.hasVSX())
18895 return false;
18896
18897 switch(VT.getSimpleVT().SimpleTy) {
18898 default:
18899 // For FP types that are currently not supported by PPC backend, return
18900 // false. Examples: f16, f80.
18901 return false;
18902 case MVT::f32:
18903 case MVT::f64: {
18904 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
18905 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
18906 return true;
18907 }
18908 bool IsExact;
18909 APSInt IntResult(16, false);
18910 // The rounding mode doesn't really matter because we only care about floats
18911 // that can be converted to integers exactly.
18912 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
18913 // For exact values in the range [-16, 15] we can materialize the float.
18914 if (IsExact && IntResult <= 15 && IntResult >= -16)
18915 return true;
18916 return Imm.isZero();
18917 }
18918 case MVT::ppcf128:
18919 return Imm.isPosZero();
18920 }
18921}
18922
18923// For vector shift operation op, fold
18924// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
18926 SelectionDAG &DAG) {
18927 SDValue N0 = N->getOperand(0);
18928 SDValue N1 = N->getOperand(1);
18929 EVT VT = N0.getValueType();
18930 unsigned OpSizeInBits = VT.getScalarSizeInBits();
18931 unsigned Opcode = N->getOpcode();
18932 unsigned TargetOpcode;
18933
18934 switch (Opcode) {
18935 default:
18936 llvm_unreachable("Unexpected shift operation");
18937 case ISD::SHL:
18939 break;
18940 case ISD::SRL:
18942 break;
18943 case ISD::SRA:
18945 break;
18946 }
18947
18948 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
18949 N1->getOpcode() == ISD::AND)
18950 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
18951 if (Mask->getZExtValue() == OpSizeInBits - 1)
18952 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
18953
18954 return SDValue();
18955}
18956
18957SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
18958 DAGCombinerInfo &DCI) const {
18959 EVT VT = N->getValueType(0);
18960 assert(VT.isVector() && "Vector type expected.");
18961
18962 unsigned Opc = N->getOpcode();
18963 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
18964 "Unexpected opcode.");
18965
18966 if (!isOperationLegal(Opc, VT))
18967 return SDValue();
18968
18969 EVT EltTy = VT.getScalarType();
18970 unsigned EltBits = EltTy.getSizeInBits();
18971 if (EltTy != MVT::i64 && EltTy != MVT::i32)
18972 return SDValue();
18973
18974 SDValue N1 = N->getOperand(1);
18975 uint64_t SplatBits = 0;
18976 bool AddSplatCase = false;
18977 unsigned OpcN1 = N1.getOpcode();
18978 if (OpcN1 == PPCISD::VADD_SPLAT &&
18980 AddSplatCase = true;
18981 SplatBits = N1.getConstantOperandVal(0);
18982 }
18983
18984 if (!AddSplatCase) {
18985 if (OpcN1 != ISD::BUILD_VECTOR)
18986 return SDValue();
18987
18988 unsigned SplatBitSize;
18989 bool HasAnyUndefs;
18990 APInt APSplatBits, APSplatUndef;
18991 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
18992 bool BVNIsConstantSplat =
18993 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
18994 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
18995 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
18996 return SDValue();
18997 SplatBits = APSplatBits.getZExtValue();
18998 }
18999
19000 SDLoc DL(N);
19001 SDValue N0 = N->getOperand(0);
19002 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19003 // shift vector, which means the max value is 31/63. A shift vector of all
19004 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19005 // -16 to 15 range.
19006 if (SplatBits == (EltBits - 1)) {
19007 unsigned NewOpc;
19008 switch (Opc) {
19009 case ISD::SHL:
19010 NewOpc = PPCISD::SHL;
19011 break;
19012 case ISD::SRL:
19013 NewOpc = PPCISD::SRL;
19014 break;
19015 case ISD::SRA:
19016 NewOpc = PPCISD::SRA;
19017 break;
19018 }
19019 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19020 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19021 }
19022
19023 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19024 return SDValue();
19025
19026 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19027 // before the BUILD_VECTOR is replaced by a load.
19028 if (EltTy != MVT::i64 || SplatBits != 1)
19029 return SDValue();
19030
19031 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19032}
19033
19034SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19035 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19036 return Value;
19037
19038 if (N->getValueType(0).isVector())
19039 return combineVectorShift(N, DCI);
19040
19041 SDValue N0 = N->getOperand(0);
19042 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19043 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19044 N0.getOpcode() != ISD::SIGN_EXTEND ||
19045 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19046 N->getValueType(0) != MVT::i64)
19047 return SDValue();
19048
19049 // We can't save an operation here if the value is already extended, and
19050 // the existing shift is easier to combine.
19051 SDValue ExtsSrc = N0.getOperand(0);
19052 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19053 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19054 return SDValue();
19055
19056 SDLoc DL(N0);
19057 SDValue ShiftBy = SDValue(CN1, 0);
19058 // We want the shift amount to be i32 on the extswli, but the shift could
19059 // have an i64.
19060 if (ShiftBy.getValueType() == MVT::i64)
19061 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19062
19063 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19064 ShiftBy);
19065}
19066
19067SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19068 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19069 return Value;
19070
19071 if (N->getValueType(0).isVector())
19072 return combineVectorShift(N, DCI);
19073
19074 return SDValue();
19075}
19076
19077SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19078 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19079 return Value;
19080
19081 if (N->getValueType(0).isVector())
19082 return combineVectorShift(N, DCI);
19083
19084 return SDValue();
19085}
19086
19087// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19088// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19089// When C is zero, the equation (addi Z, -C) can be simplified to Z
19090// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19092 const PPCSubtarget &Subtarget) {
19093 if (!Subtarget.isPPC64())
19094 return SDValue();
19095
19096 SDValue LHS = N->getOperand(0);
19097 SDValue RHS = N->getOperand(1);
19098
19099 auto isZextOfCompareWithConstant = [](SDValue Op) {
19100 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19101 Op.getValueType() != MVT::i64)
19102 return false;
19103
19104 SDValue Cmp = Op.getOperand(0);
19105 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19106 Cmp.getOperand(0).getValueType() != MVT::i64)
19107 return false;
19108
19109 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19110 int64_t NegConstant = 0 - Constant->getSExtValue();
19111 // Due to the limitations of the addi instruction,
19112 // -C is required to be [-32768, 32767].
19113 return isInt<16>(NegConstant);
19114 }
19115
19116 return false;
19117 };
19118
19119 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19120 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19121
19122 // If there is a pattern, canonicalize a zext operand to the RHS.
19123 if (LHSHasPattern && !RHSHasPattern)
19124 std::swap(LHS, RHS);
19125 else if (!LHSHasPattern && !RHSHasPattern)
19126 return SDValue();
19127
19128 SDLoc DL(N);
19129 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19130 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19131 SDValue Cmp = RHS.getOperand(0);
19132 SDValue Z = Cmp.getOperand(0);
19133 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19134 int64_t NegConstant = 0 - Constant->getSExtValue();
19135
19136 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19137 default: break;
19138 case ISD::SETNE: {
19139 // when C == 0
19140 // --> addze X, (addic Z, -1).carry
19141 // /
19142 // add X, (zext(setne Z, C))--
19143 // \ when -32768 <= -C <= 32767 && C != 0
19144 // --> addze X, (addic (addi Z, -C), -1).carry
19145 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19146 DAG.getConstant(NegConstant, DL, MVT::i64));
19147 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19148 SDValue Addc =
19149 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19150 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19151 DAG.getConstant(0, DL, CarryType));
19152 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19153 DAG.getConstant(0, DL, MVT::i64),
19154 SDValue(Addc.getNode(), 1));
19155 }
19156 case ISD::SETEQ: {
19157 // when C == 0
19158 // --> addze X, (subfic Z, 0).carry
19159 // /
19160 // add X, (zext(sete Z, C))--
19161 // \ when -32768 <= -C <= 32767 && C != 0
19162 // --> addze X, (subfic (addi Z, -C), 0).carry
19163 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19164 DAG.getConstant(NegConstant, DL, MVT::i64));
19165 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19166 SDValue Subc =
19167 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19168 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19169 DAG.getConstant(0, DL, CarryType));
19170 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19171 DAG.getConstant(1UL, DL, CarryType));
19172 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19173 DAG.getConstant(0, DL, MVT::i64), Invert);
19174 }
19175 }
19176
19177 return SDValue();
19178}
19179
19180// Transform
19181// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19182// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19183// In this case both C1 and C2 must be known constants.
19184// C1+C2 must fit into a 34 bit signed integer.
19186 const PPCSubtarget &Subtarget) {
19187 if (!Subtarget.isUsingPCRelativeCalls())
19188 return SDValue();
19189
19190 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19191 // If we find that node try to cast the Global Address and the Constant.
19192 SDValue LHS = N->getOperand(0);
19193 SDValue RHS = N->getOperand(1);
19194
19195 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19196 std::swap(LHS, RHS);
19197
19198 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19199 return SDValue();
19200
19201 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19204
19205 // Check that both casts succeeded.
19206 if (!GSDN || !ConstNode)
19207 return SDValue();
19208
19209 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19210 SDLoc DL(GSDN);
19211
19212 // The signed int offset needs to fit in 34 bits.
19213 if (!isInt<34>(NewOffset))
19214 return SDValue();
19215
19216 // The new global address is a copy of the old global address except
19217 // that it has the updated Offset.
19218 SDValue GA =
19219 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19220 NewOffset, GSDN->getTargetFlags());
19221 SDValue MatPCRel =
19222 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19223 return MatPCRel;
19224}
19225
19226SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19227 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19228 return Value;
19229
19230 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19231 return Value;
19232
19233 return SDValue();
19234}
19235
19236// Detect TRUNCATE operations on bitcasts of float128 values.
19237// What we are looking for here is the situtation where we extract a subset
19238// of bits from a 128 bit float.
19239// This can be of two forms:
19240// 1) BITCAST of f128 feeding TRUNCATE
19241// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19242// The reason this is required is because we do not have a legal i128 type
19243// and so we want to prevent having to store the f128 and then reload part
19244// of it.
19245SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19246 DAGCombinerInfo &DCI) const {
19247 // If we are using CRBits then try that first.
19248 if (Subtarget.useCRBits()) {
19249 // Check if CRBits did anything and return that if it did.
19250 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19251 return CRTruncValue;
19252 }
19253
19254 SDLoc dl(N);
19255 SDValue Op0 = N->getOperand(0);
19256
19257 // Looking for a truncate of i128 to i64.
19258 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19259 return SDValue();
19260
19261 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19262
19263 // SRL feeding TRUNCATE.
19264 if (Op0.getOpcode() == ISD::SRL) {
19265 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19266 // The right shift has to be by 64 bits.
19267 if (!ConstNode || ConstNode->getZExtValue() != 64)
19268 return SDValue();
19269
19270 // Switch the element number to extract.
19271 EltToExtract = EltToExtract ? 0 : 1;
19272 // Update Op0 past the SRL.
19273 Op0 = Op0.getOperand(0);
19274 }
19275
19276 // BITCAST feeding a TRUNCATE possibly via SRL.
19277 if (Op0.getOpcode() == ISD::BITCAST &&
19278 Op0.getValueType() == MVT::i128 &&
19279 Op0.getOperand(0).getValueType() == MVT::f128) {
19280 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19281 return DCI.DAG.getNode(
19282 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19283 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19284 }
19285 return SDValue();
19286}
19287
19288SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19289 SelectionDAG &DAG = DCI.DAG;
19290
19291 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19292 if (!ConstOpOrElement)
19293 return SDValue();
19294
19295 // An imul is usually smaller than the alternative sequence for legal type.
19297 isOperationLegal(ISD::MUL, N->getValueType(0)))
19298 return SDValue();
19299
19300 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19301 switch (this->Subtarget.getCPUDirective()) {
19302 default:
19303 // TODO: enhance the condition for subtarget before pwr8
19304 return false;
19305 case PPC::DIR_PWR8:
19306 // type mul add shl
19307 // scalar 4 1 1
19308 // vector 7 2 2
19309 return true;
19310 case PPC::DIR_PWR9:
19311 case PPC::DIR_PWR10:
19312 case PPC::DIR_PWR11:
19314 // type mul add shl
19315 // scalar 5 2 2
19316 // vector 7 2 2
19317
19318 // The cycle RATIO of related operations are showed as a table above.
19319 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19320 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19321 // are 4, it is always profitable; but for 3 instrs patterns
19322 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19323 // So we should only do it for vector type.
19324 return IsAddOne && IsNeg ? VT.isVector() : true;
19325 }
19326 };
19327
19328 EVT VT = N->getValueType(0);
19329 SDLoc DL(N);
19330
19331 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19332 bool IsNeg = MulAmt.isNegative();
19333 APInt MulAmtAbs = MulAmt.abs();
19334
19335 if ((MulAmtAbs - 1).isPowerOf2()) {
19336 // (mul x, 2^N + 1) => (add (shl x, N), x)
19337 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19338
19339 if (!IsProfitable(IsNeg, true, VT))
19340 return SDValue();
19341
19342 SDValue Op0 = N->getOperand(0);
19343 SDValue Op1 =
19344 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19345 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19346 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19347
19348 if (!IsNeg)
19349 return Res;
19350
19351 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19352 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19353 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19354 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19355
19356 if (!IsProfitable(IsNeg, false, VT))
19357 return SDValue();
19358
19359 SDValue Op0 = N->getOperand(0);
19360 SDValue Op1 =
19361 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19362 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19363
19364 if (!IsNeg)
19365 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19366 else
19367 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19368
19369 } else {
19370 return SDValue();
19371 }
19372}
19373
19374// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19375// in combiner since we need to check SD flags and other subtarget features.
19376SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19377 DAGCombinerInfo &DCI) const {
19378 SDValue N0 = N->getOperand(0);
19379 SDValue N1 = N->getOperand(1);
19380 SDValue N2 = N->getOperand(2);
19381 SDNodeFlags Flags = N->getFlags();
19382 EVT VT = N->getValueType(0);
19383 SelectionDAG &DAG = DCI.DAG;
19384 const TargetOptions &Options = getTargetMachine().Options;
19385 unsigned Opc = N->getOpcode();
19387 bool LegalOps = !DCI.isBeforeLegalizeOps();
19388 SDLoc Loc(N);
19389
19390 if (!isOperationLegal(ISD::FMA, VT))
19391 return SDValue();
19392
19393 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19394 // since (fnmsub a b c)=-0 while c-ab=+0.
19395 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19396 return SDValue();
19397
19398 // (fma (fneg a) b c) => (fnmsub a b c)
19399 // (fnmsub (fneg a) b c) => (fma a b c)
19400 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19401 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19402
19403 // (fma a (fneg b) c) => (fnmsub a b c)
19404 // (fnmsub a (fneg b) c) => (fma a b c)
19405 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19406 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19407
19408 return SDValue();
19409}
19410
19411bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19412 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19413 if (!Subtarget.is64BitELFABI())
19414 return false;
19415
19416 // If not a tail call then no need to proceed.
19417 if (!CI->isTailCall())
19418 return false;
19419
19420 // If sibling calls have been disabled and tail-calls aren't guaranteed
19421 // there is no reason to duplicate.
19422 auto &TM = getTargetMachine();
19423 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19424 return false;
19425
19426 // Can't tail call a function called indirectly, or if it has variadic args.
19427 const Function *Callee = CI->getCalledFunction();
19428 if (!Callee || Callee->isVarArg())
19429 return false;
19430
19431 // Make sure the callee and caller calling conventions are eligible for tco.
19432 const Function *Caller = CI->getParent()->getParent();
19433 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19434 CI->getCallingConv()))
19435 return false;
19436
19437 // If the function is local then we have a good chance at tail-calling it
19438 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19439}
19440
19441bool PPCTargetLowering::
19442isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19443 const Value *Mask = AndI.getOperand(1);
19444 // If the mask is suitable for andi. or andis. we should sink the and.
19445 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19446 // Can't handle constants wider than 64-bits.
19447 if (CI->getBitWidth() > 64)
19448 return false;
19449 int64_t ConstVal = CI->getZExtValue();
19450 return isUInt<16>(ConstVal) ||
19451 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19452 }
19453
19454 // For non-constant masks, we can always use the record-form and.
19455 return true;
19456}
19457
19458/// getAddrModeForFlags - Based on the set of address flags, select the most
19459/// optimal instruction format to match by.
19460PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19461 // This is not a node we should be handling here.
19462 if (Flags == PPC::MOF_None)
19463 return PPC::AM_None;
19464 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19465 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19466 if ((Flags & FlagSet) == FlagSet)
19467 return PPC::AM_DForm;
19468 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19469 if ((Flags & FlagSet) == FlagSet)
19470 return PPC::AM_DSForm;
19471 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19472 if ((Flags & FlagSet) == FlagSet)
19473 return PPC::AM_DQForm;
19474 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19475 if ((Flags & FlagSet) == FlagSet)
19476 return PPC::AM_PrefixDForm;
19477 // If no other forms are selected, return an X-Form as it is the most
19478 // general addressing mode.
19479 return PPC::AM_XForm;
19480}
19481
19482/// Set alignment flags based on whether or not the Frame Index is aligned.
19483/// Utilized when computing flags for address computation when selecting
19484/// load and store instructions.
19485static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19486 SelectionDAG &DAG) {
19487 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19488 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19489 if (!FI)
19490 return;
19492 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19493 // If this is (add $FI, $S16Imm), the alignment flags are already set
19494 // based on the immediate. We just need to clear the alignment flags
19495 // if the FI alignment is weaker.
19496 if ((FrameIndexAlign % 4) != 0)
19497 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19498 if ((FrameIndexAlign % 16) != 0)
19499 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19500 // If the address is a plain FrameIndex, set alignment flags based on
19501 // FI alignment.
19502 if (!IsAdd) {
19503 if ((FrameIndexAlign % 4) == 0)
19504 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19505 if ((FrameIndexAlign % 16) == 0)
19506 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19507 }
19508}
19509
19510/// Given a node, compute flags that are used for address computation when
19511/// selecting load and store instructions. The flags computed are stored in
19512/// FlagSet. This function takes into account whether the node is a constant,
19513/// an ADD, OR, or a constant, and computes the address flags accordingly.
19514static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19515 SelectionDAG &DAG) {
19516 // Set the alignment flags for the node depending on if the node is
19517 // 4-byte or 16-byte aligned.
19518 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19519 if ((Imm & 0x3) == 0)
19520 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19521 if ((Imm & 0xf) == 0)
19522 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19523 };
19524
19526 // All 32-bit constants can be computed as LIS + Disp.
19527 const APInt &ConstImm = CN->getAPIntValue();
19528 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19529 FlagSet |= PPC::MOF_AddrIsSImm32;
19530 SetAlignFlagsForImm(ConstImm.getZExtValue());
19531 setAlignFlagsForFI(N, FlagSet, DAG);
19532 }
19533 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19534 FlagSet |= PPC::MOF_RPlusSImm34;
19535 else // Let constant materialization handle large constants.
19536 FlagSet |= PPC::MOF_NotAddNorCst;
19537 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19538 // This address can be represented as an addition of:
19539 // - Register + Imm16 (possibly a multiple of 4/16)
19540 // - Register + Imm34
19541 // - Register + PPCISD::Lo
19542 // - Register + Register
19543 // In any case, we won't have to match this as Base + Zero.
19544 SDValue RHS = N.getOperand(1);
19546 const APInt &ConstImm = CN->getAPIntValue();
19547 if (ConstImm.isSignedIntN(16)) {
19548 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19549 SetAlignFlagsForImm(ConstImm.getZExtValue());
19550 setAlignFlagsForFI(N, FlagSet, DAG);
19551 }
19552 if (ConstImm.isSignedIntN(34))
19553 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19554 else
19555 FlagSet |= PPC::MOF_RPlusR; // Register.
19556 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19557 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19558 else
19559 FlagSet |= PPC::MOF_RPlusR;
19560 } else { // The address computation is not a constant or an addition.
19561 setAlignFlagsForFI(N, FlagSet, DAG);
19562 FlagSet |= PPC::MOF_NotAddNorCst;
19563 }
19564}
19565
19573
19574/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19575/// the address flags of the load/store instruction that is to be matched.
19576unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19577 SelectionDAG &DAG) const {
19578 unsigned FlagSet = PPC::MOF_None;
19579
19580 // Compute subtarget flags.
19581 if (!Subtarget.hasP9Vector())
19582 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19583 else
19584 FlagSet |= PPC::MOF_SubtargetP9;
19585
19586 if (Subtarget.hasPrefixInstrs())
19587 FlagSet |= PPC::MOF_SubtargetP10;
19588
19589 if (Subtarget.hasSPE())
19590 FlagSet |= PPC::MOF_SubtargetSPE;
19591
19592 // Check if we have a PCRel node and return early.
19593 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19594 return FlagSet;
19595
19596 // If the node is the paired load/store intrinsics, compute flags for
19597 // address computation and return early.
19598 unsigned ParentOp = Parent->getOpcode();
19599 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19600 (ParentOp == ISD::INTRINSIC_VOID))) {
19601 unsigned ID = Parent->getConstantOperandVal(1);
19602 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19603 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19604 ? Parent->getOperand(2)
19605 : Parent->getOperand(3);
19606 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19607 FlagSet |= PPC::MOF_Vector;
19608 return FlagSet;
19609 }
19610 }
19611
19612 // Mark this as something we don't want to handle here if it is atomic
19613 // or pre-increment instruction.
19614 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19615 if (LSB->isIndexed())
19616 return PPC::MOF_None;
19617
19618 // Compute in-memory type flags. This is based on if there are scalars,
19619 // floats or vectors.
19620 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19621 assert(MN && "Parent should be a MemSDNode!");
19622 EVT MemVT = MN->getMemoryVT();
19623 unsigned Size = MemVT.getSizeInBits();
19624 if (MemVT.isScalarInteger()) {
19625 assert(Size <= 128 &&
19626 "Not expecting scalar integers larger than 16 bytes!");
19627 if (Size < 32)
19628 FlagSet |= PPC::MOF_SubWordInt;
19629 else if (Size == 32)
19630 FlagSet |= PPC::MOF_WordInt;
19631 else
19632 FlagSet |= PPC::MOF_DoubleWordInt;
19633 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19634 if (Size == 128)
19635 FlagSet |= PPC::MOF_Vector;
19636 else if (Size == 256) {
19637 assert(Subtarget.pairedVectorMemops() &&
19638 "256-bit vectors are only available when paired vector memops is "
19639 "enabled!");
19640 FlagSet |= PPC::MOF_Vector;
19641 } else
19642 llvm_unreachable("Not expecting illegal vectors!");
19643 } else { // Floating point type: can be scalar, f128 or vector types.
19644 if (Size == 32 || Size == 64)
19645 FlagSet |= PPC::MOF_ScalarFloat;
19646 else if (MemVT == MVT::f128 || MemVT.isVector())
19647 FlagSet |= PPC::MOF_Vector;
19648 else
19649 llvm_unreachable("Not expecting illegal scalar floats!");
19650 }
19651
19652 // Compute flags for address computation.
19653 computeFlagsForAddressComputation(N, FlagSet, DAG);
19654
19655 // Compute type extension flags.
19656 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19657 switch (LN->getExtensionType()) {
19658 case ISD::SEXTLOAD:
19659 FlagSet |= PPC::MOF_SExt;
19660 break;
19661 case ISD::EXTLOAD:
19662 case ISD::ZEXTLOAD:
19663 FlagSet |= PPC::MOF_ZExt;
19664 break;
19665 case ISD::NON_EXTLOAD:
19666 FlagSet |= PPC::MOF_NoExt;
19667 break;
19668 }
19669 } else
19670 FlagSet |= PPC::MOF_NoExt;
19671
19672 // For integers, no extension is the same as zero extension.
19673 // We set the extension mode to zero extension so we don't have
19674 // to add separate entries in AddrModesMap for loads and stores.
19675 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19676 FlagSet |= PPC::MOF_ZExt;
19677 FlagSet &= ~PPC::MOF_NoExt;
19678 }
19679
19680 // If we don't have prefixed instructions, 34-bit constants should be
19681 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19682 bool IsNonP1034BitConst =
19684 FlagSet) == PPC::MOF_RPlusSImm34;
19685 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19686 IsNonP1034BitConst)
19687 FlagSet |= PPC::MOF_NotAddNorCst;
19688
19689 return FlagSet;
19690}
19691
19692/// SelectForceXFormMode - Given the specified address, force it to be
19693/// represented as an indexed [r+r] operation (an XForm instruction).
19695 SDValue &Base,
19696 SelectionDAG &DAG) const {
19697
19699 int16_t ForceXFormImm = 0;
19700 if (provablyDisjointOr(DAG, N) &&
19701 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19702 Disp = N.getOperand(0);
19703 Base = N.getOperand(1);
19704 return Mode;
19705 }
19706
19707 // If the address is the result of an add, we will utilize the fact that the
19708 // address calculation includes an implicit add. However, we can reduce
19709 // register pressure if we do not materialize a constant just for use as the
19710 // index register. We only get rid of the add if it is not an add of a
19711 // value and a 16-bit signed constant and both have a single use.
19712 if (N.getOpcode() == ISD::ADD &&
19713 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19714 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19715 Disp = N.getOperand(0);
19716 Base = N.getOperand(1);
19717 return Mode;
19718 }
19719
19720 // Otherwise, use R0 as the base register.
19721 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19722 N.getValueType());
19723 Base = N;
19724
19725 return Mode;
19726}
19727
19729 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19730 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19731 EVT ValVT = Val.getValueType();
19732 // If we are splitting a scalar integer into f64 parts (i.e. so they
19733 // can be placed into VFRC registers), we need to zero extend and
19734 // bitcast the values. This will ensure the value is placed into a
19735 // VSR using direct moves or stack operations as needed.
19736 if (PartVT == MVT::f64 &&
19737 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19738 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19739 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19740 Parts[0] = Val;
19741 return true;
19742 }
19743 return false;
19744}
19745
19746SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19747 SelectionDAG &DAG) const {
19748 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19750 EVT RetVT = Op.getValueType();
19751 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19752 SDValue Callee =
19753 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19754 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19756 for (const SDValue &N : Op->op_values()) {
19757 EVT ArgVT = N.getValueType();
19758 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
19759 TargetLowering::ArgListEntry Entry(N, ArgTy);
19760 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
19761 Entry.IsZExt = !Entry.IsSExt;
19762 Args.push_back(Entry);
19763 }
19764
19765 SDValue InChain = DAG.getEntryNode();
19766 SDValue TCChain = InChain;
19767 const Function &F = DAG.getMachineFunction().getFunction();
19768 bool isTailCall =
19769 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
19770 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
19771 if (isTailCall)
19772 InChain = TCChain;
19773 CLI.setDebugLoc(SDLoc(Op))
19774 .setChain(InChain)
19775 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
19776 .setTailCall(isTailCall)
19777 .setSExtResult(SignExtend)
19778 .setZExtResult(!SignExtend)
19780 return TLI.LowerCallTo(CLI).first;
19781}
19782
19783SDValue PPCTargetLowering::lowerLibCallBasedOnType(
19784 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
19785 SelectionDAG &DAG) const {
19786 if (Op.getValueType() == MVT::f32)
19787 return lowerToLibCall(LibCallFloatName, Op, DAG);
19788
19789 if (Op.getValueType() == MVT::f64)
19790 return lowerToLibCall(LibCallDoubleName, Op, DAG);
19791
19792 return SDValue();
19793}
19794
19795bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
19796 SDNodeFlags Flags = Op.getNode()->getFlags();
19797 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
19798 Flags.hasNoNaNs() && Flags.hasNoInfs();
19799}
19800
19801bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
19802 return Op.getNode()->getFlags().hasApproximateFuncs();
19803}
19804
19805bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
19807}
19808
19809SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
19810 const char *LibCallFloatName,
19811 const char *LibCallDoubleNameFinite,
19812 const char *LibCallFloatNameFinite,
19813 SDValue Op,
19814 SelectionDAG &DAG) const {
19815 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
19816 return SDValue();
19817
19818 if (!isLowringToMASSFiniteSafe(Op))
19819 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
19820 DAG);
19821
19822 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
19823 LibCallDoubleNameFinite, Op, DAG);
19824}
19825
19826SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
19827 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
19828 "__xl_powf_finite", Op, DAG);
19829}
19830
19831SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
19832 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
19833 "__xl_sinf_finite", Op, DAG);
19834}
19835
19836SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
19837 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
19838 "__xl_cosf_finite", Op, DAG);
19839}
19840
19841SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
19842 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
19843 "__xl_logf_finite", Op, DAG);
19844}
19845
19846SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
19847 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
19848 "__xl_log10f_finite", Op, DAG);
19849}
19850
19851SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
19852 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
19853 "__xl_expf_finite", Op, DAG);
19854}
19855
19856// If we happen to match to an aligned D-Form, check if the Frame Index is
19857// adequately aligned. If it is not, reset the mode to match to X-Form.
19858static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
19861 return;
19862 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
19865}
19866
19867/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
19868/// compute the address flags of the node, get the optimal address mode based
19869/// on the flags, and set the Base and Disp based on the address mode.
19871 SDValue N, SDValue &Disp,
19872 SDValue &Base,
19873 SelectionDAG &DAG,
19874 MaybeAlign Align) const {
19875 SDLoc DL(Parent);
19876
19877 // Compute the address flags.
19878 unsigned Flags = computeMOFlags(Parent, N, DAG);
19879
19880 // Get the optimal address mode based on the Flags.
19881 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
19882
19883 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
19884 // Select an X-Form load if it is not.
19885 setXFormForUnalignedFI(N, Flags, Mode);
19886
19887 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
19888 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
19889 assert(Subtarget.isUsingPCRelativeCalls() &&
19890 "Must be using PC-Relative calls when a valid PC-Relative node is "
19891 "present!");
19892 Mode = PPC::AM_PCRel;
19893 }
19894
19895 // Set Base and Disp accordingly depending on the address mode.
19896 switch (Mode) {
19897 case PPC::AM_DForm:
19898 case PPC::AM_DSForm:
19899 case PPC::AM_DQForm: {
19900 // This is a register plus a 16-bit immediate. The base will be the
19901 // register and the displacement will be the immediate unless it
19902 // isn't sufficiently aligned.
19903 if (Flags & PPC::MOF_RPlusSImm16) {
19904 SDValue Op0 = N.getOperand(0);
19905 SDValue Op1 = N.getOperand(1);
19906 int16_t Imm = Op1->getAsZExtVal();
19907 if (!Align || isAligned(*Align, Imm)) {
19908 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
19909 Base = Op0;
19911 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19912 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19913 }
19914 break;
19915 }
19916 }
19917 // This is a register plus the @lo relocation. The base is the register
19918 // and the displacement is the global address.
19919 else if (Flags & PPC::MOF_RPlusLo) {
19920 Disp = N.getOperand(1).getOperand(0); // The global address.
19925 Base = N.getOperand(0);
19926 break;
19927 }
19928 // This is a constant address at most 32 bits. The base will be
19929 // zero or load-immediate-shifted and the displacement will be
19930 // the low 16 bits of the address.
19931 else if (Flags & PPC::MOF_AddrIsSImm32) {
19932 auto *CN = cast<ConstantSDNode>(N);
19933 EVT CNType = CN->getValueType(0);
19934 uint64_t CNImm = CN->getZExtValue();
19935 // If this address fits entirely in a 16-bit sext immediate field, codegen
19936 // this as "d, 0".
19937 int16_t Imm;
19938 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
19939 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
19940 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19941 CNType);
19942 break;
19943 }
19944 // Handle 32-bit sext immediate with LIS + Addr mode.
19945 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
19946 (!Align || isAligned(*Align, CNImm))) {
19947 int32_t Addr = (int32_t)CNImm;
19948 // Otherwise, break this down into LIS + Disp.
19949 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
19950 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
19951 MVT::i32);
19952 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
19953 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
19954 break;
19955 }
19956 }
19957 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
19958 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
19960 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19961 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
19962 } else
19963 Base = N;
19964 break;
19965 }
19966 case PPC::AM_PrefixDForm: {
19967 int64_t Imm34 = 0;
19968 unsigned Opcode = N.getOpcode();
19969 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
19970 (isIntS34Immediate(N.getOperand(1), Imm34))) {
19971 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
19972 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19973 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
19974 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
19975 else
19976 Base = N.getOperand(0);
19977 } else if (isIntS34Immediate(N, Imm34)) {
19978 // The address is a 34-bit signed immediate.
19979 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
19980 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
19981 }
19982 break;
19983 }
19984 case PPC::AM_PCRel: {
19985 // When selecting PC-Relative instructions, "Base" is not utilized as
19986 // we select the address as [PC+imm].
19987 Disp = N;
19988 break;
19989 }
19990 case PPC::AM_None:
19991 break;
19992 default: { // By default, X-Form is always available to be selected.
19993 // When a frame index is not aligned, we also match by XForm.
19995 Base = FI ? N : N.getOperand(1);
19996 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19997 N.getValueType())
19998 : N.getOperand(0);
19999 break;
20000 }
20001 }
20002 return Mode;
20003}
20004
20006 bool Return,
20007 bool IsVarArg) const {
20008 switch (CC) {
20009 case CallingConv::Cold:
20010 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20011 default:
20012 return CC_PPC64_ELF;
20013 }
20014}
20015
20017 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20018}
20019
20022 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20023 if (shouldInlineQuadwordAtomics() && Size == 128)
20025
20026 switch (AI->getOperation()) {
20032 default:
20034 }
20035
20036 llvm_unreachable("unreachable atomicrmw operation");
20037}
20038
20046
20047static Intrinsic::ID
20049 switch (BinOp) {
20050 default:
20051 llvm_unreachable("Unexpected AtomicRMW BinOp");
20053 return Intrinsic::ppc_atomicrmw_xchg_i128;
20054 case AtomicRMWInst::Add:
20055 return Intrinsic::ppc_atomicrmw_add_i128;
20056 case AtomicRMWInst::Sub:
20057 return Intrinsic::ppc_atomicrmw_sub_i128;
20058 case AtomicRMWInst::And:
20059 return Intrinsic::ppc_atomicrmw_and_i128;
20060 case AtomicRMWInst::Or:
20061 return Intrinsic::ppc_atomicrmw_or_i128;
20062 case AtomicRMWInst::Xor:
20063 return Intrinsic::ppc_atomicrmw_xor_i128;
20065 return Intrinsic::ppc_atomicrmw_nand_i128;
20066 }
20067}
20068
20070 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20071 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20072 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20073 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20074 Type *ValTy = Incr->getType();
20075 assert(ValTy->getPrimitiveSizeInBits() == 128);
20076 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20077 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20078 Value *IncrHi =
20079 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20080 Value *LoHi = Builder.CreateIntrinsic(
20082 {AlignedAddr, IncrLo, IncrHi});
20083 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20084 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20085 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20086 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20087 return Builder.CreateOr(
20088 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20089}
20090
20092 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20093 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20094 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20095 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20096 Type *ValTy = CmpVal->getType();
20097 assert(ValTy->getPrimitiveSizeInBits() == 128);
20098 Function *IntCmpXchg =
20099 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20100 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20101 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20102 Value *CmpHi =
20103 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20104 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20105 Value *NewHi =
20106 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20107 emitLeadingFence(Builder, CI, Ord);
20108 Value *LoHi =
20109 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20110 emitTrailingFence(Builder, CI, Ord);
20111 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20112 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20113 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20114 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20115 return Builder.CreateOr(
20116 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20117}
20118
20120 return Subtarget.useCRBits();
20121}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:472
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
static bool Enabled
Definition Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
bool isDenormal() const
Definition APFloat.h:1432
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:234
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1406
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:449
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1330
APInt abs() const
Get the absolute value.
Definition APInt.h:1795
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:371
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:329
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1396
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:471
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1722
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:306
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:296
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:200
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:143
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:277
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:762
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:774
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:181
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:198
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:134
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ TLSLD_AIX
[GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle) Op that requires a single input of the module handle ...
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ STORE_COND
CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr The store conditional instruction ST[BHWD]ARX that produces a...
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ RET_GLUE
Return with a glue operand, matched by 'blr'.
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec and local-exec TLS models,...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ GET_TLS_MOD_AIX
x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model, produces a call to ....
@ SETBC
SETBC - The ISA 3.1 (P10) SETBC instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ SETBCR
SETBCR - The ISA 3.1 (P10) SETBCR instruction.
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ XSMAXC
XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ VSRQ
VSRQ - The ISA 3.1 (P10) Vector Shift right quadword instruction.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ ADDC
These nodes represent PPC arithmetic operations with carry.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ GET_TPOINTER
x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on 32-bit AIX, produces a call to ...
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.