LLVM 22.0.0git
PPCISelLowering.cpp
Go to the documentation of this file.
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the PPCISelLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "PPCISelLowering.h"
16#include "PPC.h"
17#include "PPCCallingConv.h"
18#include "PPCFrameLowering.h"
19#include "PPCInstrInfo.h"
21#include "PPCPerfectShuffle.h"
22#include "PPCRegisterInfo.h"
23#include "PPCSelectionDAGInfo.h"
24#include "PPCSubtarget.h"
25#include "PPCTargetMachine.h"
26#include "llvm/ADT/APFloat.h"
27#include "llvm/ADT/APInt.h"
28#include "llvm/ADT/APSInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/StringRef.h"
58#include "llvm/IR/CallingConv.h"
59#include "llvm/IR/Constant.h"
60#include "llvm/IR/Constants.h"
61#include "llvm/IR/DataLayout.h"
62#include "llvm/IR/DebugLoc.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/GlobalValue.h"
66#include "llvm/IR/IRBuilder.h"
68#include "llvm/IR/Intrinsics.h"
69#include "llvm/IR/IntrinsicsPowerPC.h"
70#include "llvm/IR/Module.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/Value.h"
74#include "llvm/MC/MCContext.h"
75#include "llvm/MC/MCExpr.h"
84#include "llvm/Support/Debug.h"
86#include "llvm/Support/Format.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <list>
97#include <optional>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102
103#define DEBUG_TYPE "ppc-lowering"
104
106 "disable-p10-store-forward",
107 cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden,
108 cl::init(false));
109
110static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
111cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
112
113static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
114cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
115
116static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
117cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
118
119static cl::opt<bool> DisableSCO("disable-ppc-sco",
120cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
121
122static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
123cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
124
125static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
126cl::desc("use absolute jump tables on ppc"), cl::Hidden);
127
128static cl::opt<bool>
129 DisablePerfectShuffle("ppc-disable-perfect-shuffle",
130 cl::desc("disable vector permute decomposition"),
131 cl::init(true), cl::Hidden);
132
134 "disable-auto-paired-vec-st",
135 cl::desc("disable automatically generated 32byte paired vector stores"),
136 cl::init(true), cl::Hidden);
137
139 "ppc-min-jump-table-entries", cl::init(64), cl::Hidden,
140 cl::desc("Set minimum number of entries to use a jump table on PPC"));
141
143 "ppc-min-bit-test-cmps", cl::init(3), cl::Hidden,
144 cl::desc("Set minimum of largest number of comparisons to use bit test for "
145 "switch on PPC."));
146
148 "ppc-gather-alias-max-depth", cl::init(18), cl::Hidden,
149 cl::desc("max depth when checking alias info in GatherAllAliases()"));
150
152 "ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden,
153 cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a "
154 "function to use initial-exec"));
155
156STATISTIC(NumTailCalls, "Number of tail calls");
157STATISTIC(NumSiblingCalls, "Number of sibling calls");
158STATISTIC(ShufflesHandledWithVPERM,
159 "Number of shuffles lowered to a VPERM or XXPERM");
160STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
161
162static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
163
164static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
165
166// A faster local-[exec|dynamic] TLS access sequence (enabled with the
167// -maix-small-local-[exec|dynamic]-tls option) can be produced for TLS
168// variables; consistent with the IBM XL compiler, we apply a max size of
169// slightly under 32KB.
171
172// FIXME: Remove this once the bug has been fixed!
174
176 const PPCSubtarget &STI)
177 : TargetLowering(TM, STI), Subtarget(STI) {
178 // Initialize map that relates the PPC addressing modes to the computed flags
179 // of a load/store instruction. The map is used to determine the optimal
180 // addressing mode when selecting load and stores.
181 initializeAddrModeMap();
182 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
183 // arguments are at least 4/8 bytes aligned.
184 bool isPPC64 = Subtarget.isPPC64();
185 setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
186 const MVT RegVT = Subtarget.getScalarIntVT();
187
188 // Set up the register classes.
189 addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
190 if (!useSoftFloat()) {
191 if (hasSPE()) {
192 addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
193 // EFPU2 APU only supports f32
194 if (!Subtarget.hasEFPU2())
195 addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
196 } else {
197 addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
198 addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
199 }
200 }
201
204
205 // PowerPC uses addo_carry,subo_carry to propagate carry.
208
209 // On P10, the default lowering generates better code using the
210 // setbc instruction.
211 if (!Subtarget.hasP10Vector()) {
214 if (isPPC64) {
217 }
218 }
219
220 // Match BITREVERSE to customized fast code sequence in the td file.
223
224 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
225 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Custom);
226
227 // Custom lower inline assembly to check for special registers.
228 setOperationAction(ISD::INLINEASM, MVT::Other, Custom);
229 setOperationAction(ISD::INLINEASM_BR, MVT::Other, Custom);
230
231 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
232 for (MVT VT : MVT::integer_valuetypes()) {
235 }
236
237 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
238 setOperationAction(ISD::FP_TO_FP16, MVT::f128, Expand);
239
240 if (Subtarget.isISA3_0()) {
241 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Legal);
242 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Legal);
243 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Legal);
244 setTruncStoreAction(MVT::f64, MVT::f16, Legal);
245 setTruncStoreAction(MVT::f32, MVT::f16, Legal);
246 } else {
247 // No extending loads from f16 or HW conversions back and forth.
248 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
249 setOperationAction(ISD::FP16_TO_FP, MVT::f128, Expand);
250 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
251 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
252 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
253 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
254 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
255 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
256 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
257 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
258 }
259
260 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
261
262 // PowerPC has pre-inc load and store's.
273 if (!Subtarget.hasSPE()) {
278 }
279
280 if (Subtarget.useCRBits()) {
282
283 if (isPPC64 || Subtarget.hasFPCVT()) {
288
290 AddPromotedToType(ISD::SINT_TO_FP, MVT::i1, RegVT);
292 AddPromotedToType(ISD::UINT_TO_FP, MVT::i1, RegVT);
293
298
300 AddPromotedToType(ISD::FP_TO_SINT, MVT::i1, RegVT);
302 AddPromotedToType(ISD::FP_TO_UINT, MVT::i1, RegVT);
303 } else {
308 }
309
310 // PowerPC does not support direct load/store of condition registers.
311 setOperationAction(ISD::LOAD, MVT::i1, Custom);
312 setOperationAction(ISD::STORE, MVT::i1, Custom);
313
314 // FIXME: Remove this once the ANDI glue bug is fixed:
315 if (ANDIGlueBug)
317
318 for (MVT VT : MVT::integer_valuetypes()) {
321 setTruncStoreAction(VT, MVT::i1, Expand);
322 }
323
324 addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
325 }
326
327 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
328 // PPC (the libcall is not available).
333
334 // We do not currently implement these libm ops for PowerPC.
335 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
336 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand);
337 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand);
338 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand);
339 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand);
340 setOperationAction(ISD::FREM, MVT::ppcf128, Expand);
341
342 // PowerPC has no SREM/UREM instructions unless we are on P9
343 // On P9 we may use a hardware instruction to compute the remainder.
344 // When the result of both the remainder and the division is required it is
345 // more efficient to compute the remainder from the result of the division
346 // rather than use the remainder instruction. The instructions are legalized
347 // directly because the DivRemPairsPass performs the transformation at the IR
348 // level.
349 if (Subtarget.isISA3_0()) {
354 } else {
359 }
360
361 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
370
371 // Handle constrained floating-point operations of scalar.
372 // TODO: Handle SPE specific operation.
378
383
384 if (!Subtarget.hasSPE()) {
387 }
388
389 if (Subtarget.hasVSX()) {
392 }
393
394 if (Subtarget.hasFSQRT()) {
397 }
398
399 if (Subtarget.hasFPRND()) {
404
409 }
410
411 // We don't support sin/cos/sqrt/fmod/pow
412 setOperationAction(ISD::FSIN , MVT::f64, Expand);
413 setOperationAction(ISD::FCOS , MVT::f64, Expand);
414 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
416 setOperationAction(ISD::FPOW , MVT::f64, Expand);
417 setOperationAction(ISD::FSIN , MVT::f32, Expand);
418 setOperationAction(ISD::FCOS , MVT::f32, Expand);
419 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
421 setOperationAction(ISD::FPOW , MVT::f32, Expand);
422
423 // MASS transformation for LLVM intrinsics with replicating fast-math flag
424 // to be consistent to PPCGenScalarMASSEntries pass
425 if (TM.getOptLevel() == CodeGenOptLevel::Aggressive) {
426 setOperationAction(ISD::FSIN , MVT::f64, Custom);
427 setOperationAction(ISD::FCOS , MVT::f64, Custom);
428 setOperationAction(ISD::FPOW , MVT::f64, Custom);
429 setOperationAction(ISD::FLOG, MVT::f64, Custom);
430 setOperationAction(ISD::FLOG10, MVT::f64, Custom);
431 setOperationAction(ISD::FEXP, MVT::f64, Custom);
432 setOperationAction(ISD::FSIN , MVT::f32, Custom);
433 setOperationAction(ISD::FCOS , MVT::f32, Custom);
434 setOperationAction(ISD::FPOW , MVT::f32, Custom);
435 setOperationAction(ISD::FLOG, MVT::f32, Custom);
436 setOperationAction(ISD::FLOG10, MVT::f32, Custom);
437 setOperationAction(ISD::FEXP, MVT::f32, Custom);
438 }
439
440 if (Subtarget.hasSPE()) {
443 } else {
444 setOperationAction(ISD::FMA , MVT::f64, Legal);
445 setOperationAction(ISD::FMA , MVT::f32, Legal);
447 setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
448 }
449
450 if (Subtarget.hasSPE())
451 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
452
453 // If we're enabling GP optimizations, use hardware square root
454 if (!Subtarget.hasFSQRT() && !(Subtarget.hasFRSQRTE() && Subtarget.hasFRE()))
455 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
456
457 if (!Subtarget.hasFSQRT() &&
458 !(Subtarget.hasFRSQRTES() && Subtarget.hasFRES()))
459 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
460
461 if (Subtarget.hasFCPSGN()) {
464 } else {
467 }
468
469 if (Subtarget.hasFPRND()) {
470 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
471 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
472 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
473 setOperationAction(ISD::FROUND, MVT::f64, Legal);
474
475 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
476 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
477 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
478 setOperationAction(ISD::FROUND, MVT::f32, Legal);
479 }
480
481 // Prior to P10, PowerPC does not have BSWAP, but we can use vector BSWAP
482 // instruction xxbrd to speed up scalar BSWAP64.
483 if (Subtarget.isISA3_1()) {
486 } else {
489 (Subtarget.hasP9Vector() && isPPC64) ? Custom : Expand);
490 }
491
492 // CTPOP or CTTZ were introduced in P8/P9 respectively
493 if (Subtarget.isISA3_0()) {
494 setOperationAction(ISD::CTTZ , MVT::i32 , Legal);
495 setOperationAction(ISD::CTTZ , MVT::i64 , Legal);
496 } else {
497 setOperationAction(ISD::CTTZ , MVT::i32 , Expand);
498 setOperationAction(ISD::CTTZ , MVT::i64 , Expand);
499 }
500
501 if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
504 } else {
507 }
508
509 // PowerPC does not have ROTR
512
513 if (!Subtarget.useCRBits()) {
514 // PowerPC does not have Select
519 }
520
521 // PowerPC wants to turn select_cc of FP into fsel when possible.
524
525 // PowerPC wants to optimize integer setcc a bit
526 if (!Subtarget.useCRBits())
528
529 if (Subtarget.hasFPU()) {
533
537 }
538
539 // PowerPC does not have BRCOND which requires SetCC
540 if (!Subtarget.useCRBits())
541 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
542
543 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
544
545 if (Subtarget.hasSPE()) {
546 // SPE has built-in conversions
553
554 // SPE supports signaling compare of f32/f64.
557 } else {
558 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
561
562 // PowerPC does not have [U|S]INT_TO_FP
567 }
568
569 if (Subtarget.hasDirectMove() && isPPC64) {
570 setOperationAction(ISD::BITCAST, MVT::f32, Legal);
571 setOperationAction(ISD::BITCAST, MVT::i32, Legal);
572 setOperationAction(ISD::BITCAST, MVT::i64, Legal);
573 setOperationAction(ISD::BITCAST, MVT::f64, Legal);
574
583 } else {
584 setOperationAction(ISD::BITCAST, MVT::f32, Expand);
585 setOperationAction(ISD::BITCAST, MVT::i32, Expand);
586 setOperationAction(ISD::BITCAST, MVT::i64, Expand);
587 setOperationAction(ISD::BITCAST, MVT::f64, Expand);
588 }
589
590 // We cannot sextinreg(i1). Expand to shifts.
592
593 // Custom handling for PowerPC ucmp instruction
595 setOperationAction(ISD::UCMP, MVT::i64, isPPC64 ? Custom : Expand);
596
597 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
598 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
599 // support continuation, user-level threading, and etc.. As a result, no
600 // other SjLj exception interfaces are implemented and please don't build
601 // your own exception handling based on them.
602 // LLVM/Clang supports zero-cost DWARF exception handling.
605
606 // We want to legalize GlobalAddress and ConstantPool nodes into the
607 // appropriate instructions to materialize the address.
618
619 // TRAP is legal.
620 setOperationAction(ISD::TRAP, MVT::Other, Legal);
621
622 // TRAMPOLINE is custom lowered.
623 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
624 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
625
626 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
627 setOperationAction(ISD::VASTART , MVT::Other, Custom);
628
629 if (Subtarget.is64BitELFABI()) {
630 // VAARG always uses double-word chunks, so promote anything smaller.
631 setOperationAction(ISD::VAARG, MVT::i1, Promote);
632 AddPromotedToType(ISD::VAARG, MVT::i1, MVT::i64);
633 setOperationAction(ISD::VAARG, MVT::i8, Promote);
634 AddPromotedToType(ISD::VAARG, MVT::i8, MVT::i64);
635 setOperationAction(ISD::VAARG, MVT::i16, Promote);
636 AddPromotedToType(ISD::VAARG, MVT::i16, MVT::i64);
637 setOperationAction(ISD::VAARG, MVT::i32, Promote);
638 AddPromotedToType(ISD::VAARG, MVT::i32, MVT::i64);
639 setOperationAction(ISD::VAARG, MVT::Other, Expand);
640 } else if (Subtarget.is32BitELFABI()) {
641 // VAARG is custom lowered with the 32-bit SVR4 ABI.
642 setOperationAction(ISD::VAARG, MVT::Other, Custom);
643 setOperationAction(ISD::VAARG, MVT::i64, Custom);
644 } else
645 setOperationAction(ISD::VAARG, MVT::Other, Expand);
646
647 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
648 if (Subtarget.is32BitELFABI())
649 setOperationAction(ISD::VACOPY , MVT::Other, Custom);
650 else
651 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
652
653 // Use the default implementation.
654 setOperationAction(ISD::VAEND , MVT::Other, Expand);
655 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
656 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom);
657 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom);
658 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom);
659 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i32, Custom);
660 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, MVT::i64, Custom);
663
664 if (Subtarget.isISA3_0() && isPPC64) {
665 setOperationAction(ISD::VP_STORE, MVT::v16i1, Custom);
666 setOperationAction(ISD::VP_STORE, MVT::v8i1, Custom);
667 setOperationAction(ISD::VP_STORE, MVT::v4i1, Custom);
668 setOperationAction(ISD::VP_STORE, MVT::v2i1, Custom);
669 setOperationAction(ISD::VP_LOAD, MVT::v16i1, Custom);
670 setOperationAction(ISD::VP_LOAD, MVT::v8i1, Custom);
671 setOperationAction(ISD::VP_LOAD, MVT::v4i1, Custom);
672 setOperationAction(ISD::VP_LOAD, MVT::v2i1, Custom);
673 }
674
675 // We want to custom lower some of our intrinsics.
681
682 // To handle counter-based loop conditions.
685
690
691 // Comparisons that require checking two conditions.
692 if (Subtarget.hasSPE()) {
697 }
710
713
714 if (Subtarget.has64BitSupport()) {
715 // They also have instructions for converting between i64 and fp.
724 // This is just the low 32 bits of a (signed) fp->i64 conversion.
725 // We cannot do this with Promote because i64 is not a legal type.
728
729 if (Subtarget.hasLFIWAX() || isPPC64) {
732 }
733 } else {
734 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
735 if (Subtarget.hasSPE()) {
738 } else {
741 }
742 }
743
744 // With the instructions enabled under FPCVT, we can do everything.
745 if (Subtarget.hasFPCVT()) {
746 if (Subtarget.has64BitSupport()) {
755 }
756
765 }
766
767 if (Subtarget.use64BitRegs()) {
768 // 64-bit PowerPC implementations can support i64 types directly
769 addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
770 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
772 // 64-bit PowerPC wants to expand i128 shifts itself.
776 } else {
777 // 32-bit PowerPC wants to expand i64 shifts itself.
781 }
782
783 // PowerPC has better expansions for funnel shifts than the generic
784 // TargetLowering::expandFunnelShift.
785 if (Subtarget.has64BitSupport()) {
788 }
791
792 if (Subtarget.hasVSX()) {
793 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f64, Legal);
794 setOperationAction(ISD::FMAXNUM_IEEE, MVT::f32, Legal);
795 setOperationAction(ISD::FMINNUM_IEEE, MVT::f64, Legal);
796 setOperationAction(ISD::FMINNUM_IEEE, MVT::f32, Legal);
797 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
798 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
799 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
800 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
803 }
804
805 if (Subtarget.hasAltivec()) {
806 for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
811 }
812 // First set operation action for all vector types to expand. Then we
813 // will selectively turn on ones that can be effectively codegen'd.
815 // add/sub are legal for all supported vector VT's.
818
819 // For v2i64, these are only valid with P8Vector. This is corrected after
820 // the loop.
821 if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
826 }
827 else {
832 }
833
834 if (Subtarget.hasVSX()) {
835 setOperationAction(ISD::FMAXNUM_IEEE, VT, Legal);
836 setOperationAction(ISD::FMINNUM_IEEE, VT, Legal);
837 setOperationAction(ISD::FMAXNUM, VT, Legal);
838 setOperationAction(ISD::FMINNUM, VT, Legal);
840 }
841
842 // Vector instructions introduced in P8
843 if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
846 }
847 else {
850 }
851
852 // Vector instructions introduced in P9
853 if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
855 else
857
858 // We promote all shuffles to v16i8.
860 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
861
862 // We promote all non-typed operations to v4i32.
864 AddPromotedToType (ISD::AND , VT, MVT::v4i32);
866 AddPromotedToType (ISD::OR , VT, MVT::v4i32);
868 AddPromotedToType (ISD::XOR , VT, MVT::v4i32);
869 setOperationAction(ISD::LOAD , VT, Promote);
870 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32);
872 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32);
875 AddPromotedToType (ISD::SELECT_CC, VT, MVT::v4i32);
876 setOperationAction(ISD::STORE, VT, Promote);
877 AddPromotedToType (ISD::STORE, VT, MVT::v4i32);
878
879 // No other operations are legal.
887 setOperationAction(ISD::FNEG, VT, Expand);
888 setOperationAction(ISD::FSQRT, VT, Expand);
889 setOperationAction(ISD::FLOG, VT, Expand);
890 setOperationAction(ISD::FLOG10, VT, Expand);
891 setOperationAction(ISD::FLOG2, VT, Expand);
892 setOperationAction(ISD::FEXP, VT, Expand);
893 setOperationAction(ISD::FEXP2, VT, Expand);
894 setOperationAction(ISD::FSIN, VT, Expand);
895 setOperationAction(ISD::FCOS, VT, Expand);
896 setOperationAction(ISD::FABS, VT, Expand);
897 setOperationAction(ISD::FFLOOR, VT, Expand);
898 setOperationAction(ISD::FCEIL, VT, Expand);
899 setOperationAction(ISD::FTRUNC, VT, Expand);
900 setOperationAction(ISD::FRINT, VT, Expand);
901 setOperationAction(ISD::FLDEXP, VT, Expand);
902 setOperationAction(ISD::FNEARBYINT, VT, Expand);
913 setOperationAction(ISD::FPOW, VT, Expand);
918
919 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
920 setTruncStoreAction(VT, InnerVT, Expand);
923 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
924 }
925 }
927 if (!Subtarget.hasP8Vector()) {
928 setOperationAction(ISD::SMAX, MVT::v2i64, Expand);
929 setOperationAction(ISD::SMIN, MVT::v2i64, Expand);
930 setOperationAction(ISD::UMAX, MVT::v2i64, Expand);
931 setOperationAction(ISD::UMIN, MVT::v2i64, Expand);
932 }
933
934 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
935 // with merges, splats, etc.
937
938 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
939 // are cheap, so handle them before they get expanded to scalar.
945
946 setOperationAction(ISD::AND , MVT::v4i32, Legal);
947 setOperationAction(ISD::OR , MVT::v4i32, Legal);
948 setOperationAction(ISD::XOR , MVT::v4i32, Legal);
949 setOperationAction(ISD::LOAD , MVT::v4i32, Legal);
951 Subtarget.useCRBits() ? Legal : Expand);
952 setOperationAction(ISD::STORE , MVT::v4i32, Legal);
961 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
962 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
963 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
964 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
965
966 // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
967 setOperationAction(ISD::ROTL, MVT::v1i128, Custom);
968 // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
969 if (Subtarget.hasAltivec())
970 for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
972 // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
973 if (Subtarget.hasP8Altivec())
974 setOperationAction(ISD::ROTL, MVT::v2i64, Legal);
975
976 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
977 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
978 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
979 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
980
981 setOperationAction(ISD::MUL, MVT::v4f32, Legal);
982 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
983
984 if (Subtarget.hasVSX()) {
985 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
986 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
988 }
989
990 if (Subtarget.hasP8Altivec())
991 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
992 else
993 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
994
995 if (Subtarget.isISA3_1()) {
996 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
997 setOperationAction(ISD::MULHS, MVT::v2i64, Legal);
998 setOperationAction(ISD::MULHU, MVT::v2i64, Legal);
999 setOperationAction(ISD::MULHS, MVT::v4i32, Legal);
1000 setOperationAction(ISD::MULHU, MVT::v4i32, Legal);
1001 setOperationAction(ISD::UDIV, MVT::v2i64, Legal);
1002 setOperationAction(ISD::SDIV, MVT::v2i64, Legal);
1003 setOperationAction(ISD::UDIV, MVT::v4i32, Legal);
1004 setOperationAction(ISD::SDIV, MVT::v4i32, Legal);
1005 setOperationAction(ISD::UREM, MVT::v2i64, Legal);
1006 setOperationAction(ISD::SREM, MVT::v2i64, Legal);
1007 setOperationAction(ISD::UREM, MVT::v4i32, Legal);
1008 setOperationAction(ISD::SREM, MVT::v4i32, Legal);
1009 setOperationAction(ISD::UREM, MVT::v1i128, Legal);
1010 setOperationAction(ISD::SREM, MVT::v1i128, Legal);
1011 setOperationAction(ISD::UDIV, MVT::v1i128, Legal);
1012 setOperationAction(ISD::SDIV, MVT::v1i128, Legal);
1013 setOperationAction(ISD::ROTL, MVT::v1i128, Legal);
1014 }
1015
1016 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
1017 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
1018
1021 // LE is P8+/64-bit so direct moves are supported and these operations
1022 // are legal. The custom transformation requires 64-bit since we need a
1023 // pair of stores that will cover a 128-bit load for P10.
1024 if (!DisableP10StoreForward && isPPC64 && !Subtarget.isLittleEndian()) {
1028 }
1029
1034
1035 // Altivec does not contain unordered floating-point compare instructions
1036 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand);
1037 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand);
1038 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand);
1039 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand);
1040
1041 if (Subtarget.hasVSX()) {
1044 if (Subtarget.hasP8Vector()) {
1047 }
1048 if (Subtarget.hasDirectMove() && isPPC64) {
1057 }
1059
1060 // The nearbyint variants are not allowed to raise the inexact exception
1061 // so we can only code-gen them with fpexcept.ignore.
1066
1067 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
1068 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
1069 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
1070 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
1071 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
1072 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1073 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1074
1075 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
1076 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
1077 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1078 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1079
1080 setOperationAction(ISD::MUL, MVT::v2f64, Legal);
1081 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
1082
1083 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
1084 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
1085
1086 // Share the Altivec comparison restrictions.
1087 setCondCodeAction(ISD::SETUO, MVT::v2f64, Expand);
1088 setCondCodeAction(ISD::SETUEQ, MVT::v2f64, Expand);
1089 setCondCodeAction(ISD::SETO, MVT::v2f64, Expand);
1090 setCondCodeAction(ISD::SETONE, MVT::v2f64, Expand);
1091
1092 setOperationAction(ISD::LOAD, MVT::v2f64, Legal);
1093 setOperationAction(ISD::STORE, MVT::v2f64, Legal);
1094
1096
1097 if (Subtarget.hasP8Vector())
1098 addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1099
1100 addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1101
1102 addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1103 addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1104 addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1105
1106 if (Subtarget.hasP8Altivec()) {
1107 setOperationAction(ISD::SHL, MVT::v2i64, Legal);
1108 setOperationAction(ISD::SRA, MVT::v2i64, Legal);
1109 setOperationAction(ISD::SRL, MVT::v2i64, Legal);
1110
1111 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1112 // SRL, but not for SRA because of the instructions available:
1113 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1114 // doing
1115 setOperationAction(ISD::SHL, MVT::v1i128, Expand);
1116 setOperationAction(ISD::SRL, MVT::v1i128, Expand);
1117 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1118
1119 setOperationAction(ISD::SETCC, MVT::v2i64, Legal);
1120 }
1121 else {
1122 setOperationAction(ISD::SHL, MVT::v2i64, Expand);
1123 setOperationAction(ISD::SRA, MVT::v2i64, Expand);
1124 setOperationAction(ISD::SRL, MVT::v2i64, Expand);
1125
1126 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
1127
1128 // VSX v2i64 only supports non-arithmetic operations.
1129 setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1130 setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1131 }
1132
1133 if (Subtarget.isISA3_1())
1134 setOperationAction(ISD::SETCC, MVT::v1i128, Legal);
1135 else
1136 setOperationAction(ISD::SETCC, MVT::v1i128, Expand);
1137
1138 setOperationAction(ISD::LOAD, MVT::v2i64, Promote);
1139 AddPromotedToType (ISD::LOAD, MVT::v2i64, MVT::v2f64);
1140 setOperationAction(ISD::STORE, MVT::v2i64, Promote);
1141 AddPromotedToType (ISD::STORE, MVT::v2i64, MVT::v2f64);
1142
1144
1153
1154 // Custom handling for partial vectors of integers converted to
1155 // floating point. We already have optimal handling for v2i32 through
1156 // the DAG combine, so those aren't necessary.
1173
1174 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
1175 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
1176 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
1177 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
1180
1183
1184 // Handle constrained floating-point operations of vector.
1185 // The predictor is `hasVSX` because altivec instruction has
1186 // no exception but VSX vector instruction has.
1200
1214
1215 addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1216 addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1217
1218 for (MVT FPT : MVT::fp_valuetypes())
1219 setLoadExtAction(ISD::EXTLOAD, MVT::f128, FPT, Expand);
1220
1221 // Expand the SELECT to SELECT_CC
1223
1224 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
1225 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
1226
1227 // No implementation for these ops for PowerPC.
1228 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
1229 setOperationAction(ISD::FSIN, MVT::f128, Expand);
1230 setOperationAction(ISD::FCOS, MVT::f128, Expand);
1231 setOperationAction(ISD::FPOW, MVT::f128, Expand);
1232 setOperationAction(ISD::FPOWI, MVT::f128, Expand);
1233 setOperationAction(ISD::FREM, MVT::f128, Expand);
1234 }
1235
1236 if (Subtarget.hasP8Altivec()) {
1237 addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1238 addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1239 }
1240
1241 if (Subtarget.hasP9Vector()) {
1244
1245 // Test data class instructions store results in CR bits.
1246 if (Subtarget.useCRBits()) {
1251 }
1252
1253 // 128 bit shifts can be accomplished via 3 instructions for SHL and
1254 // SRL, but not for SRA because of the instructions available:
1255 // VS{RL} and VS{RL}O.
1256 setOperationAction(ISD::SHL, MVT::v1i128, Legal);
1257 setOperationAction(ISD::SRL, MVT::v1i128, Legal);
1258 setOperationAction(ISD::SRA, MVT::v1i128, Expand);
1259
1260 setOperationAction(ISD::FADD, MVT::f128, Legal);
1261 setOperationAction(ISD::FSUB, MVT::f128, Legal);
1262 setOperationAction(ISD::FDIV, MVT::f128, Legal);
1263 setOperationAction(ISD::FMUL, MVT::f128, Legal);
1264 setOperationAction(ISD::FP_EXTEND, MVT::f128, Legal);
1265
1266 setOperationAction(ISD::FMA, MVT::f128, Legal);
1273
1274 setOperationAction(ISD::FTRUNC, MVT::f128, Legal);
1275 setOperationAction(ISD::FRINT, MVT::f128, Legal);
1276 setOperationAction(ISD::FFLOOR, MVT::f128, Legal);
1277 setOperationAction(ISD::FCEIL, MVT::f128, Legal);
1278 setOperationAction(ISD::FNEARBYINT, MVT::f128, Legal);
1279 setOperationAction(ISD::FROUND, MVT::f128, Legal);
1280
1283 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
1284
1285 // Handle constrained floating-point operations of fp128
1301 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1302 setOperationAction(ISD::BSWAP, MVT::v8i16, Legal);
1303 setOperationAction(ISD::BSWAP, MVT::v4i32, Legal);
1304 setOperationAction(ISD::BSWAP, MVT::v2i64, Legal);
1305 setOperationAction(ISD::BSWAP, MVT::v1i128, Legal);
1306 } else if (Subtarget.hasVSX()) {
1307 setOperationAction(ISD::LOAD, MVT::f128, Promote);
1308 setOperationAction(ISD::STORE, MVT::f128, Promote);
1309
1310 AddPromotedToType(ISD::LOAD, MVT::f128, MVT::v4i32);
1311 AddPromotedToType(ISD::STORE, MVT::f128, MVT::v4i32);
1312
1313 // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1314 // fp_to_uint and int_to_fp.
1317
1318 setOperationAction(ISD::FMUL, MVT::f128, Expand);
1319 setOperationAction(ISD::FDIV, MVT::f128, Expand);
1320 setOperationAction(ISD::FNEG, MVT::f128, Expand);
1321 setOperationAction(ISD::FABS, MVT::f128, Expand);
1322 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
1323 setOperationAction(ISD::FMA, MVT::f128, Expand);
1325
1326 // Expand the fp_extend if the target type is fp128.
1327 setOperationAction(ISD::FP_EXTEND, MVT::f128, Expand);
1329
1330 // Expand the fp_round if the source type is fp128.
1331 for (MVT VT : {MVT::f32, MVT::f64}) {
1334 }
1335
1339 setOperationAction(ISD::BR_CC, MVT::f128, Expand);
1340
1341 // Lower following f128 select_cc pattern:
1342 // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1344
1345 // We need to handle f128 SELECT_CC with integer result type.
1347 setOperationAction(ISD::SELECT_CC, MVT::i64, isPPC64 ? Custom : Expand);
1348 }
1349
1350 if (Subtarget.hasP9Altivec()) {
1351 if (Subtarget.isISA3_1()) {
1356 } else {
1359 }
1367
1368 setOperationAction(ISD::ABDU, MVT::v16i8, Legal);
1369 setOperationAction(ISD::ABDU, MVT::v8i16, Legal);
1370 setOperationAction(ISD::ABDU, MVT::v4i32, Legal);
1371 setOperationAction(ISD::ABDS, MVT::v4i32, Legal);
1372 }
1373
1374 if (Subtarget.hasP10Vector()) {
1376 }
1377 }
1378
1379 if (Subtarget.pairedVectorMemops()) {
1380 addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1381 setOperationAction(ISD::LOAD, MVT::v256i1, Custom);
1382 setOperationAction(ISD::STORE, MVT::v256i1, Custom);
1383 }
1384 if (Subtarget.hasMMA()) {
1385 if (Subtarget.isISAFuture()) {
1386 addRegisterClass(MVT::v512i1, &PPC::WACCRCRegClass);
1387 addRegisterClass(MVT::v1024i1, &PPC::DMRRCRegClass);
1388 addRegisterClass(MVT::v2048i1, &PPC::DMRpRCRegClass);
1389 setOperationAction(ISD::LOAD, MVT::v1024i1, Custom);
1390 setOperationAction(ISD::STORE, MVT::v1024i1, Custom);
1391 setOperationAction(ISD::LOAD, MVT::v2048i1, Custom);
1392 setOperationAction(ISD::STORE, MVT::v2048i1, Custom);
1393 } else {
1394 addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1395 }
1396 setOperationAction(ISD::LOAD, MVT::v512i1, Custom);
1397 setOperationAction(ISD::STORE, MVT::v512i1, Custom);
1399 }
1400
1401 if (Subtarget.has64BitSupport())
1402 setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
1403
1404 if (Subtarget.isISA3_1())
1405 setOperationAction(ISD::SRA, MVT::v1i128, Legal);
1406
1407 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom);
1408
1409 if (!isPPC64) {
1410 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand);
1411 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand);
1412 }
1413
1415 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
1416 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
1418 }
1419
1421
1422 if (Subtarget.hasAltivec()) {
1423 // Altivec instructions set fields to all zeros or all ones.
1425 }
1426
1429 else if (isPPC64)
1431 else
1433
1434 setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1435
1436 // We have target-specific dag combine patterns for the following nodes:
1439 if (Subtarget.hasFPCVT())
1441 setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC});
1442 if (Subtarget.useCRBits())
1443 setTargetDAGCombine(ISD::BRCOND);
1446
1448
1450
1451 if (Subtarget.useCRBits()) {
1453 }
1454
1455 // With 32 condition bits, we don't need to sink (and duplicate) compares
1456 // aggressively in CodeGenPrep.
1457 if (Subtarget.useCRBits()) {
1459 }
1460
1461 // TODO: The default entry number is set to 64. This stops most jump table
1462 // generation on PPC. But it is good for current PPC HWs because the indirect
1463 // branch instruction mtctr to the jump table may lead to bad branch predict.
1464 // Re-evaluate this value on future HWs that can do better with mtctr.
1466
1467 // The default minimum of largest number in a BitTest cluster is 3.
1469
1471 setMinCmpXchgSizeInBits(Subtarget.hasPartwordAtomics() ? 8 : 32);
1472
1473 auto CPUDirective = Subtarget.getCPUDirective();
1474 switch (CPUDirective) {
1475 default: break;
1476 case PPC::DIR_970:
1477 case PPC::DIR_A2:
1478 case PPC::DIR_E500:
1479 case PPC::DIR_E500mc:
1480 case PPC::DIR_E5500:
1481 case PPC::DIR_PWR4:
1482 case PPC::DIR_PWR5:
1483 case PPC::DIR_PWR5X:
1484 case PPC::DIR_PWR6:
1485 case PPC::DIR_PWR6X:
1486 case PPC::DIR_PWR7:
1487 case PPC::DIR_PWR8:
1488 case PPC::DIR_PWR9:
1489 case PPC::DIR_PWR10:
1490 case PPC::DIR_PWR11:
1494 break;
1495 }
1496
1497 if (Subtarget.enableMachineScheduler())
1499 else
1501
1503
1504 // The Freescale cores do better with aggressive inlining of memcpy and
1505 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1506 if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
1507 MaxStoresPerMemset = 32;
1509 MaxStoresPerMemcpy = 32;
1513 } else if (CPUDirective == PPC::DIR_A2) {
1514 // The A2 also benefits from (very) aggressive inlining of memcpy and
1515 // friends. The overhead of a the function call, even when warm, can be
1516 // over one hundred cycles.
1517 MaxStoresPerMemset = 128;
1518 MaxStoresPerMemcpy = 128;
1519 MaxStoresPerMemmove = 128;
1520 MaxLoadsPerMemcmp = 128;
1521 } else {
1524 }
1525
1526 // Enable generation of STXVP instructions by default for mcpu=future.
1527 if (CPUDirective == PPC::DIR_PWR_FUTURE &&
1528 DisableAutoPairedVecSt.getNumOccurrences() == 0)
1529 DisableAutoPairedVecSt = false;
1530
1531 IsStrictFPEnabled = true;
1532
1533 // Let the subtarget (CPU) decide if a predictable select is more expensive
1534 // than the corresponding branch. This information is used in CGP to decide
1535 // when to convert selects into branches.
1536 PredictableSelectIsExpensive = Subtarget.isPredictableSelectIsExpensive();
1537
1539}
1540
1541// *********************************** NOTE ************************************
1542// For selecting load and store instructions, the addressing modes are defined
1543// as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1544// patterns to match the load the store instructions.
1545//
1546// The TD definitions for the addressing modes correspond to their respective
1547// Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1548// on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1549// address mode flags of a particular node. Afterwards, the computed address
1550// flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1551// addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1552// accordingly, based on the preferred addressing mode.
1553//
1554// Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1555// MemOpFlags contains all the possible flags that can be used to compute the
1556// optimal addressing mode for load and store instructions.
1557// AddrMode contains all the possible load and store addressing modes available
1558// on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1559//
1560// When adding new load and store instructions, it is possible that new address
1561// flags may need to be added into MemOpFlags, and a new addressing mode will
1562// need to be added to AddrMode. An entry of the new addressing mode (consisting
1563// of the minimal and main distinguishing address flags for the new load/store
1564// instructions) will need to be added into initializeAddrModeMap() below.
1565// Finally, when adding new addressing modes, the getAddrModeForFlags() will
1566// need to be updated to account for selecting the optimal addressing mode.
1567// *****************************************************************************
1568/// Initialize the map that relates the different addressing modes of the load
1569/// and store instructions to a set of flags. This ensures the load/store
1570/// instruction is correctly matched during instruction selection.
1571void PPCTargetLowering::initializeAddrModeMap() {
1572 AddrModesMap[PPC::AM_DForm] = {
1573 // LWZ, STW
1578 // LBZ, LHZ, STB, STH
1583 // LHA
1588 // LFS, LFD, STFS, STFD
1593 };
1594 AddrModesMap[PPC::AM_DSForm] = {
1595 // LWA
1599 // LD, STD
1603 // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1607 };
1608 AddrModesMap[PPC::AM_DQForm] = {
1609 // LXV, STXV
1613 };
1614 AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1616 // TODO: Add mapping for quadword load/store.
1617}
1618
1619/// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1620/// the desired ByVal argument alignment.
1621static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1622 if (MaxAlign == MaxMaxAlign)
1623 return;
1624 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1625 if (MaxMaxAlign >= 32 &&
1626 VTy->getPrimitiveSizeInBits().getFixedValue() >= 256)
1627 MaxAlign = Align(32);
1628 else if (VTy->getPrimitiveSizeInBits().getFixedValue() >= 128 &&
1629 MaxAlign < 16)
1630 MaxAlign = Align(16);
1631 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1632 Align EltAlign;
1633 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1634 if (EltAlign > MaxAlign)
1635 MaxAlign = EltAlign;
1636 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1637 for (auto *EltTy : STy->elements()) {
1638 Align EltAlign;
1639 getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1640 if (EltAlign > MaxAlign)
1641 MaxAlign = EltAlign;
1642 if (MaxAlign == MaxMaxAlign)
1643 break;
1644 }
1645 }
1646}
1647
1648/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1649/// function arguments in the caller parameter area.
1651 const DataLayout &DL) const {
1652 // 16byte and wider vectors are passed on 16byte boundary.
1653 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1654 Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1655 if (Subtarget.hasAltivec())
1656 getMaxByValAlign(Ty, Alignment, Align(16));
1657 return Alignment;
1658}
1659
1661 return Subtarget.useSoftFloat();
1662}
1663
1665 return Subtarget.hasSPE();
1666}
1667
1669 return VT.isScalarInteger();
1670}
1671
1673 Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const {
1674 if (!Subtarget.isPPC64() || !Subtarget.hasVSX())
1675 return false;
1676
1677 if (auto *VTy = dyn_cast<VectorType>(VectorTy)) {
1678 if (VTy->getScalarType()->isIntegerTy()) {
1679 // ElemSizeInBits 8/16 can fit in immediate field, not needed here.
1680 if (ElemSizeInBits == 32) {
1681 Index = Subtarget.isLittleEndian() ? 2 : 1;
1682 return true;
1683 }
1684 if (ElemSizeInBits == 64) {
1685 Index = Subtarget.isLittleEndian() ? 1 : 0;
1686 return true;
1687 }
1688 }
1689 }
1690 return false;
1691}
1692
1694 EVT VT) const {
1695 if (!VT.isVector())
1696 return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1697
1699}
1700
1702 assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1703 return true;
1704}
1705
1706//===----------------------------------------------------------------------===//
1707// Node matching predicates, for use by the tblgen matching code.
1708//===----------------------------------------------------------------------===//
1709
1710/// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1713 return CFP->getValueAPF().isZero();
1714 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1715 // Maybe this has already been legalized into the constant pool?
1716 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1717 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1718 return CFP->getValueAPF().isZero();
1719 }
1720 return false;
1721}
1722
1723/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1724/// true if Op is undef or if it matches the specified value.
1725static bool isConstantOrUndef(int Op, int Val) {
1726 return Op < 0 || Op == Val;
1727}
1728
1729/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1730/// VPKUHUM instruction.
1731/// The ShuffleKind distinguishes between big-endian operations with
1732/// two different inputs (0), either-endian operations with two identical
1733/// inputs (1), and little-endian operations with two different inputs (2).
1734/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1736 SelectionDAG &DAG) {
1737 bool IsLE = DAG.getDataLayout().isLittleEndian();
1738 if (ShuffleKind == 0) {
1739 if (IsLE)
1740 return false;
1741 for (unsigned i = 0; i != 16; ++i)
1742 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1743 return false;
1744 } else if (ShuffleKind == 2) {
1745 if (!IsLE)
1746 return false;
1747 for (unsigned i = 0; i != 16; ++i)
1748 if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1749 return false;
1750 } else if (ShuffleKind == 1) {
1751 unsigned j = IsLE ? 0 : 1;
1752 for (unsigned i = 0; i != 8; ++i)
1753 if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1754 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1755 return false;
1756 }
1757 return true;
1758}
1759
1760/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1761/// VPKUWUM instruction.
1762/// The ShuffleKind distinguishes between big-endian operations with
1763/// two different inputs (0), either-endian operations with two identical
1764/// inputs (1), and little-endian operations with two different inputs (2).
1765/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1767 SelectionDAG &DAG) {
1768 bool IsLE = DAG.getDataLayout().isLittleEndian();
1769 if (ShuffleKind == 0) {
1770 if (IsLE)
1771 return false;
1772 for (unsigned i = 0; i != 16; i += 2)
1773 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1774 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1775 return false;
1776 } else if (ShuffleKind == 2) {
1777 if (!IsLE)
1778 return false;
1779 for (unsigned i = 0; i != 16; i += 2)
1780 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1781 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1782 return false;
1783 } else if (ShuffleKind == 1) {
1784 unsigned j = IsLE ? 0 : 2;
1785 for (unsigned i = 0; i != 8; i += 2)
1786 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1787 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1788 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1789 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1790 return false;
1791 }
1792 return true;
1793}
1794
1795/// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1796/// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1797/// current subtarget.
1798///
1799/// The ShuffleKind distinguishes between big-endian operations with
1800/// two different inputs (0), either-endian operations with two identical
1801/// inputs (1), and little-endian operations with two different inputs (2).
1802/// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1804 SelectionDAG &DAG) {
1805 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
1806 if (!Subtarget.hasP8Vector())
1807 return false;
1808
1809 bool IsLE = DAG.getDataLayout().isLittleEndian();
1810 if (ShuffleKind == 0) {
1811 if (IsLE)
1812 return false;
1813 for (unsigned i = 0; i != 16; i += 4)
1814 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1815 !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1816 !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1817 !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1818 return false;
1819 } else if (ShuffleKind == 2) {
1820 if (!IsLE)
1821 return false;
1822 for (unsigned i = 0; i != 16; i += 4)
1823 if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1824 !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1825 !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1826 !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1827 return false;
1828 } else if (ShuffleKind == 1) {
1829 unsigned j = IsLE ? 0 : 4;
1830 for (unsigned i = 0; i != 8; i += 4)
1831 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1832 !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1833 !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1834 !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1835 !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1836 !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1837 !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1838 !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1839 return false;
1840 }
1841 return true;
1842}
1843
1844/// isVMerge - Common function, used to match vmrg* shuffles.
1845///
1846static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1847 unsigned LHSStart, unsigned RHSStart) {
1848 if (N->getValueType(0) != MVT::v16i8)
1849 return false;
1850 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1851 "Unsupported merge size!");
1852
1853 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1854 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1855 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1856 LHSStart+j+i*UnitSize) ||
1857 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1858 RHSStart+j+i*UnitSize))
1859 return false;
1860 }
1861 return true;
1862}
1863
1864/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1865/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1866/// The ShuffleKind distinguishes between big-endian merges with two
1867/// different inputs (0), either-endian merges with two identical inputs (1),
1868/// and little-endian merges with two different inputs (2). For the latter,
1869/// the input operands are swapped (see PPCInstrAltivec.td).
1871 unsigned ShuffleKind, SelectionDAG &DAG) {
1872 if (DAG.getDataLayout().isLittleEndian()) {
1873 if (ShuffleKind == 1) // unary
1874 return isVMerge(N, UnitSize, 0, 0);
1875 else if (ShuffleKind == 2) // swapped
1876 return isVMerge(N, UnitSize, 0, 16);
1877 else
1878 return false;
1879 } else {
1880 if (ShuffleKind == 1) // unary
1881 return isVMerge(N, UnitSize, 8, 8);
1882 else if (ShuffleKind == 0) // normal
1883 return isVMerge(N, UnitSize, 8, 24);
1884 else
1885 return false;
1886 }
1887}
1888
1889/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1890/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1891/// The ShuffleKind distinguishes between big-endian merges with two
1892/// different inputs (0), either-endian merges with two identical inputs (1),
1893/// and little-endian merges with two different inputs (2). For the latter,
1894/// the input operands are swapped (see PPCInstrAltivec.td).
1896 unsigned ShuffleKind, SelectionDAG &DAG) {
1897 if (DAG.getDataLayout().isLittleEndian()) {
1898 if (ShuffleKind == 1) // unary
1899 return isVMerge(N, UnitSize, 8, 8);
1900 else if (ShuffleKind == 2) // swapped
1901 return isVMerge(N, UnitSize, 8, 24);
1902 else
1903 return false;
1904 } else {
1905 if (ShuffleKind == 1) // unary
1906 return isVMerge(N, UnitSize, 0, 0);
1907 else if (ShuffleKind == 0) // normal
1908 return isVMerge(N, UnitSize, 0, 16);
1909 else
1910 return false;
1911 }
1912}
1913
1914/**
1915 * Common function used to match vmrgew and vmrgow shuffles
1916 *
1917 * The indexOffset determines whether to look for even or odd words in
1918 * the shuffle mask. This is based on the of the endianness of the target
1919 * machine.
1920 * - Little Endian:
1921 * - Use offset of 0 to check for odd elements
1922 * - Use offset of 4 to check for even elements
1923 * - Big Endian:
1924 * - Use offset of 0 to check for even elements
1925 * - Use offset of 4 to check for odd elements
1926 * A detailed description of the vector element ordering for little endian and
1927 * big endian can be found at
1928 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1929 * Targeting your applications - what little endian and big endian IBM XL C/C++
1930 * compiler differences mean to you
1931 *
1932 * The mask to the shuffle vector instruction specifies the indices of the
1933 * elements from the two input vectors to place in the result. The elements are
1934 * numbered in array-access order, starting with the first vector. These vectors
1935 * are always of type v16i8, thus each vector will contain 16 elements of size
1936 * 8. More info on the shuffle vector can be found in the
1937 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1938 * Language Reference.
1939 *
1940 * The RHSStartValue indicates whether the same input vectors are used (unary)
1941 * or two different input vectors are used, based on the following:
1942 * - If the instruction uses the same vector for both inputs, the range of the
1943 * indices will be 0 to 15. In this case, the RHSStart value passed should
1944 * be 0.
1945 * - If the instruction has two different vectors then the range of the
1946 * indices will be 0 to 31. In this case, the RHSStart value passed should
1947 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1948 * to 31 specify elements in the second vector).
1949 *
1950 * \param[in] N The shuffle vector SD Node to analyze
1951 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1952 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1953 * vector to the shuffle_vector instruction
1954 * \return true iff this shuffle vector represents an even or odd word merge
1955 */
1956static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
1957 unsigned RHSStartValue) {
1958 if (N->getValueType(0) != MVT::v16i8)
1959 return false;
1960
1961 for (unsigned i = 0; i < 2; ++i)
1962 for (unsigned j = 0; j < 4; ++j)
1963 if (!isConstantOrUndef(N->getMaskElt(i*4+j),
1964 i*RHSStartValue+j+IndexOffset) ||
1965 !isConstantOrUndef(N->getMaskElt(i*4+j+8),
1966 i*RHSStartValue+j+IndexOffset+8))
1967 return false;
1968 return true;
1969}
1970
1971/**
1972 * Determine if the specified shuffle mask is suitable for the vmrgew or
1973 * vmrgow instructions.
1974 *
1975 * \param[in] N The shuffle vector SD Node to analyze
1976 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1977 * \param[in] ShuffleKind Identify the type of merge:
1978 * - 0 = big-endian merge with two different inputs;
1979 * - 1 = either-endian merge with two identical inputs;
1980 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1981 * little-endian merges).
1982 * \param[in] DAG The current SelectionDAG
1983 * \return true iff this shuffle mask
1984 */
1986 unsigned ShuffleKind, SelectionDAG &DAG) {
1987 if (DAG.getDataLayout().isLittleEndian()) {
1988 unsigned indexOffset = CheckEven ? 4 : 0;
1989 if (ShuffleKind == 1) // Unary
1990 return isVMerge(N, indexOffset, 0);
1991 else if (ShuffleKind == 2) // swapped
1992 return isVMerge(N, indexOffset, 16);
1993 else
1994 return false;
1995 }
1996 else {
1997 unsigned indexOffset = CheckEven ? 0 : 4;
1998 if (ShuffleKind == 1) // Unary
1999 return isVMerge(N, indexOffset, 0);
2000 else if (ShuffleKind == 0) // Normal
2001 return isVMerge(N, indexOffset, 16);
2002 else
2003 return false;
2004 }
2005 return false;
2006}
2007
2008/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2009/// amount, otherwise return -1.
2010/// The ShuffleKind distinguishes between big-endian operations with two
2011/// different inputs (0), either-endian operations with two identical inputs
2012/// (1), and little-endian operations with two different inputs (2). For the
2013/// latter, the input operands are swapped (see PPCInstrAltivec.td).
2014int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2015 SelectionDAG &DAG) {
2016 if (N->getValueType(0) != MVT::v16i8)
2017 return -1;
2018
2020
2021 // Find the first non-undef value in the shuffle mask.
2022 unsigned i;
2023 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2024 /*search*/;
2025
2026 if (i == 16) return -1; // all undef.
2027
2028 // Otherwise, check to see if the rest of the elements are consecutively
2029 // numbered from this value.
2030 unsigned ShiftAmt = SVOp->getMaskElt(i);
2031 if (ShiftAmt < i) return -1;
2032
2033 ShiftAmt -= i;
2034 bool isLE = DAG.getDataLayout().isLittleEndian();
2035
2036 if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2037 // Check the rest of the elements to see if they are consecutive.
2038 for (++i; i != 16; ++i)
2039 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2040 return -1;
2041 } else if (ShuffleKind == 1) {
2042 // Check the rest of the elements to see if they are consecutive.
2043 for (++i; i != 16; ++i)
2044 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2045 return -1;
2046 } else
2047 return -1;
2048
2049 if (isLE)
2050 ShiftAmt = 16 - ShiftAmt;
2051
2052 return ShiftAmt;
2053}
2054
2055/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2056/// specifies a splat of a single element that is suitable for input to
2057/// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2059 EVT VT = N->getValueType(0);
2060 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2061 return EltSize == 8 && N->getMaskElt(0) == N->getMaskElt(1);
2062
2063 assert(VT == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2064 EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2065
2066 // The consecutive indices need to specify an element, not part of two
2067 // different elements. So abandon ship early if this isn't the case.
2068 if (N->getMaskElt(0) % EltSize != 0)
2069 return false;
2070
2071 // This is a splat operation if each element of the permute is the same, and
2072 // if the value doesn't reference the second vector.
2073 unsigned ElementBase = N->getMaskElt(0);
2074
2075 // FIXME: Handle UNDEF elements too!
2076 if (ElementBase >= 16)
2077 return false;
2078
2079 // Check that the indices are consecutive, in the case of a multi-byte element
2080 // splatted with a v16i8 mask.
2081 for (unsigned i = 1; i != EltSize; ++i)
2082 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2083 return false;
2084
2085 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2086 // An UNDEF element is a sequence of UNDEF bytes.
2087 if (N->getMaskElt(i) < 0) {
2088 for (unsigned j = 1; j != EltSize; ++j)
2089 if (N->getMaskElt(i + j) >= 0)
2090 return false;
2091 } else
2092 for (unsigned j = 0; j != EltSize; ++j)
2093 if (N->getMaskElt(i + j) != N->getMaskElt(j))
2094 return false;
2095 }
2096 return true;
2097}
2098
2099/// Check that the mask is shuffling N byte elements. Within each N byte
2100/// element of the mask, the indices could be either in increasing or
2101/// decreasing order as long as they are consecutive.
2102/// \param[in] N the shuffle vector SD Node to analyze
2103/// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2104/// Word/DoubleWord/QuadWord).
2105/// \param[in] StepLen the delta indices number among the N byte element, if
2106/// the mask is in increasing/decreasing order then it is 1/-1.
2107/// \return true iff the mask is shuffling N byte elements.
2108static bool isNByteElemShuffleMask(ShuffleVectorSDNode *N, unsigned Width,
2109 int StepLen) {
2110 assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2111 "Unexpected element width.");
2112 assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2113
2114 unsigned NumOfElem = 16 / Width;
2115 unsigned MaskVal[16]; // Width is never greater than 16
2116 for (unsigned i = 0; i < NumOfElem; ++i) {
2117 MaskVal[0] = N->getMaskElt(i * Width);
2118 if ((StepLen == 1) && (MaskVal[0] % Width)) {
2119 return false;
2120 } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2121 return false;
2122 }
2123
2124 for (unsigned int j = 1; j < Width; ++j) {
2125 MaskVal[j] = N->getMaskElt(i * Width + j);
2126 if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2127 return false;
2128 }
2129 }
2130 }
2131
2132 return true;
2133}
2134
2135bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2136 unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2137 if (!isNByteElemShuffleMask(N, 4, 1))
2138 return false;
2139
2140 // Now we look at mask elements 0,4,8,12
2141 unsigned M0 = N->getMaskElt(0) / 4;
2142 unsigned M1 = N->getMaskElt(4) / 4;
2143 unsigned M2 = N->getMaskElt(8) / 4;
2144 unsigned M3 = N->getMaskElt(12) / 4;
2145 unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2146 unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2147
2148 // Below, let H and L be arbitrary elements of the shuffle mask
2149 // where H is in the range [4,7] and L is in the range [0,3].
2150 // H, 1, 2, 3 or L, 5, 6, 7
2151 if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2152 (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2153 ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2154 InsertAtByte = IsLE ? 12 : 0;
2155 Swap = M0 < 4;
2156 return true;
2157 }
2158 // 0, H, 2, 3 or 4, L, 6, 7
2159 if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2160 (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2161 ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2162 InsertAtByte = IsLE ? 8 : 4;
2163 Swap = M1 < 4;
2164 return true;
2165 }
2166 // 0, 1, H, 3 or 4, 5, L, 7
2167 if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2168 (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2169 ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2170 InsertAtByte = IsLE ? 4 : 8;
2171 Swap = M2 < 4;
2172 return true;
2173 }
2174 // 0, 1, 2, H or 4, 5, 6, L
2175 if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2176 (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2177 ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2178 InsertAtByte = IsLE ? 0 : 12;
2179 Swap = M3 < 4;
2180 return true;
2181 }
2182
2183 // If both vector operands for the shuffle are the same vector, the mask will
2184 // contain only elements from the first one and the second one will be undef.
2185 if (N->getOperand(1).isUndef()) {
2186 ShiftElts = 0;
2187 Swap = true;
2188 unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2189 if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2190 InsertAtByte = IsLE ? 12 : 0;
2191 return true;
2192 }
2193 if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2194 InsertAtByte = IsLE ? 8 : 4;
2195 return true;
2196 }
2197 if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2198 InsertAtByte = IsLE ? 4 : 8;
2199 return true;
2200 }
2201 if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2202 InsertAtByte = IsLE ? 0 : 12;
2203 return true;
2204 }
2205 }
2206
2207 return false;
2208}
2209
2211 bool &Swap, bool IsLE) {
2212 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2213 // Ensure each byte index of the word is consecutive.
2214 if (!isNByteElemShuffleMask(N, 4, 1))
2215 return false;
2216
2217 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2218 unsigned M0 = N->getMaskElt(0) / 4;
2219 unsigned M1 = N->getMaskElt(4) / 4;
2220 unsigned M2 = N->getMaskElt(8) / 4;
2221 unsigned M3 = N->getMaskElt(12) / 4;
2222
2223 // If both vector operands for the shuffle are the same vector, the mask will
2224 // contain only elements from the first one and the second one will be undef.
2225 if (N->getOperand(1).isUndef()) {
2226 assert(M0 < 4 && "Indexing into an undef vector?");
2227 if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2228 return false;
2229
2230 ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2231 Swap = false;
2232 return true;
2233 }
2234
2235 // Ensure each word index of the ShuffleVector Mask is consecutive.
2236 if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2237 return false;
2238
2239 if (IsLE) {
2240 if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2241 // Input vectors don't need to be swapped if the leading element
2242 // of the result is one of the 3 left elements of the second vector
2243 // (or if there is no shift to be done at all).
2244 Swap = false;
2245 ShiftElts = (8 - M0) % 8;
2246 } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2247 // Input vectors need to be swapped if the leading element
2248 // of the result is one of the 3 left elements of the first vector
2249 // (or if we're shifting by 4 - thereby simply swapping the vectors).
2250 Swap = true;
2251 ShiftElts = (4 - M0) % 4;
2252 }
2253
2254 return true;
2255 } else { // BE
2256 if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2257 // Input vectors don't need to be swapped if the leading element
2258 // of the result is one of the 4 elements of the first vector.
2259 Swap = false;
2260 ShiftElts = M0;
2261 } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2262 // Input vectors need to be swapped if the leading element
2263 // of the result is one of the 4 elements of the right vector.
2264 Swap = true;
2265 ShiftElts = M0 - 4;
2266 }
2267
2268 return true;
2269 }
2270}
2271
2273 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2274
2275 if (!isNByteElemShuffleMask(N, Width, -1))
2276 return false;
2277
2278 for (int i = 0; i < 16; i += Width)
2279 if (N->getMaskElt(i) != i + Width - 1)
2280 return false;
2281
2282 return true;
2283}
2284
2288
2292
2296
2300
2301/// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2302/// if the inputs to the instruction should be swapped and set \p DM to the
2303/// value for the immediate.
2304/// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2305/// AND element 0 of the result comes from the first input (LE) or second input
2306/// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2307/// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2308/// mask.
2310 bool &Swap, bool IsLE) {
2311 assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2312
2313 // Ensure each byte index of the double word is consecutive.
2314 if (!isNByteElemShuffleMask(N, 8, 1))
2315 return false;
2316
2317 unsigned M0 = N->getMaskElt(0) / 8;
2318 unsigned M1 = N->getMaskElt(8) / 8;
2319 assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2320
2321 // If both vector operands for the shuffle are the same vector, the mask will
2322 // contain only elements from the first one and the second one will be undef.
2323 if (N->getOperand(1).isUndef()) {
2324 if ((M0 | M1) < 2) {
2325 DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2326 Swap = false;
2327 return true;
2328 } else
2329 return false;
2330 }
2331
2332 if (IsLE) {
2333 if (M0 > 1 && M1 < 2) {
2334 Swap = false;
2335 } else if (M0 < 2 && M1 > 1) {
2336 M0 = (M0 + 2) % 4;
2337 M1 = (M1 + 2) % 4;
2338 Swap = true;
2339 } else
2340 return false;
2341
2342 // Note: if control flow comes here that means Swap is already set above
2343 DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2344 return true;
2345 } else { // BE
2346 if (M0 < 2 && M1 > 1) {
2347 Swap = false;
2348 } else if (M0 > 1 && M1 < 2) {
2349 M0 = (M0 + 2) % 4;
2350 M1 = (M1 + 2) % 4;
2351 Swap = true;
2352 } else
2353 return false;
2354
2355 // Note: if control flow comes here that means Swap is already set above
2356 DM = (M0 << 1) + (M1 & 1);
2357 return true;
2358 }
2359}
2360
2361
2362/// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2363/// appropriate for PPC mnemonics (which have a big endian bias - namely
2364/// elements are counted from the left of the vector register).
2365unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2366 SelectionDAG &DAG) {
2368 assert(isSplatShuffleMask(SVOp, EltSize));
2369 EVT VT = SVOp->getValueType(0);
2370
2371 if (VT == MVT::v2i64 || VT == MVT::v2f64)
2372 return DAG.getDataLayout().isLittleEndian() ? 1 - SVOp->getMaskElt(0)
2373 : SVOp->getMaskElt(0);
2374
2375 if (DAG.getDataLayout().isLittleEndian())
2376 return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2377 else
2378 return SVOp->getMaskElt(0) / EltSize;
2379}
2380
2381/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2382/// by using a vspltis[bhw] instruction of the specified element size, return
2383/// the constant being splatted. The ByteSize field indicates the number of
2384/// bytes of each element [124] -> [bhw].
2386 SDValue OpVal;
2387
2388 // If ByteSize of the splat is bigger than the element size of the
2389 // build_vector, then we have a case where we are checking for a splat where
2390 // multiple elements of the buildvector are folded together into a single
2391 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2392 unsigned EltSize = 16/N->getNumOperands();
2393 if (EltSize < ByteSize) {
2394 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2395 SDValue UniquedVals[4];
2396 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2397
2398 // See if all of the elements in the buildvector agree across.
2399 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2400 if (N->getOperand(i).isUndef()) continue;
2401 // If the element isn't a constant, bail fully out.
2402 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2403
2404 if (!UniquedVals[i&(Multiple-1)].getNode())
2405 UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2406 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2407 return SDValue(); // no match.
2408 }
2409
2410 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2411 // either constant or undef values that are identical for each chunk. See
2412 // if these chunks can form into a larger vspltis*.
2413
2414 // Check to see if all of the leading entries are either 0 or -1. If
2415 // neither, then this won't fit into the immediate field.
2416 bool LeadingZero = true;
2417 bool LeadingOnes = true;
2418 for (unsigned i = 0; i != Multiple-1; ++i) {
2419 if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2420
2421 LeadingZero &= isNullConstant(UniquedVals[i]);
2422 LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2423 }
2424 // Finally, check the least significant entry.
2425 if (LeadingZero) {
2426 if (!UniquedVals[Multiple-1].getNode())
2427 return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2428 int Val = UniquedVals[Multiple - 1]->getAsZExtVal();
2429 if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2430 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2431 }
2432 if (LeadingOnes) {
2433 if (!UniquedVals[Multiple-1].getNode())
2434 return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2435 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2436 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2437 return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2438 }
2439
2440 return SDValue();
2441 }
2442
2443 // Check to see if this buildvec has a single non-undef value in its elements.
2444 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2445 if (N->getOperand(i).isUndef()) continue;
2446 if (!OpVal.getNode())
2447 OpVal = N->getOperand(i);
2448 else if (OpVal != N->getOperand(i))
2449 return SDValue();
2450 }
2451
2452 if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2453
2454 unsigned ValSizeInBytes = EltSize;
2455 uint64_t Value = 0;
2456 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2457 Value = CN->getZExtValue();
2458 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2459 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2460 Value = llvm::bit_cast<uint32_t>(CN->getValueAPF().convertToFloat());
2461 }
2462
2463 // If the splat value is larger than the element value, then we can never do
2464 // this splat. The only case that we could fit the replicated bits into our
2465 // immediate field for would be zero, and we prefer to use vxor for it.
2466 if (ValSizeInBytes < ByteSize) return SDValue();
2467
2468 // If the element value is larger than the splat value, check if it consists
2469 // of a repeated bit pattern of size ByteSize.
2470 if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2471 return SDValue();
2472
2473 // Properly sign extend the value.
2474 int MaskVal = SignExtend32(Value, ByteSize * 8);
2475
2476 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2477 if (MaskVal == 0) return SDValue();
2478
2479 // Finally, if this value fits in a 5 bit sext field, return it
2480 if (SignExtend32<5>(MaskVal) == MaskVal)
2481 return DAG.getSignedTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2482 return SDValue();
2483}
2484
2485//===----------------------------------------------------------------------===//
2486// Addressing Mode Selection
2487//===----------------------------------------------------------------------===//
2488
2489/// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2490/// or 64-bit immediate, and if the value can be accurately represented as a
2491/// sign extension from a 16-bit value. If so, this returns true and the
2492/// immediate.
2493bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2494 if (!isa<ConstantSDNode>(N))
2495 return false;
2496
2497 Imm = (int16_t)N->getAsZExtVal();
2498 if (N->getValueType(0) == MVT::i32)
2499 return Imm == (int32_t)N->getAsZExtVal();
2500 else
2501 return Imm == (int64_t)N->getAsZExtVal();
2502}
2504 return isIntS16Immediate(Op.getNode(), Imm);
2505}
2506
2507/// Used when computing address flags for selecting loads and stores.
2508/// If we have an OR, check if the LHS and RHS are provably disjoint.
2509/// An OR of two provably disjoint values is equivalent to an ADD.
2510/// Most PPC load/store instructions compute the effective address as a sum,
2511/// so doing this conversion is useful.
2512static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2513 if (N.getOpcode() != ISD::OR)
2514 return false;
2515 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2516 if (!LHSKnown.Zero.getBoolValue())
2517 return false;
2518 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2519 return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2520}
2521
2522/// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2523/// be represented as an indexed [r+r] operation.
2525 SDValue &Index,
2526 SelectionDAG &DAG) const {
2527 for (SDNode *U : N->users()) {
2528 if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2529 if (Memop->getMemoryVT() == MVT::f64) {
2530 Base = N.getOperand(0);
2531 Index = N.getOperand(1);
2532 return true;
2533 }
2534 }
2535 }
2536 return false;
2537}
2538
2539/// isIntS34Immediate - This method tests if value of node given can be
2540/// accurately represented as a sign extension from a 34-bit value. If so,
2541/// this returns true and the immediate.
2542bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2543 if (!isa<ConstantSDNode>(N))
2544 return false;
2545
2546 Imm = cast<ConstantSDNode>(N)->getSExtValue();
2547 return isInt<34>(Imm);
2548}
2550 return isIntS34Immediate(Op.getNode(), Imm);
2551}
2552
2553/// SelectAddressRegReg - Given the specified addressed, check to see if it
2554/// can be represented as an indexed [r+r] operation. Returns false if it
2555/// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2556/// non-zero and N can be represented by a base register plus a signed 16-bit
2557/// displacement, make a more precise judgement by checking (displacement % \p
2558/// EncodingAlignment).
2560 SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG,
2561 MaybeAlign EncodingAlignment) const {
2562 // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2563 // a [pc+imm].
2565 return false;
2566
2567 int16_t Imm = 0;
2568 if (N.getOpcode() == ISD::ADD) {
2569 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2570 // SPE load/store can only handle 8-bit offsets.
2571 if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2572 return true;
2573 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2574 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2575 return false; // r+i
2576 if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2577 return false; // r+i
2578
2579 Base = N.getOperand(0);
2580 Index = N.getOperand(1);
2581 return true;
2582 } else if (N.getOpcode() == ISD::OR) {
2583 if (isIntS16Immediate(N.getOperand(1), Imm) &&
2584 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2585 return false; // r+i can fold it if we can.
2586
2587 // If this is an or of disjoint bitfields, we can codegen this as an add
2588 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2589 // disjoint.
2590 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2591
2592 if (LHSKnown.Zero.getBoolValue()) {
2593 KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2594 // If all of the bits are known zero on the LHS or RHS, the add won't
2595 // carry.
2596 if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2597 Base = N.getOperand(0);
2598 Index = N.getOperand(1);
2599 return true;
2600 }
2601 }
2602 }
2603
2604 return false;
2605}
2606
2607// If we happen to be doing an i64 load or store into a stack slot that has
2608// less than a 4-byte alignment, then the frame-index elimination may need to
2609// use an indexed load or store instruction (because the offset may not be a
2610// multiple of 4). The extra register needed to hold the offset comes from the
2611// register scavenger, and it is possible that the scavenger will need to use
2612// an emergency spill slot. As a result, we need to make sure that a spill slot
2613// is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2614// stack slot.
2615static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2616 // FIXME: This does not handle the LWA case.
2617 if (VT != MVT::i64)
2618 return;
2619
2620 // NOTE: We'll exclude negative FIs here, which come from argument
2621 // lowering, because there are no known test cases triggering this problem
2622 // using packed structures (or similar). We can remove this exclusion if
2623 // we find such a test case. The reason why this is so test-case driven is
2624 // because this entire 'fixup' is only to prevent crashes (from the
2625 // register scavenger) on not-really-valid inputs. For example, if we have:
2626 // %a = alloca i1
2627 // %b = bitcast i1* %a to i64*
2628 // store i64* a, i64 b
2629 // then the store should really be marked as 'align 1', but is not. If it
2630 // were marked as 'align 1' then the indexed form would have been
2631 // instruction-selected initially, and the problem this 'fixup' is preventing
2632 // won't happen regardless.
2633 if (FrameIdx < 0)
2634 return;
2635
2637 MachineFrameInfo &MFI = MF.getFrameInfo();
2638
2639 if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2640 return;
2641
2642 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2643 FuncInfo->setHasNonRISpills();
2644}
2645
2646/// Returns true if the address N can be represented by a base register plus
2647/// a signed 16-bit displacement [r+imm], and if it is not better
2648/// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2649/// displacements that are multiples of that value.
2651 SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2652 MaybeAlign EncodingAlignment) const {
2653 // FIXME dl should come from parent load or store, not from address
2654 SDLoc dl(N);
2655
2656 // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2657 // a [pc+imm].
2659 return false;
2660
2661 // If this can be more profitably realized as r+r, fail.
2662 if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2663 return false;
2664
2665 if (N.getOpcode() == ISD::ADD) {
2666 int16_t imm = 0;
2667 if (isIntS16Immediate(N.getOperand(1), imm) &&
2668 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2669 Disp = DAG.getSignedTargetConstant(imm, dl, N.getValueType());
2670 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2671 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2672 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2673 } else {
2674 Base = N.getOperand(0);
2675 }
2676 return true; // [r+i]
2677 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2678 // Match LOAD (ADD (X, Lo(G))).
2679 assert(!N.getOperand(1).getConstantOperandVal(1) &&
2680 "Cannot handle constant offsets yet!");
2681 Disp = N.getOperand(1).getOperand(0); // The global address.
2686 Base = N.getOperand(0);
2687 return true; // [&g+r]
2688 }
2689 } else if (N.getOpcode() == ISD::OR) {
2690 int16_t imm = 0;
2691 if (isIntS16Immediate(N.getOperand(1), imm) &&
2692 (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2693 // If this is an or of disjoint bitfields, we can codegen this as an add
2694 // (for better address arithmetic) if the LHS and RHS of the OR are
2695 // provably disjoint.
2696 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2697
2698 if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2699 // If all of the bits are known zero on the LHS or RHS, the add won't
2700 // carry.
2701 if (FrameIndexSDNode *FI =
2702 dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2703 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2704 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2705 } else {
2706 Base = N.getOperand(0);
2707 }
2708 Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2709 return true;
2710 }
2711 }
2712 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2713 // Loading from a constant address.
2714
2715 // If this address fits entirely in a 16-bit sext immediate field, codegen
2716 // this as "d, 0"
2717 int16_t Imm;
2718 if (isIntS16Immediate(CN, Imm) &&
2719 (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2720 Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2721 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2722 CN->getValueType(0));
2723 return true;
2724 }
2725
2726 // Handle 32-bit sext immediates with LIS + addr mode.
2727 if ((CN->getValueType(0) == MVT::i32 ||
2728 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2729 (!EncodingAlignment ||
2730 isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2731 int Addr = (int)CN->getZExtValue();
2732
2733 // Otherwise, break this down into an LIS + disp.
2734 Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2735
2736 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2737 MVT::i32);
2738 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2739 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2740 return true;
2741 }
2742 }
2743
2744 Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2746 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2747 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2748 } else
2749 Base = N;
2750 return true; // [r+0]
2751}
2752
2753/// Similar to the 16-bit case but for instructions that take a 34-bit
2754/// displacement field (prefixed loads/stores).
2756 SDValue &Base,
2757 SelectionDAG &DAG) const {
2758 // Only on 64-bit targets.
2759 if (N.getValueType() != MVT::i64)
2760 return false;
2761
2762 SDLoc dl(N);
2763 int64_t Imm = 0;
2764
2765 if (N.getOpcode() == ISD::ADD) {
2766 if (!isIntS34Immediate(N.getOperand(1), Imm))
2767 return false;
2768 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2769 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2770 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2771 else
2772 Base = N.getOperand(0);
2773 return true;
2774 }
2775
2776 if (N.getOpcode() == ISD::OR) {
2777 if (!isIntS34Immediate(N.getOperand(1), Imm))
2778 return false;
2779 // If this is an or of disjoint bitfields, we can codegen this as an add
2780 // (for better address arithmetic) if the LHS and RHS of the OR are
2781 // provably disjoint.
2782 KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2783 if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2784 return false;
2785 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2786 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2787 else
2788 Base = N.getOperand(0);
2789 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2790 return true;
2791 }
2792
2793 if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2794 Disp = DAG.getSignedTargetConstant(Imm, dl, N.getValueType());
2795 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2796 return true;
2797 }
2798
2799 return false;
2800}
2801
2802/// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2803/// represented as an indexed [r+r] operation.
2805 SDValue &Index,
2806 SelectionDAG &DAG) const {
2807 // Check to see if we can easily represent this as an [r+r] address. This
2808 // will fail if it thinks that the address is more profitably represented as
2809 // reg+imm, e.g. where imm = 0.
2810 if (SelectAddressRegReg(N, Base, Index, DAG))
2811 return true;
2812
2813 // If the address is the result of an add, we will utilize the fact that the
2814 // address calculation includes an implicit add. However, we can reduce
2815 // register pressure if we do not materialize a constant just for use as the
2816 // index register. We only get rid of the add if it is not an add of a
2817 // value and a 16-bit signed constant and both have a single use.
2818 int16_t imm = 0;
2819 if (N.getOpcode() == ISD::ADD &&
2820 (!isIntS16Immediate(N.getOperand(1), imm) ||
2821 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2822 Base = N.getOperand(0);
2823 Index = N.getOperand(1);
2824 return true;
2825 }
2826
2827 // Otherwise, do it the hard way, using R0 as the base register.
2828 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2829 N.getValueType());
2830 Index = N;
2831 return true;
2832}
2833
2834template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2835 Ty *PCRelCand = dyn_cast<Ty>(N);
2836 return PCRelCand && (PPCInstrInfo::hasPCRelFlag(PCRelCand->getTargetFlags()));
2837}
2838
2839/// Returns true if this address is a PC Relative address.
2840/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2841/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2843 // This is a materialize PC Relative node. Always select this as PC Relative.
2844 Base = N;
2845 if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2846 return true;
2851 return true;
2852 return false;
2853}
2854
2855/// Returns true if we should use a direct load into vector instruction
2856/// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2857static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget& ST) {
2858
2859 // If there are any other uses other than scalar to vector, then we should
2860 // keep it as a scalar load -> direct move pattern to prevent multiple
2861 // loads.
2863 if (!LD)
2864 return false;
2865
2866 EVT MemVT = LD->getMemoryVT();
2867 if (!MemVT.isSimple())
2868 return false;
2869 switch(MemVT.getSimpleVT().SimpleTy) {
2870 case MVT::i64:
2871 break;
2872 case MVT::i32:
2873 if (!ST.hasP8Vector())
2874 return false;
2875 break;
2876 case MVT::i16:
2877 case MVT::i8:
2878 if (!ST.hasP9Vector())
2879 return false;
2880 break;
2881 default:
2882 return false;
2883 }
2884
2885 SDValue LoadedVal(N, 0);
2886 if (!LoadedVal.hasOneUse())
2887 return false;
2888
2889 for (SDUse &Use : LD->uses())
2890 if (Use.getResNo() == 0 &&
2891 Use.getUser()->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2892 Use.getUser()->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2893 return false;
2894
2895 return true;
2896}
2897
2898/// getPreIndexedAddressParts - returns true by value, base pointer and
2899/// offset pointer and addressing mode by reference if the node's address
2900/// can be legally represented as pre-indexed load / store address.
2902 SDValue &Offset,
2904 SelectionDAG &DAG) const {
2905 if (DisablePPCPreinc) return false;
2906
2907 bool isLoad = true;
2908 SDValue Ptr;
2909 EVT VT;
2910 Align Alignment;
2911 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2912 Ptr = LD->getBasePtr();
2913 VT = LD->getMemoryVT();
2914 Alignment = LD->getAlign();
2915 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2916 Ptr = ST->getBasePtr();
2917 VT = ST->getMemoryVT();
2918 Alignment = ST->getAlign();
2919 isLoad = false;
2920 } else
2921 return false;
2922
2923 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2924 // instructions because we can fold these into a more efficient instruction
2925 // instead, (such as LXSD).
2926 if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2927 return false;
2928 }
2929
2930 // PowerPC doesn't have preinc load/store instructions for vectors
2931 if (VT.isVector())
2932 return false;
2933
2934 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2935 // Common code will reject creating a pre-inc form if the base pointer
2936 // is a frame index, or if N is a store and the base pointer is either
2937 // the same as or a predecessor of the value being stored. Check for
2938 // those situations here, and try with swapped Base/Offset instead.
2939 bool Swap = false;
2940
2942 Swap = true;
2943 else if (!isLoad) {
2944 SDValue Val = cast<StoreSDNode>(N)->getValue();
2945 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2946 Swap = true;
2947 }
2948
2949 if (Swap)
2951
2952 AM = ISD::PRE_INC;
2953 return true;
2954 }
2955
2956 // LDU/STU can only handle immediates that are a multiple of 4.
2957 if (VT != MVT::i64) {
2958 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, std::nullopt))
2959 return false;
2960 } else {
2961 // LDU/STU need an address with at least 4-byte alignment.
2962 if (Alignment < Align(4))
2963 return false;
2964
2965 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
2966 return false;
2967 }
2968
2969 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2970 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2971 // sext i32 to i64 when addr mode is r+i.
2972 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
2973 LD->getExtensionType() == ISD::SEXTLOAD &&
2975 return false;
2976 }
2977
2978 AM = ISD::PRE_INC;
2979 return true;
2980}
2981
2982//===----------------------------------------------------------------------===//
2983// LowerOperation implementation
2984//===----------------------------------------------------------------------===//
2985
2986/// Return true if we should reference labels using a PICBase, set the HiOpFlags
2987/// and LoOpFlags to the target MO flags.
2988static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
2989 unsigned &HiOpFlags, unsigned &LoOpFlags,
2990 const GlobalValue *GV = nullptr) {
2991 HiOpFlags = PPCII::MO_HA;
2992 LoOpFlags = PPCII::MO_LO;
2993
2994 // Don't use the pic base if not in PIC relocation model.
2995 if (IsPIC) {
2996 HiOpFlags = PPCII::MO_PIC_HA_FLAG;
2997 LoOpFlags = PPCII::MO_PIC_LO_FLAG;
2998 }
2999}
3000
3001static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3002 SelectionDAG &DAG) {
3003 SDLoc DL(HiPart);
3004 EVT PtrVT = HiPart.getValueType();
3005 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3006
3007 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3008 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3009
3010 // With PIC, the first instruction is actually "GR+hi(&G)".
3011 if (isPIC)
3012 Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3013 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3014
3015 // Generate non-pic code that has direct accesses to the constant pool.
3016 // The address of the global is just (hi(&g)+lo(&g)).
3017 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3018}
3019
3021 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3022 FuncInfo->setUsesTOCBasePtr();
3023}
3024
3028
3029SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3030 SDValue GA) const {
3031 EVT VT = Subtarget.getScalarIntVT();
3032 SDValue Reg = Subtarget.isPPC64() ? DAG.getRegister(PPC::X2, VT)
3033 : Subtarget.isAIXABI()
3034 ? DAG.getRegister(PPC::R2, VT)
3035 : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3036 SDValue Ops[] = { GA, Reg };
3037 return DAG.getMemIntrinsicNode(
3038 PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3041}
3042
3043SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3044 SelectionDAG &DAG) const {
3045 EVT PtrVT = Op.getValueType();
3046 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3047 const Constant *C = CP->getConstVal();
3048
3049 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3050 // The actual address of the GlobalValue is stored in the TOC.
3051 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3052 if (Subtarget.isUsingPCRelativeCalls()) {
3053 SDLoc DL(CP);
3054 EVT Ty = getPointerTy(DAG.getDataLayout());
3055 SDValue ConstPool = DAG.getTargetConstantPool(
3056 C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3057 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3058 }
3059 setUsesTOCBasePtr(DAG);
3060 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3061 return getTOCEntry(DAG, SDLoc(CP), GA);
3062 }
3063
3064 unsigned MOHiFlag, MOLoFlag;
3065 bool IsPIC = isPositionIndependent();
3066 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3067
3068 if (IsPIC && Subtarget.isSVR4ABI()) {
3069 SDValue GA =
3070 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3071 return getTOCEntry(DAG, SDLoc(CP), GA);
3072 }
3073
3074 SDValue CPIHi =
3075 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3076 SDValue CPILo =
3077 DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3078 return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3079}
3080
3081// For 64-bit PowerPC, prefer the more compact relative encodings.
3082// This trades 32 bits per jump table entry for one or two instructions
3083// on the jump site.
3090
3093 return false;
3094 if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3095 return true;
3097}
3098
3100 SelectionDAG &DAG) const {
3101 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3103
3104 switch (getTargetMachine().getCodeModel()) {
3105 case CodeModel::Small:
3106 case CodeModel::Medium:
3108 default:
3109 return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3111 }
3112}
3113
3114const MCExpr *
3116 unsigned JTI,
3117 MCContext &Ctx) const {
3118 if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3120
3121 switch (getTargetMachine().getCodeModel()) {
3122 case CodeModel::Small:
3123 case CodeModel::Medium:
3125 default:
3126 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3127 }
3128}
3129
3130SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3131 EVT PtrVT = Op.getValueType();
3133
3134 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3135 if (Subtarget.isUsingPCRelativeCalls()) {
3136 SDLoc DL(JT);
3137 EVT Ty = getPointerTy(DAG.getDataLayout());
3138 SDValue GA =
3139 DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3140 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3141 return MatAddr;
3142 }
3143
3144 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3145 // The actual address of the GlobalValue is stored in the TOC.
3146 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3147 setUsesTOCBasePtr(DAG);
3148 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3149 return getTOCEntry(DAG, SDLoc(JT), GA);
3150 }
3151
3152 unsigned MOHiFlag, MOLoFlag;
3153 bool IsPIC = isPositionIndependent();
3154 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3155
3156 if (IsPIC && Subtarget.isSVR4ABI()) {
3157 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3159 return getTOCEntry(DAG, SDLoc(GA), GA);
3160 }
3161
3162 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3163 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3164 return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3165}
3166
3167SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3168 SelectionDAG &DAG) const {
3169 EVT PtrVT = Op.getValueType();
3170 BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3171 const BlockAddress *BA = BASDN->getBlockAddress();
3172
3173 // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3174 if (Subtarget.isUsingPCRelativeCalls()) {
3175 SDLoc DL(BASDN);
3176 EVT Ty = getPointerTy(DAG.getDataLayout());
3177 SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3179 SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3180 return MatAddr;
3181 }
3182
3183 // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3184 // The actual BlockAddress is stored in the TOC.
3185 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3186 setUsesTOCBasePtr(DAG);
3187 SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3188 return getTOCEntry(DAG, SDLoc(BASDN), GA);
3189 }
3190
3191 // 32-bit position-independent ELF stores the BlockAddress in the .got.
3192 if (Subtarget.is32BitELFABI() && isPositionIndependent())
3193 return getTOCEntry(
3194 DAG, SDLoc(BASDN),
3195 DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3196
3197 unsigned MOHiFlag, MOLoFlag;
3198 bool IsPIC = isPositionIndependent();
3199 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3200 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3201 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3202 return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3203}
3204
3205SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3206 SelectionDAG &DAG) const {
3207 if (Subtarget.isAIXABI())
3208 return LowerGlobalTLSAddressAIX(Op, DAG);
3209
3210 return LowerGlobalTLSAddressLinux(Op, DAG);
3211}
3212
3213/// updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings,
3214/// and then apply the update.
3216 SelectionDAG &DAG,
3217 const TargetMachine &TM) {
3218 // Initialize TLS model opt setting lazily:
3219 // (1) Use initial-exec for single TLS var references within current function.
3220 // (2) Use local-dynamic for multiple TLS var references within current
3221 // function.
3222 PPCFunctionInfo *FuncInfo =
3224 if (!FuncInfo->isAIXFuncTLSModelOptInitDone()) {
3226 // Iterate over all instructions within current function, collect all TLS
3227 // global variables (global variables taken as the first parameter to
3228 // Intrinsic::threadlocal_address).
3229 const Function &Func = DAG.getMachineFunction().getFunction();
3230 for (const BasicBlock &BB : Func)
3231 for (const Instruction &I : BB)
3232 if (I.getOpcode() == Instruction::Call)
3233 if (const CallInst *CI = dyn_cast<const CallInst>(&I))
3234 if (Function *CF = CI->getCalledFunction())
3235 if (CF->isDeclaration() &&
3236 CF->getIntrinsicID() == Intrinsic::threadlocal_address)
3237 if (const GlobalValue *GV =
3238 dyn_cast<GlobalValue>(I.getOperand(0))) {
3239 TLSModel::Model GVModel = TM.getTLSModel(GV);
3240 if (GVModel == TLSModel::LocalDynamic)
3241 TLSGV.insert(GV);
3242 }
3243
3244 unsigned TLSGVCnt = TLSGV.size();
3245 LLVM_DEBUG(dbgs() << format("LocalDynamic TLSGV count:%d\n", TLSGVCnt));
3246 if (TLSGVCnt <= PPCAIXTLSModelOptUseIEForLDLimit)
3247 FuncInfo->setAIXFuncUseTLSIEForLD();
3249 }
3250
3251 if (FuncInfo->isAIXFuncUseTLSIEForLD()) {
3252 LLVM_DEBUG(
3253 dbgs() << DAG.getMachineFunction().getName()
3254 << " function is using the TLS-IE model for TLS-LD access.\n");
3255 Model = TLSModel::InitialExec;
3256 }
3257}
3258
3259SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3260 SelectionDAG &DAG) const {
3261 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3262
3263 if (DAG.getTarget().useEmulatedTLS())
3264 report_fatal_error("Emulated TLS is not yet supported on AIX");
3265
3266 SDLoc dl(GA);
3267 const GlobalValue *GV = GA->getGlobal();
3268 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3269 bool Is64Bit = Subtarget.isPPC64();
3271
3272 // Apply update to the TLS model.
3273 if (Subtarget.hasAIXShLibTLSModelOpt())
3275
3276 // TLS variables are accessed through TOC entries.
3277 // To support this, set the DAG to use the TOC base pointer.
3278 setUsesTOCBasePtr(DAG);
3279
3280 bool IsTLSLocalExecModel = Model == TLSModel::LocalExec;
3281
3282 if (IsTLSLocalExecModel || Model == TLSModel::InitialExec) {
3283 bool HasAIXSmallLocalExecTLS = Subtarget.hasAIXSmallLocalExecTLS();
3284 bool HasAIXSmallTLSGlobalAttr = false;
3285 SDValue VariableOffsetTGA =
3286 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TPREL_FLAG);
3287 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3288 SDValue TLSReg;
3289
3290 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
3291 if (GVar->hasAttribute("aix-small-tls"))
3292 HasAIXSmallTLSGlobalAttr = true;
3293
3294 if (Is64Bit) {
3295 // For local-exec and initial-exec on AIX (64-bit), the sequence generated
3296 // involves a load of the variable offset (from the TOC), followed by an
3297 // add of the loaded variable offset to R13 (the thread pointer).
3298 // This code sequence looks like:
3299 // ld reg1,var[TC](2)
3300 // add reg2, reg1, r13 // r13 contains the thread pointer
3301 TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3302
3303 // With the -maix-small-local-exec-tls option, or with the "aix-small-tls"
3304 // global variable attribute, produce a faster access sequence for
3305 // local-exec TLS variables where the offset from the TLS base is encoded
3306 // as an immediate operand.
3307 //
3308 // We only utilize the faster local-exec access sequence when the TLS
3309 // variable has a size within the policy limit. We treat types that are
3310 // not sized or are empty as being over the policy size limit.
3311 if ((HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr) &&
3312 IsTLSLocalExecModel) {
3313 Type *GVType = GV->getValueType();
3314 if (GVType->isSized() && !GVType->isEmptyTy() &&
3315 GV->getDataLayout().getTypeAllocSize(GVType) <=
3317 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA, TLSReg);
3318 }
3319 } else {
3320 // For local-exec and initial-exec on AIX (32-bit), the sequence generated
3321 // involves loading the variable offset from the TOC, generating a call to
3322 // .__get_tpointer to get the thread pointer (which will be in R3), and
3323 // adding the two together:
3324 // lwz reg1,var[TC](2)
3325 // bla .__get_tpointer
3326 // add reg2, reg1, r3
3327 TLSReg = DAG.getNode(PPCISD::GET_TPOINTER, dl, PtrVT);
3328
3329 // We do not implement the 32-bit version of the faster access sequence
3330 // for local-exec that is controlled by the -maix-small-local-exec-tls
3331 // option, or the "aix-small-tls" global variable attribute.
3332 if (HasAIXSmallLocalExecTLS || HasAIXSmallTLSGlobalAttr)
3333 report_fatal_error("The small-local-exec TLS access sequence is "
3334 "currently only supported on AIX (64-bit mode).");
3335 }
3336 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, VariableOffset);
3337 }
3338
3339 if (Model == TLSModel::LocalDynamic) {
3340 bool HasAIXSmallLocalDynamicTLS = Subtarget.hasAIXSmallLocalDynamicTLS();
3341
3342 // We do not implement the 32-bit version of the faster access sequence
3343 // for local-dynamic that is controlled by -maix-small-local-dynamic-tls.
3344 if (!Is64Bit && HasAIXSmallLocalDynamicTLS)
3345 report_fatal_error("The small-local-dynamic TLS access sequence is "
3346 "currently only supported on AIX (64-bit mode).");
3347
3348 // For local-dynamic on AIX, we need to generate one TOC entry for each
3349 // variable offset, and a single module-handle TOC entry for the entire
3350 // file.
3351
3352 SDValue VariableOffsetTGA =
3353 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSLD_FLAG);
3354 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3355
3357 GlobalVariable *TLSGV =
3358 dyn_cast_or_null<GlobalVariable>(M->getOrInsertGlobal(
3359 StringRef("_$TLSML"), PointerType::getUnqual(*DAG.getContext())));
3361 assert(TLSGV && "Not able to create GV for _$TLSML.");
3362 SDValue ModuleHandleTGA =
3363 DAG.getTargetGlobalAddress(TLSGV, dl, PtrVT, 0, PPCII::MO_TLSLDM_FLAG);
3364 SDValue ModuleHandleTOC = getTOCEntry(DAG, dl, ModuleHandleTGA);
3365 SDValue ModuleHandle =
3366 DAG.getNode(PPCISD::TLSLD_AIX, dl, PtrVT, ModuleHandleTOC);
3367
3368 // With the -maix-small-local-dynamic-tls option, produce a faster access
3369 // sequence for local-dynamic TLS variables where the offset from the
3370 // module-handle is encoded as an immediate operand.
3371 //
3372 // We only utilize the faster local-dynamic access sequence when the TLS
3373 // variable has a size within the policy limit. We treat types that are
3374 // not sized or are empty as being over the policy size limit.
3375 if (HasAIXSmallLocalDynamicTLS) {
3376 Type *GVType = GV->getValueType();
3377 if (GVType->isSized() && !GVType->isEmptyTy() &&
3378 GV->getDataLayout().getTypeAllocSize(GVType) <=
3380 return DAG.getNode(PPCISD::Lo, dl, PtrVT, VariableOffsetTGA,
3381 ModuleHandle);
3382 }
3383
3384 return DAG.getNode(ISD::ADD, dl, PtrVT, ModuleHandle, VariableOffset);
3385 }
3386
3387 // If Local- or Initial-exec or Local-dynamic is not possible or specified,
3388 // all GlobalTLSAddress nodes are lowered using the general-dynamic model. We
3389 // need to generate two TOC entries, one for the variable offset, one for the
3390 // region handle. The global address for the TOC entry of the region handle is
3391 // created with the MO_TLSGDM_FLAG flag and the global address for the TOC
3392 // entry of the variable offset is created with MO_TLSGD_FLAG.
3393 SDValue VariableOffsetTGA =
3394 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3395 SDValue RegionHandleTGA =
3396 DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3397 SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3398 SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3399 return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3400 RegionHandle);
3401}
3402
3403SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3404 SelectionDAG &DAG) const {
3405 // FIXME: TLS addresses currently use medium model code sequences,
3406 // which is the most useful form. Eventually support for small and
3407 // large models could be added if users need it, at the cost of
3408 // additional complexity.
3409 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3410 if (DAG.getTarget().useEmulatedTLS())
3411 return LowerToTLSEmulatedModel(GA, DAG);
3412
3413 SDLoc dl(GA);
3414 const GlobalValue *GV = GA->getGlobal();
3415 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3416 bool is64bit = Subtarget.isPPC64();
3417 const Module *M = DAG.getMachineFunction().getFunction().getParent();
3418 PICLevel::Level picLevel = M->getPICLevel();
3419
3420 const TargetMachine &TM = getTargetMachine();
3421 TLSModel::Model Model = TM.getTLSModel(GV);
3422
3423 if (Model == TLSModel::LocalExec) {
3424 if (Subtarget.isUsingPCRelativeCalls()) {
3425 SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3426 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3428 SDValue MatAddr =
3429 DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3430 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3431 }
3432
3433 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3435 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3437 SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3438 : DAG.getRegister(PPC::R2, MVT::i32);
3439
3440 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3441 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3442 }
3443
3444 if (Model == TLSModel::InitialExec) {
3445 bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3447 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3448 SDValue TGATLS = DAG.getTargetGlobalAddress(
3449 GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_TLS_PCREL_FLAG : PPCII::MO_TLS);
3450 SDValue TPOffset;
3451 if (IsPCRel) {
3452 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3453 TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3454 MachinePointerInfo());
3455 } else {
3456 SDValue GOTPtr;
3457 if (is64bit) {
3458 setUsesTOCBasePtr(DAG);
3459 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3460 GOTPtr =
3461 DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3462 } else {
3463 if (!TM.isPositionIndependent())
3464 GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3465 else if (picLevel == PICLevel::SmallPIC)
3466 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3467 else
3468 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3469 }
3470 TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3471 }
3472 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3473 }
3474
3475 if (Model == TLSModel::GeneralDynamic) {
3476 if (Subtarget.isUsingPCRelativeCalls()) {
3477 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3479 return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3480 }
3481
3482 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3483 SDValue GOTPtr;
3484 if (is64bit) {
3485 setUsesTOCBasePtr(DAG);
3486 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3487 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3488 GOTReg, TGA);
3489 } else {
3490 if (picLevel == PICLevel::SmallPIC)
3491 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3492 else
3493 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3494 }
3495 return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3496 GOTPtr, TGA, TGA);
3497 }
3498
3499 if (Model == TLSModel::LocalDynamic) {
3500 if (Subtarget.isUsingPCRelativeCalls()) {
3501 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3503 SDValue MatPCRel =
3504 DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3505 return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3506 }
3507
3508 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3509 SDValue GOTPtr;
3510 if (is64bit) {
3511 setUsesTOCBasePtr(DAG);
3512 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3513 GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3514 GOTReg, TGA);
3515 } else {
3516 if (picLevel == PICLevel::SmallPIC)
3517 GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3518 else
3519 GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3520 }
3521 SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3522 PtrVT, GOTPtr, TGA, TGA);
3523 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3524 PtrVT, TLSAddr, TGA);
3525 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3526 }
3527
3528 llvm_unreachable("Unknown TLS model!");
3529}
3530
3531SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3532 SelectionDAG &DAG) const {
3533 EVT PtrVT = Op.getValueType();
3534 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3535 SDLoc DL(GSDN);
3536 const GlobalValue *GV = GSDN->getGlobal();
3537
3538 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3539 // The actual address of the GlobalValue is stored in the TOC.
3540 if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3541 if (Subtarget.isUsingPCRelativeCalls()) {
3542 EVT Ty = getPointerTy(DAG.getDataLayout());
3544 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3546 SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3547 SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3548 MachinePointerInfo());
3549 return Load;
3550 } else {
3551 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3553 return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3554 }
3555 }
3556 setUsesTOCBasePtr(DAG);
3557 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3558 return getTOCEntry(DAG, DL, GA);
3559 }
3560
3561 unsigned MOHiFlag, MOLoFlag;
3562 bool IsPIC = isPositionIndependent();
3563 getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3564
3565 if (IsPIC && Subtarget.isSVR4ABI()) {
3566 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3567 GSDN->getOffset(),
3569 return getTOCEntry(DAG, DL, GA);
3570 }
3571
3572 SDValue GAHi =
3573 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3574 SDValue GALo =
3575 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3576
3577 return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3578}
3579
3580SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3581 bool IsStrict = Op->isStrictFPOpcode();
3582 ISD::CondCode CC =
3583 cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3584 SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3585 SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3586 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3587 EVT LHSVT = LHS.getValueType();
3588 SDLoc dl(Op);
3589
3590 // Soften the setcc with libcall if it is fp128.
3591 if (LHSVT == MVT::f128) {
3592 assert(!Subtarget.hasP9Vector() &&
3593 "SETCC for f128 is already legal under Power9!");
3594 softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3595 Op->getOpcode() == ISD::STRICT_FSETCCS);
3596 if (RHS.getNode())
3597 LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3598 DAG.getCondCode(CC));
3599 if (IsStrict)
3600 return DAG.getMergeValues({LHS, Chain}, dl);
3601 return LHS;
3602 }
3603
3604 assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3605
3606 if (Op.getValueType() == MVT::v2i64) {
3607 // When the operands themselves are v2i64 values, we need to do something
3608 // special because VSX has no underlying comparison operations for these.
3609 if (LHS.getValueType() == MVT::v2i64) {
3610 // Equality can be handled by casting to the legal type for Altivec
3611 // comparisons, everything else needs to be expanded.
3612 if (CC != ISD::SETEQ && CC != ISD::SETNE)
3613 return SDValue();
3614 SDValue SetCC32 = DAG.getSetCC(
3615 dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3616 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3617 int ShuffV[] = {1, 0, 3, 2};
3618 SDValue Shuff =
3619 DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3620 return DAG.getBitcast(MVT::v2i64,
3621 DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3622 dl, MVT::v4i32, Shuff, SetCC32));
3623 }
3624
3625 // We handle most of these in the usual way.
3626 return Op;
3627 }
3628
3629 // If we're comparing for equality to zero, expose the fact that this is
3630 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3631 // fold the new nodes.
3632 if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3633 return V;
3634
3635 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3636 // Leave comparisons against 0 and -1 alone for now, since they're usually
3637 // optimized. FIXME: revisit this when we can custom lower all setcc
3638 // optimizations.
3639 if (C->isAllOnes() || C->isZero())
3640 return SDValue();
3641 }
3642
3643 // If we have an integer seteq/setne, turn it into a compare against zero
3644 // by xor'ing the rhs with the lhs, which is faster than setting a
3645 // condition register, reading it back out, and masking the correct bit. The
3646 // normal approach here uses sub to do this instead of xor. Using xor exposes
3647 // the result to other bit-twiddling opportunities.
3648 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3649 EVT VT = Op.getValueType();
3650 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3651 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3652 }
3653 return SDValue();
3654}
3655
3656SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3657 SDNode *Node = Op.getNode();
3658 EVT VT = Node->getValueType(0);
3659 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3660 SDValue InChain = Node->getOperand(0);
3661 SDValue VAListPtr = Node->getOperand(1);
3662 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3663 SDLoc dl(Node);
3664
3665 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3666
3667 // gpr_index
3668 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3669 VAListPtr, MachinePointerInfo(SV), MVT::i8);
3670 InChain = GprIndex.getValue(1);
3671
3672 if (VT == MVT::i64) {
3673 // Check if GprIndex is even
3674 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3675 DAG.getConstant(1, dl, MVT::i32));
3676 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3677 DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3678 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3679 DAG.getConstant(1, dl, MVT::i32));
3680 // Align GprIndex to be even if it isn't
3681 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3682 GprIndex);
3683 }
3684
3685 // fpr index is 1 byte after gpr
3686 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3687 DAG.getConstant(1, dl, MVT::i32));
3688
3689 // fpr
3690 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3691 FprPtr, MachinePointerInfo(SV), MVT::i8);
3692 InChain = FprIndex.getValue(1);
3693
3694 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3695 DAG.getConstant(8, dl, MVT::i32));
3696
3697 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3698 DAG.getConstant(4, dl, MVT::i32));
3699
3700 // areas
3701 SDValue OverflowArea =
3702 DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3703 InChain = OverflowArea.getValue(1);
3704
3705 SDValue RegSaveArea =
3706 DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3707 InChain = RegSaveArea.getValue(1);
3708
3709 // select overflow_area if index > 8
3710 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3711 DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3712
3713 // adjustment constant gpr_index * 4/8
3714 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3715 VT.isInteger() ? GprIndex : FprIndex,
3716 DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3717 MVT::i32));
3718
3719 // OurReg = RegSaveArea + RegConstant
3720 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3721 RegConstant);
3722
3723 // Floating types are 32 bytes into RegSaveArea
3724 if (VT.isFloatingPoint())
3725 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3726 DAG.getConstant(32, dl, MVT::i32));
3727
3728 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3729 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3730 VT.isInteger() ? GprIndex : FprIndex,
3731 DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3732 MVT::i32));
3733
3734 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3735 VT.isInteger() ? VAListPtr : FprPtr,
3736 MachinePointerInfo(SV), MVT::i8);
3737
3738 // determine if we should load from reg_save_area or overflow_area
3739 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3740
3741 // increase overflow_area by 4/8 if gpr/fpr > 8
3742 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3743 DAG.getConstant(VT.isInteger() ? 4 : 8,
3744 dl, MVT::i32));
3745
3746 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3747 OverflowAreaPlusN);
3748
3749 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3750 MachinePointerInfo(), MVT::i32);
3751
3752 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3753}
3754
3755SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3756 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3757
3758 // We have to copy the entire va_list struct:
3759 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3760 return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3761 DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3762 false, true, /*CI=*/nullptr, std::nullopt,
3763 MachinePointerInfo(), MachinePointerInfo());
3764}
3765
3766SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3767 SelectionDAG &DAG) const {
3768 return Op.getOperand(0);
3769}
3770
3771SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3772 MachineFunction &MF = DAG.getMachineFunction();
3773 PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3774
3775 assert((Op.getOpcode() == ISD::INLINEASM ||
3776 Op.getOpcode() == ISD::INLINEASM_BR) &&
3777 "Expecting Inline ASM node.");
3778
3779 // If an LR store is already known to be required then there is not point in
3780 // checking this ASM as well.
3781 if (MFI.isLRStoreRequired())
3782 return Op;
3783
3784 // Inline ASM nodes have an optional last operand that is an incoming Flag of
3785 // type MVT::Glue. We want to ignore this last operand if that is the case.
3786 unsigned NumOps = Op.getNumOperands();
3787 if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3788 --NumOps;
3789
3790 // Check all operands that may contain the LR.
3791 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3792 const InlineAsm::Flag Flags(Op.getConstantOperandVal(i));
3793 unsigned NumVals = Flags.getNumOperandRegisters();
3794 ++i; // Skip the ID value.
3795
3796 switch (Flags.getKind()) {
3797 default:
3798 llvm_unreachable("Bad flags!");
3802 i += NumVals;
3803 break;
3807 for (; NumVals; --NumVals, ++i) {
3808 Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3809 if (Reg != PPC::LR && Reg != PPC::LR8)
3810 continue;
3811 MFI.setLRStoreRequired();
3812 return Op;
3813 }
3814 break;
3815 }
3816 }
3817 }
3818
3819 return Op;
3820}
3821
3822SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3823 SelectionDAG &DAG) const {
3824 SDValue Chain = Op.getOperand(0);
3825 SDValue Trmp = Op.getOperand(1); // trampoline
3826 SDValue FPtr = Op.getOperand(2); // nested function
3827 SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3828 SDLoc dl(Op);
3829
3830 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3831
3832 if (Subtarget.isAIXABI()) {
3833 // On AIX we create a trampoline descriptor by combining the
3834 // entry point and TOC from the global descriptor (FPtr) with the
3835 // nest argument as the environment pointer.
3836 uint64_t PointerSize = Subtarget.isPPC64() ? 8 : 4;
3837 MaybeAlign PointerAlign(PointerSize);
3838 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
3841 : MachineMemOperand::MONone;
3842
3843 uint64_t TOCPointerOffset = 1 * PointerSize;
3844 uint64_t EnvPointerOffset = 2 * PointerSize;
3845 SDValue SDTOCPtrOffset = DAG.getConstant(TOCPointerOffset, dl, PtrVT);
3846 SDValue SDEnvPtrOffset = DAG.getConstant(EnvPointerOffset, dl, PtrVT);
3847
3848 const Value *TrampolineAddr =
3849 cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3850 const Function *Func =
3851 cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
3852
3853 SDValue OutChains[3];
3854
3855 // Copy the entry point address from the global descriptor to the
3856 // trampoline buffer.
3857 SDValue LoadEntryPoint =
3858 DAG.getLoad(PtrVT, dl, Chain, FPtr, MachinePointerInfo(Func, 0),
3859 PointerAlign, MMOFlags);
3860 SDValue EPLoadChain = LoadEntryPoint.getValue(1);
3861 OutChains[0] = DAG.getStore(EPLoadChain, dl, LoadEntryPoint, Trmp,
3862 MachinePointerInfo(TrampolineAddr, 0));
3863
3864 // Copy the TOC pointer from the global descriptor to the trampoline
3865 // buffer.
3866 SDValue TOCFromDescriptorPtr =
3867 DAG.getNode(ISD::ADD, dl, PtrVT, FPtr, SDTOCPtrOffset);
3868 SDValue TOCReg = DAG.getLoad(PtrVT, dl, Chain, TOCFromDescriptorPtr,
3869 MachinePointerInfo(Func, TOCPointerOffset),
3870 PointerAlign, MMOFlags);
3871 SDValue TrampolineTOCPointer =
3872 DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDTOCPtrOffset);
3873 SDValue TOCLoadChain = TOCReg.getValue(1);
3874 OutChains[1] =
3875 DAG.getStore(TOCLoadChain, dl, TOCReg, TrampolineTOCPointer,
3876 MachinePointerInfo(TrampolineAddr, TOCPointerOffset));
3877
3878 // Store the nest argument into the environment pointer in the trampoline
3879 // buffer.
3880 SDValue EnvPointer = DAG.getNode(ISD::ADD, dl, PtrVT, Trmp, SDEnvPtrOffset);
3881 OutChains[2] =
3882 DAG.getStore(Chain, dl, Nest, EnvPointer,
3883 MachinePointerInfo(TrampolineAddr, EnvPointerOffset));
3884
3886 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
3887 return TokenFactor;
3888 }
3889
3890 bool isPPC64 = (PtrVT == MVT::i64);
3891 Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3892
3894 Args.emplace_back(Trmp, IntPtrTy);
3895 // TrampSize == (isPPC64 ? 48 : 40);
3896 Args.emplace_back(
3897 DAG.getConstant(isPPC64 ? 48 : 40, dl, Subtarget.getScalarIntVT()),
3898 IntPtrTy);
3899 Args.emplace_back(FPtr, IntPtrTy);
3900 Args.emplace_back(Nest, IntPtrTy);
3901
3902 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3903 TargetLowering::CallLoweringInfo CLI(DAG);
3904 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3906 DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3907
3908 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3909 return CallResult.second;
3910}
3911
3912SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3913 MachineFunction &MF = DAG.getMachineFunction();
3914 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3915 EVT PtrVT = getPointerTy(MF.getDataLayout());
3916
3917 SDLoc dl(Op);
3918
3919 if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3920 // vastart just stores the address of the VarArgsFrameIndex slot into the
3921 // memory location argument.
3922 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3923 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3924 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3925 MachinePointerInfo(SV));
3926 }
3927
3928 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3929 // We suppose the given va_list is already allocated.
3930 //
3931 // typedef struct {
3932 // char gpr; /* index into the array of 8 GPRs
3933 // * stored in the register save area
3934 // * gpr=0 corresponds to r3,
3935 // * gpr=1 to r4, etc.
3936 // */
3937 // char fpr; /* index into the array of 8 FPRs
3938 // * stored in the register save area
3939 // * fpr=0 corresponds to f1,
3940 // * fpr=1 to f2, etc.
3941 // */
3942 // char *overflow_arg_area;
3943 // /* location on stack that holds
3944 // * the next overflow argument
3945 // */
3946 // char *reg_save_area;
3947 // /* where r3:r10 and f1:f8 (if saved)
3948 // * are stored
3949 // */
3950 // } va_list[1];
3951
3952 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3953 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3954 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3955 PtrVT);
3956 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3957 PtrVT);
3958
3959 uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3960 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3961
3962 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3963 SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3964
3965 uint64_t FPROffset = 1;
3966 SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3967
3968 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3969
3970 // Store first byte : number of int regs
3971 SDValue firstStore =
3972 DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3973 MachinePointerInfo(SV), MVT::i8);
3974 uint64_t nextOffset = FPROffset;
3975 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3976 ConstFPROffset);
3977
3978 // Store second byte : number of float regs
3979 SDValue secondStore =
3980 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3981 MachinePointerInfo(SV, nextOffset), MVT::i8);
3982 nextOffset += StackOffset;
3983 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3984
3985 // Store second word : arguments given on stack
3986 SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3987 MachinePointerInfo(SV, nextOffset));
3988 nextOffset += FrameOffset;
3989 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3990
3991 // Store third word : arguments given in registers
3992 return DAG.getStore(thirdStore, dl, FR, nextPtr,
3993 MachinePointerInfo(SV, nextOffset));
3994}
3995
3996/// FPR - The set of FP registers that should be allocated for arguments
3997/// on Darwin and AIX.
3998static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3999 PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
4000 PPC::F11, PPC::F12, PPC::F13};
4001
4002/// CalculateStackSlotSize - Calculates the size reserved for this argument on
4003/// the stack.
4004static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
4005 unsigned PtrByteSize) {
4006 unsigned ArgSize = ArgVT.getStoreSize();
4007 if (Flags.isByVal())
4008 ArgSize = Flags.getByValSize();
4009
4010 // Round up to multiples of the pointer size, except for array members,
4011 // which are always packed.
4012 if (!Flags.isInConsecutiveRegs())
4013 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4014
4015 return ArgSize;
4016}
4017
4018/// CalculateStackSlotAlignment - Calculates the alignment of this argument
4019/// on the stack.
4021 ISD::ArgFlagsTy Flags,
4022 unsigned PtrByteSize) {
4023 Align Alignment(PtrByteSize);
4024
4025 // Altivec parameters are padded to a 16 byte boundary.
4026 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4027 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4028 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4029 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4030 Alignment = Align(16);
4031
4032 // ByVal parameters are aligned as requested.
4033 if (Flags.isByVal()) {
4034 auto BVAlign = Flags.getNonZeroByValAlign();
4035 if (BVAlign > PtrByteSize) {
4036 if (BVAlign.value() % PtrByteSize != 0)
4038 "ByVal alignment is not a multiple of the pointer size");
4039
4040 Alignment = BVAlign;
4041 }
4042 }
4043
4044 // Array members are always packed to their original alignment.
4045 if (Flags.isInConsecutiveRegs()) {
4046 // If the array member was split into multiple registers, the first
4047 // needs to be aligned to the size of the full type. (Except for
4048 // ppcf128, which is only aligned as its f64 components.)
4049 if (Flags.isSplit() && OrigVT != MVT::ppcf128)
4050 Alignment = Align(OrigVT.getStoreSize());
4051 else
4052 Alignment = Align(ArgVT.getStoreSize());
4053 }
4054
4055 return Alignment;
4056}
4057
4058/// CalculateStackSlotUsed - Return whether this argument will use its
4059/// stack slot (instead of being passed in registers). ArgOffset,
4060/// AvailableFPRs, and AvailableVRs must hold the current argument
4061/// position, and will be updated to account for this argument.
4062static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
4063 unsigned PtrByteSize, unsigned LinkageSize,
4064 unsigned ParamAreaSize, unsigned &ArgOffset,
4065 unsigned &AvailableFPRs,
4066 unsigned &AvailableVRs) {
4067 bool UseMemory = false;
4068
4069 // Respect alignment of argument on the stack.
4070 Align Alignment =
4071 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
4072 ArgOffset = alignTo(ArgOffset, Alignment);
4073 // If there's no space left in the argument save area, we must
4074 // use memory (this check also catches zero-sized arguments).
4075 if (ArgOffset >= LinkageSize + ParamAreaSize)
4076 UseMemory = true;
4077
4078 // Allocate argument on the stack.
4079 ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
4080 if (Flags.isInConsecutiveRegsLast())
4081 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4082 // If we overran the argument save area, we must use memory
4083 // (this check catches arguments passed partially in memory)
4084 if (ArgOffset > LinkageSize + ParamAreaSize)
4085 UseMemory = true;
4086
4087 // However, if the argument is actually passed in an FPR or a VR,
4088 // we don't use memory after all.
4089 if (!Flags.isByVal()) {
4090 if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
4091 if (AvailableFPRs > 0) {
4092 --AvailableFPRs;
4093 return false;
4094 }
4095 if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
4096 ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
4097 ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
4098 ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
4099 if (AvailableVRs > 0) {
4100 --AvailableVRs;
4101 return false;
4102 }
4103 }
4104
4105 return UseMemory;
4106}
4107
4108/// EnsureStackAlignment - Round stack frame size up from NumBytes to
4109/// ensure minimum alignment required for target.
4111 unsigned NumBytes) {
4112 return alignTo(NumBytes, Lowering->getStackAlign());
4113}
4114
4115SDValue PPCTargetLowering::LowerFormalArguments(
4116 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4117 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4118 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4119 if (Subtarget.isAIXABI())
4120 return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
4121 InVals);
4122 if (Subtarget.is64BitELFABI())
4123 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4124 InVals);
4125 assert(Subtarget.is32BitELFABI());
4126 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
4127 InVals);
4128}
4129
4130SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
4131 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4132 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4133 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4134
4135 // 32-bit SVR4 ABI Stack Frame Layout:
4136 // +-----------------------------------+
4137 // +--> | Back chain |
4138 // | +-----------------------------------+
4139 // | | Floating-point register save area |
4140 // | +-----------------------------------+
4141 // | | General register save area |
4142 // | +-----------------------------------+
4143 // | | CR save word |
4144 // | +-----------------------------------+
4145 // | | VRSAVE save word |
4146 // | +-----------------------------------+
4147 // | | Alignment padding |
4148 // | +-----------------------------------+
4149 // | | Vector register save area |
4150 // | +-----------------------------------+
4151 // | | Local variable space |
4152 // | +-----------------------------------+
4153 // | | Parameter list area |
4154 // | +-----------------------------------+
4155 // | | LR save word |
4156 // | +-----------------------------------+
4157 // SP--> +--- | Back chain |
4158 // +-----------------------------------+
4159 //
4160 // Specifications:
4161 // System V Application Binary Interface PowerPC Processor Supplement
4162 // AltiVec Technology Programming Interface Manual
4163
4164 MachineFunction &MF = DAG.getMachineFunction();
4165 MachineFrameInfo &MFI = MF.getFrameInfo();
4166 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4167
4168 EVT PtrVT = getPointerTy(MF.getDataLayout());
4169 // Potential tail calls could cause overwriting of argument stack slots.
4170 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4171 (CallConv == CallingConv::Fast));
4172 const Align PtrAlign(4);
4173
4174 // Assign locations to all of the incoming arguments.
4176 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4177 *DAG.getContext());
4178
4179 // Reserve space for the linkage area on the stack.
4180 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4181 CCInfo.AllocateStack(LinkageSize, PtrAlign);
4182 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4183
4184 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4185 CCValAssign &VA = ArgLocs[i];
4186
4187 // Arguments stored in registers.
4188 if (VA.isRegLoc()) {
4189 const TargetRegisterClass *RC;
4190 EVT ValVT = VA.getValVT();
4191
4192 switch (ValVT.getSimpleVT().SimpleTy) {
4193 default:
4194 llvm_unreachable("ValVT not supported by formal arguments Lowering");
4195 case MVT::i1:
4196 case MVT::i32:
4197 RC = &PPC::GPRCRegClass;
4198 break;
4199 case MVT::f32:
4200 if (Subtarget.hasP8Vector())
4201 RC = &PPC::VSSRCRegClass;
4202 else if (Subtarget.hasSPE())
4203 RC = &PPC::GPRCRegClass;
4204 else
4205 RC = &PPC::F4RCRegClass;
4206 break;
4207 case MVT::f64:
4208 if (Subtarget.hasVSX())
4209 RC = &PPC::VSFRCRegClass;
4210 else if (Subtarget.hasSPE())
4211 // SPE passes doubles in GPR pairs.
4212 RC = &PPC::GPRCRegClass;
4213 else
4214 RC = &PPC::F8RCRegClass;
4215 break;
4216 case MVT::v16i8:
4217 case MVT::v8i16:
4218 case MVT::v4i32:
4219 RC = &PPC::VRRCRegClass;
4220 break;
4221 case MVT::v4f32:
4222 RC = &PPC::VRRCRegClass;
4223 break;
4224 case MVT::v2f64:
4225 case MVT::v2i64:
4226 RC = &PPC::VRRCRegClass;
4227 break;
4228 }
4229
4230 SDValue ArgValue;
4231 // Transform the arguments stored in physical registers into
4232 // virtual ones.
4233 if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4234 assert(i + 1 < e && "No second half of double precision argument");
4235 Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4236 Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4237 SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4238 SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4239 if (!Subtarget.isLittleEndian())
4240 std::swap (ArgValueLo, ArgValueHi);
4241 ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4242 ArgValueHi);
4243 } else {
4244 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4245 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4246 ValVT == MVT::i1 ? MVT::i32 : ValVT);
4247 if (ValVT == MVT::i1)
4248 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4249 }
4250
4251 InVals.push_back(ArgValue);
4252 } else {
4253 // Argument stored in memory.
4254 assert(VA.isMemLoc());
4255
4256 // Get the extended size of the argument type in stack
4257 unsigned ArgSize = VA.getLocVT().getStoreSize();
4258 // Get the actual size of the argument type
4259 unsigned ObjSize = VA.getValVT().getStoreSize();
4260 unsigned ArgOffset = VA.getLocMemOffset();
4261 // Stack objects in PPC32 are right justified.
4262 ArgOffset += ArgSize - ObjSize;
4263 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4264
4265 // Create load nodes to retrieve arguments from the stack.
4266 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4267 InVals.push_back(
4268 DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4269 }
4270 }
4271
4272 // Assign locations to all of the incoming aggregate by value arguments.
4273 // Aggregates passed by value are stored in the local variable space of the
4274 // caller's stack frame, right above the parameter list area.
4275 SmallVector<CCValAssign, 16> ByValArgLocs;
4276 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4277 ByValArgLocs, *DAG.getContext());
4278
4279 // Reserve stack space for the allocations in CCInfo.
4280 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
4281
4282 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4283
4284 // Area that is at least reserved in the caller of this function.
4285 unsigned MinReservedArea = CCByValInfo.getStackSize();
4286 MinReservedArea = std::max(MinReservedArea, LinkageSize);
4287
4288 // Set the size that is at least reserved in caller of this function. Tail
4289 // call optimized function's reserved stack space needs to be aligned so that
4290 // taking the difference between two stack areas will result in an aligned
4291 // stack.
4292 MinReservedArea =
4293 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4294 FuncInfo->setMinReservedArea(MinReservedArea);
4295
4297
4298 // If the function takes variable number of arguments, make a frame index for
4299 // the start of the first vararg value... for expansion of llvm.va_start.
4300 if (isVarArg) {
4301 static const MCPhysReg GPArgRegs[] = {
4302 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4303 PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4304 };
4305 const unsigned NumGPArgRegs = std::size(GPArgRegs);
4306
4307 static const MCPhysReg FPArgRegs[] = {
4308 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4309 PPC::F8
4310 };
4311 unsigned NumFPArgRegs = std::size(FPArgRegs);
4312
4313 if (useSoftFloat() || hasSPE())
4314 NumFPArgRegs = 0;
4315
4316 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4317 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4318
4319 // Make room for NumGPArgRegs and NumFPArgRegs.
4320 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4321 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4322
4324 PtrVT.getSizeInBits() / 8, CCInfo.getStackSize(), true));
4325
4326 FuncInfo->setVarArgsFrameIndex(
4327 MFI.CreateStackObject(Depth, Align(8), false));
4328 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4329
4330 // The fixed integer arguments of a variadic function are stored to the
4331 // VarArgsFrameIndex on the stack so that they may be loaded by
4332 // dereferencing the result of va_next.
4333 for (MCPhysReg GPArgReg : GPArgRegs) {
4334 // Get an existing live-in vreg, or add a new one.
4335 Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgReg);
4336 if (!VReg)
4337 VReg = MF.addLiveIn(GPArgReg, &PPC::GPRCRegClass);
4338
4339 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4340 SDValue Store =
4341 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4342 MemOps.push_back(Store);
4343 // Increment the address by four for the next argument to store
4344 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4345 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4346 }
4347
4348 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4349 // is set.
4350 // The double arguments are stored to the VarArgsFrameIndex
4351 // on the stack.
4352 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4353 // Get an existing live-in vreg, or add a new one.
4354 Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4355 if (!VReg)
4356 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4357
4358 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4359 SDValue Store =
4360 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4361 MemOps.push_back(Store);
4362 // Increment the address by eight for the next argument to store
4363 SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4364 PtrVT);
4365 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4366 }
4367 }
4368
4369 if (!MemOps.empty())
4370 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4371
4372 return Chain;
4373}
4374
4375// PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4376// value to MVT::i64 and then truncate to the correct register size.
4377SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4378 EVT ObjectVT, SelectionDAG &DAG,
4379 SDValue ArgVal,
4380 const SDLoc &dl) const {
4381 if (Flags.isSExt())
4382 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4383 DAG.getValueType(ObjectVT));
4384 else if (Flags.isZExt())
4385 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4386 DAG.getValueType(ObjectVT));
4387
4388 return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4389}
4390
4391SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4392 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4393 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4394 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4395 // TODO: add description of PPC stack frame format, or at least some docs.
4396 //
4397 bool isELFv2ABI = Subtarget.isELFv2ABI();
4398 bool isLittleEndian = Subtarget.isLittleEndian();
4399 MachineFunction &MF = DAG.getMachineFunction();
4400 MachineFrameInfo &MFI = MF.getFrameInfo();
4401 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4402
4403 assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4404 "fastcc not supported on varargs functions");
4405
4406 EVT PtrVT = getPointerTy(MF.getDataLayout());
4407 // Potential tail calls could cause overwriting of argument stack slots.
4408 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4409 (CallConv == CallingConv::Fast));
4410 unsigned PtrByteSize = 8;
4411 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4412
4413 static const MCPhysReg GPR[] = {
4414 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4415 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4416 };
4417 static const MCPhysReg VR[] = {
4418 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4419 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4420 };
4421
4422 const unsigned Num_GPR_Regs = std::size(GPR);
4423 const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4424 const unsigned Num_VR_Regs = std::size(VR);
4425
4426 // Do a first pass over the arguments to determine whether the ABI
4427 // guarantees that our caller has allocated the parameter save area
4428 // on its stack frame. In the ELFv1 ABI, this is always the case;
4429 // in the ELFv2 ABI, it is true if this is a vararg function or if
4430 // any parameter is located in a stack slot.
4431
4432 bool HasParameterArea = !isELFv2ABI || isVarArg;
4433 unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4434 unsigned NumBytes = LinkageSize;
4435 unsigned AvailableFPRs = Num_FPR_Regs;
4436 unsigned AvailableVRs = Num_VR_Regs;
4437 for (const ISD::InputArg &In : Ins) {
4438 if (In.Flags.isNest())
4439 continue;
4440
4441 if (CalculateStackSlotUsed(In.VT, In.ArgVT, In.Flags, PtrByteSize,
4442 LinkageSize, ParamAreaSize, NumBytes,
4443 AvailableFPRs, AvailableVRs))
4444 HasParameterArea = true;
4445 }
4446
4447 // Add DAG nodes to load the arguments or copy them out of registers. On
4448 // entry to a function on PPC, the arguments start after the linkage area,
4449 // although the first ones are often in registers.
4450
4451 unsigned ArgOffset = LinkageSize;
4452 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4455 unsigned CurArgIdx = 0;
4456 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4457 SDValue ArgVal;
4458 bool needsLoad = false;
4459 EVT ObjectVT = Ins[ArgNo].VT;
4460 EVT OrigVT = Ins[ArgNo].ArgVT;
4461 unsigned ObjSize = ObjectVT.getStoreSize();
4462 unsigned ArgSize = ObjSize;
4463 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4464 if (Ins[ArgNo].isOrigArg()) {
4465 std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4466 CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4467 }
4468 // We re-align the argument offset for each argument, except when using the
4469 // fast calling convention, when we need to make sure we do that only when
4470 // we'll actually use a stack slot.
4471 unsigned CurArgOffset;
4472 Align Alignment;
4473 auto ComputeArgOffset = [&]() {
4474 /* Respect alignment of argument on the stack. */
4475 Alignment =
4476 CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4477 ArgOffset = alignTo(ArgOffset, Alignment);
4478 CurArgOffset = ArgOffset;
4479 };
4480
4481 if (CallConv != CallingConv::Fast) {
4482 ComputeArgOffset();
4483
4484 /* Compute GPR index associated with argument offset. */
4485 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4486 GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4487 }
4488
4489 // FIXME the codegen can be much improved in some cases.
4490 // We do not have to keep everything in memory.
4491 if (Flags.isByVal()) {
4492 assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4493
4494 if (CallConv == CallingConv::Fast)
4495 ComputeArgOffset();
4496
4497 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4498 ObjSize = Flags.getByValSize();
4499 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4500 // Empty aggregate parameters do not take up registers. Examples:
4501 // struct { } a;
4502 // union { } b;
4503 // int c[0];
4504 // etc. However, we have to provide a place-holder in InVals, so
4505 // pretend we have an 8-byte item at the current address for that
4506 // purpose.
4507 if (!ObjSize) {
4508 int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4509 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4510 InVals.push_back(FIN);
4511 continue;
4512 }
4513
4514 // Create a stack object covering all stack doublewords occupied
4515 // by the argument. If the argument is (fully or partially) on
4516 // the stack, or if the argument is fully in registers but the
4517 // caller has allocated the parameter save anyway, we can refer
4518 // directly to the caller's stack frame. Otherwise, create a
4519 // local copy in our own frame.
4520 int FI;
4521 if (HasParameterArea ||
4522 ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4523 FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4524 else
4525 FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4526 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4527
4528 // Handle aggregates smaller than 8 bytes.
4529 if (ObjSize < PtrByteSize) {
4530 // The value of the object is its address, which differs from the
4531 // address of the enclosing doubleword on big-endian systems.
4532 SDValue Arg = FIN;
4533 if (!isLittleEndian) {
4534 SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4535 Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4536 }
4537 InVals.push_back(Arg);
4538
4539 if (GPR_idx != Num_GPR_Regs) {
4540 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4541 FuncInfo->addLiveInAttr(VReg, Flags);
4542 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4543 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4544 SDValue Store =
4545 DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4546 MachinePointerInfo(&*FuncArg), ObjType);
4547 MemOps.push_back(Store);
4548 }
4549 // Whether we copied from a register or not, advance the offset
4550 // into the parameter save area by a full doubleword.
4551 ArgOffset += PtrByteSize;
4552 continue;
4553 }
4554
4555 // The value of the object is its address, which is the address of
4556 // its first stack doubleword.
4557 InVals.push_back(FIN);
4558
4559 // Store whatever pieces of the object are in registers to memory.
4560 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4561 if (GPR_idx == Num_GPR_Regs)
4562 break;
4563
4564 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4565 FuncInfo->addLiveInAttr(VReg, Flags);
4566 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4567 SDValue Addr = FIN;
4568 if (j) {
4569 SDValue Off = DAG.getConstant(j, dl, PtrVT);
4570 Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4571 }
4572 unsigned StoreSizeInBits = std::min(PtrByteSize, (ObjSize - j)) * 8;
4573 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), StoreSizeInBits);
4574 SDValue Store =
4575 DAG.getTruncStore(Val.getValue(1), dl, Val, Addr,
4576 MachinePointerInfo(&*FuncArg, j), ObjType);
4577 MemOps.push_back(Store);
4578 ++GPR_idx;
4579 }
4580 ArgOffset += ArgSize;
4581 continue;
4582 }
4583
4584 switch (ObjectVT.getSimpleVT().SimpleTy) {
4585 default: llvm_unreachable("Unhandled argument type!");
4586 case MVT::i1:
4587 case MVT::i32:
4588 case MVT::i64:
4589 if (Flags.isNest()) {
4590 // The 'nest' parameter, if any, is passed in R11.
4591 Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4592 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4593
4594 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4595 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4596
4597 break;
4598 }
4599
4600 // These can be scalar arguments or elements of an integer array type
4601 // passed directly. Clang may use those instead of "byval" aggregate
4602 // types to avoid forcing arguments to memory unnecessarily.
4603 if (GPR_idx != Num_GPR_Regs) {
4604 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4605 FuncInfo->addLiveInAttr(VReg, Flags);
4606 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4607
4608 if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4609 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4610 // value to MVT::i64 and then truncate to the correct register size.
4611 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4612 } else {
4613 if (CallConv == CallingConv::Fast)
4614 ComputeArgOffset();
4615
4616 needsLoad = true;
4617 ArgSize = PtrByteSize;
4618 }
4619 if (CallConv != CallingConv::Fast || needsLoad)
4620 ArgOffset += 8;
4621 break;
4622
4623 case MVT::f32:
4624 case MVT::f64:
4625 // These can be scalar arguments or elements of a float array type
4626 // passed directly. The latter are used to implement ELFv2 homogenous
4627 // float aggregates.
4628 if (FPR_idx != Num_FPR_Regs) {
4629 unsigned VReg;
4630
4631 if (ObjectVT == MVT::f32)
4632 VReg = MF.addLiveIn(FPR[FPR_idx],
4633 Subtarget.hasP8Vector()
4634 ? &PPC::VSSRCRegClass
4635 : &PPC::F4RCRegClass);
4636 else
4637 VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4638 ? &PPC::VSFRCRegClass
4639 : &PPC::F8RCRegClass);
4640
4641 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4642 ++FPR_idx;
4643 } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4644 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4645 // once we support fp <-> gpr moves.
4646
4647 // This can only ever happen in the presence of f32 array types,
4648 // since otherwise we never run out of FPRs before running out
4649 // of GPRs.
4650 Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4651 FuncInfo->addLiveInAttr(VReg, Flags);
4652 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4653
4654 if (ObjectVT == MVT::f32) {
4655 if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4656 ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4657 DAG.getConstant(32, dl, MVT::i32));
4658 ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4659 }
4660
4661 ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4662 } else {
4663 if (CallConv == CallingConv::Fast)
4664 ComputeArgOffset();
4665
4666 needsLoad = true;
4667 }
4668
4669 // When passing an array of floats, the array occupies consecutive
4670 // space in the argument area; only round up to the next doubleword
4671 // at the end of the array. Otherwise, each float takes 8 bytes.
4672 if (CallConv != CallingConv::Fast || needsLoad) {
4673 ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4674 ArgOffset += ArgSize;
4675 if (Flags.isInConsecutiveRegsLast())
4676 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4677 }
4678 break;
4679 case MVT::v4f32:
4680 case MVT::v4i32:
4681 case MVT::v8i16:
4682 case MVT::v16i8:
4683 case MVT::v2f64:
4684 case MVT::v2i64:
4685 case MVT::v1i128:
4686 case MVT::f128:
4687 // These can be scalar arguments or elements of a vector array type
4688 // passed directly. The latter are used to implement ELFv2 homogenous
4689 // vector aggregates.
4690 if (VR_idx != Num_VR_Regs) {
4691 Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4692 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4693 ++VR_idx;
4694 } else {
4695 if (CallConv == CallingConv::Fast)
4696 ComputeArgOffset();
4697 needsLoad = true;
4698 }
4699 if (CallConv != CallingConv::Fast || needsLoad)
4700 ArgOffset += 16;
4701 break;
4702 }
4703
4704 // We need to load the argument to a virtual register if we determined
4705 // above that we ran out of physical registers of the appropriate type.
4706 if (needsLoad) {
4707 if (ObjSize < ArgSize && !isLittleEndian)
4708 CurArgOffset += ArgSize - ObjSize;
4709 int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4710 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4711 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4712 }
4713
4714 InVals.push_back(ArgVal);
4715 }
4716
4717 // Area that is at least reserved in the caller of this function.
4718 unsigned MinReservedArea;
4719 if (HasParameterArea)
4720 MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4721 else
4722 MinReservedArea = LinkageSize;
4723
4724 // Set the size that is at least reserved in caller of this function. Tail
4725 // call optimized functions' reserved stack space needs to be aligned so that
4726 // taking the difference between two stack areas will result in an aligned
4727 // stack.
4728 MinReservedArea =
4729 EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4730 FuncInfo->setMinReservedArea(MinReservedArea);
4731
4732 // If the function takes variable number of arguments, make a frame index for
4733 // the start of the first vararg value... for expansion of llvm.va_start.
4734 // On ELFv2ABI spec, it writes:
4735 // C programs that are intended to be *portable* across different compilers
4736 // and architectures must use the header file <stdarg.h> to deal with variable
4737 // argument lists.
4738 if (isVarArg && MFI.hasVAStart()) {
4739 int Depth = ArgOffset;
4740
4741 FuncInfo->setVarArgsFrameIndex(
4742 MFI.CreateFixedObject(PtrByteSize, Depth, true));
4743 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4744
4745 // If this function is vararg, store any remaining integer argument regs
4746 // to their spots on the stack so that they may be loaded by dereferencing
4747 // the result of va_next.
4748 for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4749 GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4750 Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4751 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4752 SDValue Store =
4753 DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4754 MemOps.push_back(Store);
4755 // Increment the address by four for the next argument to store
4756 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4757 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4758 }
4759 }
4760
4761 if (!MemOps.empty())
4762 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4763
4764 return Chain;
4765}
4766
4767/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4768/// adjusted to accommodate the arguments for the tailcall.
4769static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4770 unsigned ParamSize) {
4771
4772 if (!isTailCall) return 0;
4773
4775 unsigned CallerMinReservedArea = FI->getMinReservedArea();
4776 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4777 // Remember only if the new adjustment is bigger.
4778 if (SPDiff < FI->getTailCallSPDelta())
4779 FI->setTailCallSPDelta(SPDiff);
4780
4781 return SPDiff;
4782}
4783
4784static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV);
4785
4786static bool callsShareTOCBase(const Function *Caller,
4787 const GlobalValue *CalleeGV,
4788 const TargetMachine &TM) {
4789 // It does not make sense to call callsShareTOCBase() with a caller that
4790 // is PC Relative since PC Relative callers do not have a TOC.
4791#ifndef NDEBUG
4792 const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4793 assert(!STICaller->isUsingPCRelativeCalls() &&
4794 "PC Relative callers do not have a TOC and cannot share a TOC Base");
4795#endif
4796
4797 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4798 // don't have enough information to determine if the caller and callee share
4799 // the same TOC base, so we have to pessimistically assume they don't for
4800 // correctness.
4801 if (!CalleeGV)
4802 return false;
4803
4804 // If the callee is preemptable, then the static linker will use a plt-stub
4805 // which saves the toc to the stack, and needs a nop after the call
4806 // instruction to convert to a toc-restore.
4807 if (!TM.shouldAssumeDSOLocal(CalleeGV))
4808 return false;
4809
4810 // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4811 // We may need a TOC restore in the situation where the caller requires a
4812 // valid TOC but the callee is PC Relative and does not.
4813 const Function *F = dyn_cast<Function>(CalleeGV);
4814 const GlobalAlias *Alias = dyn_cast<GlobalAlias>(CalleeGV);
4815
4816 // If we have an Alias we can try to get the function from there.
4817 if (Alias) {
4818 const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4819 F = dyn_cast<Function>(GlobalObj);
4820 }
4821
4822 // If we still have no valid function pointer we do not have enough
4823 // information to determine if the callee uses PC Relative calls so we must
4824 // assume that it does.
4825 if (!F)
4826 return false;
4827
4828 // If the callee uses PC Relative we cannot guarantee that the callee won't
4829 // clobber the TOC of the caller and so we must assume that the two
4830 // functions do not share a TOC base.
4831 const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4832 if (STICallee->isUsingPCRelativeCalls())
4833 return false;
4834
4835 // If the GV is not a strong definition then we need to assume it can be
4836 // replaced by another function at link time. The function that replaces
4837 // it may not share the same TOC as the caller since the callee may be
4838 // replaced by a PC Relative version of the same function.
4839 if (!CalleeGV->isStrongDefinitionForLinker())
4840 return false;
4841
4842 // The medium and large code models are expected to provide a sufficiently
4843 // large TOC to provide all data addressing needs of a module with a
4844 // single TOC.
4845 if (CodeModel::Medium == TM.getCodeModel() ||
4847 return true;
4848
4849 // Any explicitly-specified sections and section prefixes must also match.
4850 // Also, if we're using -ffunction-sections, then each function is always in
4851 // a different section (the same is true for COMDAT functions).
4852 if (TM.getFunctionSections() || CalleeGV->hasComdat() ||
4853 Caller->hasComdat() || CalleeGV->getSection() != Caller->getSection())
4854 return false;
4855 if (const auto *F = dyn_cast<Function>(CalleeGV)) {
4856 if (F->getSectionPrefix() != Caller->getSectionPrefix())
4857 return false;
4858 }
4859
4860 return true;
4861}
4862
4863static bool
4865 const SmallVectorImpl<ISD::OutputArg> &Outs) {
4866 assert(Subtarget.is64BitELFABI());
4867
4868 const unsigned PtrByteSize = 8;
4869 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4870
4871 static const MCPhysReg GPR[] = {
4872 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4873 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4874 };
4875 static const MCPhysReg VR[] = {
4876 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4877 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4878 };
4879
4880 const unsigned NumGPRs = std::size(GPR);
4881 const unsigned NumFPRs = 13;
4882 const unsigned NumVRs = std::size(VR);
4883 const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4884
4885 unsigned NumBytes = LinkageSize;
4886 unsigned AvailableFPRs = NumFPRs;
4887 unsigned AvailableVRs = NumVRs;
4888
4889 for (const ISD::OutputArg& Param : Outs) {
4890 if (Param.Flags.isNest()) continue;
4891
4892 if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4893 LinkageSize, ParamAreaSize, NumBytes,
4894 AvailableFPRs, AvailableVRs))
4895 return true;
4896 }
4897 return false;
4898}
4899
4900static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4901 if (CB.arg_size() != CallerFn->arg_size())
4902 return false;
4903
4904 auto CalleeArgIter = CB.arg_begin();
4905 auto CalleeArgEnd = CB.arg_end();
4906 Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4907
4908 for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4909 const Value* CalleeArg = *CalleeArgIter;
4910 const Value* CallerArg = &(*CallerArgIter);
4911 if (CalleeArg == CallerArg)
4912 continue;
4913
4914 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4915 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4916 // }
4917 // 1st argument of callee is undef and has the same type as caller.
4918 if (CalleeArg->getType() == CallerArg->getType() &&
4919 isa<UndefValue>(CalleeArg))
4920 continue;
4921
4922 return false;
4923 }
4924
4925 return true;
4926}
4927
4928// Returns true if TCO is possible between the callers and callees
4929// calling conventions.
4930static bool
4932 CallingConv::ID CalleeCC) {
4933 // Tail calls are possible with fastcc and ccc.
4934 auto isTailCallableCC = [] (CallingConv::ID CC){
4935 return CC == CallingConv::C || CC == CallingConv::Fast;
4936 };
4937 if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4938 return false;
4939
4940 // We can safely tail call both fastcc and ccc callees from a c calling
4941 // convention caller. If the caller is fastcc, we may have less stack space
4942 // than a non-fastcc caller with the same signature so disable tail-calls in
4943 // that case.
4944 return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4945}
4946
4947bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4948 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
4949 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
4951 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
4952 bool isCalleeExternalSymbol) const {
4953 bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4954
4955 if (DisableSCO && !TailCallOpt) return false;
4956
4957 // Variadic argument functions are not supported.
4958 if (isVarArg) return false;
4959
4960 // Check that the calling conventions are compatible for tco.
4961 if (!areCallingConvEligibleForTCO_64SVR4(CallerCC, CalleeCC))
4962 return false;
4963
4964 // Caller contains any byval parameter is not supported.
4965 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4966 return false;
4967
4968 // Callee contains any byval parameter is not supported, too.
4969 // Note: This is a quick work around, because in some cases, e.g.
4970 // caller's stack size > callee's stack size, we are still able to apply
4971 // sibling call optimization. For example, gcc is able to do SCO for caller1
4972 // in the following example, but not for caller2.
4973 // struct test {
4974 // long int a;
4975 // char ary[56];
4976 // } gTest;
4977 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4978 // b->a = v.a;
4979 // return 0;
4980 // }
4981 // void caller1(struct test a, struct test c, struct test *b) {
4982 // callee(gTest, b); }
4983 // void caller2(struct test *b) { callee(gTest, b); }
4984 if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4985 return false;
4986
4987 // If callee and caller use different calling conventions, we cannot pass
4988 // parameters on stack since offsets for the parameter area may be different.
4989 if (CallerCC != CalleeCC && needStackSlotPassParameters(Subtarget, Outs))
4990 return false;
4991
4992 // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4993 // the caller and callee share the same TOC for TCO/SCO. If the caller and
4994 // callee potentially have different TOC bases then we cannot tail call since
4995 // we need to restore the TOC pointer after the call.
4996 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4997 // We cannot guarantee this for indirect calls or calls to external functions.
4998 // When PC-Relative addressing is used, the concept of the TOC is no longer
4999 // applicable so this check is not required.
5000 // Check first for indirect calls.
5001 if (!Subtarget.isUsingPCRelativeCalls() &&
5002 !isFunctionGlobalAddress(CalleeGV) && !isCalleeExternalSymbol)
5003 return false;
5004
5005 // Check if we share the TOC base.
5006 if (!Subtarget.isUsingPCRelativeCalls() &&
5007 !callsShareTOCBase(CallerFunc, CalleeGV, getTargetMachine()))
5008 return false;
5009
5010 // TCO allows altering callee ABI, so we don't have to check further.
5011 if (CalleeCC == CallingConv::Fast && TailCallOpt)
5012 return true;
5013
5014 if (DisableSCO) return false;
5015
5016 // If callee use the same argument list that caller is using, then we can
5017 // apply SCO on this case. If it is not, then we need to check if callee needs
5018 // stack for passing arguments.
5019 // PC Relative tail calls may not have a CallBase.
5020 // If there is no CallBase we cannot verify if we have the same argument
5021 // list so assume that we don't have the same argument list.
5022 if (CB && !hasSameArgumentList(CallerFunc, *CB) &&
5023 needStackSlotPassParameters(Subtarget, Outs))
5024 return false;
5025 else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
5026 return false;
5027
5028 return true;
5029}
5030
5031/// IsEligibleForTailCallOptimization - Check whether the call is eligible
5032/// for tail call optimization. Targets which want to do tail call
5033/// optimization should implement this function.
5034bool PPCTargetLowering::IsEligibleForTailCallOptimization(
5035 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5036 CallingConv::ID CallerCC, bool isVarArg,
5037 const SmallVectorImpl<ISD::InputArg> &Ins) const {
5038 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5039 return false;
5040
5041 // Variable argument functions are not supported.
5042 if (isVarArg)
5043 return false;
5044
5045 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
5046 // Functions containing by val parameters are not supported.
5047 if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
5048 return false;
5049
5050 // Non-PIC/GOT tail calls are supported.
5051 if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
5052 return true;
5053
5054 // At the moment we can only do local tail calls (in same module, hidden
5055 // or protected) if we are generating PIC.
5056 if (CalleeGV)
5057 return CalleeGV->hasHiddenVisibility() ||
5058 CalleeGV->hasProtectedVisibility();
5059 }
5060
5061 return false;
5062}
5063
5064/// isCallCompatibleAddress - Return the immediate to use if the specified
5065/// 32-bit value is representable in the immediate field of a BxA instruction.
5068 if (!C) return nullptr;
5069
5070 int Addr = C->getZExtValue();
5071 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
5072 SignExtend32<26>(Addr) != Addr)
5073 return nullptr; // Top 6 bits have to be sext of immediate.
5074
5075 return DAG
5077 (int)C->getZExtValue() >> 2, SDLoc(Op),
5079 .getNode();
5080}
5081
5082namespace {
5083
5084struct TailCallArgumentInfo {
5085 SDValue Arg;
5086 SDValue FrameIdxOp;
5087 int FrameIdx = 0;
5088
5089 TailCallArgumentInfo() = default;
5090};
5091
5092} // end anonymous namespace
5093
5094/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
5096 SelectionDAG &DAG, SDValue Chain,
5097 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
5098 SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
5099 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
5100 SDValue Arg = TailCallArgs[i].Arg;
5101 SDValue FIN = TailCallArgs[i].FrameIdxOp;
5102 int FI = TailCallArgs[i].FrameIdx;
5103 // Store relative to framepointer.
5104 MemOpChains.push_back(DAG.getStore(
5105 Chain, dl, Arg, FIN,
5107 }
5108}
5109
5110/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
5111/// the appropriate stack slot for the tail call optimized function call.
5113 SDValue OldRetAddr, SDValue OldFP,
5114 int SPDiff, const SDLoc &dl) {
5115 if (SPDiff) {
5116 // Calculate the new stack slot for the return address.
5118 const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
5119 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
5120 int SlotSize = Subtarget.isPPC64() ? 8 : 4;
5121 int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
5122 int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
5123 NewRetAddrLoc, true);
5124 SDValue NewRetAddrFrIdx =
5125 DAG.getFrameIndex(NewRetAddr, Subtarget.getScalarIntVT());
5126 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
5127 MachinePointerInfo::getFixedStack(MF, NewRetAddr));
5128 }
5129 return Chain;
5130}
5131
5132/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
5133/// the position of the argument.
5135 SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg,
5136 int SPDiff, unsigned ArgOffset,
5137 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5138 int Offset = ArgOffset + SPDiff;
5139 uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
5140 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
5141 EVT VT = IsPPC64 ? MVT::i64 : MVT::i32;
5142 SDValue FIN = DAG.getFrameIndex(FI, VT);
5143 TailCallArgumentInfo Info;
5144 Info.Arg = Arg;
5145 Info.FrameIdxOp = FIN;
5146 Info.FrameIdx = FI;
5147 TailCallArguments.push_back(Info);
5148}
5149
5150/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5151/// stack slot. Returns the chain as result and the loaded frame pointers in
5152/// LROpOut/FPOpout. Used when tail calling.
5153SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5154 SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5155 SDValue &FPOpOut, const SDLoc &dl) const {
5156 if (SPDiff) {
5157 // Load the LR and FP stack slot for later adjusting.
5158 LROpOut = getReturnAddrFrameIndex(DAG);
5159 LROpOut = DAG.getLoad(Subtarget.getScalarIntVT(), dl, Chain, LROpOut,
5160 MachinePointerInfo());
5161 Chain = SDValue(LROpOut.getNode(), 1);
5162 }
5163 return Chain;
5164}
5165
5166/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5167/// by "Src" to address "Dst" of size "Size". Alignment information is
5168/// specified by the specific parameter attribute. The copy will be passed as
5169/// a byval function parameter.
5170/// Sometimes what we are copying is the end of a larger object, the part that
5171/// does not fit in registers.
5173 SDValue Chain, ISD::ArgFlagsTy Flags,
5174 SelectionDAG &DAG, const SDLoc &dl) {
5175 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5176 return DAG.getMemcpy(
5177 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), false, false,
5178 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
5179}
5180
5181/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5182/// tail calls.
5184 SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5185 SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5186 bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5187 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5189 if (!isTailCall) {
5190 if (isVector) {
5191 SDValue StackPtr;
5192 if (isPPC64)
5193 StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5194 else
5195 StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5196 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5197 DAG.getConstant(ArgOffset, dl, PtrVT));
5198 }
5199 MemOpChains.push_back(
5200 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5201 // Calculate and remember argument location.
5202 } else
5203 CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5204 TailCallArguments);
5205}
5206
5207static void
5209 const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5210 SDValue FPOp,
5211 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5212 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5213 // might overwrite each other in case of tail call optimization.
5214 SmallVector<SDValue, 8> MemOpChains2;
5215 // Do not flag preceding copytoreg stuff together with the following stuff.
5216 InGlue = SDValue();
5217 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5218 MemOpChains2, dl);
5219 if (!MemOpChains2.empty())
5220 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5221
5222 // Store the return address to the appropriate stack slot.
5223 Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5224
5225 // Emit callseq_end just before tailcall node.
5226 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, dl);
5227 InGlue = Chain.getValue(1);
5228}
5229
5230// Is this global address that of a function that can be called by name? (as
5231// opposed to something that must hold a descriptor for an indirect call).
5232static bool isFunctionGlobalAddress(const GlobalValue *GV) {
5233 if (GV) {
5234 if (GV->isThreadLocal())
5235 return false;
5236
5237 return GV->getValueType()->isFunctionTy();
5238 }
5239
5240 return false;
5241}
5242
5243SDValue PPCTargetLowering::LowerCallResult(
5244 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
5245 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5246 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5248 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5249 *DAG.getContext());
5250
5251 CCRetInfo.AnalyzeCallResult(
5252 Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5254 : RetCC_PPC);
5255
5256 // Copy all of the result registers out of their specified physreg.
5257 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5258 CCValAssign &VA = RVLocs[i];
5259 assert(VA.isRegLoc() && "Can only return in registers!");
5260
5261 SDValue Val;
5262
5263 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5264 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5265 InGlue);
5266 Chain = Lo.getValue(1);
5267 InGlue = Lo.getValue(2);
5268 VA = RVLocs[++i]; // skip ahead to next loc
5269 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5270 InGlue);
5271 Chain = Hi.getValue(1);
5272 InGlue = Hi.getValue(2);
5273 if (!Subtarget.isLittleEndian())
5274 std::swap (Lo, Hi);
5275 Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5276 } else {
5277 Val = DAG.getCopyFromReg(Chain, dl,
5278 VA.getLocReg(), VA.getLocVT(), InGlue);
5279 Chain = Val.getValue(1);
5280 InGlue = Val.getValue(2);
5281 }
5282
5283 switch (VA.getLocInfo()) {
5284 default: llvm_unreachable("Unknown loc info!");
5285 case CCValAssign::Full: break;
5286 case CCValAssign::AExt:
5287 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5288 break;
5289 case CCValAssign::ZExt:
5290 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5291 DAG.getValueType(VA.getValVT()));
5292 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5293 break;
5294 case CCValAssign::SExt:
5295 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5296 DAG.getValueType(VA.getValVT()));
5297 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5298 break;
5299 }
5300
5301 InVals.push_back(Val);
5302 }
5303
5304 return Chain;
5305}
5306
5307static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5308 const PPCSubtarget &Subtarget, bool isPatchPoint) {
5309 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5310 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5311
5312 // PatchPoint calls are not indirect.
5313 if (isPatchPoint)
5314 return false;
5315
5317 return false;
5318
5319 // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5320 // becuase the immediate function pointer points to a descriptor instead of
5321 // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5322 // pointer immediate points to the global entry point, while the BLA would
5323 // need to jump to the local entry point (see rL211174).
5324 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5325 isBLACompatibleAddress(Callee, DAG))
5326 return false;
5327
5328 return true;
5329}
5330
5331// AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5332static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5333 return Subtarget.isAIXABI() ||
5334 (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5335}
5336
5338 const Function &Caller, const SDValue &Callee,
5339 const PPCSubtarget &Subtarget,
5340 const TargetMachine &TM,
5341 bool IsStrictFPCall = false) {
5342 if (CFlags.IsTailCall)
5343 return PPCISD::TC_RETURN;
5344
5345 unsigned RetOpc = 0;
5346 // This is a call through a function pointer.
5347 if (CFlags.IsIndirect) {
5348 // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5349 // indirect calls. The save of the caller's TOC pointer to the stack will be
5350 // inserted into the DAG as part of call lowering. The restore of the TOC
5351 // pointer is modeled by using a pseudo instruction for the call opcode that
5352 // represents the 2 instruction sequence of an indirect branch and link,
5353 // immediately followed by a load of the TOC pointer from the stack save
5354 // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5355 // as it is not saved or used.
5356 RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5357 : PPCISD::BCTRL;
5358 } else if (Subtarget.isUsingPCRelativeCalls()) {
5359 assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5360 RetOpc = PPCISD::CALL_NOTOC;
5361 } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI()) {
5362 // The ABIs that maintain a TOC pointer accross calls need to have a nop
5363 // immediately following the call instruction if the caller and callee may
5364 // have different TOC bases. At link time if the linker determines the calls
5365 // may not share a TOC base, the call is redirected to a trampoline inserted
5366 // by the linker. The trampoline will (among other things) save the callers
5367 // TOC pointer at an ABI designated offset in the linkage area and the
5368 // linker will rewrite the nop to be a load of the TOC pointer from the
5369 // linkage area into gpr2.
5370 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5371 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5372 RetOpc =
5373 callsShareTOCBase(&Caller, GV, TM) ? PPCISD::CALL : PPCISD::CALL_NOP;
5374 } else
5375 RetOpc = PPCISD::CALL;
5376 if (IsStrictFPCall) {
5377 switch (RetOpc) {
5378 default:
5379 llvm_unreachable("Unknown call opcode");
5380 case PPCISD::BCTRL_LOAD_TOC:
5381 RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5382 break;
5383 case PPCISD::BCTRL:
5384 RetOpc = PPCISD::BCTRL_RM;
5385 break;
5386 case PPCISD::CALL_NOTOC:
5387 RetOpc = PPCISD::CALL_NOTOC_RM;
5388 break;
5389 case PPCISD::CALL:
5390 RetOpc = PPCISD::CALL_RM;
5391 break;
5392 case PPCISD::CALL_NOP:
5393 RetOpc = PPCISD::CALL_NOP_RM;
5394 break;
5395 }
5396 }
5397 return RetOpc;
5398}
5399
5400static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5401 const SDLoc &dl, const PPCSubtarget &Subtarget) {
5402 if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5403 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5404 return SDValue(Dest, 0);
5405
5406 // Returns true if the callee is local, and false otherwise.
5407 auto isLocalCallee = [&]() {
5409 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5410
5411 return DAG.getTarget().shouldAssumeDSOLocal(GV) &&
5413 };
5414
5415 // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5416 // a static relocation model causes some versions of GNU LD (2.17.50, at
5417 // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5418 // built with secure-PLT.
5419 bool UsePlt =
5420 Subtarget.is32BitELFABI() && !isLocalCallee() &&
5422
5423 const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5424 const TargetMachine &TM = Subtarget.getTargetMachine();
5426 auto *S =
5427 static_cast<MCSymbolXCOFF *>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5428
5430 return DAG.getMCSymbol(S, PtrVT);
5431 };
5432
5433 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5434 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5435 if (isFunctionGlobalAddress(GV)) {
5436 const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5437
5438 if (Subtarget.isAIXABI()) {
5439 assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5440 return getAIXFuncEntryPointSymbolSDNode(GV);
5441 }
5442 return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5443 UsePlt ? PPCII::MO_PLT : 0);
5444 }
5445
5447 const char *SymName = S->getSymbol();
5448 if (Subtarget.isAIXABI()) {
5449 // If there exists a user-declared function whose name is the same as the
5450 // ExternalSymbol's, then we pick up the user-declared version.
5452 if (const Function *F =
5453 dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5454 return getAIXFuncEntryPointSymbolSDNode(F);
5455
5456 // On AIX, direct function calls reference the symbol for the function's
5457 // entry point, which is named by prepending a "." before the function's
5458 // C-linkage name. A Qualname is returned here because an external
5459 // function entry point is a csect with XTY_ER property.
5460 const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5461 auto &Context = DAG.getMachineFunction().getContext();
5462 MCSectionXCOFF *Sec = Context.getXCOFFSection(
5463 (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5465 return Sec->getQualNameSymbol();
5466 };
5467
5468 SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5469 }
5470 return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5471 UsePlt ? PPCII::MO_PLT : 0);
5472 }
5473
5474 // No transformation needed.
5475 assert(Callee.getNode() && "What no callee?");
5476 return Callee;
5477}
5478
5480 assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5481 "Expected a CALLSEQ_STARTSDNode.");
5482
5483 // The last operand is the chain, except when the node has glue. If the node
5484 // has glue, then the last operand is the glue, and the chain is the second
5485 // last operand.
5486 SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5487 if (LastValue.getValueType() != MVT::Glue)
5488 return LastValue;
5489
5490 return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5491}
5492
5493// Creates the node that moves a functions address into the count register
5494// to prepare for an indirect call instruction.
5495static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5496 SDValue &Glue, SDValue &Chain,
5497 const SDLoc &dl) {
5498 SDValue MTCTROps[] = {Chain, Callee, Glue};
5499 EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5500 Chain = DAG.getNode(PPCISD::MTCTR, dl, ReturnTypes,
5501 ArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5502 // The glue is the second value produced.
5503 Glue = Chain.getValue(1);
5504}
5505
5507 SDValue &Glue, SDValue &Chain,
5508 SDValue CallSeqStart,
5509 const CallBase *CB, const SDLoc &dl,
5510 bool hasNest,
5511 const PPCSubtarget &Subtarget) {
5512 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5513 // entry point, but to the function descriptor (the function entry point
5514 // address is part of the function descriptor though).
5515 // The function descriptor is a three doubleword structure with the
5516 // following fields: function entry point, TOC base address and
5517 // environment pointer.
5518 // Thus for a call through a function pointer, the following actions need
5519 // to be performed:
5520 // 1. Save the TOC of the caller in the TOC save area of its stack
5521 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5522 // 2. Load the address of the function entry point from the function
5523 // descriptor.
5524 // 3. Load the TOC of the callee from the function descriptor into r2.
5525 // 4. Load the environment pointer from the function descriptor into
5526 // r11.
5527 // 5. Branch to the function entry point address.
5528 // 6. On return of the callee, the TOC of the caller needs to be
5529 // restored (this is done in FinishCall()).
5530 //
5531 // The loads are scheduled at the beginning of the call sequence, and the
5532 // register copies are flagged together to ensure that no other
5533 // operations can be scheduled in between. E.g. without flagging the
5534 // copies together, a TOC access in the caller could be scheduled between
5535 // the assignment of the callee TOC and the branch to the callee, which leads
5536 // to incorrect code.
5537
5538 // Start by loading the function address from the descriptor.
5539 SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5540 auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5544
5545 MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5546
5547 // Registers used in building the DAG.
5548 const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5549 const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5550
5551 // Offsets of descriptor members.
5552 const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5553 const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5554
5555 const MVT RegVT = Subtarget.getScalarIntVT();
5556 const Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
5557
5558 // One load for the functions entry point address.
5559 SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5560 Alignment, MMOFlags);
5561
5562 // One for loading the TOC anchor for the module that contains the called
5563 // function.
5564 SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5565 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5566 SDValue TOCPtr =
5567 DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5568 MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5569
5570 // One for loading the environment pointer.
5571 SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5572 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5573 SDValue LoadEnvPtr =
5574 DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5575 MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5576
5577
5578 // Then copy the newly loaded TOC anchor to the TOC pointer.
5579 SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5580 Chain = TOCVal.getValue(0);
5581 Glue = TOCVal.getValue(1);
5582
5583 // If the function call has an explicit 'nest' parameter, it takes the
5584 // place of the environment pointer.
5585 assert((!hasNest || !Subtarget.isAIXABI()) &&
5586 "Nest parameter is not supported on AIX.");
5587 if (!hasNest) {
5588 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5589 Chain = EnvVal.getValue(0);
5590 Glue = EnvVal.getValue(1);
5591 }
5592
5593 // The rest of the indirect call sequence is the same as the non-descriptor
5594 // DAG.
5595 prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5596}
5597
5598static void
5600 PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5601 SelectionDAG &DAG,
5602 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5603 SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5604 const PPCSubtarget &Subtarget) {
5605 const bool IsPPC64 = Subtarget.isPPC64();
5606 // MVT for a general purpose register.
5607 const MVT RegVT = Subtarget.getScalarIntVT();
5608
5609 // First operand is always the chain.
5610 Ops.push_back(Chain);
5611
5612 // If it's a direct call pass the callee as the second operand.
5613 if (!CFlags.IsIndirect)
5614 Ops.push_back(Callee);
5615 else {
5616 assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5617
5618 // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5619 // on the stack (this would have been done in `LowerCall_64SVR4` or
5620 // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5621 // represents both the indirect branch and a load that restores the TOC
5622 // pointer from the linkage area. The operand for the TOC restore is an add
5623 // of the TOC save offset to the stack pointer. This must be the second
5624 // operand: after the chain input but before any other variadic arguments.
5625 // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5626 // saved or used.
5627 if (isTOCSaveRestoreRequired(Subtarget)) {
5628 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5629
5630 SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5631 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5632 SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5633 SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5634 Ops.push_back(AddTOC);
5635 }
5636
5637 // Add the register used for the environment pointer.
5638 if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5639 Ops.push_back(DAG.getRegister(Subtarget.getEnvironmentPointerRegister(),
5640 RegVT));
5641
5642
5643 // Add CTR register as callee so a bctr can be emitted later.
5644 if (CFlags.IsTailCall)
5645 Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5646 }
5647
5648 // If this is a tail call add stack pointer delta.
5649 if (CFlags.IsTailCall)
5650 Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5651
5652 // Add argument registers to the end of the list so that they are known live
5653 // into the call.
5654 for (const auto &[Reg, N] : RegsToPass)
5655 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
5656
5657 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5658 // no way to mark dependencies as implicit here.
5659 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5660 if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5661 !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5662 Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5663
5664 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5665 if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5666 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5667
5668 // Add a register mask operand representing the call-preserved registers.
5669 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5670 const uint32_t *Mask =
5671 TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5672 assert(Mask && "Missing call preserved mask for calling convention");
5673 Ops.push_back(DAG.getRegisterMask(Mask));
5674
5675 // If the glue is valid, it is the last operand.
5676 if (Glue.getNode())
5677 Ops.push_back(Glue);
5678}
5679
5680SDValue PPCTargetLowering::FinishCall(
5681 CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5682 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5683 SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5684 unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5685 SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5686
5687 if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5688 Subtarget.isAIXABI())
5689 setUsesTOCBasePtr(DAG);
5690
5691 unsigned CallOpc =
5692 getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5693 Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5694
5695 if (!CFlags.IsIndirect)
5696 Callee = transformCallee(Callee, DAG, dl, Subtarget);
5697 else if (Subtarget.usesFunctionDescriptors())
5698 prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5699 dl, CFlags.HasNest, Subtarget);
5700 else
5701 prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5702
5703 // Build the operand list for the call instruction.
5705 buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5706 SPDiff, Subtarget);
5707
5708 // Emit tail call.
5709 if (CFlags.IsTailCall) {
5710 // Indirect tail call when using PC Relative calls do not have the same
5711 // constraints.
5712 assert(((Callee.getOpcode() == ISD::Register &&
5713 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5714 Callee.getOpcode() == ISD::TargetExternalSymbol ||
5715 Callee.getOpcode() == ISD::TargetGlobalAddress ||
5716 isa<ConstantSDNode>(Callee) ||
5717 (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5718 "Expecting a global address, external symbol, absolute value, "
5719 "register or an indirect tail call when PC Relative calls are "
5720 "used.");
5721 // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5722 assert(CallOpc == PPCISD::TC_RETURN &&
5723 "Unexpected call opcode for a tail call.");
5725 SDValue Ret = DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5726 DAG.addNoMergeSiteInfo(Ret.getNode(), CFlags.NoMerge);
5727 return Ret;
5728 }
5729
5730 std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5731 Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5732 DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5733 Glue = Chain.getValue(1);
5734
5735 // When performing tail call optimization the callee pops its arguments off
5736 // the stack. Account for this here so these bytes can be pushed back on in
5737 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5738 int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5740 ? NumBytes
5741 : 0;
5742
5743 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, BytesCalleePops, Glue, dl);
5744 Glue = Chain.getValue(1);
5745
5746 return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5747 DAG, InVals);
5748}
5749
5751 CallingConv::ID CalleeCC = CB->getCallingConv();
5752 const Function *CallerFunc = CB->getCaller();
5753 CallingConv::ID CallerCC = CallerFunc->getCallingConv();
5754 const Function *CalleeFunc = CB->getCalledFunction();
5755 if (!CalleeFunc)
5756 return false;
5757 const GlobalValue *CalleeGV = dyn_cast<GlobalValue>(CalleeFunc);
5758
5761
5762 GetReturnInfo(CalleeCC, CalleeFunc->getReturnType(),
5763 CalleeFunc->getAttributes(), Outs, *this,
5764 CalleeFunc->getDataLayout());
5765
5766 return isEligibleForTCO(CalleeGV, CalleeCC, CallerCC, CB,
5767 CalleeFunc->isVarArg(), Outs, Ins, CallerFunc,
5768 false /*isCalleeExternalSymbol*/);
5769}
5770
5771bool PPCTargetLowering::isEligibleForTCO(
5772 const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
5773 CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
5775 const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
5776 bool isCalleeExternalSymbol) const {
5777 if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5778 return false;
5779
5780 if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5781 return IsEligibleForTailCallOptimization_64SVR4(
5782 CalleeGV, CalleeCC, CallerCC, CB, isVarArg, Outs, Ins, CallerFunc,
5783 isCalleeExternalSymbol);
5784 else
5785 return IsEligibleForTailCallOptimization(CalleeGV, CalleeCC, CallerCC,
5786 isVarArg, Ins);
5787}
5788
5789SDValue
5790PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5791 SmallVectorImpl<SDValue> &InVals) const {
5792 SelectionDAG &DAG = CLI.DAG;
5793 SDLoc &dl = CLI.DL;
5795 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5797 SDValue Chain = CLI.Chain;
5798 SDValue Callee = CLI.Callee;
5799 bool &isTailCall = CLI.IsTailCall;
5800 CallingConv::ID CallConv = CLI.CallConv;
5801 bool isVarArg = CLI.IsVarArg;
5802 bool isPatchPoint = CLI.IsPatchPoint;
5803 const CallBase *CB = CLI.CB;
5804
5805 if (isTailCall) {
5807 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
5808 auto *G = dyn_cast<GlobalAddressSDNode>(Callee);
5809 const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5810 bool IsCalleeExternalSymbol = isa<ExternalSymbolSDNode>(Callee);
5811
5812 isTailCall =
5813 isEligibleForTCO(GV, CallConv, CallerCC, CB, isVarArg, Outs, Ins,
5814 &(MF.getFunction()), IsCalleeExternalSymbol);
5815 if (isTailCall) {
5816 ++NumTailCalls;
5817 if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5818 ++NumSiblingCalls;
5819
5820 // PC Relative calls no longer guarantee that the callee is a Global
5821 // Address Node. The callee could be an indirect tail call in which
5822 // case the SDValue for the callee could be a load (to load the address
5823 // of a function pointer) or it may be a register copy (to move the
5824 // address of the callee from a function parameter into a virtual
5825 // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5826 assert((Subtarget.isUsingPCRelativeCalls() ||
5827 isa<GlobalAddressSDNode>(Callee)) &&
5828 "Callee should be an llvm::Function object.");
5829
5830 LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5831 << "\nTCO callee: ");
5832 LLVM_DEBUG(Callee.dump());
5833 }
5834 }
5835
5836 if (!isTailCall && CB && CB->isMustTailCall())
5837 report_fatal_error("failed to perform tail call elimination on a call "
5838 "site marked musttail");
5839
5840 // When long calls (i.e. indirect calls) are always used, calls are always
5841 // made via function pointer. If we have a function name, first translate it
5842 // into a pointer.
5843 if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5844 !isTailCall)
5845 Callee = LowerGlobalAddress(Callee, DAG);
5846
5847 CallFlags CFlags(
5848 CallConv, isTailCall, isVarArg, isPatchPoint,
5849 isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5850 // hasNest
5851 Subtarget.is64BitELFABI() &&
5852 any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5853 CLI.NoMerge);
5854
5855 if (Subtarget.isAIXABI())
5856 return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5857 InVals, CB);
5858
5859 assert(Subtarget.isSVR4ABI());
5860 if (Subtarget.isPPC64())
5861 return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5862 InVals, CB);
5863 return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5864 InVals, CB);
5865}
5866
5867SDValue PPCTargetLowering::LowerCall_32SVR4(
5868 SDValue Chain, SDValue Callee, CallFlags CFlags,
5870 const SmallVectorImpl<SDValue> &OutVals,
5871 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5873 const CallBase *CB) const {
5874 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5875 // of the 32-bit SVR4 ABI stack frame layout.
5876
5877 const CallingConv::ID CallConv = CFlags.CallConv;
5878 const bool IsVarArg = CFlags.IsVarArg;
5879 const bool IsTailCall = CFlags.IsTailCall;
5880
5881 assert((CallConv == CallingConv::C ||
5882 CallConv == CallingConv::Cold ||
5883 CallConv == CallingConv::Fast) && "Unknown calling convention!");
5884
5885 const Align PtrAlign(4);
5886
5887 MachineFunction &MF = DAG.getMachineFunction();
5888
5889 // Mark this function as potentially containing a function that contains a
5890 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5891 // and restoring the callers stack pointer in this functions epilog. This is
5892 // done because by tail calling the called function might overwrite the value
5893 // in this function's (MF) stack pointer stack slot 0(SP).
5894 if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5895 CallConv == CallingConv::Fast)
5896 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5897
5898 // Count how many bytes are to be pushed on the stack, including the linkage
5899 // area, parameter list area and the part of the local variable space which
5900 // contains copies of aggregates which are passed by value.
5901
5902 // Assign locations to all of the outgoing arguments.
5904 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5905
5906 // Reserve space for the linkage area on the stack.
5907 CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5908 PtrAlign);
5909
5910 if (IsVarArg) {
5911 // Handle fixed and variable vector arguments differently.
5912 // Fixed vector arguments go into registers as long as registers are
5913 // available. Variable vector arguments always go into memory.
5914 unsigned NumArgs = Outs.size();
5915
5916 for (unsigned i = 0; i != NumArgs; ++i) {
5917 MVT ArgVT = Outs[i].VT;
5918 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5919 bool Result;
5920
5921 if (!ArgFlags.isVarArg()) {
5922 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5923 Outs[i].OrigTy, CCInfo);
5924 } else {
5926 ArgFlags, Outs[i].OrigTy, CCInfo);
5927 }
5928
5929 if (Result) {
5930#ifndef NDEBUG
5931 errs() << "Call operand #" << i << " has unhandled type "
5932 << ArgVT << "\n";
5933#endif
5934 llvm_unreachable(nullptr);
5935 }
5936 }
5937 } else {
5938 // All arguments are treated the same.
5939 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5940 }
5941
5942 // Assign locations to all of the outgoing aggregate by value arguments.
5943 SmallVector<CCValAssign, 16> ByValArgLocs;
5944 CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5945
5946 // Reserve stack space for the allocations in CCInfo.
5947 CCByValInfo.AllocateStack(CCInfo.getStackSize(), PtrAlign);
5948
5949 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5950
5951 // Size of the linkage area, parameter list area and the part of the local
5952 // space variable where copies of aggregates which are passed by value are
5953 // stored.
5954 unsigned NumBytes = CCByValInfo.getStackSize();
5955
5956 // Calculate by how many bytes the stack has to be adjusted in case of tail
5957 // call optimization.
5958 int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5959
5960 // Adjust the stack pointer for the new arguments...
5961 // These operations are automatically eliminated by the prolog/epilog pass
5962 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5963 SDValue CallSeqStart = Chain;
5964
5965 // Load the return address and frame pointer so it can be moved somewhere else
5966 // later.
5967 SDValue LROp, FPOp;
5968 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5969
5970 // Set up a copy of the stack pointer for use loading and storing any
5971 // arguments that may not fit in the registers available for argument
5972 // passing.
5973 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5974
5976 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5977 SmallVector<SDValue, 8> MemOpChains;
5978
5979 bool seenFloatArg = false;
5980 // Walk the register/memloc assignments, inserting copies/loads.
5981 // i - Tracks the index into the list of registers allocated for the call
5982 // RealArgIdx - Tracks the index into the list of actual function arguments
5983 // j - Tracks the index into the list of byval arguments
5984 for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5985 i != e;
5986 ++i, ++RealArgIdx) {
5987 CCValAssign &VA = ArgLocs[i];
5988 SDValue Arg = OutVals[RealArgIdx];
5989 ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5990
5991 if (Flags.isByVal()) {
5992 // Argument is an aggregate which is passed by value, thus we need to
5993 // create a copy of it in the local variable space of the current stack
5994 // frame (which is the stack frame of the caller) and pass the address of
5995 // this copy to the callee.
5996 assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5997 CCValAssign &ByValVA = ByValArgLocs[j++];
5998 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5999
6000 // Memory reserved in the local variable space of the callers stack frame.
6001 unsigned LocMemOffset = ByValVA.getLocMemOffset();
6002
6003 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6004 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6005 StackPtr, PtrOff);
6006
6007 // Create a copy of the argument in the local area of the current
6008 // stack frame.
6009 SDValue MemcpyCall =
6010 CreateCopyOfByValArgument(Arg, PtrOff,
6011 CallSeqStart.getNode()->getOperand(0),
6012 Flags, DAG, dl);
6013
6014 // This must go outside the CALLSEQ_START..END.
6015 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
6016 SDLoc(MemcpyCall));
6017 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6018 NewCallSeqStart.getNode());
6019 Chain = CallSeqStart = NewCallSeqStart;
6020
6021 // Pass the address of the aggregate copy on the stack either in a
6022 // physical register or in the parameter list area of the current stack
6023 // frame to the callee.
6024 Arg = PtrOff;
6025 }
6026
6027 // When useCRBits() is true, there can be i1 arguments.
6028 // It is because getRegisterType(MVT::i1) => MVT::i1,
6029 // and for other integer types getRegisterType() => MVT::i32.
6030 // Extend i1 and ensure callee will get i32.
6031 if (Arg.getValueType() == MVT::i1)
6032 Arg = DAG.getNode(Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND,
6033 dl, MVT::i32, Arg);
6034
6035 if (VA.isRegLoc()) {
6036 seenFloatArg |= VA.getLocVT().isFloatingPoint();
6037 // Put argument in a physical register.
6038 if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
6039 bool IsLE = Subtarget.isLittleEndian();
6040 SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6041 DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
6042 RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
6043 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
6044 DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
6045 RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
6046 SVal.getValue(0)));
6047 } else
6048 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
6049 } else {
6050 // Put argument in the parameter list area of the current stack frame.
6051 assert(VA.isMemLoc());
6052 unsigned LocMemOffset = VA.getLocMemOffset();
6053
6054 if (!IsTailCall) {
6055 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
6056 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
6057 StackPtr, PtrOff);
6058
6059 MemOpChains.push_back(
6060 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
6061 } else {
6062 // Calculate and remember argument location.
6063 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
6064 TailCallArguments);
6065 }
6066 }
6067 }
6068
6069 if (!MemOpChains.empty())
6070 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6071
6072 // Build a sequence of copy-to-reg nodes chained together with token chain
6073 // and flag operands which copy the outgoing args into the appropriate regs.
6074 SDValue InGlue;
6075 for (const auto &[Reg, N] : RegsToPass) {
6076 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6077 InGlue = Chain.getValue(1);
6078 }
6079
6080 // Set CR bit 6 to true if this is a vararg call with floating args passed in
6081 // registers.
6082 if (IsVarArg) {
6083 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
6084 SDValue Ops[] = { Chain, InGlue };
6085
6086 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, dl,
6087 VTs, ArrayRef(Ops, InGlue.getNode() ? 2 : 1));
6088
6089 InGlue = Chain.getValue(1);
6090 }
6091
6092 if (IsTailCall)
6093 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6094 TailCallArguments);
6095
6096 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6097 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6098}
6099
6100// Copy an argument into memory, being careful to do this outside the
6101// call sequence for the call to which the argument belongs.
6102SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
6103 SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
6104 SelectionDAG &DAG, const SDLoc &dl) const {
6105 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
6106 CallSeqStart.getNode()->getOperand(0),
6107 Flags, DAG, dl);
6108 // The MEMCPY must go outside the CALLSEQ_START..END.
6109 int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
6110 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
6111 SDLoc(MemcpyCall));
6112 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
6113 NewCallSeqStart.getNode());
6114 return NewCallSeqStart;
6115}
6116
6117SDValue PPCTargetLowering::LowerCall_64SVR4(
6118 SDValue Chain, SDValue Callee, CallFlags CFlags,
6120 const SmallVectorImpl<SDValue> &OutVals,
6121 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6123 const CallBase *CB) const {
6124 bool isELFv2ABI = Subtarget.isELFv2ABI();
6125 bool isLittleEndian = Subtarget.isLittleEndian();
6126 unsigned NumOps = Outs.size();
6127 bool IsSibCall = false;
6128 bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
6129
6130 EVT PtrVT = getPointerTy(DAG.getDataLayout());
6131 unsigned PtrByteSize = 8;
6132
6133 MachineFunction &MF = DAG.getMachineFunction();
6134
6135 if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
6136 IsSibCall = true;
6137
6138 // Mark this function as potentially containing a function that contains a
6139 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6140 // and restoring the callers stack pointer in this functions epilog. This is
6141 // done because by tail calling the called function might overwrite the value
6142 // in this function's (MF) stack pointer stack slot 0(SP).
6143 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6144 MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
6145
6146 assert(!(IsFastCall && CFlags.IsVarArg) &&
6147 "fastcc not supported on varargs functions");
6148
6149 // Count how many bytes are to be pushed on the stack, including the linkage
6150 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
6151 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
6152 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
6153 unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6154 unsigned NumBytes = LinkageSize;
6155 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
6156
6157 static const MCPhysReg GPR[] = {
6158 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6159 PPC::X7, PPC::X8, PPC::X9, PPC::X10,
6160 };
6161 static const MCPhysReg VR[] = {
6162 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
6163 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
6164 };
6165
6166 const unsigned NumGPRs = std::size(GPR);
6167 const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
6168 const unsigned NumVRs = std::size(VR);
6169
6170 // On ELFv2, we can avoid allocating the parameter area if all the arguments
6171 // can be passed to the callee in registers.
6172 // For the fast calling convention, there is another check below.
6173 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
6174 bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
6175 if (!HasParameterArea) {
6176 unsigned ParamAreaSize = NumGPRs * PtrByteSize;
6177 unsigned AvailableFPRs = NumFPRs;
6178 unsigned AvailableVRs = NumVRs;
6179 unsigned NumBytesTmp = NumBytes;
6180 for (unsigned i = 0; i != NumOps; ++i) {
6181 if (Outs[i].Flags.isNest()) continue;
6182 if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
6183 PtrByteSize, LinkageSize, ParamAreaSize,
6184 NumBytesTmp, AvailableFPRs, AvailableVRs))
6185 HasParameterArea = true;
6186 }
6187 }
6188
6189 // When using the fast calling convention, we don't provide backing for
6190 // arguments that will be in registers.
6191 unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6192
6193 // Avoid allocating parameter area for fastcc functions if all the arguments
6194 // can be passed in the registers.
6195 if (IsFastCall)
6196 HasParameterArea = false;
6197
6198 // Add up all the space actually used.
6199 for (unsigned i = 0; i != NumOps; ++i) {
6200 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6201 EVT ArgVT = Outs[i].VT;
6202 EVT OrigVT = Outs[i].ArgVT;
6203
6204 if (Flags.isNest())
6205 continue;
6206
6207 if (IsFastCall) {
6208 if (Flags.isByVal()) {
6209 NumGPRsUsed += (Flags.getByValSize()+7)/8;
6210 if (NumGPRsUsed > NumGPRs)
6211 HasParameterArea = true;
6212 } else {
6213 switch (ArgVT.getSimpleVT().SimpleTy) {
6214 default: llvm_unreachable("Unexpected ValueType for argument!");
6215 case MVT::i1:
6216 case MVT::i32:
6217 case MVT::i64:
6218 if (++NumGPRsUsed <= NumGPRs)
6219 continue;
6220 break;
6221 case MVT::v4i32:
6222 case MVT::v8i16:
6223 case MVT::v16i8:
6224 case MVT::v2f64:
6225 case MVT::v2i64:
6226 case MVT::v1i128:
6227 case MVT::f128:
6228 if (++NumVRsUsed <= NumVRs)
6229 continue;
6230 break;
6231 case MVT::v4f32:
6232 if (++NumVRsUsed <= NumVRs)
6233 continue;
6234 break;
6235 case MVT::f32:
6236 case MVT::f64:
6237 if (++NumFPRsUsed <= NumFPRs)
6238 continue;
6239 break;
6240 }
6241 HasParameterArea = true;
6242 }
6243 }
6244
6245 /* Respect alignment of argument on the stack. */
6246 auto Alignement =
6247 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6248 NumBytes = alignTo(NumBytes, Alignement);
6249
6250 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6251 if (Flags.isInConsecutiveRegsLast())
6252 NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6253 }
6254
6255 unsigned NumBytesActuallyUsed = NumBytes;
6256
6257 // In the old ELFv1 ABI,
6258 // the prolog code of the callee may store up to 8 GPR argument registers to
6259 // the stack, allowing va_start to index over them in memory if its varargs.
6260 // Because we cannot tell if this is needed on the caller side, we have to
6261 // conservatively assume that it is needed. As such, make sure we have at
6262 // least enough stack space for the caller to store the 8 GPRs.
6263 // In the ELFv2 ABI, we allocate the parameter area iff a callee
6264 // really requires memory operands, e.g. a vararg function.
6265 if (HasParameterArea)
6266 NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6267 else
6268 NumBytes = LinkageSize;
6269
6270 // Tail call needs the stack to be aligned.
6271 if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6272 NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6273
6274 int SPDiff = 0;
6275
6276 // Calculate by how many bytes the stack has to be adjusted in case of tail
6277 // call optimization.
6278 if (!IsSibCall)
6279 SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6280
6281 // To protect arguments on the stack from being clobbered in a tail call,
6282 // force all the loads to happen before doing any other lowering.
6283 if (CFlags.IsTailCall)
6284 Chain = DAG.getStackArgumentTokenFactor(Chain);
6285
6286 // Adjust the stack pointer for the new arguments...
6287 // These operations are automatically eliminated by the prolog/epilog pass
6288 if (!IsSibCall)
6289 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6290 SDValue CallSeqStart = Chain;
6291
6292 // Load the return address and frame pointer so it can be move somewhere else
6293 // later.
6294 SDValue LROp, FPOp;
6295 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6296
6297 // Set up a copy of the stack pointer for use loading and storing any
6298 // arguments that may not fit in the registers available for argument
6299 // passing.
6300 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6301
6302 // Figure out which arguments are going to go in registers, and which in
6303 // memory. Also, if this is a vararg function, floating point operations
6304 // must be stored to our stack, and loaded into integer regs as well, if
6305 // any integer regs are available for argument passing.
6306 unsigned ArgOffset = LinkageSize;
6307
6309 SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6310
6311 SmallVector<SDValue, 8> MemOpChains;
6312 for (unsigned i = 0; i != NumOps; ++i) {
6313 SDValue Arg = OutVals[i];
6314 ISD::ArgFlagsTy Flags = Outs[i].Flags;
6315 EVT ArgVT = Outs[i].VT;
6316 EVT OrigVT = Outs[i].ArgVT;
6317
6318 // PtrOff will be used to store the current argument to the stack if a
6319 // register cannot be found for it.
6320 SDValue PtrOff;
6321
6322 // We re-align the argument offset for each argument, except when using the
6323 // fast calling convention, when we need to make sure we do that only when
6324 // we'll actually use a stack slot.
6325 auto ComputePtrOff = [&]() {
6326 /* Respect alignment of argument on the stack. */
6327 auto Alignment =
6328 CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6329 ArgOffset = alignTo(ArgOffset, Alignment);
6330
6331 PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6332
6333 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6334 };
6335
6336 if (!IsFastCall) {
6337 ComputePtrOff();
6338
6339 /* Compute GPR index associated with argument offset. */
6340 GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6341 GPR_idx = std::min(GPR_idx, NumGPRs);
6342 }
6343
6344 // Promote integers to 64-bit values.
6345 if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6346 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6347 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6348 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6349 }
6350
6351 // FIXME memcpy is used way more than necessary. Correctness first.
6352 // Note: "by value" is code for passing a structure by value, not
6353 // basic types.
6354 if (Flags.isByVal()) {
6355 // Note: Size includes alignment padding, so
6356 // struct x { short a; char b; }
6357 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6358 // These are the proper values we need for right-justifying the
6359 // aggregate in a parameter register.
6360 unsigned Size = Flags.getByValSize();
6361
6362 // An empty aggregate parameter takes up no storage and no
6363 // registers.
6364 if (Size == 0)
6365 continue;
6366
6367 if (IsFastCall)
6368 ComputePtrOff();
6369
6370 // All aggregates smaller than 8 bytes must be passed right-justified.
6371 if (Size==1 || Size==2 || Size==4) {
6372 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6373 if (GPR_idx != NumGPRs) {
6374 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6375 MachinePointerInfo(), VT);
6376 MemOpChains.push_back(Load.getValue(1));
6377 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6378
6379 ArgOffset += PtrByteSize;
6380 continue;
6381 }
6382 }
6383
6384 if (GPR_idx == NumGPRs && Size < 8) {
6385 SDValue AddPtr = PtrOff;
6386 if (!isLittleEndian) {
6387 SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6388 PtrOff.getValueType());
6389 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6390 }
6391 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6392 CallSeqStart,
6393 Flags, DAG, dl);
6394 ArgOffset += PtrByteSize;
6395 continue;
6396 }
6397 // Copy the object to parameter save area if it can not be entirely passed
6398 // by registers.
6399 // FIXME: we only need to copy the parts which need to be passed in
6400 // parameter save area. For the parts passed by registers, we don't need
6401 // to copy them to the stack although we need to allocate space for them
6402 // in parameter save area.
6403 if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6404 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6405 CallSeqStart,
6406 Flags, DAG, dl);
6407
6408 // When a register is available, pass a small aggregate right-justified.
6409 if (Size < 8 && GPR_idx != NumGPRs) {
6410 // The easiest way to get this right-justified in a register
6411 // is to copy the structure into the rightmost portion of a
6412 // local variable slot, then load the whole slot into the
6413 // register.
6414 // FIXME: The memcpy seems to produce pretty awful code for
6415 // small aggregates, particularly for packed ones.
6416 // FIXME: It would be preferable to use the slot in the
6417 // parameter save area instead of a new local variable.
6418 SDValue AddPtr = PtrOff;
6419 if (!isLittleEndian) {
6420 SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6421 AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6422 }
6423 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6424 CallSeqStart,
6425 Flags, DAG, dl);
6426
6427 // Load the slot into the register.
6428 SDValue Load =
6429 DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6430 MemOpChains.push_back(Load.getValue(1));
6431 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6432
6433 // Done with this argument.
6434 ArgOffset += PtrByteSize;
6435 continue;
6436 }
6437
6438 // For aggregates larger than PtrByteSize, copy the pieces of the
6439 // object that fit into registers from the parameter save area.
6440 for (unsigned j=0; j<Size; j+=PtrByteSize) {
6441 SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6442 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6443 if (GPR_idx != NumGPRs) {
6444 unsigned LoadSizeInBits = std::min(PtrByteSize, (Size - j)) * 8;
6445 EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), LoadSizeInBits);
6446 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, AddArg,
6447 MachinePointerInfo(), ObjType);
6448
6449 MemOpChains.push_back(Load.getValue(1));
6450 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6451 ArgOffset += PtrByteSize;
6452 } else {
6453 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6454 break;
6455 }
6456 }
6457 continue;
6458 }
6459
6460 switch (Arg.getSimpleValueType().SimpleTy) {
6461 default: llvm_unreachable("Unexpected ValueType for argument!");
6462 case MVT::i1:
6463 case MVT::i32:
6464 case MVT::i64:
6465 if (Flags.isNest()) {
6466 // The 'nest' parameter, if any, is passed in R11.
6467 RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6468 break;
6469 }
6470
6471 // These can be scalar arguments or elements of an integer array type
6472 // passed directly. Clang may use those instead of "byval" aggregate
6473 // types to avoid forcing arguments to memory unnecessarily.
6474 if (GPR_idx != NumGPRs) {
6475 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6476 } else {
6477 if (IsFastCall)
6478 ComputePtrOff();
6479
6480 assert(HasParameterArea &&
6481 "Parameter area must exist to pass an argument in memory.");
6482 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6483 true, CFlags.IsTailCall, false, MemOpChains,
6484 TailCallArguments, dl);
6485 if (IsFastCall)
6486 ArgOffset += PtrByteSize;
6487 }
6488 if (!IsFastCall)
6489 ArgOffset += PtrByteSize;
6490 break;
6491 case MVT::f32:
6492 case MVT::f64: {
6493 // These can be scalar arguments or elements of a float array type
6494 // passed directly. The latter are used to implement ELFv2 homogenous
6495 // float aggregates.
6496
6497 // Named arguments go into FPRs first, and once they overflow, the
6498 // remaining arguments go into GPRs and then the parameter save area.
6499 // Unnamed arguments for vararg functions always go to GPRs and
6500 // then the parameter save area. For now, put all arguments to vararg
6501 // routines always in both locations (FPR *and* GPR or stack slot).
6502 bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6503 bool NeededLoad = false;
6504
6505 // First load the argument into the next available FPR.
6506 if (FPR_idx != NumFPRs)
6507 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6508
6509 // Next, load the argument into GPR or stack slot if needed.
6510 if (!NeedGPROrStack)
6511 ;
6512 else if (GPR_idx != NumGPRs && !IsFastCall) {
6513 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6514 // once we support fp <-> gpr moves.
6515
6516 // In the non-vararg case, this can only ever happen in the
6517 // presence of f32 array types, since otherwise we never run
6518 // out of FPRs before running out of GPRs.
6519 SDValue ArgVal;
6520
6521 // Double values are always passed in a single GPR.
6522 if (Arg.getValueType() != MVT::f32) {
6523 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6524
6525 // Non-array float values are extended and passed in a GPR.
6526 } else if (!Flags.isInConsecutiveRegs()) {
6527 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6528 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6529
6530 // If we have an array of floats, we collect every odd element
6531 // together with its predecessor into one GPR.
6532 } else if (ArgOffset % PtrByteSize != 0) {
6533 SDValue Lo, Hi;
6534 Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6535 Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6536 if (!isLittleEndian)
6537 std::swap(Lo, Hi);
6538 ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6539
6540 // The final element, if even, goes into the first half of a GPR.
6541 } else if (Flags.isInConsecutiveRegsLast()) {
6542 ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6543 ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6544 if (!isLittleEndian)
6545 ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6546 DAG.getConstant(32, dl, MVT::i32));
6547
6548 // Non-final even elements are skipped; they will be handled
6549 // together the with subsequent argument on the next go-around.
6550 } else
6551 ArgVal = SDValue();
6552
6553 if (ArgVal.getNode())
6554 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6555 } else {
6556 if (IsFastCall)
6557 ComputePtrOff();
6558
6559 // Single-precision floating-point values are mapped to the
6560 // second (rightmost) word of the stack doubleword.
6561 if (Arg.getValueType() == MVT::f32 &&
6562 !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6563 SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6564 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6565 }
6566
6567 assert(HasParameterArea &&
6568 "Parameter area must exist to pass an argument in memory.");
6569 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6570 true, CFlags.IsTailCall, false, MemOpChains,
6571 TailCallArguments, dl);
6572
6573 NeededLoad = true;
6574 }
6575 // When passing an array of floats, the array occupies consecutive
6576 // space in the argument area; only round up to the next doubleword
6577 // at the end of the array. Otherwise, each float takes 8 bytes.
6578 if (!IsFastCall || NeededLoad) {
6579 ArgOffset += (Arg.getValueType() == MVT::f32 &&
6580 Flags.isInConsecutiveRegs()) ? 4 : 8;
6581 if (Flags.isInConsecutiveRegsLast())
6582 ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6583 }
6584 break;
6585 }
6586 case MVT::v4f32:
6587 case MVT::v4i32:
6588 case MVT::v8i16:
6589 case MVT::v16i8:
6590 case MVT::v2f64:
6591 case MVT::v2i64:
6592 case MVT::v1i128:
6593 case MVT::f128:
6594 // These can be scalar arguments or elements of a vector array type
6595 // passed directly. The latter are used to implement ELFv2 homogenous
6596 // vector aggregates.
6597
6598 // For a varargs call, named arguments go into VRs or on the stack as
6599 // usual; unnamed arguments always go to the stack or the corresponding
6600 // GPRs when within range. For now, we always put the value in both
6601 // locations (or even all three).
6602 if (CFlags.IsVarArg) {
6603 assert(HasParameterArea &&
6604 "Parameter area must exist if we have a varargs call.");
6605 // We could elide this store in the case where the object fits
6606 // entirely in R registers. Maybe later.
6607 SDValue Store =
6608 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6609 MemOpChains.push_back(Store);
6610 if (VR_idx != NumVRs) {
6611 SDValue Load =
6612 DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6613 MemOpChains.push_back(Load.getValue(1));
6614 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6615 }
6616 ArgOffset += 16;
6617 for (unsigned i=0; i<16; i+=PtrByteSize) {
6618 if (GPR_idx == NumGPRs)
6619 break;
6620 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6621 DAG.getConstant(i, dl, PtrVT));
6622 SDValue Load =
6623 DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6624 MemOpChains.push_back(Load.getValue(1));
6625 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6626 }
6627 break;
6628 }
6629
6630 // Non-varargs Altivec params go into VRs or on the stack.
6631 if (VR_idx != NumVRs) {
6632 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6633 } else {
6634 if (IsFastCall)
6635 ComputePtrOff();
6636
6637 assert(HasParameterArea &&
6638 "Parameter area must exist to pass an argument in memory.");
6639 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6640 true, CFlags.IsTailCall, true, MemOpChains,
6641 TailCallArguments, dl);
6642 if (IsFastCall)
6643 ArgOffset += 16;
6644 }
6645
6646 if (!IsFastCall)
6647 ArgOffset += 16;
6648 break;
6649 }
6650 }
6651
6652 assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6653 "mismatch in size of parameter area");
6654 (void)NumBytesActuallyUsed;
6655
6656 if (!MemOpChains.empty())
6657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6658
6659 // Check if this is an indirect call (MTCTR/BCTRL).
6660 // See prepareDescriptorIndirectCall and buildCallOperands for more
6661 // information about calls through function pointers in the 64-bit SVR4 ABI.
6662 if (CFlags.IsIndirect) {
6663 // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6664 // caller in the TOC save area.
6665 if (isTOCSaveRestoreRequired(Subtarget)) {
6666 assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6667 // Load r2 into a virtual register and store it to the TOC save area.
6668 setUsesTOCBasePtr(DAG);
6669 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6670 // TOC save area offset.
6671 unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6672 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6673 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6674 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6676 DAG.getMachineFunction(), TOCSaveOffset));
6677 }
6678 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6679 // This does not mean the MTCTR instruction must use R12; it's easier
6680 // to model this as an extra parameter, so do that.
6681 if (isELFv2ABI && !CFlags.IsPatchPoint)
6682 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6683 }
6684
6685 // Build a sequence of copy-to-reg nodes chained together with token chain
6686 // and flag operands which copy the outgoing args into the appropriate regs.
6687 SDValue InGlue;
6688 for (const auto &[Reg, N] : RegsToPass) {
6689 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
6690 InGlue = Chain.getValue(1);
6691 }
6692
6693 if (CFlags.IsTailCall && !IsSibCall)
6694 PrepareTailCall(DAG, InGlue, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6695 TailCallArguments);
6696
6697 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
6698 Callee, SPDiff, NumBytes, Ins, InVals, CB);
6699}
6700
6701// Returns true when the shadow of a general purpose argument register
6702// in the parameter save area is aligned to at least 'RequiredAlign'.
6703static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6704 assert(RequiredAlign.value() <= 16 &&
6705 "Required alignment greater than stack alignment.");
6706 switch (Reg) {
6707 default:
6708 report_fatal_error("called on invalid register.");
6709 case PPC::R5:
6710 case PPC::R9:
6711 case PPC::X3:
6712 case PPC::X5:
6713 case PPC::X7:
6714 case PPC::X9:
6715 // These registers are 16 byte aligned which is the most strict aligment
6716 // we can support.
6717 return true;
6718 case PPC::R3:
6719 case PPC::R7:
6720 case PPC::X4:
6721 case PPC::X6:
6722 case PPC::X8:
6723 case PPC::X10:
6724 // The shadow of these registers in the PSA is 8 byte aligned.
6725 return RequiredAlign <= 8;
6726 case PPC::R4:
6727 case PPC::R6:
6728 case PPC::R8:
6729 case PPC::R10:
6730 return RequiredAlign <= 4;
6731 }
6732}
6733
6734static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6735 CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6736 Type *OrigTy, CCState &State) {
6737 const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6738 State.getMachineFunction().getSubtarget());
6739 const bool IsPPC64 = Subtarget.isPPC64();
6740 const unsigned PtrSize = IsPPC64 ? 8 : 4;
6741 const Align PtrAlign(PtrSize);
6742 const Align StackAlign(16);
6743 const MVT RegVT = Subtarget.getScalarIntVT();
6744
6745 if (ValVT == MVT::f128)
6746 report_fatal_error("f128 is unimplemented on AIX.");
6747
6748 static const MCPhysReg GPR_32[] = {// 32-bit registers.
6749 PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6750 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6751 static const MCPhysReg GPR_64[] = {// 64-bit registers.
6752 PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6753 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6754
6755 static const MCPhysReg VR[] = {// Vector registers.
6756 PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6757 PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6758 PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6759
6760 const ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6761
6762 if (ArgFlags.isNest()) {
6763 MCRegister EnvReg = State.AllocateReg(IsPPC64 ? PPC::X11 : PPC::R11);
6764 if (!EnvReg)
6765 report_fatal_error("More then one nest argument.");
6766 State.addLoc(CCValAssign::getReg(ValNo, ValVT, EnvReg, RegVT, LocInfo));
6767 return false;
6768 }
6769
6770 if (ArgFlags.isByVal()) {
6771 const Align ByValAlign(ArgFlags.getNonZeroByValAlign());
6772 if (ByValAlign > StackAlign)
6773 report_fatal_error("Pass-by-value arguments with alignment greater than "
6774 "16 are not supported.");
6775
6776 const unsigned ByValSize = ArgFlags.getByValSize();
6777 const Align ObjAlign = ByValAlign > PtrAlign ? ByValAlign : PtrAlign;
6778
6779 // An empty aggregate parameter takes up no storage and no registers,
6780 // but needs a MemLoc for a stack slot for the formal arguments side.
6781 if (ByValSize == 0) {
6783 State.getStackSize(), RegVT, LocInfo));
6784 return false;
6785 }
6786
6787 // Shadow allocate any registers that are not properly aligned.
6788 unsigned NextReg = State.getFirstUnallocated(GPRs);
6789 while (NextReg != GPRs.size() &&
6790 !isGPRShadowAligned(GPRs[NextReg], ObjAlign)) {
6791 // Shadow allocate next registers since its aligment is not strict enough.
6792 MCRegister Reg = State.AllocateReg(GPRs);
6793 // Allocate the stack space shadowed by said register.
6794 State.AllocateStack(PtrSize, PtrAlign);
6795 assert(Reg && "Alocating register unexpectedly failed.");
6796 (void)Reg;
6797 NextReg = State.getFirstUnallocated(GPRs);
6798 }
6799
6800 const unsigned StackSize = alignTo(ByValSize, ObjAlign);
6801 unsigned Offset = State.AllocateStack(StackSize, ObjAlign);
6802 for (const unsigned E = Offset + StackSize; Offset < E; Offset += PtrSize) {
6803 if (MCRegister Reg = State.AllocateReg(GPRs))
6804 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6805 else {
6808 LocInfo));
6809 break;
6810 }
6811 }
6812 return false;
6813 }
6814
6815 // Arguments always reserve parameter save area.
6816 switch (ValVT.SimpleTy) {
6817 default:
6818 report_fatal_error("Unhandled value type for argument.");
6819 case MVT::i64:
6820 // i64 arguments should have been split to i32 for PPC32.
6821 assert(IsPPC64 && "PPC32 should have split i64 values.");
6822 [[fallthrough]];
6823 case MVT::i1:
6824 case MVT::i32: {
6825 const unsigned Offset = State.AllocateStack(PtrSize, PtrAlign);
6826 // AIX integer arguments are always passed in register width.
6827 if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6828 LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6830 if (MCRegister Reg = State.AllocateReg(GPRs))
6831 State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6832 else
6833 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6834
6835 return false;
6836 }
6837 case MVT::f32:
6838 case MVT::f64: {
6839 // Parameter save area (PSA) is reserved even if the float passes in fpr.
6840 const unsigned StoreSize = LocVT.getStoreSize();
6841 // Floats are always 4-byte aligned in the PSA on AIX.
6842 // This includes f64 in 64-bit mode for ABI compatibility.
6843 const unsigned Offset =
6844 State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6845 MCRegister FReg = State.AllocateReg(FPR);
6846 if (FReg)
6847 State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6848
6849 // Reserve and initialize GPRs or initialize the PSA as required.
6850 for (unsigned I = 0; I < StoreSize; I += PtrSize) {
6851 if (MCRegister Reg = State.AllocateReg(GPRs)) {
6852 assert(FReg && "An FPR should be available when a GPR is reserved.");
6853 if (State.isVarArg()) {
6854 // Successfully reserved GPRs are only initialized for vararg calls.
6855 // Custom handling is required for:
6856 // f64 in PPC32 needs to be split into 2 GPRs.
6857 // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6858 State.addLoc(
6859 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6860 }
6861 } else {
6862 // If there are insufficient GPRs, the PSA needs to be initialized.
6863 // Initialization occurs even if an FPR was initialized for
6864 // compatibility with the AIX XL compiler. The full memory for the
6865 // argument will be initialized even if a prior word is saved in GPR.
6866 // A custom memLoc is used when the argument also passes in FPR so
6867 // that the callee handling can skip over it easily.
6868 State.addLoc(
6869 FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6870 LocInfo)
6871 : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6872 break;
6873 }
6874 }
6875
6876 return false;
6877 }
6878 case MVT::v4f32:
6879 case MVT::v4i32:
6880 case MVT::v8i16:
6881 case MVT::v16i8:
6882 case MVT::v2i64:
6883 case MVT::v2f64:
6884 case MVT::v1i128: {
6885 const unsigned VecSize = 16;
6886 const Align VecAlign(VecSize);
6887
6888 if (!State.isVarArg()) {
6889 // If there are vector registers remaining we don't consume any stack
6890 // space.
6891 if (MCRegister VReg = State.AllocateReg(VR)) {
6892 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6893 return false;
6894 }
6895 // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6896 // might be allocated in the portion of the PSA that is shadowed by the
6897 // GPRs.
6898 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6899 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6900 return false;
6901 }
6902
6903 unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6904 // Burn any underaligned registers and their shadowed stack space until
6905 // we reach the required alignment.
6906 while (NextRegIndex != GPRs.size() &&
6907 !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6908 // Shadow allocate register and its stack shadow.
6909 MCRegister Reg = State.AllocateReg(GPRs);
6910 State.AllocateStack(PtrSize, PtrAlign);
6911 assert(Reg && "Allocating register unexpectedly failed.");
6912 (void)Reg;
6913 NextRegIndex = State.getFirstUnallocated(GPRs);
6914 }
6915
6916 // Vectors that are passed as fixed arguments are handled differently.
6917 // They are passed in VRs if any are available (unlike arguments passed
6918 // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6919 // functions)
6920 if (!ArgFlags.isVarArg()) {
6921 if (MCRegister VReg = State.AllocateReg(VR)) {
6922 State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6923 // Shadow allocate GPRs and stack space even though we pass in a VR.
6924 for (unsigned I = 0; I != VecSize; I += PtrSize)
6925 State.AllocateReg(GPRs);
6926 State.AllocateStack(VecSize, VecAlign);
6927 return false;
6928 }
6929 // No vector registers remain so pass on the stack.
6930 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6931 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6932 return false;
6933 }
6934
6935 // If all GPRS are consumed then we pass the argument fully on the stack.
6936 if (NextRegIndex == GPRs.size()) {
6937 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6938 State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6939 return false;
6940 }
6941
6942 // Corner case for 32-bit codegen. We have 2 registers to pass the first
6943 // half of the argument, and then need to pass the remaining half on the
6944 // stack.
6945 if (GPRs[NextRegIndex] == PPC::R9) {
6946 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6947 State.addLoc(
6948 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6949
6950 const MCRegister FirstReg = State.AllocateReg(PPC::R9);
6951 const MCRegister SecondReg = State.AllocateReg(PPC::R10);
6952 assert(FirstReg && SecondReg &&
6953 "Allocating R9 or R10 unexpectedly failed.");
6954 State.addLoc(
6955 CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6956 State.addLoc(
6957 CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6958 return false;
6959 }
6960
6961 // We have enough GPRs to fully pass the vector argument, and we have
6962 // already consumed any underaligned registers. Start with the custom
6963 // MemLoc and then the custom RegLocs.
6964 const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6965 State.addLoc(
6966 CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6967 for (unsigned I = 0; I != VecSize; I += PtrSize) {
6968 const MCRegister Reg = State.AllocateReg(GPRs);
6969 assert(Reg && "Failed to allocated register for vararg vector argument");
6970 State.addLoc(
6971 CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6972 }
6973 return false;
6974 }
6975 }
6976 return true;
6977}
6978
6979// So far, this function is only used by LowerFormalArguments_AIX()
6981 bool IsPPC64,
6982 bool HasP8Vector,
6983 bool HasVSX) {
6984 assert((IsPPC64 || SVT != MVT::i64) &&
6985 "i64 should have been split for 32-bit codegen.");
6986
6987 switch (SVT) {
6988 default:
6989 report_fatal_error("Unexpected value type for formal argument");
6990 case MVT::i1:
6991 case MVT::i32:
6992 case MVT::i64:
6993 return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6994 case MVT::f32:
6995 return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6996 case MVT::f64:
6997 return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6998 case MVT::v4f32:
6999 case MVT::v4i32:
7000 case MVT::v8i16:
7001 case MVT::v16i8:
7002 case MVT::v2i64:
7003 case MVT::v2f64:
7004 case MVT::v1i128:
7005 return &PPC::VRRCRegClass;
7006 }
7007}
7008
7010 SelectionDAG &DAG, SDValue ArgValue,
7011 MVT LocVT, const SDLoc &dl) {
7012 assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
7013 assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
7014
7015 if (Flags.isSExt())
7016 ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
7017 DAG.getValueType(ValVT));
7018 else if (Flags.isZExt())
7019 ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
7020 DAG.getValueType(ValVT));
7021
7022 return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
7023}
7024
7025static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
7026 const unsigned LASize = FL->getLinkageSize();
7027
7028 if (PPC::GPRCRegClass.contains(Reg)) {
7029 assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
7030 "Reg must be a valid argument register!");
7031 return LASize + 4 * (Reg - PPC::R3);
7032 }
7033
7034 if (PPC::G8RCRegClass.contains(Reg)) {
7035 assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
7036 "Reg must be a valid argument register!");
7037 return LASize + 8 * (Reg - PPC::X3);
7038 }
7039
7040 llvm_unreachable("Only general purpose registers expected.");
7041}
7042
7043// AIX ABI Stack Frame Layout:
7044//
7045// Low Memory +--------------------------------------------+
7046// SP +---> | Back chain | ---+
7047// | +--------------------------------------------+ |
7048// | | Saved Condition Register | |
7049// | +--------------------------------------------+ |
7050// | | Saved Linkage Register | |
7051// | +--------------------------------------------+ | Linkage Area
7052// | | Reserved for compilers | |
7053// | +--------------------------------------------+ |
7054// | | Reserved for binders | |
7055// | +--------------------------------------------+ |
7056// | | Saved TOC pointer | ---+
7057// | +--------------------------------------------+
7058// | | Parameter save area |
7059// | +--------------------------------------------+
7060// | | Alloca space |
7061// | +--------------------------------------------+
7062// | | Local variable space |
7063// | +--------------------------------------------+
7064// | | Float/int conversion temporary |
7065// | +--------------------------------------------+
7066// | | Save area for AltiVec registers |
7067// | +--------------------------------------------+
7068// | | AltiVec alignment padding |
7069// | +--------------------------------------------+
7070// | | Save area for VRSAVE register |
7071// | +--------------------------------------------+
7072// | | Save area for General Purpose registers |
7073// | +--------------------------------------------+
7074// | | Save area for Floating Point registers |
7075// | +--------------------------------------------+
7076// +---- | Back chain |
7077// High Memory +--------------------------------------------+
7078//
7079// Specifications:
7080// AIX 7.2 Assembler Language Reference
7081// Subroutine linkage convention
7082
7083SDValue PPCTargetLowering::LowerFormalArguments_AIX(
7084 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
7085 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7086 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
7087
7088 assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
7089 CallConv == CallingConv::Fast) &&
7090 "Unexpected calling convention!");
7091
7092 if (getTargetMachine().Options.GuaranteedTailCallOpt)
7093 report_fatal_error("Tail call support is unimplemented on AIX.");
7094
7095 if (useSoftFloat())
7096 report_fatal_error("Soft float support is unimplemented on AIX.");
7097
7098 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7099
7100 const bool IsPPC64 = Subtarget.isPPC64();
7101 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7102
7103 // Assign locations to all of the incoming arguments.
7105 MachineFunction &MF = DAG.getMachineFunction();
7106 MachineFrameInfo &MFI = MF.getFrameInfo();
7107 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
7108 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
7109
7110 const EVT PtrVT = getPointerTy(MF.getDataLayout());
7111 // Reserve space for the linkage area on the stack.
7112 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7113 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7114 uint64_t SaveStackPos = CCInfo.getStackSize();
7115 bool SaveParams = MF.getFunction().hasFnAttribute("save-reg-params");
7116 CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
7117
7119
7120 for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
7121 CCValAssign &VA = ArgLocs[I++];
7122 MVT LocVT = VA.getLocVT();
7123 MVT ValVT = VA.getValVT();
7124 ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
7125
7126 EVT ArgVT = Ins[VA.getValNo()].ArgVT;
7127 bool ArgSignExt = Ins[VA.getValNo()].Flags.isSExt();
7128 // For compatibility with the AIX XL compiler, the float args in the
7129 // parameter save area are initialized even if the argument is available
7130 // in register. The caller is required to initialize both the register
7131 // and memory, however, the callee can choose to expect it in either.
7132 // The memloc is dismissed here because the argument is retrieved from
7133 // the register.
7134 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
7135 continue;
7136
7137 if (SaveParams && VA.isRegLoc() && !Flags.isByVal() && !VA.needsCustom()) {
7138 const TargetRegisterClass *RegClass = getRegClassForSVT(
7139 LocVT.SimpleTy, IsPPC64, Subtarget.hasP8Vector(), Subtarget.hasVSX());
7140 // On PPC64, debugger assumes extended 8-byte values are stored from GPR.
7141 MVT SaveVT = RegClass == &PPC::G8RCRegClass ? MVT::i64 : LocVT;
7142 const Register VReg = MF.addLiveIn(VA.getLocReg(), RegClass);
7143 SDValue Parm = DAG.getCopyFromReg(Chain, dl, VReg, SaveVT);
7144 int FI = MFI.CreateFixedObject(SaveVT.getStoreSize(), SaveStackPos, true);
7145 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7146 SDValue StoreReg = DAG.getStore(Chain, dl, Parm, FIN,
7147 MachinePointerInfo(), Align(PtrByteSize));
7148 SaveStackPos = alignTo(SaveStackPos + SaveVT.getStoreSize(), PtrByteSize);
7149 MemOps.push_back(StoreReg);
7150 }
7151
7152 if (SaveParams && (VA.isMemLoc() || Flags.isByVal()) && !VA.needsCustom()) {
7153 unsigned StoreSize =
7154 Flags.isByVal() ? Flags.getByValSize() : LocVT.getStoreSize();
7155 SaveStackPos = alignTo(SaveStackPos + StoreSize, PtrByteSize);
7156 }
7157
7158 auto HandleMemLoc = [&]() {
7159 const unsigned LocSize = LocVT.getStoreSize();
7160 const unsigned ValSize = ValVT.getStoreSize();
7161 assert((ValSize <= LocSize) &&
7162 "Object size is larger than size of MemLoc");
7163 int CurArgOffset = VA.getLocMemOffset();
7164 // Objects are right-justified because AIX is big-endian.
7165 if (LocSize > ValSize)
7166 CurArgOffset += LocSize - ValSize;
7167 // Potential tail calls could cause overwriting of argument stack slots.
7168 const bool IsImmutable =
7170 (CallConv == CallingConv::Fast));
7171 int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
7172 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7173 SDValue ArgValue =
7174 DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
7175
7176 // While the ABI specifies the argument type is (sign or zero) extended
7177 // out to register width, not all code is compliant. We truncate and
7178 // re-extend to be more forgiving of these callers when the argument type
7179 // is smaller than register width.
7180 if (!ArgVT.isVector() && !ValVT.isVector() && ArgVT.isInteger() &&
7181 ValVT.isInteger() &&
7182 ArgVT.getScalarSizeInBits() < ValVT.getScalarSizeInBits()) {
7183 // It is possible to have either real integer values
7184 // or integers that were not originally integers.
7185 // In the latter case, these could have came from structs,
7186 // and these integers would not have an extend on the parameter.
7187 // Since these types of integers do not have an extend specified
7188 // in the first place, the type of extend that we do should not matter.
7189 EVT TruncatedArgVT = ArgVT.isSimple() && ArgVT.getSimpleVT() == MVT::i1
7190 ? MVT::i8
7191 : ArgVT;
7192 SDValue ArgValueTrunc =
7193 DAG.getNode(ISD::TRUNCATE, dl, TruncatedArgVT, ArgValue);
7194 SDValue ArgValueExt =
7195 ArgSignExt ? DAG.getSExtOrTrunc(ArgValueTrunc, dl, ValVT)
7196 : DAG.getZExtOrTrunc(ArgValueTrunc, dl, ValVT);
7197 InVals.push_back(ArgValueExt);
7198 } else {
7199 InVals.push_back(ArgValue);
7200 }
7201 };
7202
7203 // Vector arguments to VaArg functions are passed both on the stack, and
7204 // in any available GPRs. Load the value from the stack and add the GPRs
7205 // as live ins.
7206 if (VA.isMemLoc() && VA.needsCustom()) {
7207 assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
7208 assert(isVarArg && "Only use custom memloc for vararg.");
7209 // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
7210 // matching custom RegLocs.
7211 const unsigned OriginalValNo = VA.getValNo();
7212 (void)OriginalValNo;
7213
7214 auto HandleCustomVecRegLoc = [&]() {
7215 assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7216 "Missing custom RegLoc.");
7217 VA = ArgLocs[I++];
7218 assert(VA.getValVT().isVector() &&
7219 "Unexpected Val type for custom RegLoc.");
7220 assert(VA.getValNo() == OriginalValNo &&
7221 "ValNo mismatch between custom MemLoc and RegLoc.");
7223 MF.addLiveIn(VA.getLocReg(),
7224 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7225 Subtarget.hasVSX()));
7226 };
7227
7228 HandleMemLoc();
7229 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7230 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7231 // R10.
7232 HandleCustomVecRegLoc();
7233 HandleCustomVecRegLoc();
7234
7235 // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
7236 // we passed the vector in R5, R6, R7 and R8.
7237 if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
7238 assert(!IsPPC64 &&
7239 "Only 2 custom RegLocs expected for 64-bit codegen.");
7240 HandleCustomVecRegLoc();
7241 HandleCustomVecRegLoc();
7242 }
7243
7244 continue;
7245 }
7246
7247 if (VA.isRegLoc()) {
7248 if (VA.getValVT().isScalarInteger())
7250 else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
7251 switch (VA.getValVT().SimpleTy) {
7252 default:
7253 report_fatal_error("Unhandled value type for argument.");
7254 case MVT::f32:
7256 break;
7257 case MVT::f64:
7259 break;
7260 }
7261 } else if (VA.getValVT().isVector()) {
7262 switch (VA.getValVT().SimpleTy) {
7263 default:
7264 report_fatal_error("Unhandled value type for argument.");
7265 case MVT::v16i8:
7267 break;
7268 case MVT::v8i16:
7270 break;
7271 case MVT::v4i32:
7272 case MVT::v2i64:
7273 case MVT::v1i128:
7275 break;
7276 case MVT::v4f32:
7277 case MVT::v2f64:
7279 break;
7280 }
7281 }
7282 }
7283
7284 if (Flags.isByVal() && VA.isMemLoc()) {
7285 const unsigned Size =
7286 alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7287 PtrByteSize);
7288 const int FI = MF.getFrameInfo().CreateFixedObject(
7289 Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7290 /* IsAliased */ true);
7291 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7292 InVals.push_back(FIN);
7293
7294 continue;
7295 }
7296
7297 if (Flags.isByVal()) {
7298 assert(VA.isRegLoc() && "MemLocs should already be handled.");
7299
7300 const MCPhysReg ArgReg = VA.getLocReg();
7301 const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7302
7303 const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7304 const int FI = MF.getFrameInfo().CreateFixedObject(
7305 StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7306 /* IsAliased */ true);
7307 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7308 InVals.push_back(FIN);
7309
7310 // Add live ins for all the RegLocs for the same ByVal.
7311 const TargetRegisterClass *RegClass =
7312 IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7313
7314 auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7315 unsigned Offset) {
7316 const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7317 // Since the callers side has left justified the aggregate in the
7318 // register, we can simply store the entire register into the stack
7319 // slot.
7320 SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7321 // The store to the fixedstack object is needed becuase accessing a
7322 // field of the ByVal will use a gep and load. Ideally we will optimize
7323 // to extracting the value from the register directly, and elide the
7324 // stores when the arguments address is not taken, but that will need to
7325 // be future work.
7326 SDValue Store = DAG.getStore(
7327 CopyFrom.getValue(1), dl, CopyFrom,
7330
7331 MemOps.push_back(Store);
7332 };
7333
7334 unsigned Offset = 0;
7335 HandleRegLoc(VA.getLocReg(), Offset);
7336 Offset += PtrByteSize;
7337 for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7338 Offset += PtrByteSize) {
7339 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7340 "RegLocs should be for ByVal argument.");
7341
7342 const CCValAssign RL = ArgLocs[I++];
7343 HandleRegLoc(RL.getLocReg(), Offset);
7345 }
7346
7347 if (Offset != StackSize) {
7348 assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7349 "Expected MemLoc for remaining bytes.");
7350 assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7351 // Consume the MemLoc.The InVal has already been emitted, so nothing
7352 // more needs to be done.
7353 ++I;
7354 }
7355
7356 continue;
7357 }
7358
7359 if (VA.isRegLoc() && !VA.needsCustom()) {
7360 MVT::SimpleValueType SVT = ValVT.SimpleTy;
7361 Register VReg =
7362 MF.addLiveIn(VA.getLocReg(),
7363 getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7364 Subtarget.hasVSX()));
7365 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7366 if (ValVT.isScalarInteger() &&
7367 (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7368 ArgValue =
7369 truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7370 }
7371 InVals.push_back(ArgValue);
7372 continue;
7373 }
7374 if (VA.isMemLoc()) {
7375 HandleMemLoc();
7376 continue;
7377 }
7378 }
7379
7380 // On AIX a minimum of 8 words is saved to the parameter save area.
7381 const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7382 // Area that is at least reserved in the caller of this function.
7383 unsigned CallerReservedArea = std::max<unsigned>(
7384 CCInfo.getStackSize(), LinkageSize + MinParameterSaveArea);
7385
7386 // Set the size that is at least reserved in caller of this function. Tail
7387 // call optimized function's reserved stack space needs to be aligned so
7388 // that taking the difference between two stack areas will result in an
7389 // aligned stack.
7390 CallerReservedArea =
7391 EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7392 FuncInfo->setMinReservedArea(CallerReservedArea);
7393
7394 if (isVarArg) {
7395 int VAListIndex = 0;
7396 // If any of the optional arguments are passed in register then the fixed
7397 // stack object we spill into is not immutable. Create a fixed stack object
7398 // that overlaps the remainder of the parameter save area.
7399 if (CCInfo.getStackSize() < (LinkageSize + MinParameterSaveArea)) {
7400 unsigned FixedStackSize =
7401 LinkageSize + MinParameterSaveArea - CCInfo.getStackSize();
7402 VAListIndex =
7403 MFI.CreateFixedObject(FixedStackSize, CCInfo.getStackSize(),
7404 /* IsImmutable */ false, /* IsAliased */ true);
7405 } else {
7406 // All the arguments passed through ellipses are on the stack. Create a
7407 // dummy fixed stack object the same size as a pointer since we don't
7408 // know the actual size.
7409 VAListIndex =
7410 MFI.CreateFixedObject(PtrByteSize, CCInfo.getStackSize(),
7411 /* IsImmutable */ true, /* IsAliased */ true);
7412 }
7413
7414 FuncInfo->setVarArgsFrameIndex(VAListIndex);
7415 SDValue FIN = DAG.getFrameIndex(VAListIndex, PtrVT);
7416
7417 static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7418 PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7419
7420 static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7421 PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7422 const unsigned NumGPArgRegs = std::size(IsPPC64 ? GPR_64 : GPR_32);
7423
7424 // The fixed integer arguments of a variadic function are stored to the
7425 // VarArgsFrameIndex on the stack so that they may be loaded by
7426 // dereferencing the result of va_next.
7427 for (unsigned
7428 GPRIndex = (CCInfo.getStackSize() - LinkageSize) / PtrByteSize,
7429 Offset = 0;
7430 GPRIndex < NumGPArgRegs; ++GPRIndex, Offset += PtrByteSize) {
7431
7432 const Register VReg =
7433 IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7434 : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7435
7436 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7437 MachinePointerInfo MPI =
7438 MachinePointerInfo::getFixedStack(MF, VAListIndex, Offset);
7439 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, MPI);
7440 MemOps.push_back(Store);
7441 // Increment the address for the next argument to store.
7442 SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7443 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7444 }
7445 }
7446
7447 if (!MemOps.empty())
7448 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7449
7450 return Chain;
7451}
7452
7453SDValue PPCTargetLowering::LowerCall_AIX(
7454 SDValue Chain, SDValue Callee, CallFlags CFlags,
7456 const SmallVectorImpl<SDValue> &OutVals,
7457 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7459 const CallBase *CB) const {
7460 // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7461 // AIX ABI stack frame layout.
7462
7463 assert((CFlags.CallConv == CallingConv::C ||
7464 CFlags.CallConv == CallingConv::Cold ||
7465 CFlags.CallConv == CallingConv::Fast) &&
7466 "Unexpected calling convention!");
7467
7468 if (CFlags.IsPatchPoint)
7469 report_fatal_error("This call type is unimplemented on AIX.");
7470
7471 const PPCSubtarget &Subtarget = DAG.getSubtarget<PPCSubtarget>();
7472
7473 MachineFunction &MF = DAG.getMachineFunction();
7475 CCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7476 *DAG.getContext());
7477
7478 // Reserve space for the linkage save area (LSA) on the stack.
7479 // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7480 // [SP][CR][LR][2 x reserved][TOC].
7481 // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7482 const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7483 const bool IsPPC64 = Subtarget.isPPC64();
7484 const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7485 const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7486 CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7487 CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7488
7489 // The prolog code of the callee may store up to 8 GPR argument registers to
7490 // the stack, allowing va_start to index over them in memory if the callee
7491 // is variadic.
7492 // Because we cannot tell if this is needed on the caller side, we have to
7493 // conservatively assume that it is needed. As such, make sure we have at
7494 // least enough stack space for the caller to store the 8 GPRs.
7495 const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7496 const unsigned NumBytes = std::max<unsigned>(
7497 LinkageSize + MinParameterSaveAreaSize, CCInfo.getStackSize());
7498
7499 // Adjust the stack pointer for the new arguments...
7500 // These operations are automatically eliminated by the prolog/epilog pass.
7501 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7502 SDValue CallSeqStart = Chain;
7503
7505 SmallVector<SDValue, 8> MemOpChains;
7506
7507 // Set up a copy of the stack pointer for loading and storing any
7508 // arguments that may not fit in the registers available for argument
7509 // passing.
7510 const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7511 : DAG.getRegister(PPC::R1, MVT::i32);
7512
7513 for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7514 const unsigned ValNo = ArgLocs[I].getValNo();
7515 SDValue Arg = OutVals[ValNo];
7516 ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7517
7518 if (Flags.isByVal()) {
7519 const unsigned ByValSize = Flags.getByValSize();
7520
7521 // Nothing to do for zero-sized ByVals on the caller side.
7522 if (!ByValSize) {
7523 ++I;
7524 continue;
7525 }
7526
7527 auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7528 return DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain,
7529 (LoadOffset != 0)
7530 ? DAG.getObjectPtrOffset(
7531 dl, Arg, TypeSize::getFixed(LoadOffset))
7532 : Arg,
7533 MachinePointerInfo(), VT);
7534 };
7535
7536 unsigned LoadOffset = 0;
7537
7538 // Initialize registers, which are fully occupied by the by-val argument.
7539 while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7540 SDValue Load = GetLoad(PtrVT, LoadOffset);
7541 MemOpChains.push_back(Load.getValue(1));
7542 LoadOffset += PtrByteSize;
7543 const CCValAssign &ByValVA = ArgLocs[I++];
7544 assert(ByValVA.getValNo() == ValNo &&
7545 "Unexpected location for pass-by-value argument.");
7546 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7547 }
7548
7549 if (LoadOffset == ByValSize)
7550 continue;
7551
7552 // There must be one more loc to handle the remainder.
7553 assert(ArgLocs[I].getValNo() == ValNo &&
7554 "Expected additional location for by-value argument.");
7555
7556 if (ArgLocs[I].isMemLoc()) {
7557 assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7558 const CCValAssign &ByValVA = ArgLocs[I++];
7559 ISD::ArgFlagsTy MemcpyFlags = Flags;
7560 // Only memcpy the bytes that don't pass in register.
7561 MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7562 Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7563 (LoadOffset != 0) ? DAG.getObjectPtrOffset(
7564 dl, Arg, TypeSize::getFixed(LoadOffset))
7565 : Arg,
7567 dl, StackPtr, TypeSize::getFixed(ByValVA.getLocMemOffset())),
7568 CallSeqStart, MemcpyFlags, DAG, dl);
7569 continue;
7570 }
7571
7572 // Initialize the final register residue.
7573 // Any residue that occupies the final by-val arg register must be
7574 // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7575 // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7576 // 2 and 1 byte loads.
7577 const unsigned ResidueBytes = ByValSize % PtrByteSize;
7578 assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7579 "Unexpected register residue for by-value argument.");
7580 SDValue ResidueVal;
7581 for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7582 const unsigned N = llvm::bit_floor(ResidueBytes - Bytes);
7583 const MVT VT =
7584 N == 1 ? MVT::i8
7585 : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7586 SDValue Load = GetLoad(VT, LoadOffset);
7587 MemOpChains.push_back(Load.getValue(1));
7588 LoadOffset += N;
7589 Bytes += N;
7590
7591 // By-val arguments are passed left-justfied in register.
7592 // Every load here needs to be shifted, otherwise a full register load
7593 // should have been used.
7594 assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7595 "Unexpected load emitted during handling of pass-by-value "
7596 "argument.");
7597 unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7598 EVT ShiftAmountTy =
7599 getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7600 SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7601 SDValue ShiftedLoad =
7602 DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7603 ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7604 ShiftedLoad)
7605 : ShiftedLoad;
7606 }
7607
7608 const CCValAssign &ByValVA = ArgLocs[I++];
7609 RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7610 continue;
7611 }
7612
7613 CCValAssign &VA = ArgLocs[I++];
7614 const MVT LocVT = VA.getLocVT();
7615 const MVT ValVT = VA.getValVT();
7616
7617 switch (VA.getLocInfo()) {
7618 default:
7619 report_fatal_error("Unexpected argument extension type.");
7620 case CCValAssign::Full:
7621 break;
7622 case CCValAssign::ZExt:
7623 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7624 break;
7625 case CCValAssign::SExt:
7626 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7627 break;
7628 }
7629
7630 if (VA.isRegLoc() && !VA.needsCustom()) {
7631 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7632 continue;
7633 }
7634
7635 // Vector arguments passed to VarArg functions need custom handling when
7636 // they are passed (at least partially) in GPRs.
7637 if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7638 assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7639 // Store value to its stack slot.
7640 SDValue PtrOff =
7641 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7642 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7643 SDValue Store =
7644 DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7645 MemOpChains.push_back(Store);
7646 const unsigned OriginalValNo = VA.getValNo();
7647 // Then load the GPRs from the stack
7648 unsigned LoadOffset = 0;
7649 auto HandleCustomVecRegLoc = [&]() {
7650 assert(I != E && "Unexpected end of CCvalAssigns.");
7651 assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7652 "Expected custom RegLoc.");
7653 CCValAssign RegVA = ArgLocs[I++];
7654 assert(RegVA.getValNo() == OriginalValNo &&
7655 "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7656 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7657 DAG.getConstant(LoadOffset, dl, PtrVT));
7658 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7659 MemOpChains.push_back(Load.getValue(1));
7660 RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7661 LoadOffset += PtrByteSize;
7662 };
7663
7664 // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7665 // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7666 // R10.
7667 HandleCustomVecRegLoc();
7668 HandleCustomVecRegLoc();
7669
7670 if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7671 ArgLocs[I].getValNo() == OriginalValNo) {
7672 assert(!IsPPC64 &&
7673 "Only 2 custom RegLocs expected for 64-bit codegen.");
7674 HandleCustomVecRegLoc();
7675 HandleCustomVecRegLoc();
7676 }
7677
7678 continue;
7679 }
7680
7681 if (VA.isMemLoc()) {
7682 SDValue PtrOff =
7683 DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7684 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7685 MemOpChains.push_back(
7686 DAG.getStore(Chain, dl, Arg, PtrOff,
7688 Subtarget.getFrameLowering()->getStackAlign()));
7689
7690 continue;
7691 }
7692
7693 if (!ValVT.isFloatingPoint())
7695 "Unexpected register handling for calling convention.");
7696
7697 // Custom handling is used for GPR initializations for vararg float
7698 // arguments.
7699 assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7700 LocVT.isInteger() &&
7701 "Custom register handling only expected for VarArg.");
7702
7703 SDValue ArgAsInt =
7704 DAG.getBitcast(MVT::getIntegerVT(ValVT.getSizeInBits()), Arg);
7705
7706 if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7707 // f32 in 32-bit GPR
7708 // f64 in 64-bit GPR
7709 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7710 else if (Arg.getValueType().getFixedSizeInBits() <
7711 LocVT.getFixedSizeInBits())
7712 // f32 in 64-bit GPR.
7713 RegsToPass.push_back(std::make_pair(
7714 VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7715 else {
7716 // f64 in two 32-bit GPRs
7717 // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7718 assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7719 "Unexpected custom register for argument!");
7720 CCValAssign &GPR1 = VA;
7721 SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7722 DAG.getConstant(32, dl, MVT::i8));
7723 RegsToPass.push_back(std::make_pair(
7724 GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7725
7726 if (I != E) {
7727 // If only 1 GPR was available, there will only be one custom GPR and
7728 // the argument will also pass in memory.
7729 CCValAssign &PeekArg = ArgLocs[I];
7730 if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7731 assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7732 CCValAssign &GPR2 = ArgLocs[I++];
7733 RegsToPass.push_back(std::make_pair(
7734 GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7735 }
7736 }
7737 }
7738 }
7739
7740 if (!MemOpChains.empty())
7741 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7742
7743 // For indirect calls, we need to save the TOC base to the stack for
7744 // restoration after the call.
7745 if (CFlags.IsIndirect) {
7746 assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7747 const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7748 const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7749 const MVT PtrVT = Subtarget.getScalarIntVT();
7750 const unsigned TOCSaveOffset =
7751 Subtarget.getFrameLowering()->getTOCSaveOffset();
7752
7753 setUsesTOCBasePtr(DAG);
7754 SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7755 SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7756 SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7757 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7758 Chain = DAG.getStore(
7759 Val.getValue(1), dl, Val, AddPtr,
7760 MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7761 }
7762
7763 // Build a sequence of copy-to-reg nodes chained together with token chain
7764 // and flag operands which copy the outgoing args into the appropriate regs.
7765 SDValue InGlue;
7766 for (auto Reg : RegsToPass) {
7767 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InGlue);
7768 InGlue = Chain.getValue(1);
7769 }
7770
7771 const int SPDiff = 0;
7772 return FinishCall(CFlags, dl, DAG, RegsToPass, InGlue, Chain, CallSeqStart,
7773 Callee, SPDiff, NumBytes, Ins, InVals, CB);
7774}
7775
7776bool
7777PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7778 MachineFunction &MF, bool isVarArg,
7781 const Type *RetTy) const {
7783 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7784 return CCInfo.CheckReturn(
7785 Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7787 : RetCC_PPC);
7788}
7789
7790SDValue
7791PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7792 bool isVarArg,
7794 const SmallVectorImpl<SDValue> &OutVals,
7795 const SDLoc &dl, SelectionDAG &DAG) const {
7797 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7798 *DAG.getContext());
7799 CCInfo.AnalyzeReturn(Outs,
7800 (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7802 : RetCC_PPC);
7803
7804 SDValue Glue;
7805 SmallVector<SDValue, 4> RetOps(1, Chain);
7806
7807 // Copy the result values into the output registers.
7808 for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7809 CCValAssign &VA = RVLocs[i];
7810 assert(VA.isRegLoc() && "Can only return in registers!");
7811
7812 SDValue Arg = OutVals[RealResIdx];
7813
7814 switch (VA.getLocInfo()) {
7815 default: llvm_unreachable("Unknown loc info!");
7816 case CCValAssign::Full: break;
7817 case CCValAssign::AExt:
7818 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7819 break;
7820 case CCValAssign::ZExt:
7821 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7822 break;
7823 case CCValAssign::SExt:
7824 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7825 break;
7826 }
7827 if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7828 bool isLittleEndian = Subtarget.isLittleEndian();
7829 // Legalize ret f64 -> ret 2 x i32.
7830 SDValue SVal =
7831 DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7832 DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7833 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7834 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7835 SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7836 DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7837 Glue = Chain.getValue(1);
7838 VA = RVLocs[++i]; // skip ahead to next loc
7839 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Glue);
7840 } else
7841 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Glue);
7842 Glue = Chain.getValue(1);
7843 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7844 }
7845
7846 RetOps[0] = Chain; // Update chain.
7847
7848 // Add the glue if we have it.
7849 if (Glue.getNode())
7850 RetOps.push_back(Glue);
7851
7852 return DAG.getNode(PPCISD::RET_GLUE, dl, MVT::Other, RetOps);
7853}
7854
7855SDValue
7856PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7857 SelectionDAG &DAG) const {
7858 SDLoc dl(Op);
7859
7860 // Get the correct type for integers.
7861 EVT IntVT = Op.getValueType();
7862
7863 // Get the inputs.
7864 SDValue Chain = Op.getOperand(0);
7865 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7866 // Build a DYNAREAOFFSET node.
7867 SDValue Ops[2] = {Chain, FPSIdx};
7868 SDVTList VTs = DAG.getVTList(IntVT);
7869 return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7870}
7871
7872SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7873 SelectionDAG &DAG) const {
7874 // When we pop the dynamic allocation we need to restore the SP link.
7875 SDLoc dl(Op);
7876
7877 // Get the correct type for pointers.
7878 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7879
7880 // Construct the stack pointer operand.
7881 bool isPPC64 = Subtarget.isPPC64();
7882 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7883 SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7884
7885 // Get the operands for the STACKRESTORE.
7886 SDValue Chain = Op.getOperand(0);
7887 SDValue SaveSP = Op.getOperand(1);
7888
7889 // Load the old link SP.
7890 SDValue LoadLinkSP =
7891 DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7892
7893 // Restore the stack pointer.
7894 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7895
7896 // Store the old link SP.
7897 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7898}
7899
7900SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7901 MachineFunction &MF = DAG.getMachineFunction();
7902 bool isPPC64 = Subtarget.isPPC64();
7903 EVT PtrVT = getPointerTy(MF.getDataLayout());
7904
7905 // Get current frame pointer save index. The users of this index will be
7906 // primarily DYNALLOC instructions.
7907 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7908 int RASI = FI->getReturnAddrSaveIndex();
7909
7910 // If the frame pointer save index hasn't been defined yet.
7911 if (!RASI) {
7912 // Find out what the fix offset of the frame pointer save area.
7913 int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7914 // Allocate the frame index for frame pointer save area.
7915 RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7916 // Save the result.
7917 FI->setReturnAddrSaveIndex(RASI);
7918 }
7919 return DAG.getFrameIndex(RASI, PtrVT);
7920}
7921
7922SDValue
7923PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7924 MachineFunction &MF = DAG.getMachineFunction();
7925 bool isPPC64 = Subtarget.isPPC64();
7926 EVT PtrVT = getPointerTy(MF.getDataLayout());
7927
7928 // Get current frame pointer save index. The users of this index will be
7929 // primarily DYNALLOC instructions.
7930 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
7931 int FPSI = FI->getFramePointerSaveIndex();
7932
7933 // If the frame pointer save index hasn't been defined yet.
7934 if (!FPSI) {
7935 // Find out what the fix offset of the frame pointer save area.
7936 int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7937 // Allocate the frame index for frame pointer save area.
7938 FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7939 // Save the result.
7940 FI->setFramePointerSaveIndex(FPSI);
7941 }
7942 return DAG.getFrameIndex(FPSI, PtrVT);
7943}
7944
7945SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7946 SelectionDAG &DAG) const {
7947 MachineFunction &MF = DAG.getMachineFunction();
7948 // Get the inputs.
7949 SDValue Chain = Op.getOperand(0);
7950 SDValue Size = Op.getOperand(1);
7951 SDLoc dl(Op);
7952
7953 // Get the correct type for pointers.
7954 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7955 // Negate the size.
7956 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7957 DAG.getConstant(0, dl, PtrVT), Size);
7958 // Construct a node for the frame pointer save index.
7959 SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7960 SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7961 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7962 if (hasInlineStackProbe(MF))
7963 return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7964 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7965}
7966
7967SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7968 SelectionDAG &DAG) const {
7969 MachineFunction &MF = DAG.getMachineFunction();
7970
7971 bool isPPC64 = Subtarget.isPPC64();
7972 EVT PtrVT = getPointerTy(DAG.getDataLayout());
7973
7974 int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7975 return DAG.getFrameIndex(FI, PtrVT);
7976}
7977
7978SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7979 SelectionDAG &DAG) const {
7980 SDLoc DL(Op);
7981 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7982 DAG.getVTList(MVT::i32, MVT::Other),
7983 Op.getOperand(0), Op.getOperand(1));
7984}
7985
7986SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7987 SelectionDAG &DAG) const {
7988 SDLoc DL(Op);
7989 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other,
7990 Op.getOperand(0), Op.getOperand(1));
7991}
7992
7993SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7994 if (Op.getValueType().isVector())
7995 return LowerVectorLoad(Op, DAG);
7996
7997 assert(Op.getValueType() == MVT::i1 &&
7998 "Custom lowering only for i1 loads");
7999
8000 // First, load 8 bits into 32 bits, then truncate to 1 bit.
8001
8002 SDLoc dl(Op);
8003 LoadSDNode *LD = cast<LoadSDNode>(Op);
8004
8005 SDValue Chain = LD->getChain();
8006 SDValue BasePtr = LD->getBasePtr();
8007 MachineMemOperand *MMO = LD->getMemOperand();
8008
8009 SDValue NewLD =
8010 DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
8011 BasePtr, MVT::i8, MMO);
8012 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
8013
8014 SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
8015 return DAG.getMergeValues(Ops, dl);
8016}
8017
8018SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
8019 if (Op.getOperand(1).getValueType().isVector())
8020 return LowerVectorStore(Op, DAG);
8021
8022 assert(Op.getOperand(1).getValueType() == MVT::i1 &&
8023 "Custom lowering only for i1 stores");
8024
8025 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
8026
8027 SDLoc dl(Op);
8028 StoreSDNode *ST = cast<StoreSDNode>(Op);
8029
8030 SDValue Chain = ST->getChain();
8031 SDValue BasePtr = ST->getBasePtr();
8032 SDValue Value = ST->getValue();
8033 MachineMemOperand *MMO = ST->getMemOperand();
8034
8036 Value);
8037 return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
8038}
8039
8040// FIXME: Remove this once the ANDI glue bug is fixed:
8041SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
8042 assert(Op.getValueType() == MVT::i1 &&
8043 "Custom lowering only for i1 results");
8044
8045 SDLoc DL(Op);
8046 return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
8047}
8048
8049SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
8050 SelectionDAG &DAG) const {
8051
8052 // Implements a vector truncate that fits in a vector register as a shuffle.
8053 // We want to legalize vector truncates down to where the source fits in
8054 // a vector register (and target is therefore smaller than vector register
8055 // size). At that point legalization will try to custom lower the sub-legal
8056 // result and get here - where we can contain the truncate as a single target
8057 // operation.
8058
8059 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
8060 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
8061 //
8062 // We will implement it for big-endian ordering as this (where x denotes
8063 // undefined):
8064 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
8065 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
8066 //
8067 // The same operation in little-endian ordering will be:
8068 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
8069 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
8070
8071 EVT TrgVT = Op.getValueType();
8072 assert(TrgVT.isVector() && "Vector type expected.");
8073 unsigned TrgNumElts = TrgVT.getVectorNumElements();
8074 EVT EltVT = TrgVT.getVectorElementType();
8075 if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
8076 TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
8078 return SDValue();
8079
8080 SDValue N1 = Op.getOperand(0);
8081 EVT SrcVT = N1.getValueType();
8082 unsigned SrcSize = SrcVT.getSizeInBits();
8083 if (SrcSize > 256 || !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
8086 return SDValue();
8087 if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
8088 return SDValue();
8089
8090 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8091 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8092
8093 SDLoc DL(Op);
8094 SDValue Op1, Op2;
8095 if (SrcSize == 256) {
8096 EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
8097 EVT SplitVT =
8099 unsigned SplitNumElts = SplitVT.getVectorNumElements();
8100 Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8101 DAG.getConstant(0, DL, VecIdxTy));
8102 Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
8103 DAG.getConstant(SplitNumElts, DL, VecIdxTy));
8104 }
8105 else {
8106 Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
8107 Op2 = DAG.getUNDEF(WideVT);
8108 }
8109
8110 // First list the elements we want to keep.
8111 unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
8112 SmallVector<int, 16> ShuffV;
8113 if (Subtarget.isLittleEndian())
8114 for (unsigned i = 0; i < TrgNumElts; ++i)
8115 ShuffV.push_back(i * SizeMult);
8116 else
8117 for (unsigned i = 1; i <= TrgNumElts; ++i)
8118 ShuffV.push_back(i * SizeMult - 1);
8119
8120 // Populate the remaining elements with undefs.
8121 for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
8122 // ShuffV.push_back(i + WideNumElts);
8123 ShuffV.push_back(WideNumElts + 1);
8124
8125 Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
8126 Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
8127 return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
8128}
8129
8130/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
8131/// possible.
8132SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
8133 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
8134 EVT ResVT = Op.getValueType();
8135 EVT CmpVT = Op.getOperand(0).getValueType();
8136 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
8137 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
8138 SDLoc dl(Op);
8139
8140 // Without power9-vector, we don't have native instruction for f128 comparison.
8141 // Following transformation to libcall is needed for setcc:
8142 // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
8143 if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
8144 SDValue Z = DAG.getSetCC(
8145 dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
8146 LHS, RHS, CC);
8147 SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
8148 return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
8149 }
8150
8151 // Not FP, or using SPE? Not a fsel.
8152 if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
8153 Subtarget.hasSPE())
8154 return Op;
8155
8156 SDNodeFlags Flags = Op.getNode()->getFlags();
8157
8158 // We have xsmaxc[dq]p/xsminc[dq]p which are OK to emit even in the
8159 // presence of infinities.
8160 if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
8161 switch (CC) {
8162 default:
8163 break;
8164 case ISD::SETOGT:
8165 case ISD::SETGT:
8166 return DAG.getNode(PPCISD::XSMAXC, dl, Op.getValueType(), LHS, RHS);
8167 case ISD::SETOLT:
8168 case ISD::SETLT:
8169 return DAG.getNode(PPCISD::XSMINC, dl, Op.getValueType(), LHS, RHS);
8170 }
8171 }
8172
8173 // We might be able to do better than this under some circumstances, but in
8174 // general, fsel-based lowering of select is a finite-math-only optimization.
8175 // For more information, see section F.3 of the 2.06 ISA specification.
8176 // With ISA 3.0
8177 if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
8178 (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()) ||
8179 ResVT == MVT::f128)
8180 return Op;
8181
8182 // If the RHS of the comparison is a 0.0, we don't need to do the
8183 // subtraction at all.
8184 SDValue Sel1;
8186 switch (CC) {
8187 default: break; // SETUO etc aren't handled by fsel.
8188 case ISD::SETNE:
8189 std::swap(TV, FV);
8190 [[fallthrough]];
8191 case ISD::SETEQ:
8192 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8193 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8194 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8195 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8196 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8197 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8198 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
8199 case ISD::SETULT:
8200 case ISD::SETLT:
8201 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8202 [[fallthrough]];
8203 case ISD::SETOGE:
8204 case ISD::SETGE:
8205 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8206 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8207 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
8208 case ISD::SETUGT:
8209 case ISD::SETGT:
8210 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
8211 [[fallthrough]];
8212 case ISD::SETOLE:
8213 case ISD::SETLE:
8214 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
8215 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
8216 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8217 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
8218 }
8219
8220 SDValue Cmp;
8221 switch (CC) {
8222 default: break; // SETUO etc aren't handled by fsel.
8223 case ISD::SETNE:
8224 std::swap(TV, FV);
8225 [[fallthrough]];
8226 case ISD::SETEQ:
8227 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8228 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8229 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8230 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8231 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
8232 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
8233 return DAG.getNode(PPCISD::FSEL, dl, ResVT,
8234 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
8235 case ISD::SETULT:
8236 case ISD::SETLT:
8237 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8238 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8239 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8240 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8241 case ISD::SETOGE:
8242 case ISD::SETGE:
8243 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
8244 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8245 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8246 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8247 case ISD::SETUGT:
8248 case ISD::SETGT:
8249 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8250 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8251 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8252 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
8253 case ISD::SETOLE:
8254 case ISD::SETLE:
8255 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
8256 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
8257 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
8258 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
8259 }
8260 return Op;
8261}
8262
8263static unsigned getPPCStrictOpcode(unsigned Opc) {
8264 switch (Opc) {
8265 default:
8266 llvm_unreachable("No strict version of this opcode!");
8267 case PPCISD::FCTIDZ:
8268 return PPCISD::STRICT_FCTIDZ;
8269 case PPCISD::FCTIWZ:
8270 return PPCISD::STRICT_FCTIWZ;
8271 case PPCISD::FCTIDUZ:
8272 return PPCISD::STRICT_FCTIDUZ;
8273 case PPCISD::FCTIWUZ:
8274 return PPCISD::STRICT_FCTIWUZ;
8275 case PPCISD::FCFID:
8276 return PPCISD::STRICT_FCFID;
8277 case PPCISD::FCFIDU:
8278 return PPCISD::STRICT_FCFIDU;
8279 case PPCISD::FCFIDS:
8280 return PPCISD::STRICT_FCFIDS;
8281 case PPCISD::FCFIDUS:
8282 return PPCISD::STRICT_FCFIDUS;
8283 }
8284}
8285
8287 const PPCSubtarget &Subtarget) {
8288 SDLoc dl(Op);
8289 bool IsStrict = Op->isStrictFPOpcode();
8290 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8291 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8292
8293 // TODO: Any other flags to propagate?
8294 SDNodeFlags Flags;
8295 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8296
8297 // For strict nodes, source is the second operand.
8298 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8299 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8300 MVT DestTy = Op.getSimpleValueType();
8301 assert(Src.getValueType().isFloatingPoint() &&
8302 (DestTy == MVT::i8 || DestTy == MVT::i16 || DestTy == MVT::i32 ||
8303 DestTy == MVT::i64) &&
8304 "Invalid FP_TO_INT types");
8305 if (Src.getValueType() == MVT::f32) {
8306 if (IsStrict) {
8307 Src =
8309 DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8310 Chain = Src.getValue(1);
8311 } else
8312 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8313 }
8314 if ((DestTy == MVT::i8 || DestTy == MVT::i16) && Subtarget.hasP9Vector())
8315 DestTy = Subtarget.getScalarIntVT();
8316 unsigned Opc = ISD::DELETED_NODE;
8317 switch (DestTy.SimpleTy) {
8318 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8319 case MVT::i32:
8320 Opc = IsSigned ? PPCISD::FCTIWZ
8321 : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8322 break;
8323 case MVT::i64:
8324 assert((IsSigned || Subtarget.hasFPCVT()) &&
8325 "i64 FP_TO_UINT is supported only with FPCVT");
8326 Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8327 }
8328 EVT ConvTy = Src.getValueType() == MVT::f128 ? MVT::f128 : MVT::f64;
8329 SDValue Conv;
8330 if (IsStrict) {
8332 Conv = DAG.getNode(Opc, dl, DAG.getVTList(ConvTy, MVT::Other), {Chain, Src},
8333 Flags);
8334 } else {
8335 Conv = DAG.getNode(Opc, dl, ConvTy, Src);
8336 }
8337 return Conv;
8338}
8339
8340void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8341 SelectionDAG &DAG,
8342 const SDLoc &dl) const {
8343 SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8344 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8345 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8346 bool IsStrict = Op->isStrictFPOpcode();
8347
8348 // Convert the FP value to an int value through memory.
8349 bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8350 (IsSigned || Subtarget.hasFPCVT());
8351 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8352 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8353 MachinePointerInfo MPI =
8355
8356 // Emit a store to the stack slot.
8357 SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8358 Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8359 if (i32Stack) {
8360 MachineFunction &MF = DAG.getMachineFunction();
8361 Alignment = Align(4);
8362 MachineMemOperand *MMO =
8363 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8364 SDValue Ops[] = { Chain, Tmp, FIPtr };
8365 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8366 DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8367 } else
8368 Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8369
8370 // Result is a load from the stack slot. If loading 4 bytes, make sure to
8371 // add in a bias on big endian.
8372 if (Op.getValueType() == MVT::i32 && !i32Stack &&
8373 !Subtarget.isLittleEndian()) {
8374 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8375 DAG.getConstant(4, dl, FIPtr.getValueType()));
8376 MPI = MPI.getWithOffset(4);
8377 }
8378
8379 RLI.Chain = Chain;
8380 RLI.Ptr = FIPtr;
8381 RLI.MPI = MPI;
8382 RLI.Alignment = Alignment;
8383}
8384
8385/// Custom lowers floating point to integer conversions to use
8386/// the direct move instructions available in ISA 2.07 to avoid the
8387/// need for load/store combinations.
8388SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8389 SelectionDAG &DAG,
8390 const SDLoc &dl) const {
8391 SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8392 SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8393 if (Op->isStrictFPOpcode())
8394 return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8395 else
8396 return Mov;
8397}
8398
8399SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8400 const SDLoc &dl) const {
8401 bool IsStrict = Op->isStrictFPOpcode();
8402 bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8403 Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8404 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8405 EVT SrcVT = Src.getValueType();
8406 EVT DstVT = Op.getValueType();
8407
8408 // FP to INT conversions are legal for f128.
8409 if (SrcVT == MVT::f128)
8410 return Subtarget.hasP9Vector() ? Op : SDValue();
8411
8412 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8413 // PPC (the libcall is not available).
8414 if (SrcVT == MVT::ppcf128) {
8415 if (DstVT == MVT::i32) {
8416 // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8417 // set other fast-math flags to FP operations in both strict and
8418 // non-strict cases. (FP_TO_SINT, FSUB)
8419 SDNodeFlags Flags;
8420 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8421
8422 if (IsSigned) {
8423 SDValue Lo, Hi;
8424 std::tie(Lo, Hi) = DAG.SplitScalar(Src, dl, MVT::f64, MVT::f64);
8425
8426 // Add the two halves of the long double in round-to-zero mode, and use
8427 // a smaller FP_TO_SINT.
8428 if (IsStrict) {
8429 SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8430 DAG.getVTList(MVT::f64, MVT::Other),
8431 {Op.getOperand(0), Lo, Hi}, Flags);
8432 return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8433 DAG.getVTList(MVT::i32, MVT::Other),
8434 {Res.getValue(1), Res}, Flags);
8435 } else {
8436 SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8437 return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8438 }
8439 } else {
8440 const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8441 APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8442 SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8443 SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8444 if (IsStrict) {
8445 // Sel = Src < 0x80000000
8446 // FltOfs = select Sel, 0.0, 0x80000000
8447 // IntOfs = select Sel, 0, 0x80000000
8448 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8449 SDValue Chain = Op.getOperand(0);
8450 EVT SetCCVT =
8451 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8452 EVT DstSetCCVT =
8453 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8454 SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8455 Chain, true);
8456 Chain = Sel.getValue(1);
8457
8458 SDValue FltOfs = DAG.getSelect(
8459 dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8460 Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8461
8462 SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8463 DAG.getVTList(SrcVT, MVT::Other),
8464 {Chain, Src, FltOfs}, Flags);
8465 Chain = Val.getValue(1);
8466 SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8467 DAG.getVTList(DstVT, MVT::Other),
8468 {Chain, Val}, Flags);
8469 Chain = SInt.getValue(1);
8470 SDValue IntOfs = DAG.getSelect(
8471 dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8472 SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8473 return DAG.getMergeValues({Result, Chain}, dl);
8474 } else {
8475 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8476 // FIXME: generated code sucks.
8477 SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8478 True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8479 True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8480 SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8481 return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8482 }
8483 }
8484 }
8485
8486 return SDValue();
8487 }
8488
8489 if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8490 return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8491
8492 ReuseLoadInfo RLI;
8493 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8494
8495 return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8496 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8497}
8498
8499// We're trying to insert a regular store, S, and then a load, L. If the
8500// incoming value, O, is a load, we might just be able to have our load use the
8501// address used by O. However, we don't know if anything else will store to
8502// that address before we can load from it. To prevent this situation, we need
8503// to insert our load, L, into the chain as a peer of O. To do this, we give L
8504// the same chain operand as O, we create a token factor from the chain results
8505// of O and L, and we replace all uses of O's chain result with that token
8506// factor (this last part is handled by makeEquivalentMemoryOrdering).
8507bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8508 ReuseLoadInfo &RLI,
8509 SelectionDAG &DAG,
8510 ISD::LoadExtType ET) const {
8511 // Conservatively skip reusing for constrained FP nodes.
8512 if (Op->isStrictFPOpcode())
8513 return false;
8514
8515 SDLoc dl(Op);
8516 bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8517 (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8518 if (ET == ISD::NON_EXTLOAD &&
8519 (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8520 isOperationLegalOrCustom(Op.getOpcode(),
8521 Op.getOperand(0).getValueType())) {
8522
8523 LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8524 return true;
8525 }
8526
8527 LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8528 if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8529 LD->isNonTemporal())
8530 return false;
8531 if (LD->getMemoryVT() != MemVT)
8532 return false;
8533
8534 // If the result of the load is an illegal type, then we can't build a
8535 // valid chain for reuse since the legalised loads and token factor node that
8536 // ties the legalised loads together uses a different output chain then the
8537 // illegal load.
8538 if (!isTypeLegal(LD->getValueType(0)))
8539 return false;
8540
8541 RLI.Ptr = LD->getBasePtr();
8542 if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8543 assert(LD->getAddressingMode() == ISD::PRE_INC &&
8544 "Non-pre-inc AM on PPC?");
8545 RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8546 LD->getOffset());
8547 }
8548
8549 RLI.Chain = LD->getChain();
8550 RLI.MPI = LD->getPointerInfo();
8551 RLI.IsDereferenceable = LD->isDereferenceable();
8552 RLI.IsInvariant = LD->isInvariant();
8553 RLI.Alignment = LD->getAlign();
8554 RLI.AAInfo = LD->getAAInfo();
8555 RLI.Ranges = LD->getRanges();
8556
8557 RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8558 return true;
8559}
8560
8561/// Analyze profitability of direct move
8562/// prefer float load to int load plus direct move
8563/// when there is no integer use of int load
8564bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8565 SDNode *Origin = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0).getNode();
8566 if (Origin->getOpcode() != ISD::LOAD)
8567 return true;
8568
8569 // If there is no LXSIBZX/LXSIHZX, like Power8,
8570 // prefer direct move if the memory size is 1 or 2 bytes.
8571 MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8572 if (!Subtarget.hasP9Vector() &&
8573 (!MMO->getSize().hasValue() || MMO->getSize().getValue() <= 2))
8574 return true;
8575
8576 for (SDUse &Use : Origin->uses()) {
8577
8578 // Only look at the users of the loaded value.
8579 if (Use.getResNo() != 0)
8580 continue;
8581
8582 SDNode *User = Use.getUser();
8583 if (User->getOpcode() != ISD::SINT_TO_FP &&
8584 User->getOpcode() != ISD::UINT_TO_FP &&
8585 User->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8586 User->getOpcode() != ISD::STRICT_UINT_TO_FP)
8587 return true;
8588 }
8589
8590 return false;
8591}
8592
8594 const PPCSubtarget &Subtarget,
8595 SDValue Chain = SDValue()) {
8596 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8597 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8598 SDLoc dl(Op);
8599
8600 // TODO: Any other flags to propagate?
8601 SDNodeFlags Flags;
8602 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8603
8604 // If we have FCFIDS, then use it when converting to single-precision.
8605 // Otherwise, convert to double-precision and then round.
8606 bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8607 unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8608 : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8609 EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8610 if (Op->isStrictFPOpcode()) {
8611 if (!Chain)
8612 Chain = Op.getOperand(0);
8613 return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8614 DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8615 } else
8616 return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8617}
8618
8619/// Custom lowers integer to floating point conversions to use
8620/// the direct move instructions available in ISA 2.07 to avoid the
8621/// need for load/store combinations.
8622SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8623 SelectionDAG &DAG,
8624 const SDLoc &dl) const {
8625 assert((Op.getValueType() == MVT::f32 ||
8626 Op.getValueType() == MVT::f64) &&
8627 "Invalid floating point type as target of conversion");
8628 assert(Subtarget.hasFPCVT() &&
8629 "Int to FP conversions with direct moves require FPCVT");
8630 SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8631 bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8632 bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8633 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8634 unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8635 SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8636 return convertIntToFP(Op, Mov, DAG, Subtarget);
8637}
8638
8639static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8640
8641 EVT VecVT = Vec.getValueType();
8642 assert(VecVT.isVector() && "Expected a vector type.");
8643 assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8644
8645 EVT EltVT = VecVT.getVectorElementType();
8646 unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8647 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8648
8649 unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8650 SmallVector<SDValue, 16> Ops(NumConcat);
8651 Ops[0] = Vec;
8652 SDValue UndefVec = DAG.getUNDEF(VecVT);
8653 for (unsigned i = 1; i < NumConcat; ++i)
8654 Ops[i] = UndefVec;
8655
8656 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8657}
8658
8659SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8660 const SDLoc &dl) const {
8661 bool IsStrict = Op->isStrictFPOpcode();
8662 unsigned Opc = Op.getOpcode();
8663 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8666 "Unexpected conversion type");
8667 assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8668 "Supports conversions to v2f64/v4f32 only.");
8669
8670 // TODO: Any other flags to propagate?
8671 SDNodeFlags Flags;
8672 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8673
8674 bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8675 bool FourEltRes = Op.getValueType() == MVT::v4f32;
8676
8677 SDValue Wide = widenVec(DAG, Src, dl);
8678 EVT WideVT = Wide.getValueType();
8679 unsigned WideNumElts = WideVT.getVectorNumElements();
8680 MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8681
8682 SmallVector<int, 16> ShuffV;
8683 for (unsigned i = 0; i < WideNumElts; ++i)
8684 ShuffV.push_back(i + WideNumElts);
8685
8686 int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8687 int SaveElts = FourEltRes ? 4 : 2;
8688 if (Subtarget.isLittleEndian())
8689 for (int i = 0; i < SaveElts; i++)
8690 ShuffV[i * Stride] = i;
8691 else
8692 for (int i = 1; i <= SaveElts; i++)
8693 ShuffV[i * Stride - 1] = i - 1;
8694
8695 SDValue ShuffleSrc2 =
8696 SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8697 SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8698
8699 SDValue Extend;
8700 if (SignedConv) {
8701 Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8702 EVT ExtVT = Src.getValueType();
8703 if (Subtarget.hasP9Altivec())
8704 ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8705 IntermediateVT.getVectorNumElements());
8706
8707 Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8708 DAG.getValueType(ExtVT));
8709 } else
8710 Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8711
8712 if (IsStrict)
8713 return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8714 {Op.getOperand(0), Extend}, Flags);
8715
8716 return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8717}
8718
8719SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8720 SelectionDAG &DAG) const {
8721 SDLoc dl(Op);
8722 bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8723 Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8724 bool IsStrict = Op->isStrictFPOpcode();
8725 SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8726 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8727
8728 // TODO: Any other flags to propagate?
8729 SDNodeFlags Flags;
8730 Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8731
8732 EVT InVT = Src.getValueType();
8733 EVT OutVT = Op.getValueType();
8734 if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8735 isOperationCustom(Op.getOpcode(), InVT))
8736 return LowerINT_TO_FPVector(Op, DAG, dl);
8737
8738 // Conversions to f128 are legal.
8739 if (Op.getValueType() == MVT::f128)
8740 return Subtarget.hasP9Vector() ? Op : SDValue();
8741
8742 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8743 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8744 return SDValue();
8745
8746 if (Src.getValueType() == MVT::i1) {
8747 SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8748 DAG.getConstantFP(1.0, dl, Op.getValueType()),
8749 DAG.getConstantFP(0.0, dl, Op.getValueType()));
8750 if (IsStrict)
8751 return DAG.getMergeValues({Sel, Chain}, dl);
8752 else
8753 return Sel;
8754 }
8755
8756 // If we have direct moves, we can do all the conversion, skip the store/load
8757 // however, without FPCVT we can't do most conversions.
8758 if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8759 Subtarget.isPPC64() && Subtarget.hasFPCVT())
8760 return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8761
8762 assert((IsSigned || Subtarget.hasFPCVT()) &&
8763 "UINT_TO_FP is supported only with FPCVT");
8764
8765 if (Src.getValueType() == MVT::i64) {
8766 SDValue SINT = Src;
8767 // When converting to single-precision, we actually need to convert
8768 // to double-precision first and then round to single-precision.
8769 // To avoid double-rounding effects during that operation, we have
8770 // to prepare the input operand. Bits that might be truncated when
8771 // converting to double-precision are replaced by a bit that won't
8772 // be lost at this stage, but is below the single-precision rounding
8773 // position.
8774 //
8775 // However, if afn is in effect, accept double
8776 // rounding to avoid the extra overhead.
8777 // FIXME: Currently INT_TO_FP can't support fast math flags because
8778 // of nneg flag, thus Op->getFlags().hasApproximateFuncs() is always
8779 // false.
8780 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT() &&
8781 !Op->getFlags().hasApproximateFuncs()) {
8782
8783 // Twiddle input to make sure the low 11 bits are zero. (If this
8784 // is the case, we are guaranteed the value will fit into the 53 bit
8785 // mantissa of an IEEE double-precision value without rounding.)
8786 // If any of those low 11 bits were not zero originally, make sure
8787 // bit 12 (value 2048) is set instead, so that the final rounding
8788 // to single-precision gets the correct result.
8789 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8790 SINT, DAG.getConstant(2047, dl, MVT::i64));
8791 Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8792 Round, DAG.getConstant(2047, dl, MVT::i64));
8793 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8794 Round = DAG.getNode(ISD::AND, dl, MVT::i64, Round,
8795 DAG.getSignedConstant(-2048, dl, MVT::i64));
8796
8797 // However, we cannot use that value unconditionally: if the magnitude
8798 // of the input value is small, the bit-twiddling we did above might
8799 // end up visibly changing the output. Fortunately, in that case, we
8800 // don't need to twiddle bits since the original input will convert
8801 // exactly to double-precision floating-point already. Therefore,
8802 // construct a conditional to use the original value if the top 11
8803 // bits are all sign-bit copies, and use the rounded value computed
8804 // above otherwise.
8805 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8806 SINT, DAG.getConstant(53, dl, MVT::i32));
8807 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8808 Cond, DAG.getConstant(1, dl, MVT::i64));
8809 Cond = DAG.getSetCC(
8810 dl,
8811 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64),
8812 Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8813
8814 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8815 }
8816
8817 ReuseLoadInfo RLI;
8818 SDValue Bits;
8819
8820 MachineFunction &MF = DAG.getMachineFunction();
8821 if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8822 Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8823 RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8824 if (RLI.ResChain)
8825 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8826 } else if (Subtarget.hasLFIWAX() &&
8827 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8828 MachineMemOperand *MMO =
8830 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8831 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8832 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWAX, dl,
8833 DAG.getVTList(MVT::f64, MVT::Other),
8834 Ops, MVT::i32, MMO);
8835 if (RLI.ResChain)
8836 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8837 } else if (Subtarget.hasFPCVT() &&
8838 canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8839 MachineMemOperand *MMO =
8841 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8842 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8843 Bits = DAG.getMemIntrinsicNode(PPCISD::LFIWZX, dl,
8844 DAG.getVTList(MVT::f64, MVT::Other),
8845 Ops, MVT::i32, MMO);
8846 if (RLI.ResChain)
8847 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
8848 } else if (((Subtarget.hasLFIWAX() &&
8849 SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8850 (Subtarget.hasFPCVT() &&
8851 SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8852 SINT.getOperand(0).getValueType() == MVT::i32) {
8853 MachineFrameInfo &MFI = MF.getFrameInfo();
8854 EVT PtrVT = getPointerTy(DAG.getDataLayout());
8855
8856 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8857 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8858
8859 SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8861 DAG.getMachineFunction(), FrameIdx));
8862 Chain = Store;
8863
8864 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8865 "Expected an i32 store");
8866
8867 RLI.Ptr = FIdx;
8868 RLI.Chain = Chain;
8869 RLI.MPI =
8871 RLI.Alignment = Align(4);
8872
8873 MachineMemOperand *MMO =
8875 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8876 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8878 PPCISD::LFIWZX : PPCISD::LFIWAX,
8879 dl, DAG.getVTList(MVT::f64, MVT::Other),
8880 Ops, MVT::i32, MMO);
8881 Chain = Bits.getValue(1);
8882 } else
8883 Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8884
8885 SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8886 if (IsStrict)
8887 Chain = FP.getValue(1);
8888
8889 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8890 if (IsStrict)
8891 FP = DAG.getNode(
8892 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8893 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)},
8894 Flags);
8895 else
8896 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8897 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8898 }
8899 return FP;
8900 }
8901
8902 assert(Src.getValueType() == MVT::i32 &&
8903 "Unhandled INT_TO_FP type in custom expander!");
8904 // Since we only generate this in 64-bit mode, we can take advantage of
8905 // 64-bit registers. In particular, sign extend the input value into the
8906 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8907 // then lfd it and fcfid it.
8908 MachineFunction &MF = DAG.getMachineFunction();
8909 MachineFrameInfo &MFI = MF.getFrameInfo();
8910 EVT PtrVT = getPointerTy(MF.getDataLayout());
8911
8912 SDValue Ld;
8913 if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8914 ReuseLoadInfo RLI;
8915 bool ReusingLoad;
8916 if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8917 int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8918 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8919
8920 SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8922 DAG.getMachineFunction(), FrameIdx));
8923 Chain = Store;
8924
8925 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8926 "Expected an i32 store");
8927
8928 RLI.Ptr = FIdx;
8929 RLI.Chain = Chain;
8930 RLI.MPI =
8932 RLI.Alignment = Align(4);
8933 }
8934
8935 MachineMemOperand *MMO =
8937 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8938 SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8939 Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8940 DAG.getVTList(MVT::f64, MVT::Other), Ops,
8941 MVT::i32, MMO);
8942 Chain = Ld.getValue(1);
8943 if (ReusingLoad && RLI.ResChain) {
8944 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Ld.getValue(1));
8945 }
8946 } else {
8947 assert(Subtarget.isPPC64() &&
8948 "i32->FP without LFIWAX supported only on PPC64");
8949
8950 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8951 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8952
8953 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8954
8955 // STD the extended value into the stack slot.
8956 SDValue Store = DAG.getStore(
8957 Chain, dl, Ext64, FIdx,
8959 Chain = Store;
8960
8961 // Load the value as a double.
8962 Ld = DAG.getLoad(
8963 MVT::f64, dl, Chain, FIdx,
8965 Chain = Ld.getValue(1);
8966 }
8967
8968 // FCFID it and return it.
8969 SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8970 if (IsStrict)
8971 Chain = FP.getValue(1);
8972 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8973 if (IsStrict)
8974 FP = DAG.getNode(
8975 ISD::STRICT_FP_ROUND, dl, DAG.getVTList(MVT::f32, MVT::Other),
8976 {Chain, FP, DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)}, Flags);
8977 else
8978 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8979 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
8980 }
8981 return FP;
8982}
8983
8984SDValue PPCTargetLowering::LowerSET_ROUNDING(SDValue Op,
8985 SelectionDAG &DAG) const {
8986 SDLoc Dl(Op);
8987 MachineFunction &MF = DAG.getMachineFunction();
8988 EVT PtrVT = getPointerTy(MF.getDataLayout());
8989 SDValue Chain = Op.getOperand(0);
8990
8991 // If requested mode is constant, just use simpler mtfsb/mffscrni
8992 if (auto *CVal = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
8993 uint64_t Mode = CVal->getZExtValue();
8994 assert(Mode < 4 && "Unsupported rounding mode!");
8995 unsigned InternalRnd = Mode ^ (~(Mode >> 1) & 1);
8996 if (Subtarget.isISA3_0())
8997 return SDValue(
8998 DAG.getMachineNode(
8999 PPC::MFFSCRNI, Dl, {MVT::f64, MVT::Other},
9000 {DAG.getConstant(InternalRnd, Dl, MVT::i32, true), Chain}),
9001 1);
9002 SDNode *SetHi = DAG.getMachineNode(
9003 (InternalRnd & 2) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9004 {DAG.getConstant(30, Dl, MVT::i32, true), Chain});
9005 SDNode *SetLo = DAG.getMachineNode(
9006 (InternalRnd & 1) ? PPC::MTFSB1 : PPC::MTFSB0, Dl, MVT::Other,
9007 {DAG.getConstant(31, Dl, MVT::i32, true), SDValue(SetHi, 0)});
9008 return SDValue(SetLo, 0);
9009 }
9010
9011 // Use x ^ (~(x >> 1) & 1) to transform LLVM rounding mode to Power format.
9012 SDValue One = DAG.getConstant(1, Dl, MVT::i32);
9013 SDValue SrcFlag = DAG.getNode(ISD::AND, Dl, MVT::i32, Op.getOperand(1),
9014 DAG.getConstant(3, Dl, MVT::i32));
9015 SDValue DstFlag = DAG.getNode(
9016 ISD::XOR, Dl, MVT::i32, SrcFlag,
9017 DAG.getNode(ISD::AND, Dl, MVT::i32,
9018 DAG.getNOT(Dl,
9019 DAG.getNode(ISD::SRL, Dl, MVT::i32, SrcFlag, One),
9020 MVT::i32),
9021 One));
9022 // For Power9, there's faster mffscrn, and we don't need to read FPSCR
9023 SDValue MFFS;
9024 if (!Subtarget.isISA3_0()) {
9025 MFFS = DAG.getNode(PPCISD::MFFS, Dl, {MVT::f64, MVT::Other}, Chain);
9026 Chain = MFFS.getValue(1);
9027 }
9028 SDValue NewFPSCR;
9029 if (Subtarget.isPPC64()) {
9030 if (Subtarget.isISA3_0()) {
9031 NewFPSCR = DAG.getAnyExtOrTrunc(DstFlag, Dl, MVT::i64);
9032 } else {
9033 // Set the last two bits (rounding mode) of bitcasted FPSCR.
9034 SDNode *InsertRN = DAG.getMachineNode(
9035 PPC::RLDIMI, Dl, MVT::i64,
9036 {DAG.getNode(ISD::BITCAST, Dl, MVT::i64, MFFS),
9037 DAG.getNode(ISD::ZERO_EXTEND, Dl, MVT::i64, DstFlag),
9038 DAG.getTargetConstant(0, Dl, MVT::i32),
9039 DAG.getTargetConstant(62, Dl, MVT::i32)});
9040 NewFPSCR = SDValue(InsertRN, 0);
9041 }
9042 NewFPSCR = DAG.getNode(ISD::BITCAST, Dl, MVT::f64, NewFPSCR);
9043 } else {
9044 // In 32-bit mode, store f64, load and update the lower half.
9045 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9046 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9047 SDValue Addr = Subtarget.isLittleEndian()
9048 ? StackSlot
9049 : DAG.getNode(ISD::ADD, Dl, PtrVT, StackSlot,
9050 DAG.getConstant(4, Dl, PtrVT));
9051 if (Subtarget.isISA3_0()) {
9052 Chain = DAG.getStore(Chain, Dl, DstFlag, Addr, MachinePointerInfo());
9053 } else {
9054 Chain = DAG.getStore(Chain, Dl, MFFS, StackSlot, MachinePointerInfo());
9055 SDValue Tmp =
9056 DAG.getLoad(MVT::i32, Dl, Chain, Addr, MachinePointerInfo());
9057 Chain = Tmp.getValue(1);
9058 Tmp = SDValue(DAG.getMachineNode(
9059 PPC::RLWIMI, Dl, MVT::i32,
9060 {Tmp, DstFlag, DAG.getTargetConstant(0, Dl, MVT::i32),
9061 DAG.getTargetConstant(30, Dl, MVT::i32),
9062 DAG.getTargetConstant(31, Dl, MVT::i32)}),
9063 0);
9064 Chain = DAG.getStore(Chain, Dl, Tmp, Addr, MachinePointerInfo());
9065 }
9066 NewFPSCR =
9067 DAG.getLoad(MVT::f64, Dl, Chain, StackSlot, MachinePointerInfo());
9068 Chain = NewFPSCR.getValue(1);
9069 }
9070 if (Subtarget.isISA3_0())
9071 return SDValue(DAG.getMachineNode(PPC::MFFSCRN, Dl, {MVT::f64, MVT::Other},
9072 {NewFPSCR, Chain}),
9073 1);
9074 SDValue Zero = DAG.getConstant(0, Dl, MVT::i32, true);
9075 SDNode *MTFSF = DAG.getMachineNode(
9076 PPC::MTFSF, Dl, MVT::Other,
9077 {DAG.getConstant(255, Dl, MVT::i32, true), NewFPSCR, Zero, Zero, Chain});
9078 return SDValue(MTFSF, 0);
9079}
9080
9081SDValue PPCTargetLowering::LowerGET_ROUNDING(SDValue Op,
9082 SelectionDAG &DAG) const {
9083 SDLoc dl(Op);
9084 /*
9085 The rounding mode is in bits 30:31 of FPSR, and has the following
9086 settings:
9087 00 Round to nearest
9088 01 Round to 0
9089 10 Round to +inf
9090 11 Round to -inf
9091
9092 GET_ROUNDING, on the other hand, expects the following:
9093 -1 Undefined
9094 0 Round to 0
9095 1 Round to nearest
9096 2 Round to +inf
9097 3 Round to -inf
9098
9099 To perform the conversion, we do:
9100 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
9101 */
9102
9103 MachineFunction &MF = DAG.getMachineFunction();
9104 EVT VT = Op.getValueType();
9105 EVT PtrVT = getPointerTy(MF.getDataLayout());
9106
9107 // Save FP Control Word to register
9108 SDValue Chain = Op.getOperand(0);
9109 SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
9110 Chain = MFFS.getValue(1);
9111
9112 SDValue CWD;
9113 if (isTypeLegal(MVT::i64)) {
9114 CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
9115 DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
9116 } else {
9117 // Save FP register to stack slot
9118 int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
9119 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
9120 Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
9121
9122 // Load FP Control Word from low 32 bits of stack slot.
9124 "Stack slot adjustment is valid only on big endian subtargets!");
9125 SDValue Four = DAG.getConstant(4, dl, PtrVT);
9126 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
9127 CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
9128 Chain = CWD.getValue(1);
9129 }
9130
9131 // Transform as necessary
9132 SDValue CWD1 =
9133 DAG.getNode(ISD::AND, dl, MVT::i32,
9134 CWD, DAG.getConstant(3, dl, MVT::i32));
9135 SDValue CWD2 =
9136 DAG.getNode(ISD::SRL, dl, MVT::i32,
9137 DAG.getNode(ISD::AND, dl, MVT::i32,
9138 DAG.getNode(ISD::XOR, dl, MVT::i32,
9139 CWD, DAG.getConstant(3, dl, MVT::i32)),
9140 DAG.getConstant(3, dl, MVT::i32)),
9141 DAG.getConstant(1, dl, MVT::i32));
9142
9143 SDValue RetVal =
9144 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
9145
9146 RetVal =
9148 dl, VT, RetVal);
9149
9150 return DAG.getMergeValues({RetVal, Chain}, dl);
9151}
9152
9153SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9154 EVT VT = Op.getValueType();
9155 uint64_t BitWidth = VT.getSizeInBits();
9156 SDLoc dl(Op);
9157 assert(Op.getNumOperands() == 3 &&
9158 VT == Op.getOperand(1).getValueType() &&
9159 "Unexpected SHL!");
9160
9161 // Expand into a bunch of logical ops. Note that these ops
9162 // depend on the PPC behavior for oversized shift amounts.
9163 SDValue Lo = Op.getOperand(0);
9164 SDValue Hi = Op.getOperand(1);
9165 SDValue Amt = Op.getOperand(2);
9166 EVT AmtVT = Amt.getValueType();
9167
9168 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9169 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9170 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
9171 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
9172 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
9173 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9174 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9175 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
9176 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9177 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
9178 SDValue OutOps[] = { OutLo, OutHi };
9179 return DAG.getMergeValues(OutOps, dl);
9180}
9181
9182SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
9183 EVT VT = Op.getValueType();
9184 SDLoc dl(Op);
9185 uint64_t BitWidth = VT.getSizeInBits();
9186 assert(Op.getNumOperands() == 3 &&
9187 VT == Op.getOperand(1).getValueType() &&
9188 "Unexpected SRL!");
9189
9190 // Expand into a bunch of logical ops. Note that these ops
9191 // depend on the PPC behavior for oversized shift amounts.
9192 SDValue Lo = Op.getOperand(0);
9193 SDValue Hi = Op.getOperand(1);
9194 SDValue Amt = Op.getOperand(2);
9195 EVT AmtVT = Amt.getValueType();
9196
9197 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9198 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9199 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9200 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9201 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9202 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9203 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9204 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
9205 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
9206 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
9207 SDValue OutOps[] = { OutLo, OutHi };
9208 return DAG.getMergeValues(OutOps, dl);
9209}
9210
9211SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
9212 SDLoc dl(Op);
9213 EVT VT = Op.getValueType();
9214 uint64_t BitWidth = VT.getSizeInBits();
9215 assert(Op.getNumOperands() == 3 &&
9216 VT == Op.getOperand(1).getValueType() &&
9217 "Unexpected SRA!");
9218
9219 // Expand into a bunch of logical ops, followed by a select_cc.
9220 SDValue Lo = Op.getOperand(0);
9221 SDValue Hi = Op.getOperand(1);
9222 SDValue Amt = Op.getOperand(2);
9223 EVT AmtVT = Amt.getValueType();
9224
9225 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
9226 DAG.getConstant(BitWidth, dl, AmtVT), Amt);
9227 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
9228 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
9229 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
9230 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
9231 DAG.getSignedConstant(-BitWidth, dl, AmtVT));
9232 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
9233 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
9234 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
9235 Tmp4, Tmp6, ISD::SETLE);
9236 SDValue OutOps[] = { OutLo, OutHi };
9237 return DAG.getMergeValues(OutOps, dl);
9238}
9239
9240SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
9241 SelectionDAG &DAG) const {
9242 SDLoc dl(Op);
9243 EVT VT = Op.getValueType();
9244 unsigned BitWidth = VT.getSizeInBits();
9245
9246 bool IsFSHL = Op.getOpcode() == ISD::FSHL;
9247 SDValue X = Op.getOperand(0);
9248 SDValue Y = Op.getOperand(1);
9249 SDValue Z = Op.getOperand(2);
9250 EVT AmtVT = Z.getValueType();
9251
9252 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
9253 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
9254 // This is simpler than TargetLowering::expandFunnelShift because we can rely
9255 // on PowerPC shift by BW being well defined.
9256 Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
9257 DAG.getConstant(BitWidth - 1, dl, AmtVT));
9258 SDValue SubZ =
9259 DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
9260 X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
9261 Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
9262 return DAG.getNode(ISD::OR, dl, VT, X, Y);
9263}
9264
9265//===----------------------------------------------------------------------===//
9266// Vector related lowering.
9267//
9268
9269/// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
9270/// element size of SplatSize. Cast the result to VT.
9271static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
9272 SelectionDAG &DAG, const SDLoc &dl) {
9273 static const MVT VTys[] = { // canonical VT to use for each size.
9274 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32
9275 };
9276
9277 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
9278
9279 // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
9280 if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
9281 SplatSize = 1;
9282 Val = 0xFF;
9283 }
9284
9285 EVT CanonicalVT = VTys[SplatSize-1];
9286
9287 // Build a canonical splat for this value.
9288 // Explicitly truncate APInt here, as this API is used with a mix of
9289 // signed and unsigned values.
9290 return DAG.getBitcast(
9291 ReqVT,
9292 DAG.getConstant(APInt(64, Val).trunc(SplatSize * 8), dl, CanonicalVT));
9293}
9294
9295/// BuildIntrinsicOp - Return a unary operator intrinsic node with the
9296/// specified intrinsic ID.
9298 const SDLoc &dl, EVT DestVT = MVT::Other) {
9299 if (DestVT == MVT::Other) DestVT = Op.getValueType();
9300 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9301 DAG.getConstant(IID, dl, MVT::i32), Op);
9302}
9303
9304/// BuildIntrinsicOp - Return a binary operator intrinsic node with the
9305/// specified intrinsic ID.
9307 SelectionDAG &DAG, const SDLoc &dl,
9308 EVT DestVT = MVT::Other) {
9309 if (DestVT == MVT::Other) DestVT = LHS.getValueType();
9310 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9311 DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
9312}
9313
9314/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
9315/// specified intrinsic ID.
9316static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
9317 SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
9318 EVT DestVT = MVT::Other) {
9319 if (DestVT == MVT::Other) DestVT = Op0.getValueType();
9320 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
9321 DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
9322}
9323
9324/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
9325/// amount. The result has the specified value type.
9326static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
9327 SelectionDAG &DAG, const SDLoc &dl) {
9328 // Force LHS/RHS to be the right type.
9329 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
9330 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
9331
9332 int Ops[16];
9333 for (unsigned i = 0; i != 16; ++i)
9334 Ops[i] = i + Amt;
9335 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
9336 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9337}
9338
9339/// Do we have an efficient pattern in a .td file for this node?
9340///
9341/// \param V - pointer to the BuildVectorSDNode being matched
9342/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
9343///
9344/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
9345/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
9346/// the opposite is true (expansion is beneficial) are:
9347/// - The node builds a vector out of integers that are not 32 or 64-bits
9348/// - The node builds a vector out of constants
9349/// - The node is a "load-and-splat"
9350/// In all other cases, we will choose to keep the BUILD_VECTOR.
9352 bool HasDirectMove,
9353 bool HasP8Vector) {
9354 EVT VecVT = V->getValueType(0);
9355 bool RightType = VecVT == MVT::v2f64 ||
9356 (HasP8Vector && VecVT == MVT::v4f32) ||
9357 (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
9358 if (!RightType)
9359 return false;
9360
9361 bool IsSplat = true;
9362 bool IsLoad = false;
9363 SDValue Op0 = V->getOperand(0);
9364
9365 // This function is called in a block that confirms the node is not a constant
9366 // splat. So a constant BUILD_VECTOR here means the vector is built out of
9367 // different constants.
9368 if (V->isConstant())
9369 return false;
9370 for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9371 if (V->getOperand(i).isUndef())
9372 return false;
9373 // We want to expand nodes that represent load-and-splat even if the
9374 // loaded value is a floating point truncation or conversion to int.
9375 if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9376 (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9377 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9378 (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9379 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9380 (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9381 V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9382 IsLoad = true;
9383 // If the operands are different or the input is not a load and has more
9384 // uses than just this BV node, then it isn't a splat.
9385 if (V->getOperand(i) != Op0 ||
9386 (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9387 IsSplat = false;
9388 }
9389 return !(IsSplat && IsLoad);
9390}
9391
9392// Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9393SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9394
9395 SDLoc dl(Op);
9396 SDValue Op0 = Op->getOperand(0);
9397
9398 if (!Subtarget.isPPC64() || (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9399 (Op.getValueType() != MVT::f128))
9400 return SDValue();
9401
9402 SDValue Lo = Op0.getOperand(0);
9403 SDValue Hi = Op0.getOperand(1);
9404 if ((Lo.getValueType() != MVT::i64) || (Hi.getValueType() != MVT::i64))
9405 return SDValue();
9406
9407 if (!Subtarget.isLittleEndian())
9408 std::swap(Lo, Hi);
9409
9410 return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Lo, Hi);
9411}
9412
9413static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9414 const SDValue *InputLoad = &Op;
9415 while (InputLoad->getOpcode() == ISD::BITCAST)
9416 InputLoad = &InputLoad->getOperand(0);
9417 if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9418 InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9419 IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9420 InputLoad = &InputLoad->getOperand(0);
9421 }
9422 if (InputLoad->getOpcode() != ISD::LOAD)
9423 return nullptr;
9424 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9425 return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9426}
9427
9428// Convert the argument APFloat to a single precision APFloat if there is no
9429// loss in information during the conversion to single precision APFloat and the
9430// resulting number is not a denormal number. Return true if successful.
9432 APFloat APFloatToConvert = ArgAPFloat;
9433 bool LosesInfo = true;
9435 &LosesInfo);
9436 bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9437 if (Success)
9438 ArgAPFloat = APFloatToConvert;
9439 return Success;
9440}
9441
9442// Bitcast the argument APInt to a double and convert it to a single precision
9443// APFloat, bitcast the APFloat to an APInt and assign it to the original
9444// argument if there is no loss in information during the conversion from
9445// double to single precision APFloat and the resulting number is not a denormal
9446// number. Return true if successful.
9448 double DpValue = ArgAPInt.bitsToDouble();
9449 APFloat APFloatDp(DpValue);
9450 bool Success = convertToNonDenormSingle(APFloatDp);
9451 if (Success)
9452 ArgAPInt = APFloatDp.bitcastToAPInt();
9453 return Success;
9454}
9455
9456// Nondestructive check for convertTonNonDenormSingle.
9458 // Only convert if it loses info, since XXSPLTIDP should
9459 // handle the other case.
9460 APFloat APFloatToConvert = ArgAPFloat;
9461 bool LosesInfo = true;
9463 &LosesInfo);
9464
9465 return (!LosesInfo && !APFloatToConvert.isDenormal());
9466}
9467
9468static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9469 unsigned &Opcode) {
9470 LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9471 if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9472 return false;
9473
9474 EVT Ty = Op->getValueType(0);
9475 // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9476 // as we cannot handle extending loads for these types.
9477 if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9478 ISD::isNON_EXTLoad(InputNode))
9479 return true;
9480
9481 EVT MemVT = InputNode->getMemoryVT();
9482 // For v8i16 and v16i8 types, extending loads can be handled as long as the
9483 // memory VT is the same vector element VT type.
9484 // The loads feeding into the v8i16 and v16i8 types will be extending because
9485 // scalar i8/i16 are not legal types.
9486 if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9487 (MemVT == Ty.getVectorElementType()))
9488 return true;
9489
9490 if (Ty == MVT::v2i64) {
9491 // Check the extend type, when the input type is i32, and the output vector
9492 // type is v2i64.
9493 if (MemVT == MVT::i32) {
9494 if (ISD::isZEXTLoad(InputNode))
9495 Opcode = PPCISD::ZEXT_LD_SPLAT;
9496 if (ISD::isSEXTLoad(InputNode))
9497 Opcode = PPCISD::SEXT_LD_SPLAT;
9498 }
9499 return true;
9500 }
9501 return false;
9502}
9503
9505 bool IsLittleEndian) {
9506 assert(BVN.getNumOperands() > 0 && "Unexpected 0-size build vector");
9507
9508 BitMask.clearAllBits();
9509 EVT VT = BVN.getValueType(0);
9510 unsigned VTSize = VT.getSizeInBits();
9511 APInt ConstValue(VTSize, 0);
9512
9513 unsigned EltWidth = VT.getScalarSizeInBits();
9514
9515 unsigned BitPos = 0;
9516 for (auto OpVal : BVN.op_values()) {
9517 auto *CN = dyn_cast<ConstantSDNode>(OpVal);
9518
9519 if (!CN)
9520 return false;
9521 // The elements in a vector register are ordered in reverse byte order
9522 // between little-endian and big-endian modes.
9523 ConstValue.insertBits(CN->getAPIntValue().zextOrTrunc(EltWidth),
9524 IsLittleEndian ? BitPos : VTSize - EltWidth - BitPos);
9525 BitPos += EltWidth;
9526 }
9527
9528 for (unsigned J = 0; J < 16; ++J) {
9529 APInt ExtractValue = ConstValue.extractBits(8, J * 8);
9530 if (ExtractValue != 0x00 && ExtractValue != 0xFF)
9531 return false;
9532 if (ExtractValue == 0xFF)
9533 BitMask.setBit(J);
9534 }
9535 return true;
9536}
9537
9538// If this is a case we can't handle, return null and let the default
9539// expansion code take care of it. If we CAN select this case, and if it
9540// selects to a single instruction, return Op. Otherwise, if we can codegen
9541// this case more efficiently than a constant pool load, lower it to the
9542// sequence of ops that should be used.
9543SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9544 SelectionDAG &DAG) const {
9545 SDLoc dl(Op);
9546 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9547 assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9548
9549 if (Subtarget.hasP10Vector()) {
9550 APInt BitMask(32, 0);
9551 // If the value of the vector is all zeros or all ones,
9552 // we do not convert it to MTVSRBMI.
9553 // The xxleqv instruction sets a vector with all ones.
9554 // The xxlxor instruction sets a vector with all zeros.
9555 if (isValidMtVsrBmi(BitMask, *BVN, Subtarget.isLittleEndian()) &&
9556 BitMask != 0 && BitMask != 0xffff) {
9557 SDValue SDConstant = DAG.getTargetConstant(BitMask, dl, MVT::i32);
9558 MachineSDNode *MSDNode =
9559 DAG.getMachineNode(PPC::MTVSRBMI, dl, MVT::v16i8, SDConstant);
9560 SDValue SDV = SDValue(MSDNode, 0);
9561 EVT DVT = BVN->getValueType(0);
9562 EVT SVT = SDV.getValueType();
9563 if (SVT != DVT) {
9564 SDV = DAG.getNode(ISD::BITCAST, dl, DVT, SDV);
9565 }
9566 return SDV;
9567 }
9568 // Recognize build vector patterns to emit VSX vector instructions
9569 // instead of loading value from memory.
9570 if (SDValue VecPat = combineBVLoadsSpecialValue(Op, DAG))
9571 return VecPat;
9572 }
9573 // Check if this is a splat of a constant value.
9574 APInt APSplatBits, APSplatUndef;
9575 unsigned SplatBitSize;
9576 bool HasAnyUndefs;
9577 bool BVNIsConstantSplat =
9578 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9579 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9580
9581 // If it is a splat of a double, check if we can shrink it to a 32 bit
9582 // non-denormal float which when converted back to double gives us the same
9583 // double. This is to exploit the XXSPLTIDP instruction.
9584 // If we lose precision, we use XXSPLTI32DX.
9585 if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9586 Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
9587 // Check the type first to short-circuit so we don't modify APSplatBits if
9588 // this block isn't executed.
9589 if ((Op->getValueType(0) == MVT::v2f64) &&
9590 convertToNonDenormSingle(APSplatBits)) {
9591 SDValue SplatNode = DAG.getNode(
9592 PPCISD::XXSPLTI_SP_TO_DP, dl, MVT::v2f64,
9593 DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9594 return DAG.getBitcast(Op.getValueType(), SplatNode);
9595 } else {
9596 // We may lose precision, so we have to use XXSPLTI32DX.
9597
9598 uint32_t Hi = Hi_32(APSplatBits.getZExtValue());
9599 uint32_t Lo = Lo_32(APSplatBits.getZExtValue());
9600 SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9601
9602 if (!Hi || !Lo)
9603 // If either load is 0, then we should generate XXLXOR to set to 0.
9604 SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9605
9606 if (Hi)
9607 SplatNode = DAG.getNode(
9608 PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9609 DAG.getTargetConstant(0, dl, MVT::i32),
9610 DAG.getTargetConstant(Hi, dl, MVT::i32));
9611
9612 if (Lo)
9613 SplatNode =
9614 DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9615 DAG.getTargetConstant(1, dl, MVT::i32),
9616 DAG.getTargetConstant(Lo, dl, MVT::i32));
9617
9618 return DAG.getBitcast(Op.getValueType(), SplatNode);
9619 }
9620 }
9621
9622 bool IsSplat64 = false;
9623 uint64_t SplatBits = 0;
9624 int32_t SextVal = 0;
9625 if (BVNIsConstantSplat && SplatBitSize <= 64) {
9626 SplatBits = APSplatBits.getZExtValue();
9627 if (SplatBitSize <= 32) {
9628 SextVal = SignExtend32(SplatBits, SplatBitSize);
9629 } else if (SplatBitSize == 64 && Subtarget.hasP8Altivec()) {
9630 int64_t Splat64Val = static_cast<int64_t>(SplatBits);
9631 bool P9Vector = Subtarget.hasP9Vector();
9632 int32_t Hi = P9Vector ? 127 : 15;
9633 int32_t Lo = P9Vector ? -128 : -16;
9634 IsSplat64 = Splat64Val >= Lo && Splat64Val <= Hi;
9635 SextVal = static_cast<int32_t>(SplatBits);
9636 }
9637 }
9638
9639 if (!BVNIsConstantSplat || (SplatBitSize > 32 && !IsSplat64)) {
9640 unsigned NewOpcode = PPCISD::LD_SPLAT;
9641
9642 // Handle load-and-splat patterns as we have instructions that will do this
9643 // in one go.
9644 if (DAG.isSplatValue(Op, true) &&
9645 isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9646 const SDValue *InputLoad = &Op.getOperand(0);
9647 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9648
9649 // If the input load is an extending load, it will be an i32 -> i64
9650 // extending load and isValidSplatLoad() will update NewOpcode.
9651 unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9652 unsigned ElementSize =
9653 MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9654
9655 assert(((ElementSize == 2 * MemorySize)
9656 ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9657 NewOpcode == PPCISD::SEXT_LD_SPLAT)
9658 : (NewOpcode == PPCISD::LD_SPLAT)) &&
9659 "Unmatched element size and opcode!\n");
9660
9661 // Checking for a single use of this load, we have to check for vector
9662 // width (128 bits) / ElementSize uses (since each operand of the
9663 // BUILD_VECTOR is a separate use of the value.
9664 unsigned NumUsesOfInputLD = 128 / ElementSize;
9665 for (SDValue BVInOp : Op->ops())
9666 if (BVInOp.isUndef())
9667 NumUsesOfInputLD--;
9668
9669 // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9670 // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9671 // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9672 // 15", but function IsValidSplatLoad() now will only return true when
9673 // the data at index 0 is not nullptr. So we will not get into trouble for
9674 // these cases.
9675 //
9676 // case 1 - lfiwzx/lfiwax
9677 // 1.1: load result is i32 and is sign/zero extend to i64;
9678 // 1.2: build a v2i64 vector type with above loaded value;
9679 // 1.3: the vector has only one value at index 0, others are all undef;
9680 // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9681 if (NumUsesOfInputLD == 1 &&
9682 (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9683 !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9684 Subtarget.hasLFIWAX()))
9685 return SDValue();
9686
9687 // case 2 - lxvr[hb]x
9688 // 2.1: load result is at most i16;
9689 // 2.2: build a vector with above loaded value;
9690 // 2.3: the vector has only one value at index 0, others are all undef;
9691 // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9692 if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9693 Subtarget.isISA3_1() && ElementSize <= 16)
9694 return SDValue();
9695
9696 assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9697 if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9698 Subtarget.hasVSX()) {
9699 SDValue Ops[] = {
9700 LD->getChain(), // Chain
9701 LD->getBasePtr(), // Ptr
9702 DAG.getValueType(Op.getValueType()) // VT
9703 };
9704 SDValue LdSplt = DAG.getMemIntrinsicNode(
9705 NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9706 LD->getMemoryVT(), LD->getMemOperand());
9707 // Replace all uses of the output chain of the original load with the
9708 // output chain of the new load.
9709 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9710 LdSplt.getValue(1));
9711 return LdSplt;
9712 }
9713 }
9714
9715 // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9716 // 32-bits can be lowered to VSX instructions under certain conditions.
9717 // Without VSX, there is no pattern more efficient than expanding the node.
9718 if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9719 haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
9720 Subtarget.hasP8Vector()))
9721 return Op;
9722 return SDValue();
9723 }
9724
9725 uint64_t SplatUndef = APSplatUndef.getZExtValue();
9726 unsigned SplatSize = SplatBitSize / 8;
9727
9728 // First, handle single instruction cases.
9729
9730 // All zeros?
9731 if (SplatBits == 0) {
9732 // Canonicalize all zero vectors to be v4i32.
9733 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9734 SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9735 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9736 }
9737 return Op;
9738 }
9739
9740 // We have XXSPLTIW for constant splats four bytes wide.
9741 // Given vector length is a multiple of 4, 2-byte splats can be replaced
9742 // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9743 // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9744 // turned into a 4-byte splat of 0xABABABAB.
9745 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 2)
9746 return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9747 Op.getValueType(), DAG, dl);
9748
9749 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector() && SplatSize == 4)
9750 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9751 dl);
9752
9753 // We have XXSPLTIB for constant splats one byte wide.
9754 if (Subtarget.hasP9Vector() && SplatSize == 1)
9755 return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9756 dl);
9757
9758 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9759 // Use VSPLTIW/VUPKLSW for v2i64 in range [-16,15].
9760 if (SextVal >= -16 && SextVal <= 15) {
9761 // SplatSize may be 1, 2, 4, or 8. Use size 4 instead of 8 for the splat to
9762 // generate a splat word with extend for size 8.
9763 unsigned UseSize = SplatSize == 8 ? 4 : SplatSize;
9764 SDValue Res =
9765 getCanonicalConstSplat(SextVal, UseSize, Op.getValueType(), DAG, dl);
9766 if (SplatSize != 8)
9767 return Res;
9768 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vupklsw, Res, DAG, dl);
9769 }
9770
9771 // Two instruction sequences.
9772
9773 if (Subtarget.hasP9Vector() && SextVal >= -128 && SextVal <= 127) {
9774 SDValue C = DAG.getConstant((unsigned char)SextVal, dl, MVT::i32);
9776 SDValue BV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
9777 unsigned IID;
9778 EVT VT;
9779 switch (SplatSize) {
9780 default:
9781 llvm_unreachable("Unexpected type for vector constant.");
9782 case 2:
9783 IID = Intrinsic::ppc_altivec_vupklsb;
9784 VT = MVT::v8i16;
9785 break;
9786 case 4:
9787 IID = Intrinsic::ppc_altivec_vextsb2w;
9788 VT = MVT::v4i32;
9789 break;
9790 case 8:
9791 IID = Intrinsic::ppc_altivec_vextsb2d;
9792 VT = MVT::v2i64;
9793 break;
9794 }
9795 SDValue Extend = BuildIntrinsicOp(IID, BV, DAG, dl, VT);
9796 return DAG.getBitcast(Op->getValueType(0), Extend);
9797 }
9798 assert(!IsSplat64 && "Unhandled 64-bit splat pattern");
9799
9800 // If this value is in the range [-32,30] and is even, use:
9801 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9802 // If this value is in the range [17,31] and is odd, use:
9803 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9804 // If this value is in the range [-31,-17] and is odd, use:
9805 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9806 // Note the last two are three-instruction sequences.
9807 if (SextVal >= -32 && SextVal <= 31) {
9808 // To avoid having these optimizations undone by constant folding,
9809 // we convert to a pseudo that will be expanded later into one of
9810 // the above forms.
9811 SDValue Elt = DAG.getSignedConstant(SextVal, dl, MVT::i32);
9812 EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9813 (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9814 SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9815 SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9816 if (VT == Op.getValueType())
9817 return RetVal;
9818 else
9819 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9820 }
9821
9822 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9823 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9824 // for fneg/fabs.
9825 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9826 // Make -1 and vspltisw -1:
9827 SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9828
9829 // Make the VSLW intrinsic, computing 0x8000_0000.
9830 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9831 OnesV, DAG, dl);
9832
9833 // xor by OnesV to invert it.
9834 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9835 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9836 }
9837
9838 // Check to see if this is a wide variety of vsplti*, binop self cases.
9839 static const signed char SplatCsts[] = {
9840 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9841 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9842 };
9843
9844 for (unsigned idx = 0; idx < std::size(SplatCsts); ++idx) {
9845 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9846 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9847 int i = SplatCsts[idx];
9848
9849 // Figure out what shift amount will be used by altivec if shifted by i in
9850 // this splat size.
9851 unsigned TypeShiftAmt = i & (SplatBitSize-1);
9852
9853 // vsplti + shl self.
9854 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9855 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9856 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9857 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9858 Intrinsic::ppc_altivec_vslw
9859 };
9860 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9861 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9862 }
9863
9864 // vsplti + srl self.
9865 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9866 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9867 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9868 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9869 Intrinsic::ppc_altivec_vsrw
9870 };
9871 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9872 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9873 }
9874
9875 // vsplti + rol self.
9876 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9877 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9878 SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9879 static const unsigned IIDs[] = { // Intrinsic to use for each size.
9880 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9881 Intrinsic::ppc_altivec_vrlw
9882 };
9883 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9884 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9885 }
9886
9887 // t = vsplti c, result = vsldoi t, t, 1
9888 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9889 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9890 unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9891 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9892 }
9893 // t = vsplti c, result = vsldoi t, t, 2
9894 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9895 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9896 unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9897 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9898 }
9899 // t = vsplti c, result = vsldoi t, t, 3
9900 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9901 SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9902 unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9903 return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9904 }
9905 }
9906
9907 return SDValue();
9908}
9909
9910/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9911/// the specified operations to build the shuffle.
9913 SDValue RHS, SelectionDAG &DAG,
9914 const SDLoc &dl) {
9915 unsigned OpNum = (PFEntry >> 26) & 0x0F;
9916 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9917 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9918
9919 enum {
9920 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9921 OP_VMRGHW,
9922 OP_VMRGLW,
9923 OP_VSPLTISW0,
9924 OP_VSPLTISW1,
9925 OP_VSPLTISW2,
9926 OP_VSPLTISW3,
9927 OP_VSLDOI4,
9928 OP_VSLDOI8,
9929 OP_VSLDOI12
9930 };
9931
9932 if (OpNum == OP_COPY) {
9933 if (LHSID == (1*9+2)*9+3) return LHS;
9934 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9935 return RHS;
9936 }
9937
9938 SDValue OpLHS, OpRHS;
9939 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9940 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9941
9942 int ShufIdxs[16];
9943 switch (OpNum) {
9944 default: llvm_unreachable("Unknown i32 permute!");
9945 case OP_VMRGHW:
9946 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9947 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9948 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9949 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9950 break;
9951 case OP_VMRGLW:
9952 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9953 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9954 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9955 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9956 break;
9957 case OP_VSPLTISW0:
9958 for (unsigned i = 0; i != 16; ++i)
9959 ShufIdxs[i] = (i&3)+0;
9960 break;
9961 case OP_VSPLTISW1:
9962 for (unsigned i = 0; i != 16; ++i)
9963 ShufIdxs[i] = (i&3)+4;
9964 break;
9965 case OP_VSPLTISW2:
9966 for (unsigned i = 0; i != 16; ++i)
9967 ShufIdxs[i] = (i&3)+8;
9968 break;
9969 case OP_VSPLTISW3:
9970 for (unsigned i = 0; i != 16; ++i)
9971 ShufIdxs[i] = (i&3)+12;
9972 break;
9973 case OP_VSLDOI4:
9974 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9975 case OP_VSLDOI8:
9976 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9977 case OP_VSLDOI12:
9978 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9979 }
9980 EVT VT = OpLHS.getValueType();
9981 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9982 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9983 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9984 return DAG.getNode(ISD::BITCAST, dl, VT, T);
9985}
9986
9987/// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9988/// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9989/// SDValue.
9990SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9991 SelectionDAG &DAG) const {
9992 const unsigned BytesInVector = 16;
9993 bool IsLE = Subtarget.isLittleEndian();
9994 SDLoc dl(N);
9995 SDValue V1 = N->getOperand(0);
9996 SDValue V2 = N->getOperand(1);
9997 unsigned ShiftElts = 0, InsertAtByte = 0;
9998 bool Swap = false;
9999
10000 // Shifts required to get the byte we want at element 7.
10001 unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
10002 0, 15, 14, 13, 12, 11, 10, 9};
10003 unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
10004 1, 2, 3, 4, 5, 6, 7, 8};
10005
10006 ArrayRef<int> Mask = N->getMask();
10007 int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
10008
10009 // For each mask element, find out if we're just inserting something
10010 // from V2 into V1 or vice versa.
10011 // Possible permutations inserting an element from V2 into V1:
10012 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10013 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
10014 // ...
10015 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
10016 // Inserting from V1 into V2 will be similar, except mask range will be
10017 // [16,31].
10018
10019 bool FoundCandidate = false;
10020 // If both vector operands for the shuffle are the same vector, the mask
10021 // will contain only elements from the first one and the second one will be
10022 // undef.
10023 unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
10024 // Go through the mask of half-words to find an element that's being moved
10025 // from one vector to the other.
10026 for (unsigned i = 0; i < BytesInVector; ++i) {
10027 unsigned CurrentElement = Mask[i];
10028 // If 2nd operand is undefined, we should only look for element 7 in the
10029 // Mask.
10030 if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
10031 continue;
10032
10033 bool OtherElementsInOrder = true;
10034 // Examine the other elements in the Mask to see if they're in original
10035 // order.
10036 for (unsigned j = 0; j < BytesInVector; ++j) {
10037 if (j == i)
10038 continue;
10039 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
10040 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
10041 // in which we always assume we're always picking from the 1st operand.
10042 int MaskOffset =
10043 (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
10044 if (Mask[j] != OriginalOrder[j] + MaskOffset) {
10045 OtherElementsInOrder = false;
10046 break;
10047 }
10048 }
10049 // If other elements are in original order, we record the number of shifts
10050 // we need to get the element we want into element 7. Also record which byte
10051 // in the vector we should insert into.
10052 if (OtherElementsInOrder) {
10053 // If 2nd operand is undefined, we assume no shifts and no swapping.
10054 if (V2.isUndef()) {
10055 ShiftElts = 0;
10056 Swap = false;
10057 } else {
10058 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
10059 ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
10060 : BigEndianShifts[CurrentElement & 0xF];
10061 Swap = CurrentElement < BytesInVector;
10062 }
10063 InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
10064 FoundCandidate = true;
10065 break;
10066 }
10067 }
10068
10069 if (!FoundCandidate)
10070 return SDValue();
10071
10072 // Candidate found, construct the proper SDAG sequence with VINSERTB,
10073 // optionally with VECSHL if shift is required.
10074 if (Swap)
10075 std::swap(V1, V2);
10076 if (V2.isUndef())
10077 V2 = V1;
10078 if (ShiftElts) {
10079 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10080 DAG.getConstant(ShiftElts, dl, MVT::i32));
10081 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
10082 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10083 }
10084 return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
10085 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10086}
10087
10088/// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
10089/// by the VINSERTH instruction introduced in ISA 3.0, else just return default
10090/// SDValue.
10091SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
10092 SelectionDAG &DAG) const {
10093 const unsigned NumHalfWords = 8;
10094 const unsigned BytesInVector = NumHalfWords * 2;
10095 // Check that the shuffle is on half-words.
10096 if (!isNByteElemShuffleMask(N, 2, 1))
10097 return SDValue();
10098
10099 bool IsLE = Subtarget.isLittleEndian();
10100 SDLoc dl(N);
10101 SDValue V1 = N->getOperand(0);
10102 SDValue V2 = N->getOperand(1);
10103 unsigned ShiftElts = 0, InsertAtByte = 0;
10104 bool Swap = false;
10105
10106 // Shifts required to get the half-word we want at element 3.
10107 unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
10108 unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
10109
10110 uint32_t Mask = 0;
10111 uint32_t OriginalOrderLow = 0x1234567;
10112 uint32_t OriginalOrderHigh = 0x89ABCDEF;
10113 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
10114 // 32-bit space, only need 4-bit nibbles per element.
10115 for (unsigned i = 0; i < NumHalfWords; ++i) {
10116 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10117 Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
10118 }
10119
10120 // For each mask element, find out if we're just inserting something
10121 // from V2 into V1 or vice versa. Possible permutations inserting an element
10122 // from V2 into V1:
10123 // X, 1, 2, 3, 4, 5, 6, 7
10124 // 0, X, 2, 3, 4, 5, 6, 7
10125 // 0, 1, X, 3, 4, 5, 6, 7
10126 // 0, 1, 2, X, 4, 5, 6, 7
10127 // 0, 1, 2, 3, X, 5, 6, 7
10128 // 0, 1, 2, 3, 4, X, 6, 7
10129 // 0, 1, 2, 3, 4, 5, X, 7
10130 // 0, 1, 2, 3, 4, 5, 6, X
10131 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
10132
10133 bool FoundCandidate = false;
10134 // Go through the mask of half-words to find an element that's being moved
10135 // from one vector to the other.
10136 for (unsigned i = 0; i < NumHalfWords; ++i) {
10137 unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
10138 uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
10139 uint32_t MaskOtherElts = ~(0xF << MaskShift);
10140 uint32_t TargetOrder = 0x0;
10141
10142 // If both vector operands for the shuffle are the same vector, the mask
10143 // will contain only elements from the first one and the second one will be
10144 // undef.
10145 if (V2.isUndef()) {
10146 ShiftElts = 0;
10147 unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
10148 TargetOrder = OriginalOrderLow;
10149 Swap = false;
10150 // Skip if not the correct element or mask of other elements don't equal
10151 // to our expected order.
10152 if (MaskOneElt == VINSERTHSrcElem &&
10153 (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10154 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10155 FoundCandidate = true;
10156 break;
10157 }
10158 } else { // If both operands are defined.
10159 // Target order is [8,15] if the current mask is between [0,7].
10160 TargetOrder =
10161 (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
10162 // Skip if mask of other elements don't equal our expected order.
10163 if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
10164 // We only need the last 3 bits for the number of shifts.
10165 ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
10166 : BigEndianShifts[MaskOneElt & 0x7];
10167 InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
10168 Swap = MaskOneElt < NumHalfWords;
10169 FoundCandidate = true;
10170 break;
10171 }
10172 }
10173 }
10174
10175 if (!FoundCandidate)
10176 return SDValue();
10177
10178 // Candidate found, construct the proper SDAG sequence with VINSERTH,
10179 // optionally with VECSHL if shift is required.
10180 if (Swap)
10181 std::swap(V1, V2);
10182 if (V2.isUndef())
10183 V2 = V1;
10184 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10185 if (ShiftElts) {
10186 // Double ShiftElts because we're left shifting on v16i8 type.
10187 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
10188 DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
10189 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
10190 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10191 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10192 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10193 }
10194 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
10195 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
10196 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10197 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10198}
10199
10200/// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
10201/// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
10202/// return the default SDValue.
10203SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
10204 SelectionDAG &DAG) const {
10205 // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
10206 // to v16i8. Peek through the bitcasts to get the actual operands.
10209
10210 auto ShuffleMask = SVN->getMask();
10211 SDValue VecShuffle(SVN, 0);
10212 SDLoc DL(SVN);
10213
10214 // Check that we have a four byte shuffle.
10215 if (!isNByteElemShuffleMask(SVN, 4, 1))
10216 return SDValue();
10217
10218 // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
10219 if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
10220 std::swap(LHS, RHS);
10222 ShuffleVectorSDNode *CommutedSV = dyn_cast<ShuffleVectorSDNode>(VecShuffle);
10223 if (!CommutedSV)
10224 return SDValue();
10225 ShuffleMask = CommutedSV->getMask();
10226 }
10227
10228 // Ensure that the RHS is a vector of constants.
10229 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
10230 if (!BVN)
10231 return SDValue();
10232
10233 // Check if RHS is a splat of 4-bytes (or smaller).
10234 APInt APSplatValue, APSplatUndef;
10235 unsigned SplatBitSize;
10236 bool HasAnyUndefs;
10237 if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
10238 HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
10239 SplatBitSize > 32)
10240 return SDValue();
10241
10242 // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
10243 // The instruction splats a constant C into two words of the source vector
10244 // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
10245 // Thus we check that the shuffle mask is the equivalent of
10246 // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
10247 // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
10248 // within each word are consecutive, so we only need to check the first byte.
10249 SDValue Index;
10250 bool IsLE = Subtarget.isLittleEndian();
10251 if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
10252 (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
10253 ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
10254 Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
10255 else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
10256 (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
10257 ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
10258 Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
10259 else
10260 return SDValue();
10261
10262 // If the splat is narrower than 32-bits, we need to get the 32-bit value
10263 // for XXSPLTI32DX.
10264 unsigned SplatVal = APSplatValue.getZExtValue();
10265 for (; SplatBitSize < 32; SplatBitSize <<= 1)
10266 SplatVal |= (SplatVal << SplatBitSize);
10267
10268 SDValue SplatNode = DAG.getNode(
10269 PPCISD::XXSPLTI32DX, DL, MVT::v2i64, DAG.getBitcast(MVT::v2i64, LHS),
10270 Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
10271 return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
10272}
10273
10274/// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
10275/// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
10276/// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
10277/// i.e (or (shl x, C1), (srl x, 128-C1)).
10278SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
10279 assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
10280 assert(Op.getValueType() == MVT::v1i128 &&
10281 "Only set v1i128 as custom, other type shouldn't reach here!");
10282 SDLoc dl(Op);
10283 SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
10284 SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
10285 unsigned SHLAmt = N1.getConstantOperandVal(0);
10286 if (SHLAmt % 8 == 0) {
10287 std::array<int, 16> Mask;
10288 std::iota(Mask.begin(), Mask.end(), 0);
10289 std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
10290 if (SDValue Shuffle =
10291 DAG.getVectorShuffle(MVT::v16i8, dl, DAG.getBitcast(MVT::v16i8, N0),
10292 DAG.getUNDEF(MVT::v16i8), Mask))
10293 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
10294 }
10295 SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
10296 SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
10297 DAG.getConstant(SHLAmt, dl, MVT::i32));
10298 SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
10299 DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
10300 SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
10301 return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
10302}
10303
10304/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
10305/// is a shuffle we can handle in a single instruction, return it. Otherwise,
10306/// return the code it can be lowered into. Worst case, it can always be
10307/// lowered into a vperm.
10308SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
10309 SelectionDAG &DAG) const {
10310 SDLoc dl(Op);
10311 SDValue V1 = Op.getOperand(0);
10312 SDValue V2 = Op.getOperand(1);
10313 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10314
10315 // Any nodes that were combined in the target-independent combiner prior
10316 // to vector legalization will not be sent to the target combine. Try to
10317 // combine it here.
10318 if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
10319 if (!isa<ShuffleVectorSDNode>(NewShuffle))
10320 return NewShuffle;
10321 Op = NewShuffle;
10323 V1 = Op.getOperand(0);
10324 V2 = Op.getOperand(1);
10325 }
10326 EVT VT = Op.getValueType();
10327 bool isLittleEndian = Subtarget.isLittleEndian();
10328
10329 unsigned ShiftElts, InsertAtByte;
10330 bool Swap = false;
10331
10332 // If this is a load-and-splat, we can do that with a single instruction
10333 // in some cases. However if the load has multiple uses, we don't want to
10334 // combine it because that will just produce multiple loads.
10335 bool IsPermutedLoad = false;
10336 const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
10337 if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
10338 (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
10339 InputLoad->hasOneUse()) {
10340 bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
10341 int SplatIdx =
10342 PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
10343
10344 // The splat index for permuted loads will be in the left half of the vector
10345 // which is strictly wider than the loaded value by 8 bytes. So we need to
10346 // adjust the splat index to point to the correct address in memory.
10347 if (IsPermutedLoad) {
10348 assert((isLittleEndian || IsFourByte) &&
10349 "Unexpected size for permuted load on big endian target");
10350 SplatIdx += IsFourByte ? 2 : 1;
10351 assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
10352 "Splat of a value outside of the loaded memory");
10353 }
10354
10355 LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
10356 // For 4-byte load-and-splat, we need Power9.
10357 if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
10358 uint64_t Offset = 0;
10359 if (IsFourByte)
10360 Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
10361 else
10362 Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
10363
10364 // If the width of the load is the same as the width of the splat,
10365 // loading with an offset would load the wrong memory.
10366 if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
10367 Offset = 0;
10368
10369 SDValue BasePtr = LD->getBasePtr();
10370 if (Offset != 0)
10372 BasePtr, DAG.getIntPtrConstant(Offset, dl));
10373 SDValue Ops[] = {
10374 LD->getChain(), // Chain
10375 BasePtr, // BasePtr
10376 DAG.getValueType(Op.getValueType()) // VT
10377 };
10378 SDVTList VTL =
10379 DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
10380 SDValue LdSplt =
10381 DAG.getMemIntrinsicNode(PPCISD::LD_SPLAT, dl, VTL,
10382 Ops, LD->getMemoryVT(), LD->getMemOperand());
10383 DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
10384 if (LdSplt.getValueType() != SVOp->getValueType(0))
10385 LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
10386 return LdSplt;
10387 }
10388 }
10389
10390 // All v2i64 and v2f64 shuffles are legal
10391 if (VT == MVT::v2i64 || VT == MVT::v2f64)
10392 return Op;
10393
10394 if (Subtarget.hasP9Vector() &&
10395 PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
10396 isLittleEndian)) {
10397 if (V2.isUndef())
10398 V2 = V1;
10399 else if (Swap)
10400 std::swap(V1, V2);
10401 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10402 SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
10403 if (ShiftElts) {
10404 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
10405 DAG.getConstant(ShiftElts, dl, MVT::i32));
10406 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
10407 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10408 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10409 }
10410 SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
10411 DAG.getConstant(InsertAtByte, dl, MVT::i32));
10412 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
10413 }
10414
10415 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
10416 SDValue SplatInsertNode;
10417 if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
10418 return SplatInsertNode;
10419 }
10420
10421 if (Subtarget.hasP9Altivec()) {
10422 SDValue NewISDNode;
10423 if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
10424 return NewISDNode;
10425
10426 if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
10427 return NewISDNode;
10428 }
10429
10430 if (Subtarget.hasVSX() &&
10431 PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10432 if (Swap)
10433 std::swap(V1, V2);
10434 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10435 SDValue Conv2 =
10436 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
10437
10438 SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
10439 DAG.getConstant(ShiftElts, dl, MVT::i32));
10440 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
10441 }
10442
10443 if (Subtarget.hasVSX() &&
10444 PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
10445 if (Swap)
10446 std::swap(V1, V2);
10447 SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10448 SDValue Conv2 =
10449 DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
10450
10451 SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
10452 DAG.getConstant(ShiftElts, dl, MVT::i32));
10453 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
10454 }
10455
10456 if (Subtarget.hasP9Vector()) {
10457 if (PPC::isXXBRHShuffleMask(SVOp)) {
10458 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
10459 SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
10460 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
10461 } else if (PPC::isXXBRWShuffleMask(SVOp)) {
10462 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10463 SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
10464 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
10465 } else if (PPC::isXXBRDShuffleMask(SVOp)) {
10466 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
10467 SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
10468 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
10469 } else if (PPC::isXXBRQShuffleMask(SVOp)) {
10470 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
10471 SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
10472 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
10473 }
10474 }
10475
10476 if (Subtarget.hasVSX()) {
10477 if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
10478 int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
10479
10480 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
10481 SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
10482 DAG.getConstant(SplatIdx, dl, MVT::i32));
10483 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
10484 }
10485
10486 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
10487 if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
10488 SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
10489 SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10490 return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10491 }
10492 }
10493
10494 // Cases that are handled by instructions that take permute immediates
10495 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10496 // selected by the instruction selector.
10497 if (V2.isUndef()) {
10498 if (PPC::isSplatShuffleMask(SVOp, 1) ||
10499 PPC::isSplatShuffleMask(SVOp, 2) ||
10500 PPC::isSplatShuffleMask(SVOp, 4) ||
10501 PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10502 PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10503 PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10504 PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10505 PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10506 PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10507 PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10508 PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10509 PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10510 (Subtarget.hasP8Altivec() && (
10511 PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10512 PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10513 PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10514 return Op;
10515 }
10516 }
10517
10518 // Altivec has a variety of "shuffle immediates" that take two vector inputs
10519 // and produce a fixed permutation. If any of these match, do not lower to
10520 // VPERM.
10521 unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10522 if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10523 PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10524 PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10525 PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10526 PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10527 PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10528 PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10529 PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10530 PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10531 (Subtarget.hasP8Altivec() && (
10532 PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10533 PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10534 PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10535 return Op;
10536
10537 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10538 // perfect shuffle table to emit an optimal matching sequence.
10539 ArrayRef<int> PermMask = SVOp->getMask();
10540
10541 if (!DisablePerfectShuffle && !isLittleEndian) {
10542 unsigned PFIndexes[4];
10543 bool isFourElementShuffle = true;
10544 for (unsigned i = 0; i != 4 && isFourElementShuffle;
10545 ++i) { // Element number
10546 unsigned EltNo = 8; // Start out undef.
10547 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10548 if (PermMask[i * 4 + j] < 0)
10549 continue; // Undef, ignore it.
10550
10551 unsigned ByteSource = PermMask[i * 4 + j];
10552 if ((ByteSource & 3) != j) {
10553 isFourElementShuffle = false;
10554 break;
10555 }
10556
10557 if (EltNo == 8) {
10558 EltNo = ByteSource / 4;
10559 } else if (EltNo != ByteSource / 4) {
10560 isFourElementShuffle = false;
10561 break;
10562 }
10563 }
10564 PFIndexes[i] = EltNo;
10565 }
10566
10567 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10568 // perfect shuffle vector to determine if it is cost effective to do this as
10569 // discrete instructions, or whether we should use a vperm.
10570 // For now, we skip this for little endian until such time as we have a
10571 // little-endian perfect shuffle table.
10572 if (isFourElementShuffle) {
10573 // Compute the index in the perfect shuffle table.
10574 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
10575 PFIndexes[2] * 9 + PFIndexes[3];
10576
10577 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10578 unsigned Cost = (PFEntry >> 30);
10579
10580 // Determining when to avoid vperm is tricky. Many things affect the cost
10581 // of vperm, particularly how many times the perm mask needs to be
10582 // computed. For example, if the perm mask can be hoisted out of a loop or
10583 // is already used (perhaps because there are multiple permutes with the
10584 // same shuffle mask?) the vperm has a cost of 1. OTOH, hoisting the
10585 // permute mask out of the loop requires an extra register.
10586 //
10587 // As a compromise, we only emit discrete instructions if the shuffle can
10588 // be generated in 3 or fewer operations. When we have loop information
10589 // available, if this block is within a loop, we should avoid using vperm
10590 // for 3-operation perms and use a constant pool load instead.
10591 if (Cost < 3)
10592 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10593 }
10594 }
10595
10596 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10597 // vector that will get spilled to the constant pool.
10598 if (V2.isUndef()) V2 = V1;
10599
10600 return LowerVPERM(Op, DAG, PermMask, VT, V1, V2);
10601}
10602
10603SDValue PPCTargetLowering::LowerVPERM(SDValue Op, SelectionDAG &DAG,
10604 ArrayRef<int> PermMask, EVT VT,
10605 SDValue V1, SDValue V2) const {
10606 unsigned Opcode = PPCISD::VPERM;
10607 EVT ValType = V1.getValueType();
10608 SDLoc dl(Op);
10609 bool NeedSwap = false;
10610 bool isLittleEndian = Subtarget.isLittleEndian();
10611 bool isPPC64 = Subtarget.isPPC64();
10612
10613 if (Subtarget.hasVSX() && Subtarget.hasP9Vector() &&
10614 (V1->hasOneUse() || V2->hasOneUse())) {
10615 LLVM_DEBUG(dbgs() << "At least one of two input vectors are dead - using "
10616 "XXPERM instead\n");
10617 Opcode = PPCISD::XXPERM;
10618
10619 // The second input to XXPERM is also an output so if the second input has
10620 // multiple uses then copying is necessary, as a result we want the
10621 // single-use operand to be used as the second input to prevent copying.
10622 if ((!isLittleEndian && !V2->hasOneUse() && V1->hasOneUse()) ||
10623 (isLittleEndian && !V1->hasOneUse() && V2->hasOneUse())) {
10624 std::swap(V1, V2);
10625 NeedSwap = !NeedSwap;
10626 }
10627 }
10628
10629 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10630 // that it is in input element units, not in bytes. Convert now.
10631
10632 // For little endian, the order of the input vectors is reversed, and
10633 // the permutation mask is complemented with respect to 31. This is
10634 // necessary to produce proper semantics with the big-endian-based vperm
10635 // instruction.
10636 EVT EltVT = V1.getValueType().getVectorElementType();
10637 unsigned BytesPerElement = EltVT.getSizeInBits() / 8;
10638
10639 bool V1HasXXSWAPD = V1->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10640 bool V2HasXXSWAPD = V2->getOperand(0)->getOpcode() == PPCISD::XXSWAPD;
10641
10642 /*
10643 Vectors will be appended like so: [ V1 | v2 ]
10644 XXSWAPD on V1:
10645 [ A | B | C | D ] -> [ C | D | A | B ]
10646 0-3 4-7 8-11 12-15 0-3 4-7 8-11 12-15
10647 i.e. index of A, B += 8, and index of C, D -= 8.
10648 XXSWAPD on V2:
10649 [ E | F | G | H ] -> [ G | H | E | F ]
10650 16-19 20-23 24-27 28-31 16-19 20-23 24-27 28-31
10651 i.e. index of E, F += 8, index of G, H -= 8
10652 Swap V1 and V2:
10653 [ V1 | V2 ] -> [ V2 | V1 ]
10654 0-15 16-31 0-15 16-31
10655 i.e. index of V1 += 16, index of V2 -= 16
10656 */
10657
10658 SmallVector<SDValue, 16> ResultMask;
10659 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10660 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10661
10662 if (V1HasXXSWAPD) {
10663 if (SrcElt < 8)
10664 SrcElt += 8;
10665 else if (SrcElt < 16)
10666 SrcElt -= 8;
10667 }
10668 if (V2HasXXSWAPD) {
10669 if (SrcElt > 23)
10670 SrcElt -= 8;
10671 else if (SrcElt > 15)
10672 SrcElt += 8;
10673 }
10674 if (NeedSwap) {
10675 if (SrcElt < 16)
10676 SrcElt += 16;
10677 else
10678 SrcElt -= 16;
10679 }
10680 for (unsigned j = 0; j != BytesPerElement; ++j)
10681 if (isLittleEndian)
10682 ResultMask.push_back(
10683 DAG.getConstant(31 - (SrcElt * BytesPerElement + j), dl, MVT::i32));
10684 else
10685 ResultMask.push_back(
10686 DAG.getConstant(SrcElt * BytesPerElement + j, dl, MVT::i32));
10687 }
10688
10689 if (V1HasXXSWAPD) {
10690 dl = SDLoc(V1->getOperand(0));
10691 V1 = V1->getOperand(0)->getOperand(1);
10692 }
10693 if (V2HasXXSWAPD) {
10694 dl = SDLoc(V2->getOperand(0));
10695 V2 = V2->getOperand(0)->getOperand(1);
10696 }
10697
10698 if (isPPC64 && (V1HasXXSWAPD || V2HasXXSWAPD)) {
10699 if (ValType != MVT::v2f64)
10700 V1 = DAG.getBitcast(MVT::v2f64, V1);
10701 if (V2.getValueType() != MVT::v2f64)
10702 V2 = DAG.getBitcast(MVT::v2f64, V2);
10703 }
10704
10705 ShufflesHandledWithVPERM++;
10706 SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10707 LLVM_DEBUG({
10708 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
10709 if (Opcode == PPCISD::XXPERM) {
10710 dbgs() << "Emitting a XXPERM for the following shuffle:\n";
10711 } else {
10712 dbgs() << "Emitting a VPERM for the following shuffle:\n";
10713 }
10714 SVOp->dump();
10715 dbgs() << "With the following permute control vector:\n";
10716 VPermMask.dump();
10717 });
10718
10719 if (Opcode == PPCISD::XXPERM)
10720 VPermMask = DAG.getBitcast(MVT::v4i32, VPermMask);
10721
10722 // Only need to place items backwards in LE,
10723 // the mask was properly calculated.
10724 if (isLittleEndian)
10725 std::swap(V1, V2);
10726
10727 SDValue VPERMNode =
10728 DAG.getNode(Opcode, dl, V1.getValueType(), V1, V2, VPermMask);
10729
10730 VPERMNode = DAG.getBitcast(ValType, VPERMNode);
10731 return VPERMNode;
10732}
10733
10734/// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10735/// vector comparison. If it is, return true and fill in Opc/isDot with
10736/// information about the intrinsic.
10737static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10738 bool &isDot, const PPCSubtarget &Subtarget) {
10739 unsigned IntrinsicID = Intrin.getConstantOperandVal(0);
10740 CompareOpc = -1;
10741 isDot = false;
10742 switch (IntrinsicID) {
10743 default:
10744 return false;
10745 // Comparison predicates.
10746 case Intrinsic::ppc_altivec_vcmpbfp_p:
10747 CompareOpc = 966;
10748 isDot = true;
10749 break;
10750 case Intrinsic::ppc_altivec_vcmpeqfp_p:
10751 CompareOpc = 198;
10752 isDot = true;
10753 break;
10754 case Intrinsic::ppc_altivec_vcmpequb_p:
10755 CompareOpc = 6;
10756 isDot = true;
10757 break;
10758 case Intrinsic::ppc_altivec_vcmpequh_p:
10759 CompareOpc = 70;
10760 isDot = true;
10761 break;
10762 case Intrinsic::ppc_altivec_vcmpequw_p:
10763 CompareOpc = 134;
10764 isDot = true;
10765 break;
10766 case Intrinsic::ppc_altivec_vcmpequd_p:
10767 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10768 CompareOpc = 199;
10769 isDot = true;
10770 } else
10771 return false;
10772 break;
10773 case Intrinsic::ppc_altivec_vcmpneb_p:
10774 case Intrinsic::ppc_altivec_vcmpneh_p:
10775 case Intrinsic::ppc_altivec_vcmpnew_p:
10776 case Intrinsic::ppc_altivec_vcmpnezb_p:
10777 case Intrinsic::ppc_altivec_vcmpnezh_p:
10778 case Intrinsic::ppc_altivec_vcmpnezw_p:
10779 if (Subtarget.hasP9Altivec()) {
10780 switch (IntrinsicID) {
10781 default:
10782 llvm_unreachable("Unknown comparison intrinsic.");
10783 case Intrinsic::ppc_altivec_vcmpneb_p:
10784 CompareOpc = 7;
10785 break;
10786 case Intrinsic::ppc_altivec_vcmpneh_p:
10787 CompareOpc = 71;
10788 break;
10789 case Intrinsic::ppc_altivec_vcmpnew_p:
10790 CompareOpc = 135;
10791 break;
10792 case Intrinsic::ppc_altivec_vcmpnezb_p:
10793 CompareOpc = 263;
10794 break;
10795 case Intrinsic::ppc_altivec_vcmpnezh_p:
10796 CompareOpc = 327;
10797 break;
10798 case Intrinsic::ppc_altivec_vcmpnezw_p:
10799 CompareOpc = 391;
10800 break;
10801 }
10802 isDot = true;
10803 } else
10804 return false;
10805 break;
10806 case Intrinsic::ppc_altivec_vcmpgefp_p:
10807 CompareOpc = 454;
10808 isDot = true;
10809 break;
10810 case Intrinsic::ppc_altivec_vcmpgtfp_p:
10811 CompareOpc = 710;
10812 isDot = true;
10813 break;
10814 case Intrinsic::ppc_altivec_vcmpgtsb_p:
10815 CompareOpc = 774;
10816 isDot = true;
10817 break;
10818 case Intrinsic::ppc_altivec_vcmpgtsh_p:
10819 CompareOpc = 838;
10820 isDot = true;
10821 break;
10822 case Intrinsic::ppc_altivec_vcmpgtsw_p:
10823 CompareOpc = 902;
10824 isDot = true;
10825 break;
10826 case Intrinsic::ppc_altivec_vcmpgtsd_p:
10827 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10828 CompareOpc = 967;
10829 isDot = true;
10830 } else
10831 return false;
10832 break;
10833 case Intrinsic::ppc_altivec_vcmpgtub_p:
10834 CompareOpc = 518;
10835 isDot = true;
10836 break;
10837 case Intrinsic::ppc_altivec_vcmpgtuh_p:
10838 CompareOpc = 582;
10839 isDot = true;
10840 break;
10841 case Intrinsic::ppc_altivec_vcmpgtuw_p:
10842 CompareOpc = 646;
10843 isDot = true;
10844 break;
10845 case Intrinsic::ppc_altivec_vcmpgtud_p:
10846 if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10847 CompareOpc = 711;
10848 isDot = true;
10849 } else
10850 return false;
10851 break;
10852
10853 case Intrinsic::ppc_altivec_vcmpequq:
10854 case Intrinsic::ppc_altivec_vcmpgtsq:
10855 case Intrinsic::ppc_altivec_vcmpgtuq:
10856 if (!Subtarget.isISA3_1())
10857 return false;
10858 switch (IntrinsicID) {
10859 default:
10860 llvm_unreachable("Unknown comparison intrinsic.");
10861 case Intrinsic::ppc_altivec_vcmpequq:
10862 CompareOpc = 455;
10863 break;
10864 case Intrinsic::ppc_altivec_vcmpgtsq:
10865 CompareOpc = 903;
10866 break;
10867 case Intrinsic::ppc_altivec_vcmpgtuq:
10868 CompareOpc = 647;
10869 break;
10870 }
10871 break;
10872
10873 // VSX predicate comparisons use the same infrastructure
10874 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10875 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10876 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10877 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10878 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10879 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10880 if (Subtarget.hasVSX()) {
10881 switch (IntrinsicID) {
10882 case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10883 CompareOpc = 99;
10884 break;
10885 case Intrinsic::ppc_vsx_xvcmpgedp_p:
10886 CompareOpc = 115;
10887 break;
10888 case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10889 CompareOpc = 107;
10890 break;
10891 case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10892 CompareOpc = 67;
10893 break;
10894 case Intrinsic::ppc_vsx_xvcmpgesp_p:
10895 CompareOpc = 83;
10896 break;
10897 case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10898 CompareOpc = 75;
10899 break;
10900 }
10901 isDot = true;
10902 } else
10903 return false;
10904 break;
10905
10906 // Normal Comparisons.
10907 case Intrinsic::ppc_altivec_vcmpbfp:
10908 CompareOpc = 966;
10909 break;
10910 case Intrinsic::ppc_altivec_vcmpeqfp:
10911 CompareOpc = 198;
10912 break;
10913 case Intrinsic::ppc_altivec_vcmpequb:
10914 CompareOpc = 6;
10915 break;
10916 case Intrinsic::ppc_altivec_vcmpequh:
10917 CompareOpc = 70;
10918 break;
10919 case Intrinsic::ppc_altivec_vcmpequw:
10920 CompareOpc = 134;
10921 break;
10922 case Intrinsic::ppc_altivec_vcmpequd:
10923 if (Subtarget.hasP8Altivec())
10924 CompareOpc = 199;
10925 else
10926 return false;
10927 break;
10928 case Intrinsic::ppc_altivec_vcmpneb:
10929 case Intrinsic::ppc_altivec_vcmpneh:
10930 case Intrinsic::ppc_altivec_vcmpnew:
10931 case Intrinsic::ppc_altivec_vcmpnezb:
10932 case Intrinsic::ppc_altivec_vcmpnezh:
10933 case Intrinsic::ppc_altivec_vcmpnezw:
10934 if (Subtarget.hasP9Altivec())
10935 switch (IntrinsicID) {
10936 default:
10937 llvm_unreachable("Unknown comparison intrinsic.");
10938 case Intrinsic::ppc_altivec_vcmpneb:
10939 CompareOpc = 7;
10940 break;
10941 case Intrinsic::ppc_altivec_vcmpneh:
10942 CompareOpc = 71;
10943 break;
10944 case Intrinsic::ppc_altivec_vcmpnew:
10945 CompareOpc = 135;
10946 break;
10947 case Intrinsic::ppc_altivec_vcmpnezb:
10948 CompareOpc = 263;
10949 break;
10950 case Intrinsic::ppc_altivec_vcmpnezh:
10951 CompareOpc = 327;
10952 break;
10953 case Intrinsic::ppc_altivec_vcmpnezw:
10954 CompareOpc = 391;
10955 break;
10956 }
10957 else
10958 return false;
10959 break;
10960 case Intrinsic::ppc_altivec_vcmpgefp:
10961 CompareOpc = 454;
10962 break;
10963 case Intrinsic::ppc_altivec_vcmpgtfp:
10964 CompareOpc = 710;
10965 break;
10966 case Intrinsic::ppc_altivec_vcmpgtsb:
10967 CompareOpc = 774;
10968 break;
10969 case Intrinsic::ppc_altivec_vcmpgtsh:
10970 CompareOpc = 838;
10971 break;
10972 case Intrinsic::ppc_altivec_vcmpgtsw:
10973 CompareOpc = 902;
10974 break;
10975 case Intrinsic::ppc_altivec_vcmpgtsd:
10976 if (Subtarget.hasP8Altivec())
10977 CompareOpc = 967;
10978 else
10979 return false;
10980 break;
10981 case Intrinsic::ppc_altivec_vcmpgtub:
10982 CompareOpc = 518;
10983 break;
10984 case Intrinsic::ppc_altivec_vcmpgtuh:
10985 CompareOpc = 582;
10986 break;
10987 case Intrinsic::ppc_altivec_vcmpgtuw:
10988 CompareOpc = 646;
10989 break;
10990 case Intrinsic::ppc_altivec_vcmpgtud:
10991 if (Subtarget.hasP8Altivec())
10992 CompareOpc = 711;
10993 else
10994 return false;
10995 break;
10996 case Intrinsic::ppc_altivec_vcmpequq_p:
10997 case Intrinsic::ppc_altivec_vcmpgtsq_p:
10998 case Intrinsic::ppc_altivec_vcmpgtuq_p:
10999 if (!Subtarget.isISA3_1())
11000 return false;
11001 switch (IntrinsicID) {
11002 default:
11003 llvm_unreachable("Unknown comparison intrinsic.");
11004 case Intrinsic::ppc_altivec_vcmpequq_p:
11005 CompareOpc = 455;
11006 break;
11007 case Intrinsic::ppc_altivec_vcmpgtsq_p:
11008 CompareOpc = 903;
11009 break;
11010 case Intrinsic::ppc_altivec_vcmpgtuq_p:
11011 CompareOpc = 647;
11012 break;
11013 }
11014 isDot = true;
11015 break;
11016 }
11017 return true;
11018}
11019
11020/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
11021/// lower, do it, otherwise return null.
11022SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
11023 SelectionDAG &DAG) const {
11024 unsigned IntrinsicID = Op.getConstantOperandVal(0);
11025
11026 SDLoc dl(Op);
11027
11028 switch (IntrinsicID) {
11029 case Intrinsic::thread_pointer:
11030 // Reads the thread pointer register, used for __builtin_thread_pointer.
11031 if (Subtarget.isPPC64())
11032 return DAG.getRegister(PPC::X13, MVT::i64);
11033 return DAG.getRegister(PPC::R2, MVT::i32);
11034
11035 case Intrinsic::ppc_rldimi: {
11036 assert(Subtarget.isPPC64() && "rldimi is only available in 64-bit!");
11037 SDValue Src = Op.getOperand(1);
11038 APInt Mask = Op.getConstantOperandAPInt(4);
11039 if (Mask.isZero())
11040 return Op.getOperand(2);
11041 if (Mask.isAllOnes())
11042 return DAG.getNode(ISD::ROTL, dl, MVT::i64, Src, Op.getOperand(3));
11043 uint64_t SH = Op.getConstantOperandVal(3);
11044 unsigned MB = 0, ME = 0;
11045 if (!isRunOfOnes64(Mask.getZExtValue(), MB, ME))
11046 report_fatal_error("invalid rldimi mask!");
11047 // rldimi requires ME=63-SH, otherwise rotation is needed before rldimi.
11048 if (ME < 63 - SH) {
11049 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11050 DAG.getConstant(ME + SH + 1, dl, MVT::i32));
11051 } else if (ME > 63 - SH) {
11052 Src = DAG.getNode(ISD::ROTL, dl, MVT::i64, Src,
11053 DAG.getConstant(ME + SH - 63, dl, MVT::i32));
11054 }
11055 return SDValue(
11056 DAG.getMachineNode(PPC::RLDIMI, dl, MVT::i64,
11057 {Op.getOperand(2), Src,
11058 DAG.getTargetConstant(63 - ME, dl, MVT::i32),
11059 DAG.getTargetConstant(MB, dl, MVT::i32)}),
11060 0);
11061 }
11062
11063 case Intrinsic::ppc_rlwimi: {
11064 APInt Mask = Op.getConstantOperandAPInt(4);
11065 if (Mask.isZero())
11066 return Op.getOperand(2);
11067 if (Mask.isAllOnes())
11068 return DAG.getNode(ISD::ROTL, dl, MVT::i32, Op.getOperand(1),
11069 Op.getOperand(3));
11070 unsigned MB = 0, ME = 0;
11071 if (!isRunOfOnes(Mask.getZExtValue(), MB, ME))
11072 report_fatal_error("invalid rlwimi mask!");
11073 return SDValue(DAG.getMachineNode(
11074 PPC::RLWIMI, dl, MVT::i32,
11075 {Op.getOperand(2), Op.getOperand(1), Op.getOperand(3),
11076 DAG.getTargetConstant(MB, dl, MVT::i32),
11077 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11078 0);
11079 }
11080
11081 case Intrinsic::ppc_rlwnm: {
11082 if (Op.getConstantOperandVal(3) == 0)
11083 return DAG.getConstant(0, dl, MVT::i32);
11084 unsigned MB = 0, ME = 0;
11085 if (!isRunOfOnes(Op.getConstantOperandVal(3), MB, ME))
11086 report_fatal_error("invalid rlwnm mask!");
11087 return SDValue(
11088 DAG.getMachineNode(PPC::RLWNM, dl, MVT::i32,
11089 {Op.getOperand(1), Op.getOperand(2),
11090 DAG.getTargetConstant(MB, dl, MVT::i32),
11091 DAG.getTargetConstant(ME, dl, MVT::i32)}),
11092 0);
11093 }
11094
11095 case Intrinsic::ppc_mma_disassemble_acc: {
11096 if (Subtarget.isISAFuture()) {
11097 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11098 SDValue WideVec =
11099 SDValue(DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes,
11100 Op.getOperand(1)),
11101 0);
11103 SDValue Value = SDValue(WideVec.getNode(), 0);
11104 SDValue Value2 = SDValue(WideVec.getNode(), 1);
11105
11106 SDValue Extract;
11107 Extract = DAG.getNode(
11108 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11109 Subtarget.isLittleEndian() ? Value2 : Value,
11110 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11111 dl, getPointerTy(DAG.getDataLayout())));
11112 RetOps.push_back(Extract);
11113 Extract = DAG.getNode(
11114 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11115 Subtarget.isLittleEndian() ? Value2 : Value,
11116 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11117 dl, getPointerTy(DAG.getDataLayout())));
11118 RetOps.push_back(Extract);
11119 Extract = DAG.getNode(
11120 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11121 Subtarget.isLittleEndian() ? Value : Value2,
11122 DAG.getConstant(Subtarget.isLittleEndian() ? 1 : 0,
11123 dl, getPointerTy(DAG.getDataLayout())));
11124 RetOps.push_back(Extract);
11125 Extract = DAG.getNode(
11126 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
11127 Subtarget.isLittleEndian() ? Value : Value2,
11128 DAG.getConstant(Subtarget.isLittleEndian() ? 0 : 1,
11129 dl, getPointerTy(DAG.getDataLayout())));
11130 RetOps.push_back(Extract);
11131 return DAG.getMergeValues(RetOps, dl);
11132 }
11133 [[fallthrough]];
11134 }
11135 case Intrinsic::ppc_vsx_disassemble_pair: {
11136 int NumVecs = 2;
11137 SDValue WideVec = Op.getOperand(1);
11138 if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
11139 NumVecs = 4;
11140 WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
11141 }
11143 for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
11144 SDValue Extract = DAG.getNode(
11145 PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
11146 DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
11147 : VecNo,
11148 dl, getPointerTy(DAG.getDataLayout())));
11149 RetOps.push_back(Extract);
11150 }
11151 return DAG.getMergeValues(RetOps, dl);
11152 }
11153
11154 case Intrinsic::ppc_mma_build_dmr: {
11157 for (int i = 1; i < 9; i += 2) {
11158 SDValue Hi = Op.getOperand(i);
11159 SDValue Lo = Op.getOperand(i + 1);
11160 if (Hi->getOpcode() == ISD::LOAD)
11161 Chains.push_back(Hi.getValue(1));
11162 if (Lo->getOpcode() == ISD::LOAD)
11163 Chains.push_back(Lo.getValue(1));
11164 Pairs.push_back(
11165 DAG.getNode(PPCISD::PAIR_BUILD, dl, MVT::v256i1, {Hi, Lo}));
11166 }
11167 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
11168 SDValue Value = DMFInsert1024(Pairs, SDLoc(Op), DAG);
11169 return DAG.getMergeValues({Value, TF}, dl);
11170 }
11171
11172 case Intrinsic::ppc_mma_dmxxextfdmr512: {
11173 assert(Subtarget.isISAFuture() && "dmxxextfdmr512 requires ISA Future");
11174 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11175 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11176 "Specify P of 0 or 1 for lower or upper 512 bytes");
11177 unsigned HiLo = Idx->getSExtValue();
11178 unsigned Opcode;
11179 unsigned Subx;
11180 if (HiLo == 0) {
11181 Opcode = PPC::DMXXEXTFDMR512;
11182 Subx = PPC::sub_wacc_lo;
11183 } else {
11184 Opcode = PPC::DMXXEXTFDMR512_HI;
11185 Subx = PPC::sub_wacc_hi;
11186 }
11187 SDValue Subreg(
11188 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
11189 Op.getOperand(1),
11190 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11191 0);
11192 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
11193 return SDValue(DAG.getMachineNode(Opcode, dl, ReturnTypes, Subreg), 0);
11194 }
11195
11196 case Intrinsic::ppc_mma_dmxxextfdmr256: {
11197 assert(Subtarget.isISAFuture() && "dmxxextfdmr256 requires ISA Future");
11198 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11199 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11200 "Specify a dmr row pair 0-3");
11201 unsigned IdxVal = Idx->getSExtValue();
11202 unsigned Subx;
11203 switch (IdxVal) {
11204 case 0:
11205 Subx = PPC::sub_dmrrowp0;
11206 break;
11207 case 1:
11208 Subx = PPC::sub_dmrrowp1;
11209 break;
11210 case 2:
11211 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11212 break;
11213 case 3:
11214 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11215 break;
11216 }
11217 SDValue Subreg(
11218 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v256i1,
11219 Op.getOperand(1),
11220 DAG.getTargetConstant(Subx, dl, MVT::i32)),
11221 0);
11222 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11223 return SDValue(
11224 DAG.getMachineNode(PPC::DMXXEXTFDMR256, dl, MVT::v256i1, {Subreg, P}),
11225 0);
11226 }
11227
11228 case Intrinsic::ppc_mma_dmxxinstdmr512: {
11229 assert(Subtarget.isISAFuture() && "dmxxinstdmr512 requires ISA Future");
11230 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(4));
11231 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11232 "Specify P of 0 or 1 for lower or upper 512 bytes");
11233 unsigned HiLo = Idx->getSExtValue();
11234 unsigned Opcode;
11235 unsigned Subx;
11236 if (HiLo == 0) {
11237 Opcode = PPC::DMXXINSTDMR512;
11238 Subx = PPC::sub_wacc_lo;
11239 } else {
11240 Opcode = PPC::DMXXINSTDMR512_HI;
11241 Subx = PPC::sub_wacc_hi;
11242 }
11243 SDValue Ops[] = {Op.getOperand(2), Op.getOperand(3)};
11244 SDValue Wacc = SDValue(DAG.getMachineNode(Opcode, dl, MVT::v512i1, Ops), 0);
11245 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11246 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11247 Op.getOperand(1), Wacc, SubReg),
11248 0);
11249 }
11250
11251 case Intrinsic::ppc_mma_dmxxinstdmr256: {
11252 assert(Subtarget.isISAFuture() && "dmxxinstdmr256 requires ISA Future");
11253 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(3));
11254 assert(Idx && (Idx->getSExtValue() >= 0 || Idx->getSExtValue() <= 3) &&
11255 "Specify a dmr row pair 0-3");
11256 unsigned IdxVal = Idx->getSExtValue();
11257 unsigned Subx;
11258 switch (IdxVal) {
11259 case 0:
11260 Subx = PPC::sub_dmrrowp0;
11261 break;
11262 case 1:
11263 Subx = PPC::sub_dmrrowp1;
11264 break;
11265 case 2:
11266 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp0;
11267 break;
11268 case 3:
11269 Subx = PPC::sub_wacc_hi_then_sub_dmrrowp1;
11270 break;
11271 }
11272 SDValue SubReg = DAG.getTargetConstant(Subx, dl, MVT::i32);
11273 SDValue P = DAG.getTargetConstant(IdxVal, dl, MVT::i32);
11274 SDValue Ops[] = {Op.getOperand(2), P};
11275 SDValue DMRRowp = SDValue(
11276 DAG.getMachineNode(PPC::DMXXINSTDMR256, dl, MVT::v256i1, Ops), 0);
11277 return SDValue(DAG.getMachineNode(PPC::INSERT_SUBREG, dl, MVT::v1024i1,
11278 Op.getOperand(1), DMRRowp, SubReg),
11279 0);
11280 }
11281
11282 case Intrinsic::ppc_mma_xxmfacc:
11283 case Intrinsic::ppc_mma_xxmtacc: {
11284 // Allow pre-isa-future subtargets to lower as normal.
11285 if (!Subtarget.isISAFuture())
11286 return SDValue();
11287 // The intrinsics for xxmtacc and xxmfacc take one argument of
11288 // type v512i1, for future cpu the corresponding wacc instruction
11289 // dmxx[inst|extf]dmr512 is always generated for type v512i1, negating
11290 // the need to produce the xxm[t|f]acc.
11291 SDValue WideVec = Op.getOperand(1);
11292 DAG.ReplaceAllUsesWith(Op, WideVec);
11293 return SDValue();
11294 }
11295
11296 case Intrinsic::ppc_unpack_longdouble: {
11297 auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11298 assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
11299 "Argument of long double unpack must be 0 or 1!");
11300 return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
11301 DAG.getConstant(!!(Idx->getSExtValue()), dl,
11302 Idx->getValueType(0)));
11303 }
11304
11305 case Intrinsic::ppc_compare_exp_lt:
11306 case Intrinsic::ppc_compare_exp_gt:
11307 case Intrinsic::ppc_compare_exp_eq:
11308 case Intrinsic::ppc_compare_exp_uo: {
11309 unsigned Pred;
11310 switch (IntrinsicID) {
11311 case Intrinsic::ppc_compare_exp_lt:
11312 Pred = PPC::PRED_LT;
11313 break;
11314 case Intrinsic::ppc_compare_exp_gt:
11315 Pred = PPC::PRED_GT;
11316 break;
11317 case Intrinsic::ppc_compare_exp_eq:
11318 Pred = PPC::PRED_EQ;
11319 break;
11320 case Intrinsic::ppc_compare_exp_uo:
11321 Pred = PPC::PRED_UN;
11322 break;
11323 }
11324 return SDValue(
11325 DAG.getMachineNode(
11326 PPC::SELECT_CC_I4, dl, MVT::i32,
11327 {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
11328 Op.getOperand(1), Op.getOperand(2)),
11329 0),
11330 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11331 DAG.getTargetConstant(Pred, dl, MVT::i32)}),
11332 0);
11333 }
11334 case Intrinsic::ppc_test_data_class: {
11335 EVT OpVT = Op.getOperand(1).getValueType();
11336 unsigned CmprOpc = OpVT == MVT::f128 ? PPC::XSTSTDCQP
11337 : (OpVT == MVT::f64 ? PPC::XSTSTDCDP
11338 : PPC::XSTSTDCSP);
11339 return SDValue(
11340 DAG.getMachineNode(
11341 PPC::SELECT_CC_I4, dl, MVT::i32,
11342 {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
11343 Op.getOperand(1)),
11344 0),
11345 DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
11346 DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
11347 0);
11348 }
11349 case Intrinsic::ppc_fnmsub: {
11350 EVT VT = Op.getOperand(1).getValueType();
11351 if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128))
11352 return DAG.getNode(
11353 ISD::FNEG, dl, VT,
11354 DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2),
11355 DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3))));
11356 return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1),
11357 Op.getOperand(2), Op.getOperand(3));
11358 }
11359 case Intrinsic::ppc_convert_f128_to_ppcf128:
11360 case Intrinsic::ppc_convert_ppcf128_to_f128: {
11361 RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
11362 ? RTLIB::CONVERT_PPCF128_F128
11363 : RTLIB::CONVERT_F128_PPCF128;
11364 MakeLibCallOptions CallOptions;
11365 std::pair<SDValue, SDValue> Result =
11366 makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
11367 dl, SDValue());
11368 return Result.first;
11369 }
11370 case Intrinsic::ppc_maxfe:
11371 case Intrinsic::ppc_maxfl:
11372 case Intrinsic::ppc_maxfs:
11373 case Intrinsic::ppc_minfe:
11374 case Intrinsic::ppc_minfl:
11375 case Intrinsic::ppc_minfs: {
11376 EVT VT = Op.getValueType();
11377 assert(
11378 all_of(Op->ops().drop_front(4),
11379 [VT](const SDUse &Use) { return Use.getValueType() == VT; }) &&
11380 "ppc_[max|min]f[e|l|s] must have uniform type arguments");
11381 (void)VT;
11383 if (IntrinsicID == Intrinsic::ppc_minfe ||
11384 IntrinsicID == Intrinsic::ppc_minfl ||
11385 IntrinsicID == Intrinsic::ppc_minfs)
11386 CC = ISD::SETLT;
11387 unsigned I = Op.getNumOperands() - 2, Cnt = I;
11388 SDValue Res = Op.getOperand(I);
11389 for (--I; Cnt != 0; --Cnt, I = (--I == 0 ? (Op.getNumOperands() - 1) : I)) {
11390 Res =
11391 DAG.getSelectCC(dl, Res, Op.getOperand(I), Res, Op.getOperand(I), CC);
11392 }
11393 return Res;
11394 }
11395 }
11396
11397 // If this is a lowered altivec predicate compare, CompareOpc is set to the
11398 // opcode number of the comparison.
11399 int CompareOpc;
11400 bool isDot;
11401 if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
11402 return SDValue(); // Don't custom lower most intrinsics.
11403
11404 // If this is a non-dot comparison, make the VCMP node and we are done.
11405 if (!isDot) {
11406 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
11407 Op.getOperand(1), Op.getOperand(2),
11408 DAG.getConstant(CompareOpc, dl, MVT::i32));
11409 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
11410 }
11411
11412 // Create the PPCISD altivec 'dot' comparison node.
11413 SDValue Ops[] = {
11414 Op.getOperand(2), // LHS
11415 Op.getOperand(3), // RHS
11416 DAG.getConstant(CompareOpc, dl, MVT::i32)
11417 };
11418 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
11419 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
11420
11421 // Unpack the result based on how the target uses it.
11422 unsigned BitNo; // Bit # of CR6.
11423 bool InvertBit; // Invert result?
11424 unsigned Bitx;
11425 unsigned SetOp;
11426 switch (Op.getConstantOperandVal(1)) {
11427 default: // Can't happen, don't crash on invalid number though.
11428 case 0: // Return the value of the EQ bit of CR6.
11429 BitNo = 0;
11430 InvertBit = false;
11431 Bitx = PPC::sub_eq;
11432 SetOp = PPCISD::SETBC;
11433 break;
11434 case 1: // Return the inverted value of the EQ bit of CR6.
11435 BitNo = 0;
11436 InvertBit = true;
11437 Bitx = PPC::sub_eq;
11438 SetOp = PPCISD::SETBCR;
11439 break;
11440 case 2: // Return the value of the LT bit of CR6.
11441 BitNo = 2;
11442 InvertBit = false;
11443 Bitx = PPC::sub_lt;
11444 SetOp = PPCISD::SETBC;
11445 break;
11446 case 3: // Return the inverted value of the LT bit of CR6.
11447 BitNo = 2;
11448 InvertBit = true;
11449 Bitx = PPC::sub_lt;
11450 SetOp = PPCISD::SETBCR;
11451 break;
11452 }
11453
11454 SDValue GlueOp = CompNode.getValue(1);
11455 if (Subtarget.isISA3_1()) {
11456 SDValue SubRegIdx = DAG.getTargetConstant(Bitx, dl, MVT::i32);
11457 SDValue CR6Reg = DAG.getRegister(PPC::CR6, MVT::i32);
11458 SDValue CRBit =
11459 SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i1,
11460 CR6Reg, SubRegIdx, GlueOp),
11461 0);
11462 return DAG.getNode(SetOp, dl, MVT::i32, CRBit);
11463 }
11464
11465 // Now that we have the comparison, emit a copy from the CR to a GPR.
11466 // This is flagged to the above dot comparison.
11467 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
11468 DAG.getRegister(PPC::CR6, MVT::i32), GlueOp);
11469
11470 // Shift the bit into the low position.
11471 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
11472 DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
11473 // Isolate the bit.
11474 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
11475 DAG.getConstant(1, dl, MVT::i32));
11476
11477 // If we are supposed to, toggle the bit.
11478 if (InvertBit)
11479 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
11480 DAG.getConstant(1, dl, MVT::i32));
11481 return Flags;
11482}
11483
11484SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
11485 SelectionDAG &DAG) const {
11486 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
11487 // the beginning of the argument list.
11488 int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
11489 SDLoc DL(Op);
11490 switch (Op.getConstantOperandVal(ArgStart)) {
11491 case Intrinsic::ppc_cfence: {
11492 assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
11493 SDValue Val = Op.getOperand(ArgStart + 1);
11494 EVT Ty = Val.getValueType();
11495 if (Ty == MVT::i128) {
11496 // FIXME: Testing one of two paired registers is sufficient to guarantee
11497 // ordering?
11498 Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
11499 }
11500 unsigned Opcode = Subtarget.isPPC64() ? PPC::CFENCE8 : PPC::CFENCE;
11501 return SDValue(
11502 DAG.getMachineNode(
11503 Opcode, DL, MVT::Other,
11504 DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getScalarIntVT(), Val),
11505 Op.getOperand(0)),
11506 0);
11507 }
11508 case Intrinsic::ppc_mma_disassemble_dmr: {
11509 return DAG.getStore(DAG.getEntryNode(), DL, Op.getOperand(ArgStart + 2),
11510 Op.getOperand(ArgStart + 1), MachinePointerInfo());
11511 }
11512 default:
11513 break;
11514 }
11515 return SDValue();
11516}
11517
11518// Lower scalar BSWAP64 to xxbrd.
11519SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
11520 SDLoc dl(Op);
11521 if (!Subtarget.isPPC64())
11522 return Op;
11523 // MTVSRDD
11524 Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
11525 Op.getOperand(0));
11526 // XXBRD
11527 Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
11528 // MFVSRD
11529 int VectorIndex = 0;
11530 if (Subtarget.isLittleEndian())
11531 VectorIndex = 1;
11532 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
11533 DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
11534 return Op;
11535}
11536
11537// ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
11538// compared to a value that is atomically loaded (atomic loads zero-extend).
11539SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
11540 SelectionDAG &DAG) const {
11541 assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
11542 "Expecting an atomic compare-and-swap here.");
11543 SDLoc dl(Op);
11544 auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
11545 EVT MemVT = AtomicNode->getMemoryVT();
11546 if (MemVT.getSizeInBits() >= 32)
11547 return Op;
11548
11549 SDValue CmpOp = Op.getOperand(2);
11550 // If this is already correctly zero-extended, leave it alone.
11551 auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
11552 if (DAG.MaskedValueIsZero(CmpOp, HighBits))
11553 return Op;
11554
11555 // Clear the high bits of the compare operand.
11556 unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
11557 SDValue NewCmpOp =
11558 DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
11559 DAG.getConstant(MaskVal, dl, MVT::i32));
11560
11561 // Replace the existing compare operand with the properly zero-extended one.
11563 for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
11564 Ops.push_back(AtomicNode->getOperand(i));
11565 Ops[2] = NewCmpOp;
11566 MachineMemOperand *MMO = AtomicNode->getMemOperand();
11567 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
11568 auto NodeTy =
11569 (MemVT == MVT::i8) ? PPCISD::ATOMIC_CMP_SWAP_8 : PPCISD::ATOMIC_CMP_SWAP_16;
11570 return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
11571}
11572
11573SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
11574 SelectionDAG &DAG) const {
11575 AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
11576 EVT MemVT = N->getMemoryVT();
11577 assert(MemVT.getSimpleVT() == MVT::i128 &&
11578 "Expect quadword atomic operations");
11579 SDLoc dl(N);
11580 unsigned Opc = N->getOpcode();
11581 switch (Opc) {
11582 case ISD::ATOMIC_LOAD: {
11583 // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
11584 // lowered to ppc instructions by pattern matching instruction selector.
11585 SDVTList Tys = DAG.getVTList(MVT::i64, MVT::i64, MVT::Other);
11587 N->getOperand(0),
11588 DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
11589 for (int I = 1, E = N->getNumOperands(); I < E; ++I)
11590 Ops.push_back(N->getOperand(I));
11591 SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
11592 Ops, MemVT, N->getMemOperand());
11593 SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
11594 SDValue ValHi =
11595 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
11596 ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
11597 DAG.getConstant(64, dl, MVT::i32));
11598 SDValue Val =
11599 DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
11600 return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
11601 {Val, LoadedVal.getValue(2)});
11602 }
11603 case ISD::ATOMIC_STORE: {
11604 // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
11605 // lowered to ppc instructions by pattern matching instruction selector.
11606 SDVTList Tys = DAG.getVTList(MVT::Other);
11608 N->getOperand(0),
11609 DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
11610 SDValue Val = N->getOperand(1);
11611 SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
11612 SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
11613 DAG.getConstant(64, dl, MVT::i32));
11614 ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
11615 Ops.push_back(ValLo);
11616 Ops.push_back(ValHi);
11617 Ops.push_back(N->getOperand(2));
11618 return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
11619 N->getMemOperand());
11620 }
11621 default:
11622 llvm_unreachable("Unexpected atomic opcode");
11623 }
11624}
11625
11627 SelectionDAG &DAG,
11628 const PPCSubtarget &Subtarget) {
11629 assert(Mask <= fcAllFlags && "Invalid fp_class flags!");
11630
11631 enum DataClassMask {
11632 DC_NAN = 1 << 6,
11633 DC_NEG_INF = 1 << 4,
11634 DC_POS_INF = 1 << 5,
11635 DC_NEG_ZERO = 1 << 2,
11636 DC_POS_ZERO = 1 << 3,
11637 DC_NEG_SUBNORM = 1,
11638 DC_POS_SUBNORM = 1 << 1,
11639 };
11640
11641 EVT VT = Op.getValueType();
11642
11643 unsigned TestOp = VT == MVT::f128 ? PPC::XSTSTDCQP
11644 : VT == MVT::f64 ? PPC::XSTSTDCDP
11645 : PPC::XSTSTDCSP;
11646
11647 if (Mask == fcAllFlags)
11648 return DAG.getBoolConstant(true, Dl, MVT::i1, VT);
11649 if (Mask == 0)
11650 return DAG.getBoolConstant(false, Dl, MVT::i1, VT);
11651
11652 // When it's cheaper or necessary to test reverse flags.
11653 if ((Mask & fcNormal) == fcNormal || Mask == ~fcQNan || Mask == ~fcSNan) {
11654 SDValue Rev = getDataClassTest(Op, ~Mask, Dl, DAG, Subtarget);
11655 return DAG.getNOT(Dl, Rev, MVT::i1);
11656 }
11657
11658 // Power doesn't support testing whether a value is 'normal'. Test the rest
11659 // first, and test if it's 'not not-normal' with expected sign.
11660 if (Mask & fcNormal) {
11661 SDValue Rev(DAG.getMachineNode(
11662 TestOp, Dl, MVT::i32,
11663 DAG.getTargetConstant(DC_NAN | DC_NEG_INF | DC_POS_INF |
11664 DC_NEG_ZERO | DC_POS_ZERO |
11665 DC_NEG_SUBNORM | DC_POS_SUBNORM,
11666 Dl, MVT::i32),
11667 Op),
11668 0);
11669 // Sign are stored in CR bit 0, result are in CR bit 2.
11670 SDValue Sign(
11671 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11672 DAG.getTargetConstant(PPC::sub_lt, Dl, MVT::i32)),
11673 0);
11674 SDValue Normal(DAG.getNOT(
11675 Dl,
11677 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1, Rev,
11678 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11679 0),
11680 MVT::i1));
11681 if (Mask & fcPosNormal)
11682 Sign = DAG.getNOT(Dl, Sign, MVT::i1);
11683 SDValue Result = DAG.getNode(ISD::AND, Dl, MVT::i1, Sign, Normal);
11684 if (Mask == fcPosNormal || Mask == fcNegNormal)
11685 return Result;
11686
11687 return DAG.getNode(
11688 ISD::OR, Dl, MVT::i1,
11689 getDataClassTest(Op, Mask & ~fcNormal, Dl, DAG, Subtarget), Result);
11690 }
11691
11692 // The instruction doesn't differentiate between signaling or quiet NaN. Test
11693 // the rest first, and test if it 'is NaN and is signaling/quiet'.
11694 if ((Mask & fcNan) == fcQNan || (Mask & fcNan) == fcSNan) {
11695 bool IsQuiet = Mask & fcQNan;
11696 SDValue NanCheck = getDataClassTest(Op, fcNan, Dl, DAG, Subtarget);
11697
11698 // Quietness is determined by the first bit in fraction field.
11699 uint64_t QuietMask = 0;
11700 SDValue HighWord;
11701 if (VT == MVT::f128) {
11702 HighWord = DAG.getNode(
11703 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, DAG.getBitcast(MVT::v4i32, Op),
11704 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 3 : 0, Dl));
11705 QuietMask = 0x8000;
11706 } else if (VT == MVT::f64) {
11707 if (Subtarget.isPPC64()) {
11708 HighWord = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32,
11709 DAG.getBitcast(MVT::i64, Op),
11710 DAG.getConstant(1, Dl, MVT::i32));
11711 } else {
11712 SDValue Vec = DAG.getBitcast(
11713 MVT::v4i32, DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v2f64, Op));
11714 HighWord = DAG.getNode(
11715 ISD::EXTRACT_VECTOR_ELT, Dl, MVT::i32, Vec,
11716 DAG.getVectorIdxConstant(Subtarget.isLittleEndian() ? 1 : 0, Dl));
11717 }
11718 QuietMask = 0x80000;
11719 } else if (VT == MVT::f32) {
11720 HighWord = DAG.getBitcast(MVT::i32, Op);
11721 QuietMask = 0x400000;
11722 }
11723 SDValue NanRes = DAG.getSetCC(
11724 Dl, MVT::i1,
11725 DAG.getNode(ISD::AND, Dl, MVT::i32, HighWord,
11726 DAG.getConstant(QuietMask, Dl, MVT::i32)),
11727 DAG.getConstant(0, Dl, MVT::i32), IsQuiet ? ISD::SETNE : ISD::SETEQ);
11728 NanRes = DAG.getNode(ISD::AND, Dl, MVT::i1, NanCheck, NanRes);
11729 if (Mask == fcQNan || Mask == fcSNan)
11730 return NanRes;
11731
11732 return DAG.getNode(ISD::OR, Dl, MVT::i1,
11733 getDataClassTest(Op, Mask & ~fcNan, Dl, DAG, Subtarget),
11734 NanRes);
11735 }
11736
11737 unsigned NativeMask = 0;
11738 if ((Mask & fcNan) == fcNan)
11739 NativeMask |= DC_NAN;
11740 if (Mask & fcNegInf)
11741 NativeMask |= DC_NEG_INF;
11742 if (Mask & fcPosInf)
11743 NativeMask |= DC_POS_INF;
11744 if (Mask & fcNegZero)
11745 NativeMask |= DC_NEG_ZERO;
11746 if (Mask & fcPosZero)
11747 NativeMask |= DC_POS_ZERO;
11748 if (Mask & fcNegSubnormal)
11749 NativeMask |= DC_NEG_SUBNORM;
11750 if (Mask & fcPosSubnormal)
11751 NativeMask |= DC_POS_SUBNORM;
11752 return SDValue(
11753 DAG.getMachineNode(
11754 TargetOpcode::EXTRACT_SUBREG, Dl, MVT::i1,
11756 TestOp, Dl, MVT::i32,
11757 DAG.getTargetConstant(NativeMask, Dl, MVT::i32), Op),
11758 0),
11759 DAG.getTargetConstant(PPC::sub_eq, Dl, MVT::i32)),
11760 0);
11761}
11762
11763SDValue PPCTargetLowering::LowerIS_FPCLASS(SDValue Op,
11764 SelectionDAG &DAG) const {
11765 assert(Subtarget.hasP9Vector() && "Test data class requires Power9");
11766 SDValue LHS = Op.getOperand(0);
11767 uint64_t RHSC = Op.getConstantOperandVal(1);
11768 SDLoc Dl(Op);
11769 FPClassTest Category = static_cast<FPClassTest>(RHSC);
11770 if (LHS.getValueType() == MVT::ppcf128) {
11771 // The higher part determines the value class.
11772 LHS = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::f64, LHS,
11773 DAG.getConstant(1, Dl, MVT::i32));
11774 }
11775
11776 return getDataClassTest(LHS, Category, Dl, DAG, Subtarget);
11777}
11778
11779// Adjust the length value for a load/store with length to account for the
11780// instructions requiring a left justified length, and for non-byte element
11781// types requiring scaling by element size.
11782static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left,
11783 SelectionDAG &DAG) {
11784 SDLoc dl(Val);
11785 EVT VT = Val->getValueType(0);
11786 unsigned LeftAdj = Left ? VT.getSizeInBits() - 8 : 0;
11787 unsigned TypeAdj = llvm::countr_zero<uint32_t>(Bits / 8);
11788 SDValue SHLAmt = DAG.getConstant(LeftAdj + TypeAdj, dl, VT);
11789 return DAG.getNode(ISD::SHL, dl, VT, Val, SHLAmt);
11790}
11791
11792SDValue PPCTargetLowering::LowerVP_LOAD(SDValue Op, SelectionDAG &DAG) const {
11793 auto VPLD = cast<VPLoadSDNode>(Op);
11794 bool Future = Subtarget.isISAFuture();
11795 SDLoc dl(Op);
11796 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(3).getNode(), true) &&
11797 "Mask predication not supported");
11798 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11799 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPLD->getOperand(4));
11800 unsigned IID = Future ? Intrinsic::ppc_vsx_lxvrl : Intrinsic::ppc_vsx_lxvl;
11801 unsigned EltBits = Op->getValueType(0).getScalarType().getSizeInBits();
11802 Len = AdjustLength(Len, EltBits, !Future, DAG);
11803 SDValue Ops[] = {VPLD->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11804 VPLD->getOperand(1), Len};
11805 SDVTList Tys = DAG.getVTList(Op->getValueType(0), MVT::Other);
11806 SDValue VPL =
11808 VPLD->getMemoryVT(), VPLD->getMemOperand());
11809 return VPL;
11810}
11811
11812SDValue PPCTargetLowering::LowerVP_STORE(SDValue Op, SelectionDAG &DAG) const {
11813 auto VPST = cast<VPStoreSDNode>(Op);
11814 assert(ISD::isConstantSplatVectorAllOnes(Op->getOperand(4).getNode(), true) &&
11815 "Mask predication not supported");
11816 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11817 SDLoc dl(Op);
11818 SDValue Len = DAG.getNode(ISD::ANY_EXTEND, dl, PtrVT, VPST->getOperand(5));
11819 unsigned EltBits =
11820 Op->getOperand(1).getValueType().getScalarType().getSizeInBits();
11821 bool Future = Subtarget.isISAFuture();
11822 unsigned IID = Future ? Intrinsic::ppc_vsx_stxvrl : Intrinsic::ppc_vsx_stxvl;
11823 Len = AdjustLength(Len, EltBits, !Future, DAG);
11824 SDValue Ops[] = {
11825 VPST->getChain(), DAG.getConstant(IID, dl, MVT::i32),
11826 DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, VPST->getOperand(1)),
11827 VPST->getOperand(2), Len};
11828 SDVTList Tys = DAG.getVTList(MVT::Other);
11829 SDValue VPS =
11831 VPST->getMemoryVT(), VPST->getMemOperand());
11832 return VPS;
11833}
11834
11835SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
11836 SelectionDAG &DAG) const {
11837 SDLoc dl(Op);
11838
11839 MachineFunction &MF = DAG.getMachineFunction();
11840 SDValue Op0 = Op.getOperand(0);
11841 EVT ValVT = Op0.getValueType();
11842 unsigned EltSize = Op.getValueType().getScalarSizeInBits();
11843 if (isa<ConstantSDNode>(Op0) && EltSize <= 32) {
11844 int64_t IntVal = Op.getConstantOperandVal(0);
11845 if (IntVal >= -16 && IntVal <= 15)
11846 return getCanonicalConstSplat(IntVal, EltSize / 8, Op.getValueType(), DAG,
11847 dl);
11848 }
11849
11850 ReuseLoadInfo RLI;
11851 if (Subtarget.hasLFIWAX() && Subtarget.hasVSX() &&
11852 Op.getValueType() == MVT::v4i32 && Op0.getOpcode() == ISD::LOAD &&
11853 Op0.getValueType() == MVT::i32 && Op0.hasOneUse() &&
11854 canReuseLoadAddress(Op0, MVT::i32, RLI, DAG, ISD::NON_EXTLOAD)) {
11855
11856 MachineMemOperand *MMO =
11858 RLI.Alignment, RLI.AAInfo, RLI.Ranges);
11859 SDValue Ops[] = {RLI.Chain, RLI.Ptr, DAG.getValueType(Op.getValueType())};
11861 PPCISD::LD_SPLAT, dl, DAG.getVTList(MVT::v4i32, MVT::Other), Ops,
11862 MVT::i32, MMO);
11863 if (RLI.ResChain)
11864 DAG.makeEquivalentMemoryOrdering(RLI.ResChain, Bits.getValue(1));
11865 return Bits.getValue(0);
11866 }
11867
11868 // Create a stack slot that is 16-byte aligned.
11869 MachineFrameInfo &MFI = MF.getFrameInfo();
11870 int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
11871 EVT PtrVT = getPointerTy(DAG.getDataLayout());
11872 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
11873
11874 SDValue Val = Op0;
11875 // P10 hardware store forwarding requires that a single store contains all
11876 // the data for the load. P10 is able to merge a pair of adjacent stores. Try
11877 // to avoid load hit store on P10 when running binaries compiled for older
11878 // processors by generating two mergeable scalar stores to forward with the
11879 // vector load.
11880 if (!DisableP10StoreForward && Subtarget.isPPC64() &&
11881 !Subtarget.isLittleEndian() && ValVT.isInteger() &&
11882 ValVT.getSizeInBits() <= 64) {
11883 Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, Val);
11884 EVT ShiftAmountTy = getShiftAmountTy(MVT::i64, DAG.getDataLayout());
11885 SDValue ShiftBy = DAG.getConstant(
11886 64 - Op.getValueType().getScalarSizeInBits(), dl, ShiftAmountTy);
11887 Val = DAG.getNode(ISD::SHL, dl, MVT::i64, Val, ShiftBy);
11888 SDValue Plus8 =
11889 DAG.getNode(ISD::ADD, dl, PtrVT, FIdx, DAG.getConstant(8, dl, PtrVT));
11890 SDValue Store2 =
11891 DAG.getStore(DAG.getEntryNode(), dl, Val, Plus8, MachinePointerInfo());
11892 SDValue Store = DAG.getStore(Store2, dl, Val, FIdx, MachinePointerInfo());
11893 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx,
11894 MachinePointerInfo());
11895 }
11896
11897 // Store the input value into Value#0 of the stack slot.
11898 SDValue Store =
11899 DAG.getStore(DAG.getEntryNode(), dl, Val, FIdx, MachinePointerInfo());
11900 // Load it out.
11901 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
11902}
11903
11904SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
11905 SelectionDAG &DAG) const {
11906 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
11907 "Should only be called for ISD::INSERT_VECTOR_ELT");
11908
11909 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
11910
11911 EVT VT = Op.getValueType();
11912 SDLoc dl(Op);
11913 SDValue V1 = Op.getOperand(0);
11914 SDValue V2 = Op.getOperand(1);
11915
11916 if (VT == MVT::v2f64 && C)
11917 return Op;
11918
11919 if (Subtarget.hasP9Vector()) {
11920 // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
11921 // because on P10, it allows this specific insert_vector_elt load pattern to
11922 // utilize the refactored load and store infrastructure in order to exploit
11923 // prefixed loads.
11924 // On targets with inexpensive direct moves (Power9 and up), a
11925 // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
11926 // load since a single precision load will involve conversion to double
11927 // precision on the load followed by another conversion to single precision.
11928 if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
11929 (isa<LoadSDNode>(V2))) {
11930 SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
11931 SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
11932 SDValue InsVecElt =
11933 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
11934 BitcastLoad, Op.getOperand(2));
11935 return DAG.getBitcast(MVT::v4f32, InsVecElt);
11936 }
11937 }
11938
11939 if (Subtarget.isISA3_1()) {
11940 if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
11941 return SDValue();
11942 // On P10, we have legal lowering for constant and variable indices for
11943 // all vectors.
11944 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
11945 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
11946 return Op;
11947 }
11948
11949 // Before P10, we have legal lowering for constant indices but not for
11950 // variable ones.
11951 if (!C)
11952 return SDValue();
11953
11954 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
11955 if (VT == MVT::v8i16 || VT == MVT::v16i8) {
11956 SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
11957 unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
11958 unsigned InsertAtElement = C->getZExtValue();
11959 unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
11960 if (Subtarget.isLittleEndian()) {
11961 InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
11962 }
11963 return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
11964 DAG.getConstant(InsertAtByte, dl, MVT::i32));
11965 }
11966 return Op;
11967}
11968
11969SDValue PPCTargetLowering::LowerDMFVectorLoad(SDValue Op,
11970 SelectionDAG &DAG) const {
11971 SDLoc dl(Op);
11972 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
11973 SDValue LoadChain = LN->getChain();
11974 SDValue BasePtr = LN->getBasePtr();
11975 EVT VT = Op.getValueType();
11976 bool IsV1024i1 = VT == MVT::v1024i1;
11977 bool IsV2048i1 = VT == MVT::v2048i1;
11978
11979 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
11980 // Dense Math dmr pair registers, respectively.
11981 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
11982 (void)IsV2048i1;
11983 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
11984 "Dense Math support required.");
11985 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
11986
11988 SmallVector<SDValue, 8> LoadChains;
11989
11990 SDValue IntrinID = DAG.getConstant(Intrinsic::ppc_vsx_lxvp, dl, MVT::i32);
11991 SDValue LoadOps[] = {LoadChain, IntrinID, BasePtr};
11992 MachineMemOperand *MMO = LN->getMemOperand();
11993 unsigned NumVecs = VT.getSizeInBits() / 256;
11994 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
11995 MachineMemOperand *NewMMO =
11996 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
11997 if (Idx > 0) {
11998 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
11999 DAG.getConstant(32, dl, BasePtr.getValueType()));
12000 LoadOps[2] = BasePtr;
12001 }
12003 DAG.getVTList(MVT::v256i1, MVT::Other),
12004 LoadOps, MVT::v256i1, NewMMO);
12005 LoadChains.push_back(Ld.getValue(1));
12006 Loads.push_back(Ld);
12007 }
12008
12009 if (Subtarget.isLittleEndian()) {
12010 std::reverse(Loads.begin(), Loads.end());
12011 std::reverse(LoadChains.begin(), LoadChains.end());
12012 }
12013
12014 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12015 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Loads[0],
12016 Loads[1]),
12017 0);
12018 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12019 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12020 Loads[2], Loads[3]),
12021 0);
12022 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12023 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12024 const SDValue Ops[] = {RC, Lo, LoSub, Hi, HiSub};
12025
12026 SDValue Value =
12027 SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Ops), 0);
12028
12029 if (IsV1024i1) {
12030 return DAG.getMergeValues({Value, TF}, dl);
12031 }
12032
12033 // Handle Loads for V2048i1 which represents a dmr pair.
12034 SDValue DmrPValue;
12035 SDValue Dmr1Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1,
12036 Loads[4], Loads[5]),
12037 0);
12038 SDValue Dmr1Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12039 Loads[6], Loads[7]),
12040 0);
12041 const SDValue Dmr1Ops[] = {RC, Dmr1Lo, LoSub, Dmr1Hi, HiSub};
12042 SDValue Dmr1Value = SDValue(
12043 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1, Dmr1Ops), 0);
12044
12045 SDValue Dmr0Sub = DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32);
12046 SDValue Dmr1Sub = DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32);
12047
12048 SDValue DmrPRC = DAG.getTargetConstant(PPC::DMRpRCRegClassID, dl, MVT::i32);
12049 const SDValue DmrPOps[] = {DmrPRC, Value, Dmr0Sub, Dmr1Value, Dmr1Sub};
12050
12051 DmrPValue = SDValue(
12052 DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v2048i1, DmrPOps), 0);
12053
12054 return DAG.getMergeValues({DmrPValue, TF}, dl);
12055}
12056
12057SDValue PPCTargetLowering::DMFInsert1024(const SmallVectorImpl<SDValue> &Pairs,
12058 const SDLoc &dl,
12059 SelectionDAG &DAG) const {
12060 SDValue Lo(DAG.getMachineNode(PPC::DMXXINSTDMR512, dl, MVT::v512i1, Pairs[0],
12061 Pairs[1]),
12062 0);
12063 SDValue LoSub = DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32);
12064 SDValue Hi(DAG.getMachineNode(PPC::DMXXINSTDMR512_HI, dl, MVT::v512i1,
12065 Pairs[2], Pairs[3]),
12066 0);
12067 SDValue HiSub = DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32);
12068 SDValue RC = DAG.getTargetConstant(PPC::DMRRCRegClassID, dl, MVT::i32);
12069
12070 return SDValue(DAG.getMachineNode(PPC::REG_SEQUENCE, dl, MVT::v1024i1,
12071 {RC, Lo, LoSub, Hi, HiSub}),
12072 0);
12073}
12074
12075SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
12076 SelectionDAG &DAG) const {
12077 SDLoc dl(Op);
12078 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
12079 SDValue LoadChain = LN->getChain();
12080 SDValue BasePtr = LN->getBasePtr();
12081 EVT VT = Op.getValueType();
12082
12083 if (VT == MVT::v1024i1 || VT == MVT::v2048i1)
12084 return LowerDMFVectorLoad(Op, DAG);
12085
12086 if (VT != MVT::v256i1 && VT != MVT::v512i1)
12087 return Op;
12088
12089 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12090 // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
12091 // 2 or 4 vsx registers.
12092 assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
12093 "Type unsupported without MMA");
12094 assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12095 "Type unsupported without paired vector support");
12096 Align Alignment = LN->getAlign();
12098 SmallVector<SDValue, 4> LoadChains;
12099 unsigned NumVecs = VT.getSizeInBits() / 128;
12100 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12101 SDValue Load =
12102 DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
12103 LN->getPointerInfo().getWithOffset(Idx * 16),
12104 commonAlignment(Alignment, Idx * 16),
12105 LN->getMemOperand()->getFlags(), LN->getAAInfo());
12106 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12107 DAG.getConstant(16, dl, BasePtr.getValueType()));
12108 Loads.push_back(Load);
12109 LoadChains.push_back(Load.getValue(1));
12110 }
12111 if (Subtarget.isLittleEndian()) {
12112 std::reverse(Loads.begin(), Loads.end());
12113 std::reverse(LoadChains.begin(), LoadChains.end());
12114 }
12115 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
12116 SDValue Value =
12117 DAG.getNode(VT == MVT::v512i1 ? PPCISD::ACC_BUILD : PPCISD::PAIR_BUILD,
12118 dl, VT, Loads);
12119 SDValue RetOps[] = {Value, TF};
12120 return DAG.getMergeValues(RetOps, dl);
12121}
12122
12123SDValue PPCTargetLowering::LowerDMFVectorStore(SDValue Op,
12124 SelectionDAG &DAG) const {
12125
12126 SDLoc dl(Op);
12127 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12128 SDValue StoreChain = SN->getChain();
12129 SDValue BasePtr = SN->getBasePtr();
12132 EVT VT = SN->getValue().getValueType();
12133 bool IsV1024i1 = VT == MVT::v1024i1;
12134 bool IsV2048i1 = VT == MVT::v2048i1;
12135
12136 // The types v1024i1 and v2048i1 are used for Dense Math dmr registers and
12137 // Dense Math dmr pair registers, respectively.
12138 assert((IsV1024i1 || IsV2048i1) && "Unsupported type.");
12139 (void)IsV2048i1;
12140 assert((Subtarget.hasMMA() && Subtarget.isISAFuture()) &&
12141 "Dense Math support required.");
12142 assert(Subtarget.pairedVectorMemops() && "Vector pair support required.");
12143
12144 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12145 if (IsV1024i1) {
12147 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12148 Op.getOperand(1),
12149 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12150 0);
12152 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1,
12153 Op.getOperand(1),
12154 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12155 0);
12156 MachineSDNode *ExtNode =
12157 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Lo);
12158 Values.push_back(SDValue(ExtNode, 0));
12159 Values.push_back(SDValue(ExtNode, 1));
12160 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Hi);
12161 Values.push_back(SDValue(ExtNode, 0));
12162 Values.push_back(SDValue(ExtNode, 1));
12163 } else {
12164 // This corresponds to v2048i1 which represents a dmr pair.
12165 SDValue Dmr0(
12166 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12167 Op.getOperand(1),
12168 DAG.getTargetConstant(PPC::sub_dmr0, dl, MVT::i32)),
12169 0);
12170
12171 SDValue Dmr1(
12172 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::v1024i1,
12173 Op.getOperand(1),
12174 DAG.getTargetConstant(PPC::sub_dmr1, dl, MVT::i32)),
12175 0);
12176
12177 SDValue Dmr0Lo(DAG.getMachineNode(
12178 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12179 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12180 0);
12181
12182 SDValue Dmr0Hi(DAG.getMachineNode(
12183 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr0,
12184 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12185 0);
12186
12187 SDValue Dmr1Lo(DAG.getMachineNode(
12188 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12189 DAG.getTargetConstant(PPC::sub_wacc_lo, dl, MVT::i32)),
12190 0);
12191
12192 SDValue Dmr1Hi(DAG.getMachineNode(
12193 TargetOpcode::EXTRACT_SUBREG, dl, MVT::v512i1, Dmr1,
12194 DAG.getTargetConstant(PPC::sub_wacc_hi, dl, MVT::i32)),
12195 0);
12196
12197 MachineSDNode *ExtNode =
12198 DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr0Lo);
12199 Values.push_back(SDValue(ExtNode, 0));
12200 Values.push_back(SDValue(ExtNode, 1));
12201 ExtNode =
12202 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr0Hi);
12203 Values.push_back(SDValue(ExtNode, 0));
12204 Values.push_back(SDValue(ExtNode, 1));
12205 ExtNode = DAG.getMachineNode(PPC::DMXXEXTFDMR512, dl, ReturnTypes, Dmr1Lo);
12206 Values.push_back(SDValue(ExtNode, 0));
12207 Values.push_back(SDValue(ExtNode, 1));
12208 ExtNode =
12209 DAG.getMachineNode(PPC::DMXXEXTFDMR512_HI, dl, ReturnTypes, Dmr1Hi);
12210 Values.push_back(SDValue(ExtNode, 0));
12211 Values.push_back(SDValue(ExtNode, 1));
12212 }
12213
12214 if (Subtarget.isLittleEndian())
12215 std::reverse(Values.begin(), Values.end());
12216
12217 SDVTList Tys = DAG.getVTList(MVT::Other);
12219 StoreChain, DAG.getConstant(Intrinsic::ppc_vsx_stxvp, dl, MVT::i32),
12220 Values[0], BasePtr};
12221 MachineMemOperand *MMO = SN->getMemOperand();
12222 unsigned NumVecs = VT.getSizeInBits() / 256;
12223 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12224 MachineMemOperand *NewMMO =
12225 DAG.getMachineFunction().getMachineMemOperand(MMO, Idx * 32, 32);
12226 if (Idx > 0) {
12227 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12228 DAG.getConstant(32, dl, BasePtr.getValueType()));
12229 Ops[3] = BasePtr;
12230 }
12231 Ops[2] = Values[Idx];
12233 MVT::v256i1, NewMMO);
12234 Stores.push_back(St);
12235 }
12236
12237 SDValue TF = DAG.getTokenFactor(dl, Stores);
12238 return TF;
12239}
12240
12241SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
12242 SelectionDAG &DAG) const {
12243 SDLoc dl(Op);
12244 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
12245 SDValue StoreChain = SN->getChain();
12246 SDValue BasePtr = SN->getBasePtr();
12247 SDValue Value = SN->getValue();
12248 SDValue Value2 = SN->getValue();
12249 EVT StoreVT = Value.getValueType();
12250
12251 if (StoreVT == MVT::v1024i1 || StoreVT == MVT::v2048i1)
12252 return LowerDMFVectorStore(Op, DAG);
12253
12254 if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
12255 return Op;
12256
12257 // Type v256i1 is used for pairs and v512i1 is used for accumulators.
12258 // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
12259 // underlying registers individually.
12260 assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
12261 "Type unsupported without MMA");
12262 assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
12263 "Type unsupported without paired vector support");
12264 Align Alignment = SN->getAlign();
12266 unsigned NumVecs = 2;
12267 if (StoreVT == MVT::v512i1) {
12268 if (Subtarget.isISAFuture()) {
12269 EVT ReturnTypes[] = {MVT::v256i1, MVT::v256i1};
12270 MachineSDNode *ExtNode = DAG.getMachineNode(
12271 PPC::DMXXEXTFDMR512, dl, ReturnTypes, Op.getOperand(1));
12272
12273 Value = SDValue(ExtNode, 0);
12274 Value2 = SDValue(ExtNode, 1);
12275 } else
12276 Value = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, Value);
12277 NumVecs = 4;
12278 }
12279 for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
12280 unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
12281 SDValue Elt;
12282 if (Subtarget.isISAFuture()) {
12283 VecNum = Subtarget.isLittleEndian() ? 1 - (Idx % 2) : (Idx % 2);
12284 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8,
12285 Idx > 1 ? Value2 : Value,
12286 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12287 } else
12288 Elt = DAG.getNode(PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, Value,
12289 DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
12290
12291 SDValue Store =
12292 DAG.getStore(StoreChain, dl, Elt, BasePtr,
12293 SN->getPointerInfo().getWithOffset(Idx * 16),
12294 commonAlignment(Alignment, Idx * 16),
12295 SN->getMemOperand()->getFlags(), SN->getAAInfo());
12296 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
12297 DAG.getConstant(16, dl, BasePtr.getValueType()));
12298 Stores.push_back(Store);
12299 }
12300 SDValue TF = DAG.getTokenFactor(dl, Stores);
12301 return TF;
12302}
12303
12304SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
12305 SDLoc dl(Op);
12306 if (Op.getValueType() == MVT::v4i32) {
12307 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12308
12309 SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
12310 // +16 as shift amt.
12311 SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
12312 SDValue RHSSwap = // = vrlw RHS, 16
12313 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
12314
12315 // Shrinkify inputs to v8i16.
12316 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
12317 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
12318 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
12319
12320 // Low parts multiplied together, generating 32-bit results (we ignore the
12321 // top parts).
12322 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
12323 LHS, RHS, DAG, dl, MVT::v4i32);
12324
12325 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
12326 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
12327 // Shift the high parts up 16 bits.
12328 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
12329 Neg16, DAG, dl);
12330 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
12331 } else if (Op.getValueType() == MVT::v16i8) {
12332 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
12333 bool isLittleEndian = Subtarget.isLittleEndian();
12334
12335 // Multiply the even 8-bit parts, producing 16-bit sums.
12336 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
12337 LHS, RHS, DAG, dl, MVT::v8i16);
12338 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
12339
12340 // Multiply the odd 8-bit parts, producing 16-bit sums.
12341 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
12342 LHS, RHS, DAG, dl, MVT::v8i16);
12343 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
12344
12345 // Merge the results together. Because vmuleub and vmuloub are
12346 // instructions with a big-endian bias, we must reverse the
12347 // element numbering and reverse the meaning of "odd" and "even"
12348 // when generating little endian code.
12349 int Ops[16];
12350 for (unsigned i = 0; i != 8; ++i) {
12351 if (isLittleEndian) {
12352 Ops[i*2 ] = 2*i;
12353 Ops[i*2+1] = 2*i+16;
12354 } else {
12355 Ops[i*2 ] = 2*i+1;
12356 Ops[i*2+1] = 2*i+1+16;
12357 }
12358 }
12359 if (isLittleEndian)
12360 return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
12361 else
12362 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
12363 } else {
12364 llvm_unreachable("Unknown mul to lower!");
12365 }
12366}
12367
12368SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
12369 bool IsStrict = Op->isStrictFPOpcode();
12370 if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
12371 !Subtarget.hasP9Vector())
12372 return SDValue();
12373
12374 return Op;
12375}
12376
12377// Custom lowering for fpext vf32 to v2f64
12378SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
12379
12380 assert(Op.getOpcode() == ISD::FP_EXTEND &&
12381 "Should only be called for ISD::FP_EXTEND");
12382
12383 // FIXME: handle extends from half precision float vectors on P9.
12384 // We only want to custom lower an extend from v2f32 to v2f64.
12385 if (Op.getValueType() != MVT::v2f64 ||
12386 Op.getOperand(0).getValueType() != MVT::v2f32)
12387 return SDValue();
12388
12389 SDLoc dl(Op);
12390 SDValue Op0 = Op.getOperand(0);
12391
12392 switch (Op0.getOpcode()) {
12393 default:
12394 return SDValue();
12396 assert(Op0.getNumOperands() == 2 &&
12398 "Node should have 2 operands with second one being a constant!");
12399
12400 if (Op0.getOperand(0).getValueType() != MVT::v4f32)
12401 return SDValue();
12402
12403 // Custom lower is only done for high or low doubleword.
12404 int Idx = Op0.getConstantOperandVal(1);
12405 if (Idx % 2 != 0)
12406 return SDValue();
12407
12408 // Since input is v4f32, at this point Idx is either 0 or 2.
12409 // Shift to get the doubleword position we want.
12410 int DWord = Idx >> 1;
12411
12412 // High and low word positions are different on little endian.
12413 if (Subtarget.isLittleEndian())
12414 DWord ^= 0x1;
12415
12416 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
12417 Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
12418 }
12419 case ISD::FADD:
12420 case ISD::FMUL:
12421 case ISD::FSUB: {
12422 SDValue NewLoad[2];
12423 for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
12424 // Ensure both input are loads.
12425 SDValue LdOp = Op0.getOperand(i);
12426 if (LdOp.getOpcode() != ISD::LOAD)
12427 return SDValue();
12428 // Generate new load node.
12429 LoadSDNode *LD = cast<LoadSDNode>(LdOp);
12430 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12431 NewLoad[i] = DAG.getMemIntrinsicNode(
12432 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12433 LD->getMemoryVT(), LD->getMemOperand());
12434 }
12435 SDValue NewOp =
12436 DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
12437 NewLoad[1], Op0.getNode()->getFlags());
12438 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
12439 DAG.getConstant(0, dl, MVT::i32));
12440 }
12441 case ISD::LOAD: {
12442 LoadSDNode *LD = cast<LoadSDNode>(Op0);
12443 SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
12444 SDValue NewLd = DAG.getMemIntrinsicNode(
12445 PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
12446 LD->getMemoryVT(), LD->getMemOperand());
12447 return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
12448 DAG.getConstant(0, dl, MVT::i32));
12449 }
12450 }
12451 llvm_unreachable("ERROR:Should return for all cases within swtich.");
12452}
12453
12455 SelectionDAG &DAG,
12456 const PPCSubtarget &STI) {
12457 SDLoc DL(Value);
12458 if (STI.useCRBits())
12459 Value = DAG.getNode(ISD::SELECT, DL, SumType, Value,
12460 DAG.getConstant(1, DL, SumType),
12461 DAG.getConstant(0, DL, SumType));
12462 else
12463 Value = DAG.getZExtOrTrunc(Value, DL, SumType);
12464 SDValue Sum = DAG.getNode(PPCISD::ADDC, DL, DAG.getVTList(SumType, MVT::i32),
12465 Value, DAG.getAllOnesConstant(DL, SumType));
12466 return Sum.getValue(1);
12467}
12468
12470 EVT CarryType, SelectionDAG &DAG,
12471 const PPCSubtarget &STI) {
12472 SDLoc DL(Flag);
12473 SDValue Zero = DAG.getConstant(0, DL, SumType);
12474 SDValue Carry = DAG.getNode(
12475 PPCISD::ADDE, DL, DAG.getVTList(SumType, MVT::i32), Zero, Zero, Flag);
12476 if (STI.useCRBits())
12477 return DAG.getSetCC(DL, CarryType, Carry, Zero, ISD::SETNE);
12478 return DAG.getZExtOrTrunc(Carry, DL, CarryType);
12479}
12480
12481SDValue PPCTargetLowering::LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const {
12482
12483 SDLoc DL(Op);
12484 SDNode *N = Op.getNode();
12485 EVT VT = N->getValueType(0);
12486 EVT CarryType = N->getValueType(1);
12487 unsigned Opc = N->getOpcode();
12488 bool IsAdd = Opc == ISD::UADDO;
12489 Opc = IsAdd ? PPCISD::ADDC : PPCISD::SUBC;
12490 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12491 N->getOperand(0), N->getOperand(1));
12492 SDValue Carry = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType,
12493 DAG, Subtarget);
12494 if (!IsAdd)
12495 Carry = DAG.getNode(ISD::XOR, DL, CarryType, Carry,
12496 DAG.getConstant(1UL, DL, CarryType));
12497 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, Carry);
12498}
12499
12500SDValue PPCTargetLowering::LowerADDSUBO_CARRY(SDValue Op,
12501 SelectionDAG &DAG) const {
12502 SDLoc DL(Op);
12503 SDNode *N = Op.getNode();
12504 unsigned Opc = N->getOpcode();
12505 EVT VT = N->getValueType(0);
12506 EVT CarryType = N->getValueType(1);
12507 SDValue CarryOp = N->getOperand(2);
12508 bool IsAdd = Opc == ISD::UADDO_CARRY;
12509 Opc = IsAdd ? PPCISD::ADDE : PPCISD::SUBE;
12510 if (!IsAdd)
12511 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12512 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12513 CarryOp = ConvertCarryValueToCarryFlag(VT, CarryOp, DAG, Subtarget);
12514 SDValue Sum = DAG.getNode(Opc, DL, DAG.getVTList(VT, MVT::i32),
12515 Op.getOperand(0), Op.getOperand(1), CarryOp);
12516 CarryOp = ConvertCarryFlagToCarryValue(VT, Sum.getValue(1), CarryType, DAG,
12517 Subtarget);
12518 if (!IsAdd)
12519 CarryOp = DAG.getNode(ISD::XOR, DL, CarryOp.getValueType(), CarryOp,
12520 DAG.getConstant(1UL, DL, CarryOp.getValueType()));
12521 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, CarryOp);
12522}
12523
12524SDValue PPCTargetLowering::LowerSSUBO(SDValue Op, SelectionDAG &DAG) const {
12525
12526 SDLoc dl(Op);
12527 SDValue LHS = Op.getOperand(0);
12528 SDValue RHS = Op.getOperand(1);
12529 EVT VT = Op.getNode()->getValueType(0);
12530
12531 SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
12532
12533 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, VT, RHS, LHS);
12534 SDValue Xor2 = DAG.getNode(ISD::XOR, dl, VT, Sub, LHS);
12535
12536 SDValue And = DAG.getNode(ISD::AND, dl, VT, Xor1, Xor2);
12537
12538 SDValue Overflow =
12539 DAG.getNode(ISD::SRL, dl, VT, And,
12540 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12541
12542 SDValue OverflowTrunc =
12543 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12544
12545 return DAG.getMergeValues({Sub, OverflowTrunc}, dl);
12546}
12547
12548/// Implements signed add with overflow detection using the rule:
12549/// (x eqv y) & (sum xor x), where the overflow bit is extracted from the sign
12550SDValue PPCTargetLowering::LowerSADDO(SDValue Op, SelectionDAG &DAG) const {
12551
12552 SDLoc dl(Op);
12553 SDValue LHS = Op.getOperand(0);
12554 SDValue RHS = Op.getOperand(1);
12555 EVT VT = Op.getNode()->getValueType(0);
12556
12557 SDValue Sum = DAG.getNode(ISD::ADD, dl, VT, LHS, RHS);
12558
12559 // Compute ~(x xor y)
12560 SDValue XorXY = DAG.getNode(ISD::XOR, dl, VT, LHS, RHS);
12561 SDValue EqvXY = DAG.getNOT(dl, XorXY, VT);
12562 // Compute (s xor x)
12563 SDValue SumXorX = DAG.getNode(ISD::XOR, dl, VT, Sum, LHS);
12564
12565 // overflow = (x eqv y) & (s xor x)
12566 SDValue OverflowInSign = DAG.getNode(ISD::AND, dl, VT, EqvXY, SumXorX);
12567
12568 // Shift sign bit down to LSB
12569 SDValue Overflow =
12570 DAG.getNode(ISD::SRL, dl, VT, OverflowInSign,
12571 DAG.getConstant(VT.getSizeInBits() - 1, dl, MVT::i32));
12572 // Truncate to the overflow type (i1)
12573 SDValue OverflowTrunc =
12574 DAG.getNode(ISD::TRUNCATE, dl, Op.getNode()->getValueType(1), Overflow);
12575
12576 return DAG.getMergeValues({Sum, OverflowTrunc}, dl);
12577}
12578
12579// Lower unsigned 3-way compare producing -1/0/1.
12580SDValue PPCTargetLowering::LowerUCMP(SDValue Op, SelectionDAG &DAG) const {
12581 SDLoc DL(Op);
12582 SDValue A = DAG.getFreeze(Op.getOperand(0));
12583 SDValue B = DAG.getFreeze(Op.getOperand(1));
12584 EVT OpVT = A.getValueType(); // operand type
12585 EVT ResVT = Op.getValueType(); // result type
12586
12587 // First compute diff = A - B (will become subf).
12588 SDValue Diff = DAG.getNode(ISD::SUB, DL, OpVT, A, B);
12589
12590 // Generate B - A using SUBC to capture carry.
12591 SDVTList VTs = DAG.getVTList(OpVT, MVT::i32);
12592 SDValue SubC = DAG.getNode(PPCISD::SUBC, DL, VTs, B, A);
12593 SDValue CA0 = SubC.getValue(1);
12594
12595 // t2 = A - B + CA0 using SUBE.
12596 SDValue SubE1 = DAG.getNode(PPCISD::SUBE, DL, VTs, A, B, CA0);
12597 SDValue CA1 = SubE1.getValue(1);
12598
12599 // res = diff - t2 + CA1 using SUBE (produces desired -1/0/1).
12600 SDValue ResPair = DAG.getNode(PPCISD::SUBE, DL, VTs, Diff, SubE1, CA1);
12601
12602 // Extract the first result and truncate to result type if needed
12603 return DAG.getSExtOrTrunc(ResPair.getValue(0), DL, ResVT);
12604}
12605
12606/// LowerOperation - Provide custom lowering hooks for some operations.
12607///
12609 switch (Op.getOpcode()) {
12610 default:
12611 llvm_unreachable("Wasn't expecting to be able to lower this!");
12612 case ISD::FPOW: return lowerPow(Op, DAG);
12613 case ISD::FSIN: return lowerSin(Op, DAG);
12614 case ISD::FCOS: return lowerCos(Op, DAG);
12615 case ISD::FLOG: return lowerLog(Op, DAG);
12616 case ISD::FLOG10: return lowerLog10(Op, DAG);
12617 case ISD::FEXP: return lowerExp(Op, DAG);
12618 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
12619 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
12620 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
12621 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
12622 case ISD::JumpTable: return LowerJumpTable(Op, DAG);
12623 case ISD::STRICT_FSETCC:
12625 case ISD::SETCC: return LowerSETCC(Op, DAG);
12626 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
12627 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
12628 case ISD::SSUBO:
12629 return LowerSSUBO(Op, DAG);
12630 case ISD::SADDO:
12631 return LowerSADDO(Op, DAG);
12632
12633 case ISD::INLINEASM:
12634 case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
12635 // Variable argument lowering.
12636 case ISD::VASTART: return LowerVASTART(Op, DAG);
12637 case ISD::VAARG: return LowerVAARG(Op, DAG);
12638 case ISD::VACOPY: return LowerVACOPY(Op, DAG);
12639
12640 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
12641 case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
12642 case ISD::GET_DYNAMIC_AREA_OFFSET:
12643 return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
12644
12645 // Exception handling lowering.
12646 case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
12647 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
12648 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
12649
12650 case ISD::LOAD: return LowerLOAD(Op, DAG);
12651 case ISD::STORE: return LowerSTORE(Op, DAG);
12652 case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
12653 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
12656 case ISD::FP_TO_UINT:
12657 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
12660 case ISD::UINT_TO_FP:
12661 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
12662 case ISD::GET_ROUNDING: return LowerGET_ROUNDING(Op, DAG);
12663 case ISD::SET_ROUNDING:
12664 return LowerSET_ROUNDING(Op, DAG);
12665
12666 // Lower 64-bit shifts.
12667 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
12668 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
12669 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
12670
12671 case ISD::FSHL: return LowerFunnelShift(Op, DAG);
12672 case ISD::FSHR: return LowerFunnelShift(Op, DAG);
12673
12674 // Vector-related lowering.
12675 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
12676 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
12677 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
12678 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
12679 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
12680 case ISD::MUL: return LowerMUL(Op, DAG);
12681 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
12683 case ISD::FP_ROUND:
12684 return LowerFP_ROUND(Op, DAG);
12685 case ISD::ROTL: return LowerROTL(Op, DAG);
12686
12687 // For counter-based loop handling.
12689 return SDValue();
12690
12691 case ISD::BITCAST: return LowerBITCAST(Op, DAG);
12692
12693 // Frame & Return address.
12694 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
12695 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
12696
12698 return LowerINTRINSIC_VOID(Op, DAG);
12699 case ISD::BSWAP:
12700 return LowerBSWAP(Op, DAG);
12701 case ISD::ATOMIC_CMP_SWAP:
12702 return LowerATOMIC_CMP_SWAP(Op, DAG);
12703 case ISD::ATOMIC_STORE:
12704 return LowerATOMIC_LOAD_STORE(Op, DAG);
12705 case ISD::IS_FPCLASS:
12706 return LowerIS_FPCLASS(Op, DAG);
12707 case ISD::UADDO:
12708 case ISD::USUBO:
12709 return LowerADDSUBO(Op, DAG);
12710 case ISD::UADDO_CARRY:
12711 case ISD::USUBO_CARRY:
12712 return LowerADDSUBO_CARRY(Op, DAG);
12713 case ISD::UCMP:
12714 return LowerUCMP(Op, DAG);
12715 case ISD::STRICT_LRINT:
12716 case ISD::STRICT_LLRINT:
12717 case ISD::STRICT_LROUND:
12720 if (Op->getFlags().hasNoFPExcept())
12721 return Op;
12722 return SDValue();
12723 case ISD::VP_LOAD:
12724 return LowerVP_LOAD(Op, DAG);
12725 case ISD::VP_STORE:
12726 return LowerVP_STORE(Op, DAG);
12727 }
12728}
12729
12732 SelectionDAG &DAG) const {
12733 SDLoc dl(N);
12734 switch (N->getOpcode()) {
12735 default:
12736 llvm_unreachable("Do not know how to custom type legalize this operation!");
12737 case ISD::ATOMIC_LOAD: {
12738 SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
12739 Results.push_back(Res);
12740 Results.push_back(Res.getValue(1));
12741 break;
12742 }
12743 case ISD::READCYCLECOUNTER: {
12744 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
12745 SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
12746
12747 Results.push_back(
12748 DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
12749 Results.push_back(RTB.getValue(2));
12750 break;
12751 }
12753 if (N->getConstantOperandVal(1) != Intrinsic::loop_decrement)
12754 break;
12755
12756 assert(N->getValueType(0) == MVT::i1 &&
12757 "Unexpected result type for CTR decrement intrinsic");
12758 EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
12759 N->getValueType(0));
12760 SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
12761 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
12762 N->getOperand(1));
12763
12764 Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
12765 Results.push_back(NewInt.getValue(1));
12766 break;
12767 }
12769 switch (N->getConstantOperandVal(0)) {
12770 case Intrinsic::ppc_pack_longdouble:
12771 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
12772 N->getOperand(2), N->getOperand(1)));
12773 break;
12774 case Intrinsic::ppc_maxfe:
12775 case Intrinsic::ppc_minfe:
12776 case Intrinsic::ppc_fnmsub:
12777 case Intrinsic::ppc_convert_f128_to_ppcf128:
12778 Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
12779 break;
12780 }
12781 break;
12782 }
12783 case ISD::VAARG: {
12784 if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
12785 return;
12786
12787 EVT VT = N->getValueType(0);
12788
12789 if (VT == MVT::i64) {
12790 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
12791
12792 Results.push_back(NewNode);
12793 Results.push_back(NewNode.getValue(1));
12794 }
12795 return;
12796 }
12799 case ISD::FP_TO_SINT:
12800 case ISD::FP_TO_UINT: {
12801 // LowerFP_TO_INT() can only handle f32 and f64.
12802 if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
12803 MVT::ppcf128)
12804 return;
12805 SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
12806 Results.push_back(LoweredValue);
12807 if (N->isStrictFPOpcode())
12808 Results.push_back(LoweredValue.getValue(1));
12809 return;
12810 }
12811 case ISD::TRUNCATE: {
12812 if (!N->getValueType(0).isVector())
12813 return;
12814 SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
12815 if (Lowered)
12816 Results.push_back(Lowered);
12817 return;
12818 }
12819 case ISD::SCALAR_TO_VECTOR: {
12820 SDValue Lowered = LowerSCALAR_TO_VECTOR(SDValue(N, 0), DAG);
12821 if (Lowered)
12822 Results.push_back(Lowered);
12823 return;
12824 }
12825 case ISD::FSHL:
12826 case ISD::FSHR:
12827 // Don't handle funnel shifts here.
12828 return;
12829 case ISD::BITCAST:
12830 // Don't handle bitcast here.
12831 return;
12832 case ISD::FP_EXTEND:
12833 SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
12834 if (Lowered)
12835 Results.push_back(Lowered);
12836 return;
12837 }
12838}
12839
12840//===----------------------------------------------------------------------===//
12841// Other Lowering Code
12842//===----------------------------------------------------------------------===//
12843
12845 return Builder.CreateIntrinsic(Id, {});
12846}
12847
12849 Value *Addr,
12850 AtomicOrdering Ord) const {
12851 unsigned SZ = ValueTy->getPrimitiveSizeInBits();
12852
12853 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12854 "Only 8/16/32/64-bit atomic loads supported");
12855 Intrinsic::ID IntID;
12856 switch (SZ) {
12857 default:
12858 llvm_unreachable("Unexpected PrimitiveSize");
12859 case 8:
12860 IntID = Intrinsic::ppc_lbarx;
12861 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12862 break;
12863 case 16:
12864 IntID = Intrinsic::ppc_lharx;
12865 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12866 break;
12867 case 32:
12868 IntID = Intrinsic::ppc_lwarx;
12869 break;
12870 case 64:
12871 IntID = Intrinsic::ppc_ldarx;
12872 break;
12873 }
12874 Value *Call =
12875 Builder.CreateIntrinsic(IntID, Addr, /*FMFSource=*/nullptr, "larx");
12876
12877 return Builder.CreateTruncOrBitCast(Call, ValueTy);
12878}
12879
12880// Perform a store-conditional operation to Addr. Return the status of the
12881// store. This should be 0 if the store succeeded, non-zero otherwise.
12883 Value *Val, Value *Addr,
12884 AtomicOrdering Ord) const {
12885 Type *Ty = Val->getType();
12886 unsigned SZ = Ty->getPrimitiveSizeInBits();
12887
12888 assert((SZ == 8 || SZ == 16 || SZ == 32 || SZ == 64) &&
12889 "Only 8/16/32/64-bit atomic loads supported");
12890 Intrinsic::ID IntID;
12891 switch (SZ) {
12892 default:
12893 llvm_unreachable("Unexpected PrimitiveSize");
12894 case 8:
12895 IntID = Intrinsic::ppc_stbcx;
12896 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12897 break;
12898 case 16:
12899 IntID = Intrinsic::ppc_sthcx;
12900 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12901 break;
12902 case 32:
12903 IntID = Intrinsic::ppc_stwcx;
12904 break;
12905 case 64:
12906 IntID = Intrinsic::ppc_stdcx;
12907 break;
12908 }
12909
12910 if (SZ == 8 || SZ == 16)
12911 Val = Builder.CreateZExt(Val, Builder.getInt32Ty());
12912
12913 Value *Call = Builder.CreateIntrinsic(IntID, {Addr, Val},
12914 /*FMFSource=*/nullptr, "stcx");
12915 return Builder.CreateXor(Call, Builder.getInt32(1));
12916}
12917
12918// The mappings for emitLeading/TrailingFence is taken from
12919// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
12921 Instruction *Inst,
12922 AtomicOrdering Ord) const {
12924 return callIntrinsic(Builder, Intrinsic::ppc_sync);
12925 if (isReleaseOrStronger(Ord))
12926 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12927 return nullptr;
12928}
12929
12931 Instruction *Inst,
12932 AtomicOrdering Ord) const {
12933 if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
12934 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
12935 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
12936 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
12937 if (isa<LoadInst>(Inst))
12938 return Builder.CreateIntrinsic(Intrinsic::ppc_cfence, {Inst->getType()},
12939 {Inst});
12940 // FIXME: Can use isync for rmw operation.
12941 return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
12942 }
12943 return nullptr;
12944}
12945
12948 unsigned AtomicSize,
12949 unsigned BinOpcode,
12950 unsigned CmpOpcode,
12951 unsigned CmpPred) const {
12952 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
12953 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12954
12955 auto LoadMnemonic = PPC::LDARX;
12956 auto StoreMnemonic = PPC::STDCX;
12957 switch (AtomicSize) {
12958 default:
12959 llvm_unreachable("Unexpected size of atomic entity");
12960 case 1:
12961 LoadMnemonic = PPC::LBARX;
12962 StoreMnemonic = PPC::STBCX;
12963 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12964 break;
12965 case 2:
12966 LoadMnemonic = PPC::LHARX;
12967 StoreMnemonic = PPC::STHCX;
12968 assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
12969 break;
12970 case 4:
12971 LoadMnemonic = PPC::LWARX;
12972 StoreMnemonic = PPC::STWCX;
12973 break;
12974 case 8:
12975 LoadMnemonic = PPC::LDARX;
12976 StoreMnemonic = PPC::STDCX;
12977 break;
12978 }
12979
12980 const BasicBlock *LLVM_BB = BB->getBasicBlock();
12981 MachineFunction *F = BB->getParent();
12983
12984 Register dest = MI.getOperand(0).getReg();
12985 Register ptrA = MI.getOperand(1).getReg();
12986 Register ptrB = MI.getOperand(2).getReg();
12987 Register incr = MI.getOperand(3).getReg();
12988 DebugLoc dl = MI.getDebugLoc();
12989
12990 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
12991 MachineBasicBlock *loop2MBB =
12992 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
12993 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12994 F->insert(It, loopMBB);
12995 if (CmpOpcode)
12996 F->insert(It, loop2MBB);
12997 F->insert(It, exitMBB);
12998 exitMBB->splice(exitMBB->begin(), BB,
12999 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13001
13002 MachineRegisterInfo &RegInfo = F->getRegInfo();
13003 Register TmpReg = (!BinOpcode) ? incr :
13004 RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
13005 : &PPC::GPRCRegClass);
13006
13007 // thisMBB:
13008 // ...
13009 // fallthrough --> loopMBB
13010 BB->addSuccessor(loopMBB);
13011
13012 // loopMBB:
13013 // l[wd]arx dest, ptr
13014 // add r0, dest, incr
13015 // st[wd]cx. r0, ptr
13016 // bne- loopMBB
13017 // fallthrough --> exitMBB
13018
13019 // For max/min...
13020 // loopMBB:
13021 // l[wd]arx dest, ptr
13022 // cmpl?[wd] dest, incr
13023 // bgt exitMBB
13024 // loop2MBB:
13025 // st[wd]cx. dest, ptr
13026 // bne- loopMBB
13027 // fallthrough --> exitMBB
13028
13029 BB = loopMBB;
13030 BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
13031 .addReg(ptrA).addReg(ptrB);
13032 if (BinOpcode)
13033 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
13034 if (CmpOpcode) {
13035 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13036 // Signed comparisons of byte or halfword values must be sign-extended.
13037 if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
13038 Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13039 BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
13040 ExtReg).addReg(dest);
13041 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ExtReg).addReg(incr);
13042 } else
13043 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(dest).addReg(incr);
13044
13045 BuildMI(BB, dl, TII->get(PPC::BCC))
13046 .addImm(CmpPred)
13047 .addReg(CrReg)
13048 .addMBB(exitMBB);
13049 BB->addSuccessor(loop2MBB);
13050 BB->addSuccessor(exitMBB);
13051 BB = loop2MBB;
13052 }
13053 BuildMI(BB, dl, TII->get(StoreMnemonic))
13054 .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
13055 BuildMI(BB, dl, TII->get(PPC::BCC))
13057 .addReg(PPC::CR0)
13058 .addMBB(loopMBB);
13059 BB->addSuccessor(loopMBB);
13060 BB->addSuccessor(exitMBB);
13061
13062 // exitMBB:
13063 // ...
13064 BB = exitMBB;
13065 return BB;
13066}
13067
13069 switch(MI.getOpcode()) {
13070 default:
13071 return false;
13072 case PPC::COPY:
13073 return TII->isSignExtended(MI.getOperand(1).getReg(),
13074 &MI.getMF()->getRegInfo());
13075 case PPC::LHA:
13076 case PPC::LHA8:
13077 case PPC::LHAU:
13078 case PPC::LHAU8:
13079 case PPC::LHAUX:
13080 case PPC::LHAUX8:
13081 case PPC::LHAX:
13082 case PPC::LHAX8:
13083 case PPC::LWA:
13084 case PPC::LWAUX:
13085 case PPC::LWAX:
13086 case PPC::LWAX_32:
13087 case PPC::LWA_32:
13088 case PPC::PLHA:
13089 case PPC::PLHA8:
13090 case PPC::PLHA8pc:
13091 case PPC::PLHApc:
13092 case PPC::PLWA:
13093 case PPC::PLWA8:
13094 case PPC::PLWA8pc:
13095 case PPC::PLWApc:
13096 case PPC::EXTSB:
13097 case PPC::EXTSB8:
13098 case PPC::EXTSB8_32_64:
13099 case PPC::EXTSB8_rec:
13100 case PPC::EXTSB_rec:
13101 case PPC::EXTSH:
13102 case PPC::EXTSH8:
13103 case PPC::EXTSH8_32_64:
13104 case PPC::EXTSH8_rec:
13105 case PPC::EXTSH_rec:
13106 case PPC::EXTSW:
13107 case PPC::EXTSWSLI:
13108 case PPC::EXTSWSLI_32_64:
13109 case PPC::EXTSWSLI_32_64_rec:
13110 case PPC::EXTSWSLI_rec:
13111 case PPC::EXTSW_32:
13112 case PPC::EXTSW_32_64:
13113 case PPC::EXTSW_32_64_rec:
13114 case PPC::EXTSW_rec:
13115 case PPC::SRAW:
13116 case PPC::SRAWI:
13117 case PPC::SRAWI_rec:
13118 case PPC::SRAW_rec:
13119 return true;
13120 }
13121 return false;
13122}
13123
13126 bool is8bit, // operation
13127 unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
13128 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
13129 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
13130
13131 // If this is a signed comparison and the value being compared is not known
13132 // to be sign extended, sign extend it here.
13133 DebugLoc dl = MI.getDebugLoc();
13134 MachineFunction *F = BB->getParent();
13135 MachineRegisterInfo &RegInfo = F->getRegInfo();
13136 Register incr = MI.getOperand(3).getReg();
13137 bool IsSignExtended =
13138 incr.isVirtual() && isSignExtended(*RegInfo.getVRegDef(incr), TII);
13139
13140 if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
13141 Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13142 BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
13143 .addReg(MI.getOperand(3).getReg());
13144 MI.getOperand(3).setReg(ValueReg);
13145 incr = ValueReg;
13146 }
13147 // If we support part-word atomic mnemonics, just use them
13148 if (Subtarget.hasPartwordAtomics())
13149 return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
13150 CmpPred);
13151
13152 // In 64 bit mode we have to use 64 bits for addresses, even though the
13153 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
13154 // registers without caring whether they're 32 or 64, but here we're
13155 // doing actual arithmetic on the addresses.
13156 bool is64bit = Subtarget.isPPC64();
13157 bool isLittleEndian = Subtarget.isLittleEndian();
13158 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
13159
13160 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13162
13163 Register dest = MI.getOperand(0).getReg();
13164 Register ptrA = MI.getOperand(1).getReg();
13165 Register ptrB = MI.getOperand(2).getReg();
13166
13167 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
13168 MachineBasicBlock *loop2MBB =
13169 CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
13170 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
13171 F->insert(It, loopMBB);
13172 if (CmpOpcode)
13173 F->insert(It, loop2MBB);
13174 F->insert(It, exitMBB);
13175 exitMBB->splice(exitMBB->begin(), BB,
13176 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13178
13179 const TargetRegisterClass *RC =
13180 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13181 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13182
13183 Register PtrReg = RegInfo.createVirtualRegister(RC);
13184 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
13185 Register ShiftReg =
13186 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
13187 Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
13188 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
13189 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
13190 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
13191 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
13192 Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
13193 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
13194 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
13195 Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
13196 Register Ptr1Reg;
13197 Register TmpReg =
13198 (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
13199
13200 // thisMBB:
13201 // ...
13202 // fallthrough --> loopMBB
13203 BB->addSuccessor(loopMBB);
13204
13205 // The 4-byte load must be aligned, while a char or short may be
13206 // anywhere in the word. Hence all this nasty bookkeeping code.
13207 // add ptr1, ptrA, ptrB [copy if ptrA==0]
13208 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
13209 // xori shift, shift1, 24 [16]
13210 // rlwinm ptr, ptr1, 0, 0, 29
13211 // slw incr2, incr, shift
13212 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
13213 // slw mask, mask2, shift
13214 // loopMBB:
13215 // lwarx tmpDest, ptr
13216 // add tmp, tmpDest, incr2
13217 // andc tmp2, tmpDest, mask
13218 // and tmp3, tmp, mask
13219 // or tmp4, tmp3, tmp2
13220 // stwcx. tmp4, ptr
13221 // bne- loopMBB
13222 // fallthrough --> exitMBB
13223 // srw SrwDest, tmpDest, shift
13224 // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
13225 if (ptrA != ZeroReg) {
13226 Ptr1Reg = RegInfo.createVirtualRegister(RC);
13227 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
13228 .addReg(ptrA)
13229 .addReg(ptrB);
13230 } else {
13231 Ptr1Reg = ptrB;
13232 }
13233 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
13234 // mode.
13235 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
13236 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
13237 .addImm(3)
13238 .addImm(27)
13239 .addImm(is8bit ? 28 : 27);
13240 if (!isLittleEndian)
13241 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
13242 .addReg(Shift1Reg)
13243 .addImm(is8bit ? 24 : 16);
13244 if (is64bit)
13245 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
13246 .addReg(Ptr1Reg)
13247 .addImm(0)
13248 .addImm(61);
13249 else
13250 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
13251 .addReg(Ptr1Reg)
13252 .addImm(0)
13253 .addImm(0)
13254 .addImm(29);
13255 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
13256 if (is8bit)
13257 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
13258 else {
13259 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
13260 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
13261 .addReg(Mask3Reg)
13262 .addImm(65535);
13263 }
13264 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
13265 .addReg(Mask2Reg)
13266 .addReg(ShiftReg);
13267
13268 BB = loopMBB;
13269 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
13270 .addReg(ZeroReg)
13271 .addReg(PtrReg);
13272 if (BinOpcode)
13273 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
13274 .addReg(Incr2Reg)
13275 .addReg(TmpDestReg);
13276 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
13277 .addReg(TmpDestReg)
13278 .addReg(MaskReg);
13279 BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
13280 if (CmpOpcode) {
13281 // For unsigned comparisons, we can directly compare the shifted values.
13282 // For signed comparisons we shift and sign extend.
13283 Register SReg = RegInfo.createVirtualRegister(GPRC);
13284 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13285 BuildMI(BB, dl, TII->get(PPC::AND), SReg)
13286 .addReg(TmpDestReg)
13287 .addReg(MaskReg);
13288 unsigned ValueReg = SReg;
13289 unsigned CmpReg = Incr2Reg;
13290 if (CmpOpcode == PPC::CMPW) {
13291 ValueReg = RegInfo.createVirtualRegister(GPRC);
13292 BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
13293 .addReg(SReg)
13294 .addReg(ShiftReg);
13295 Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
13296 BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
13297 .addReg(ValueReg);
13298 ValueReg = ValueSReg;
13299 CmpReg = incr;
13300 }
13301 BuildMI(BB, dl, TII->get(CmpOpcode), CrReg).addReg(ValueReg).addReg(CmpReg);
13302 BuildMI(BB, dl, TII->get(PPC::BCC))
13303 .addImm(CmpPred)
13304 .addReg(CrReg)
13305 .addMBB(exitMBB);
13306 BB->addSuccessor(loop2MBB);
13307 BB->addSuccessor(exitMBB);
13308 BB = loop2MBB;
13309 }
13310 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
13311 BuildMI(BB, dl, TII->get(PPC::STWCX))
13312 .addReg(Tmp4Reg)
13313 .addReg(ZeroReg)
13314 .addReg(PtrReg);
13315 BuildMI(BB, dl, TII->get(PPC::BCC))
13317 .addReg(PPC::CR0)
13318 .addMBB(loopMBB);
13319 BB->addSuccessor(loopMBB);
13320 BB->addSuccessor(exitMBB);
13321
13322 // exitMBB:
13323 // ...
13324 BB = exitMBB;
13325 // Since the shift amount is not a constant, we need to clear
13326 // the upper bits with a separate RLWINM.
13327 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
13328 .addReg(SrwDestReg)
13329 .addImm(0)
13330 .addImm(is8bit ? 24 : 16)
13331 .addImm(31);
13332 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
13333 .addReg(TmpDestReg)
13334 .addReg(ShiftReg);
13335 return BB;
13336}
13337
13340 MachineBasicBlock *MBB) const {
13341 DebugLoc DL = MI.getDebugLoc();
13342 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13343 const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
13344
13345 MachineFunction *MF = MBB->getParent();
13347
13348 const BasicBlock *BB = MBB->getBasicBlock();
13349 MachineFunction::iterator I = ++MBB->getIterator();
13350
13351 Register DstReg = MI.getOperand(0).getReg();
13352 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
13353 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
13354 Register mainDstReg = MRI.createVirtualRegister(RC);
13355 Register restoreDstReg = MRI.createVirtualRegister(RC);
13356
13357 MVT PVT = getPointerTy(MF->getDataLayout());
13358 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13359 "Invalid Pointer Size!");
13360 // For v = setjmp(buf), we generate
13361 //
13362 // thisMBB:
13363 // SjLjSetup mainMBB
13364 // bl mainMBB
13365 // v_restore = 1
13366 // b sinkMBB
13367 //
13368 // mainMBB:
13369 // buf[LabelOffset] = LR
13370 // v_main = 0
13371 //
13372 // sinkMBB:
13373 // v = phi(main, restore)
13374 //
13375
13376 MachineBasicBlock *thisMBB = MBB;
13377 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB);
13378 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB);
13379 MF->insert(I, mainMBB);
13380 MF->insert(I, sinkMBB);
13381
13383
13384 // Transfer the remainder of BB and its successor edges to sinkMBB.
13385 sinkMBB->splice(sinkMBB->begin(), MBB,
13386 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13388
13389 // Note that the structure of the jmp_buf used here is not compatible
13390 // with that used by libc, and is not designed to be. Specifically, it
13391 // stores only those 'reserved' registers that LLVM does not otherwise
13392 // understand how to spill. Also, by convention, by the time this
13393 // intrinsic is called, Clang has already stored the frame address in the
13394 // first slot of the buffer and stack address in the third. Following the
13395 // X86 target code, we'll store the jump address in the second slot. We also
13396 // need to save the TOC pointer (R2) to handle jumps between shared
13397 // libraries, and that will be stored in the fourth slot. The thread
13398 // identifier (R13) is not affected.
13399
13400 // thisMBB:
13401 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13402 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13403 const int64_t BPOffset = 4 * PVT.getStoreSize();
13404
13405 // Prepare IP either in reg.
13406 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
13407 Register LabelReg = MRI.createVirtualRegister(PtrRC);
13408 Register BufReg = MI.getOperand(1).getReg();
13409
13410 if (Subtarget.is64BitELFABI()) {
13411 setUsesTOCBasePtr(*MBB->getParent());
13412 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
13413 .addReg(PPC::X2)
13414 .addImm(TOCOffset)
13415 .addReg(BufReg)
13416 .cloneMemRefs(MI);
13417 }
13418
13419 // Naked functions never have a base pointer, and so we use r1. For all
13420 // other functions, this decision must be delayed until during PEI.
13421 unsigned BaseReg;
13422 if (MF->getFunction().hasFnAttribute(Attribute::Naked))
13423 BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
13424 else
13425 BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
13426
13427 MIB = BuildMI(*thisMBB, MI, DL,
13428 TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
13429 .addReg(BaseReg)
13430 .addImm(BPOffset)
13431 .addReg(BufReg)
13432 .cloneMemRefs(MI);
13433
13434 // Setup
13435 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
13436 MIB.addRegMask(TRI->getNoPreservedMask());
13437
13438 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
13439
13440 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
13441 .addMBB(mainMBB);
13442 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
13443
13444 thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
13445 thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
13446
13447 // mainMBB:
13448 // mainDstReg = 0
13449 MIB =
13450 BuildMI(mainMBB, DL,
13451 TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
13452
13453 // Store IP
13454 if (Subtarget.isPPC64()) {
13455 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
13456 .addReg(LabelReg)
13457 .addImm(LabelOffset)
13458 .addReg(BufReg);
13459 } else {
13460 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
13461 .addReg(LabelReg)
13462 .addImm(LabelOffset)
13463 .addReg(BufReg);
13464 }
13465 MIB.cloneMemRefs(MI);
13466
13467 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
13468 mainMBB->addSuccessor(sinkMBB);
13469
13470 // sinkMBB:
13471 BuildMI(*sinkMBB, sinkMBB->begin(), DL,
13472 TII->get(PPC::PHI), DstReg)
13473 .addReg(mainDstReg).addMBB(mainMBB)
13474 .addReg(restoreDstReg).addMBB(thisMBB);
13475
13476 MI.eraseFromParent();
13477 return sinkMBB;
13478}
13479
13482 MachineBasicBlock *MBB) const {
13483 DebugLoc DL = MI.getDebugLoc();
13484 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13485
13486 MachineFunction *MF = MBB->getParent();
13488
13489 MVT PVT = getPointerTy(MF->getDataLayout());
13490 assert((PVT == MVT::i64 || PVT == MVT::i32) &&
13491 "Invalid Pointer Size!");
13492
13493 const TargetRegisterClass *RC =
13494 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
13495 Register Tmp = MRI.createVirtualRegister(RC);
13496 // Since FP is only updated here but NOT referenced, it's treated as GPR.
13497 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
13498 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
13499 unsigned BP =
13500 (PVT == MVT::i64)
13501 ? PPC::X30
13502 : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
13503 : PPC::R30);
13504
13506
13507 const int64_t LabelOffset = 1 * PVT.getStoreSize();
13508 const int64_t SPOffset = 2 * PVT.getStoreSize();
13509 const int64_t TOCOffset = 3 * PVT.getStoreSize();
13510 const int64_t BPOffset = 4 * PVT.getStoreSize();
13511
13512 Register BufReg = MI.getOperand(0).getReg();
13513
13514 // Reload FP (the jumped-to function may not have had a
13515 // frame pointer, and if so, then its r31 will be restored
13516 // as necessary).
13517 if (PVT == MVT::i64) {
13518 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
13519 .addImm(0)
13520 .addReg(BufReg);
13521 } else {
13522 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
13523 .addImm(0)
13524 .addReg(BufReg);
13525 }
13526 MIB.cloneMemRefs(MI);
13527
13528 // Reload IP
13529 if (PVT == MVT::i64) {
13530 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
13531 .addImm(LabelOffset)
13532 .addReg(BufReg);
13533 } else {
13534 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
13535 .addImm(LabelOffset)
13536 .addReg(BufReg);
13537 }
13538 MIB.cloneMemRefs(MI);
13539
13540 // Reload SP
13541 if (PVT == MVT::i64) {
13542 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
13543 .addImm(SPOffset)
13544 .addReg(BufReg);
13545 } else {
13546 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
13547 .addImm(SPOffset)
13548 .addReg(BufReg);
13549 }
13550 MIB.cloneMemRefs(MI);
13551
13552 // Reload BP
13553 if (PVT == MVT::i64) {
13554 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
13555 .addImm(BPOffset)
13556 .addReg(BufReg);
13557 } else {
13558 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
13559 .addImm(BPOffset)
13560 .addReg(BufReg);
13561 }
13562 MIB.cloneMemRefs(MI);
13563
13564 // Reload TOC
13565 if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
13566 setUsesTOCBasePtr(*MBB->getParent());
13567 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
13568 .addImm(TOCOffset)
13569 .addReg(BufReg)
13570 .cloneMemRefs(MI);
13571 }
13572
13573 // Jump
13574 BuildMI(*MBB, MI, DL,
13575 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
13576 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
13577
13578 MI.eraseFromParent();
13579 return MBB;
13580}
13581
13583 // If the function specifically requests inline stack probes, emit them.
13584 if (MF.getFunction().hasFnAttribute("probe-stack"))
13585 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
13586 "inline-asm";
13587 return false;
13588}
13589
13591 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
13592 unsigned StackAlign = TFI->getStackAlignment();
13593 assert(StackAlign >= 1 && isPowerOf2_32(StackAlign) &&
13594 "Unexpected stack alignment");
13595 // The default stack probe size is 4096 if the function has no
13596 // stack-probe-size attribute.
13597 const Function &Fn = MF.getFunction();
13598 unsigned StackProbeSize =
13599 Fn.getFnAttributeAsParsedInteger("stack-probe-size", 4096);
13600 // Round down to the stack alignment.
13601 StackProbeSize &= ~(StackAlign - 1);
13602 return StackProbeSize ? StackProbeSize : StackAlign;
13603}
13604
13605// Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
13606// into three phases. In the first phase, it uses pseudo instruction
13607// PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
13608// FinalStackPtr. In the second phase, it generates a loop for probing blocks.
13609// At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
13610// MaxCallFrameSize so that it can calculate correct data area pointer.
13613 MachineBasicBlock *MBB) const {
13614 const bool isPPC64 = Subtarget.isPPC64();
13615 MachineFunction *MF = MBB->getParent();
13616 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13617 DebugLoc DL = MI.getDebugLoc();
13618 const unsigned ProbeSize = getStackProbeSize(*MF);
13619 const BasicBlock *ProbedBB = MBB->getBasicBlock();
13621 // The CFG of probing stack looks as
13622 // +-----+
13623 // | MBB |
13624 // +--+--+
13625 // |
13626 // +----v----+
13627 // +--->+ TestMBB +---+
13628 // | +----+----+ |
13629 // | | |
13630 // | +-----v----+ |
13631 // +---+ BlockMBB | |
13632 // +----------+ |
13633 // |
13634 // +---------+ |
13635 // | TailMBB +<--+
13636 // +---------+
13637 // In MBB, calculate previous frame pointer and final stack pointer.
13638 // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
13639 // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
13640 // TailMBB is spliced via \p MI.
13641 MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
13642 MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
13643 MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
13644
13645 MachineFunction::iterator MBBIter = ++MBB->getIterator();
13646 MF->insert(MBBIter, TestMBB);
13647 MF->insert(MBBIter, BlockMBB);
13648 MF->insert(MBBIter, TailMBB);
13649
13650 const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
13651 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
13652
13653 Register DstReg = MI.getOperand(0).getReg();
13654 Register NegSizeReg = MI.getOperand(1).getReg();
13655 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
13656 Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13657 Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13658 Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13659
13660 // Since value of NegSizeReg might be realigned in prologepilog, insert a
13661 // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
13662 // NegSize.
13663 unsigned ProbeOpc;
13664 if (!MRI.hasOneNonDBGUse(NegSizeReg))
13665 ProbeOpc =
13666 isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
13667 else
13668 // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
13669 // and NegSizeReg will be allocated in the same phyreg to avoid
13670 // redundant copy when NegSizeReg has only one use which is current MI and
13671 // will be replaced by PREPARE_PROBED_ALLOCA then.
13672 ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
13673 : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
13674 BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
13675 .addDef(ActualNegSizeReg)
13676 .addReg(NegSizeReg)
13677 .add(MI.getOperand(2))
13678 .add(MI.getOperand(3));
13679
13680 // Calculate final stack pointer, which equals to SP + ActualNegSize.
13681 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
13682 FinalStackPtr)
13683 .addReg(SPReg)
13684 .addReg(ActualNegSizeReg);
13685
13686 // Materialize a scratch register for update.
13687 int64_t NegProbeSize = -(int64_t)ProbeSize;
13688 assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
13689 Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13690 if (!isInt<16>(NegProbeSize)) {
13691 Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13692 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
13693 .addImm(NegProbeSize >> 16);
13694 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
13695 ScratchReg)
13696 .addReg(TempReg)
13697 .addImm(NegProbeSize & 0xFFFF);
13698 } else
13699 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
13700 .addImm(NegProbeSize);
13701
13702 {
13703 // Probing leading residual part.
13704 Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13705 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
13706 .addReg(ActualNegSizeReg)
13707 .addReg(ScratchReg);
13708 Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13709 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
13710 .addReg(Div)
13711 .addReg(ScratchReg);
13712 Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13713 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
13714 .addReg(Mul)
13715 .addReg(ActualNegSizeReg);
13716 BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13717 .addReg(FramePointer)
13718 .addReg(SPReg)
13719 .addReg(NegMod);
13720 }
13721
13722 {
13723 // Remaining part should be multiple of ProbeSize.
13724 Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
13725 BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
13726 .addReg(SPReg)
13727 .addReg(FinalStackPtr);
13728 BuildMI(TestMBB, DL, TII->get(PPC::BCC))
13730 .addReg(CmpResult)
13731 .addMBB(TailMBB);
13732 TestMBB->addSuccessor(BlockMBB);
13733 TestMBB->addSuccessor(TailMBB);
13734 }
13735
13736 {
13737 // Touch the block.
13738 // |P...|P...|P...
13739 BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
13740 .addReg(FramePointer)
13741 .addReg(SPReg)
13742 .addReg(ScratchReg);
13743 BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
13744 BlockMBB->addSuccessor(TestMBB);
13745 }
13746
13747 // Calculation of MaxCallFrameSize is deferred to prologepilog, use
13748 // DYNAREAOFFSET pseudo instruction to get the future result.
13749 Register MaxCallFrameSizeReg =
13750 MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
13751 BuildMI(TailMBB, DL,
13752 TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
13753 MaxCallFrameSizeReg)
13754 .add(MI.getOperand(2))
13755 .add(MI.getOperand(3));
13756 BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
13757 .addReg(SPReg)
13758 .addReg(MaxCallFrameSizeReg);
13759
13760 // Splice instructions after MI to TailMBB.
13761 TailMBB->splice(TailMBB->end(), MBB,
13762 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
13764 MBB->addSuccessor(TestMBB);
13765
13766 // Delete the pseudo instruction.
13767 MI.eraseFromParent();
13768
13769 ++NumDynamicAllocaProbed;
13770 return TailMBB;
13771}
13772
13774 switch (MI.getOpcode()) {
13775 case PPC::SELECT_CC_I4:
13776 case PPC::SELECT_CC_I8:
13777 case PPC::SELECT_CC_F4:
13778 case PPC::SELECT_CC_F8:
13779 case PPC::SELECT_CC_F16:
13780 case PPC::SELECT_CC_VRRC:
13781 case PPC::SELECT_CC_VSFRC:
13782 case PPC::SELECT_CC_VSSRC:
13783 case PPC::SELECT_CC_VSRC:
13784 case PPC::SELECT_CC_SPE4:
13785 case PPC::SELECT_CC_SPE:
13786 return true;
13787 default:
13788 return false;
13789 }
13790}
13791
13792static bool IsSelect(MachineInstr &MI) {
13793 switch (MI.getOpcode()) {
13794 case PPC::SELECT_I4:
13795 case PPC::SELECT_I8:
13796 case PPC::SELECT_F4:
13797 case PPC::SELECT_F8:
13798 case PPC::SELECT_F16:
13799 case PPC::SELECT_SPE:
13800 case PPC::SELECT_SPE4:
13801 case PPC::SELECT_VRRC:
13802 case PPC::SELECT_VSFRC:
13803 case PPC::SELECT_VSSRC:
13804 case PPC::SELECT_VSRC:
13805 return true;
13806 default:
13807 return false;
13808 }
13809}
13810
13813 MachineBasicBlock *BB) const {
13814 if (MI.getOpcode() == TargetOpcode::STACKMAP ||
13815 MI.getOpcode() == TargetOpcode::PATCHPOINT) {
13816 if (Subtarget.is64BitELFABI() &&
13817 MI.getOpcode() == TargetOpcode::PATCHPOINT &&
13818 !Subtarget.isUsingPCRelativeCalls()) {
13819 // Call lowering should have added an r2 operand to indicate a dependence
13820 // on the TOC base pointer value. It can't however, because there is no
13821 // way to mark the dependence as implicit there, and so the stackmap code
13822 // will confuse it with a regular operand. Instead, add the dependence
13823 // here.
13824 MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
13825 }
13826
13827 return emitPatchPoint(MI, BB);
13828 }
13829
13830 if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
13831 MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
13832 return emitEHSjLjSetJmp(MI, BB);
13833 } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
13834 MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
13835 return emitEHSjLjLongJmp(MI, BB);
13836 }
13837
13838 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
13839
13840 // To "insert" these instructions we actually have to insert their
13841 // control-flow patterns.
13842 const BasicBlock *LLVM_BB = BB->getBasicBlock();
13844
13845 MachineFunction *F = BB->getParent();
13846 MachineRegisterInfo &MRI = F->getRegInfo();
13847
13848 if (Subtarget.hasISEL() &&
13849 (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13850 MI.getOpcode() == PPC::SELECT_CC_I8 ||
13851 MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8)) {
13853 if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
13854 MI.getOpcode() == PPC::SELECT_CC_I8)
13855 Cond.push_back(MI.getOperand(4));
13856 else
13858 Cond.push_back(MI.getOperand(1));
13859
13860 DebugLoc dl = MI.getDebugLoc();
13861 TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
13862 MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
13863 } else if (IsSelectCC(MI) || IsSelect(MI)) {
13864 // The incoming instruction knows the destination vreg to set, the
13865 // condition code register to branch on, the true/false values to
13866 // select between, and a branch opcode to use.
13867
13868 // thisMBB:
13869 // ...
13870 // TrueVal = ...
13871 // cmpTY ccX, r1, r2
13872 // bCC sinkMBB
13873 // fallthrough --> copy0MBB
13874 MachineBasicBlock *thisMBB = BB;
13875 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
13876 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13877 DebugLoc dl = MI.getDebugLoc();
13878 F->insert(It, copy0MBB);
13879 F->insert(It, sinkMBB);
13880
13881 if (isPhysRegUsedAfter(PPC::CARRY, MI.getIterator())) {
13882 copy0MBB->addLiveIn(PPC::CARRY);
13883 sinkMBB->addLiveIn(PPC::CARRY);
13884 }
13885
13886 // Set the call frame size on entry to the new basic blocks.
13887 // See https://reviews.llvm.org/D156113.
13888 unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
13889 copy0MBB->setCallFrameSize(CallFrameSize);
13890 sinkMBB->setCallFrameSize(CallFrameSize);
13891
13892 // Transfer the remainder of BB and its successor edges to sinkMBB.
13893 sinkMBB->splice(sinkMBB->begin(), BB,
13894 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13896
13897 // Next, add the true and fallthrough blocks as its successors.
13898 BB->addSuccessor(copy0MBB);
13899 BB->addSuccessor(sinkMBB);
13900
13901 if (IsSelect(MI)) {
13902 BuildMI(BB, dl, TII->get(PPC::BC))
13903 .addReg(MI.getOperand(1).getReg())
13904 .addMBB(sinkMBB);
13905 } else {
13906 unsigned SelectPred = MI.getOperand(4).getImm();
13907 BuildMI(BB, dl, TII->get(PPC::BCC))
13908 .addImm(SelectPred)
13909 .addReg(MI.getOperand(1).getReg())
13910 .addMBB(sinkMBB);
13911 }
13912
13913 // copy0MBB:
13914 // %FalseValue = ...
13915 // # fallthrough to sinkMBB
13916 BB = copy0MBB;
13917
13918 // Update machine-CFG edges
13919 BB->addSuccessor(sinkMBB);
13920
13921 // sinkMBB:
13922 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
13923 // ...
13924 BB = sinkMBB;
13925 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
13926 .addReg(MI.getOperand(3).getReg())
13927 .addMBB(copy0MBB)
13928 .addReg(MI.getOperand(2).getReg())
13929 .addMBB(thisMBB);
13930 } else if (MI.getOpcode() == PPC::ReadTB) {
13931 // To read the 64-bit time-base register on a 32-bit target, we read the
13932 // two halves. Should the counter have wrapped while it was being read, we
13933 // need to try again.
13934 // ...
13935 // readLoop:
13936 // mfspr Rx,TBU # load from TBU
13937 // mfspr Ry,TB # load from TB
13938 // mfspr Rz,TBU # load from TBU
13939 // cmpw crX,Rx,Rz # check if 'old'='new'
13940 // bne readLoop # branch if they're not equal
13941 // ...
13942
13943 MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
13944 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
13945 DebugLoc dl = MI.getDebugLoc();
13946 F->insert(It, readMBB);
13947 F->insert(It, sinkMBB);
13948
13949 // Transfer the remainder of BB and its successor edges to sinkMBB.
13950 sinkMBB->splice(sinkMBB->begin(), BB,
13951 std::next(MachineBasicBlock::iterator(MI)), BB->end());
13953
13954 BB->addSuccessor(readMBB);
13955 BB = readMBB;
13956
13957 MachineRegisterInfo &RegInfo = F->getRegInfo();
13958 Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
13959 Register LoReg = MI.getOperand(0).getReg();
13960 Register HiReg = MI.getOperand(1).getReg();
13961
13962 BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
13963 BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
13964 BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
13965
13966 Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
13967
13968 BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
13969 .addReg(HiReg)
13970 .addReg(ReadAgainReg);
13971 BuildMI(BB, dl, TII->get(PPC::BCC))
13973 .addReg(CmpReg)
13974 .addMBB(readMBB);
13975
13976 BB->addSuccessor(readMBB);
13977 BB->addSuccessor(sinkMBB);
13978 } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
13979 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
13980 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
13981 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
13982 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
13983 BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
13984 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
13985 BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
13986
13987 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
13988 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND);
13989 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
13990 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND);
13991 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
13992 BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
13993 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
13994 BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
13995
13996 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
13997 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR);
13998 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
13999 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
14000 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
14001 BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
14002 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
14003 BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
14004
14005 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
14006 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR);
14007 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
14008 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR);
14009 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
14010 BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
14011 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
14012 BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
14013
14014 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
14015 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
14016 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
14017 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
14018 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
14019 BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
14020 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
14021 BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
14022
14023 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
14024 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
14025 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
14026 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
14027 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
14028 BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
14029 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
14030 BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
14031
14032 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
14033 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LT);
14034 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
14035 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LT);
14036 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
14037 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LT);
14038 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
14039 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LT);
14040
14041 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
14042 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GT);
14043 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
14044 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GT);
14045 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
14046 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GT);
14047 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
14048 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GT);
14049
14050 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
14051 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LT);
14052 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
14053 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LT);
14054 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
14055 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LT);
14056 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
14057 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LT);
14058
14059 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
14060 BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GT);
14061 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
14062 BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GT);
14063 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
14064 BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GT);
14065 else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
14066 BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GT);
14067
14068 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
14069 BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
14070 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
14071 BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
14072 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
14073 BB = EmitAtomicBinary(MI, BB, 4, 0);
14074 else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
14075 BB = EmitAtomicBinary(MI, BB, 8, 0);
14076 else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
14077 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
14078 (Subtarget.hasPartwordAtomics() &&
14079 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
14080 (Subtarget.hasPartwordAtomics() &&
14081 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
14082 bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
14083
14084 auto LoadMnemonic = PPC::LDARX;
14085 auto StoreMnemonic = PPC::STDCX;
14086 switch (MI.getOpcode()) {
14087 default:
14088 llvm_unreachable("Compare and swap of unknown size");
14089 case PPC::ATOMIC_CMP_SWAP_I8:
14090 LoadMnemonic = PPC::LBARX;
14091 StoreMnemonic = PPC::STBCX;
14092 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14093 break;
14094 case PPC::ATOMIC_CMP_SWAP_I16:
14095 LoadMnemonic = PPC::LHARX;
14096 StoreMnemonic = PPC::STHCX;
14097 assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
14098 break;
14099 case PPC::ATOMIC_CMP_SWAP_I32:
14100 LoadMnemonic = PPC::LWARX;
14101 StoreMnemonic = PPC::STWCX;
14102 break;
14103 case PPC::ATOMIC_CMP_SWAP_I64:
14104 LoadMnemonic = PPC::LDARX;
14105 StoreMnemonic = PPC::STDCX;
14106 break;
14107 }
14108 MachineRegisterInfo &RegInfo = F->getRegInfo();
14109 Register dest = MI.getOperand(0).getReg();
14110 Register ptrA = MI.getOperand(1).getReg();
14111 Register ptrB = MI.getOperand(2).getReg();
14112 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14113 Register oldval = MI.getOperand(3).getReg();
14114 Register newval = MI.getOperand(4).getReg();
14115 DebugLoc dl = MI.getDebugLoc();
14116
14117 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14118 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14119 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14120 F->insert(It, loop1MBB);
14121 F->insert(It, loop2MBB);
14122 F->insert(It, exitMBB);
14123 exitMBB->splice(exitMBB->begin(), BB,
14124 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14126
14127 // thisMBB:
14128 // ...
14129 // fallthrough --> loopMBB
14130 BB->addSuccessor(loop1MBB);
14131
14132 // loop1MBB:
14133 // l[bhwd]arx dest, ptr
14134 // cmp[wd] dest, oldval
14135 // bne- exitBB
14136 // loop2MBB:
14137 // st[bhwd]cx. newval, ptr
14138 // bne- loopMBB
14139 // b exitBB
14140 // exitBB:
14141 BB = loop1MBB;
14142 BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
14143 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), CrReg)
14144 .addReg(dest)
14145 .addReg(oldval);
14146 BuildMI(BB, dl, TII->get(PPC::BCC))
14148 .addReg(CrReg)
14149 .addMBB(exitMBB);
14150 BB->addSuccessor(loop2MBB);
14151 BB->addSuccessor(exitMBB);
14152
14153 BB = loop2MBB;
14154 BuildMI(BB, dl, TII->get(StoreMnemonic))
14155 .addReg(newval)
14156 .addReg(ptrA)
14157 .addReg(ptrB);
14158 BuildMI(BB, dl, TII->get(PPC::BCC))
14160 .addReg(PPC::CR0)
14161 .addMBB(loop1MBB);
14162 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14163 BB->addSuccessor(loop1MBB);
14164 BB->addSuccessor(exitMBB);
14165
14166 // exitMBB:
14167 // ...
14168 BB = exitMBB;
14169 } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
14170 MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
14171 // We must use 64-bit registers for addresses when targeting 64-bit,
14172 // since we're actually doing arithmetic on them. Other registers
14173 // can be 32-bit.
14174 bool is64bit = Subtarget.isPPC64();
14175 bool isLittleEndian = Subtarget.isLittleEndian();
14176 bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
14177
14178 Register dest = MI.getOperand(0).getReg();
14179 Register ptrA = MI.getOperand(1).getReg();
14180 Register ptrB = MI.getOperand(2).getReg();
14181 Register oldval = MI.getOperand(3).getReg();
14182 Register newval = MI.getOperand(4).getReg();
14183 DebugLoc dl = MI.getDebugLoc();
14184
14185 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
14186 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
14187 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
14188 F->insert(It, loop1MBB);
14189 F->insert(It, loop2MBB);
14190 F->insert(It, exitMBB);
14191 exitMBB->splice(exitMBB->begin(), BB,
14192 std::next(MachineBasicBlock::iterator(MI)), BB->end());
14194
14195 MachineRegisterInfo &RegInfo = F->getRegInfo();
14196 const TargetRegisterClass *RC =
14197 is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
14198 const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
14199
14200 Register PtrReg = RegInfo.createVirtualRegister(RC);
14201 Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
14202 Register ShiftReg =
14203 isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
14204 Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
14205 Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
14206 Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
14207 Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
14208 Register MaskReg = RegInfo.createVirtualRegister(GPRC);
14209 Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
14210 Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
14211 Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
14212 Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
14213 Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
14214 Register Ptr1Reg;
14215 Register TmpReg = RegInfo.createVirtualRegister(GPRC);
14216 Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
14217 Register CrReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14218 // thisMBB:
14219 // ...
14220 // fallthrough --> loopMBB
14221 BB->addSuccessor(loop1MBB);
14222
14223 // The 4-byte load must be aligned, while a char or short may be
14224 // anywhere in the word. Hence all this nasty bookkeeping code.
14225 // add ptr1, ptrA, ptrB [copy if ptrA==0]
14226 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
14227 // xori shift, shift1, 24 [16]
14228 // rlwinm ptr, ptr1, 0, 0, 29
14229 // slw newval2, newval, shift
14230 // slw oldval2, oldval,shift
14231 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
14232 // slw mask, mask2, shift
14233 // and newval3, newval2, mask
14234 // and oldval3, oldval2, mask
14235 // loop1MBB:
14236 // lwarx tmpDest, ptr
14237 // and tmp, tmpDest, mask
14238 // cmpw tmp, oldval3
14239 // bne- exitBB
14240 // loop2MBB:
14241 // andc tmp2, tmpDest, mask
14242 // or tmp4, tmp2, newval3
14243 // stwcx. tmp4, ptr
14244 // bne- loop1MBB
14245 // b exitBB
14246 // exitBB:
14247 // srw dest, tmpDest, shift
14248 if (ptrA != ZeroReg) {
14249 Ptr1Reg = RegInfo.createVirtualRegister(RC);
14250 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
14251 .addReg(ptrA)
14252 .addReg(ptrB);
14253 } else {
14254 Ptr1Reg = ptrB;
14255 }
14256
14257 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
14258 // mode.
14259 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
14260 .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
14261 .addImm(3)
14262 .addImm(27)
14263 .addImm(is8bit ? 28 : 27);
14264 if (!isLittleEndian)
14265 BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
14266 .addReg(Shift1Reg)
14267 .addImm(is8bit ? 24 : 16);
14268 if (is64bit)
14269 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
14270 .addReg(Ptr1Reg)
14271 .addImm(0)
14272 .addImm(61);
14273 else
14274 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
14275 .addReg(Ptr1Reg)
14276 .addImm(0)
14277 .addImm(0)
14278 .addImm(29);
14279 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
14280 .addReg(newval)
14281 .addReg(ShiftReg);
14282 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
14283 .addReg(oldval)
14284 .addReg(ShiftReg);
14285 if (is8bit)
14286 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
14287 else {
14288 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
14289 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
14290 .addReg(Mask3Reg)
14291 .addImm(65535);
14292 }
14293 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
14294 .addReg(Mask2Reg)
14295 .addReg(ShiftReg);
14296 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
14297 .addReg(NewVal2Reg)
14298 .addReg(MaskReg);
14299 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
14300 .addReg(OldVal2Reg)
14301 .addReg(MaskReg);
14302
14303 BB = loop1MBB;
14304 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
14305 .addReg(ZeroReg)
14306 .addReg(PtrReg);
14307 BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
14308 .addReg(TmpDestReg)
14309 .addReg(MaskReg);
14310 BuildMI(BB, dl, TII->get(PPC::CMPW), CrReg)
14311 .addReg(TmpReg)
14312 .addReg(OldVal3Reg);
14313 BuildMI(BB, dl, TII->get(PPC::BCC))
14315 .addReg(CrReg)
14316 .addMBB(exitMBB);
14317 BB->addSuccessor(loop2MBB);
14318 BB->addSuccessor(exitMBB);
14319
14320 BB = loop2MBB;
14321 BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
14322 .addReg(TmpDestReg)
14323 .addReg(MaskReg);
14324 BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
14325 .addReg(Tmp2Reg)
14326 .addReg(NewVal3Reg);
14327 BuildMI(BB, dl, TII->get(PPC::STWCX))
14328 .addReg(Tmp4Reg)
14329 .addReg(ZeroReg)
14330 .addReg(PtrReg);
14331 BuildMI(BB, dl, TII->get(PPC::BCC))
14333 .addReg(PPC::CR0)
14334 .addMBB(loop1MBB);
14335 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
14336 BB->addSuccessor(loop1MBB);
14337 BB->addSuccessor(exitMBB);
14338
14339 // exitMBB:
14340 // ...
14341 BB = exitMBB;
14342 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
14343 .addReg(TmpReg)
14344 .addReg(ShiftReg);
14345 } else if (MI.getOpcode() == PPC::FADDrtz) {
14346 // This pseudo performs an FADD with rounding mode temporarily forced
14347 // to round-to-zero. We emit this via custom inserter since the FPSCR
14348 // is not modeled at the SelectionDAG level.
14349 Register Dest = MI.getOperand(0).getReg();
14350 Register Src1 = MI.getOperand(1).getReg();
14351 Register Src2 = MI.getOperand(2).getReg();
14352 DebugLoc dl = MI.getDebugLoc();
14353
14354 MachineRegisterInfo &RegInfo = F->getRegInfo();
14355 Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14356
14357 // Save FPSCR value.
14358 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
14359
14360 // Set rounding mode to round-to-zero.
14361 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
14362 .addImm(31)
14364
14365 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
14366 .addImm(30)
14368
14369 // Perform addition.
14370 auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
14371 .addReg(Src1)
14372 .addReg(Src2);
14373 if (MI.getFlag(MachineInstr::NoFPExcept))
14375
14376 // Restore FPSCR value.
14377 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
14378 } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14379 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
14380 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14381 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
14382 unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
14383 MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
14384 ? PPC::ANDI8_rec
14385 : PPC::ANDI_rec;
14386 bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
14387 MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
14388
14389 MachineRegisterInfo &RegInfo = F->getRegInfo();
14390 Register Dest = RegInfo.createVirtualRegister(
14391 Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
14392
14393 DebugLoc Dl = MI.getDebugLoc();
14394 BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
14395 .addReg(MI.getOperand(1).getReg())
14396 .addImm(1);
14397 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14398 MI.getOperand(0).getReg())
14399 .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
14400 } else if (MI.getOpcode() == PPC::TCHECK_RET) {
14401 DebugLoc Dl = MI.getDebugLoc();
14402 MachineRegisterInfo &RegInfo = F->getRegInfo();
14403 Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
14404 BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
14405 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14406 MI.getOperand(0).getReg())
14407 .addReg(CRReg);
14408 } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
14409 DebugLoc Dl = MI.getDebugLoc();
14410 unsigned Imm = MI.getOperand(1).getImm();
14411 BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
14412 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
14413 MI.getOperand(0).getReg())
14414 .addReg(PPC::CR0EQ);
14415 } else if (MI.getOpcode() == PPC::SETRNDi) {
14416 DebugLoc dl = MI.getDebugLoc();
14417 Register OldFPSCRReg = MI.getOperand(0).getReg();
14418
14419 // Save FPSCR value.
14420 if (MRI.use_empty(OldFPSCRReg))
14421 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14422 else
14423 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14424
14425 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
14426 // the following settings:
14427 // 00 Round to nearest
14428 // 01 Round to 0
14429 // 10 Round to +inf
14430 // 11 Round to -inf
14431
14432 // When the operand is immediate, using the two least significant bits of
14433 // the immediate to set the bits 62:63 of FPSCR.
14434 unsigned Mode = MI.getOperand(1).getImm();
14435 BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
14436 .addImm(31)
14438
14439 BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
14440 .addImm(30)
14442 } else if (MI.getOpcode() == PPC::SETRND) {
14443 DebugLoc dl = MI.getDebugLoc();
14444
14445 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
14446 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
14447 // If the target doesn't have DirectMove, we should use stack to do the
14448 // conversion, because the target doesn't have the instructions like mtvsrd
14449 // or mfvsrd to do this conversion directly.
14450 auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
14451 if (Subtarget.hasDirectMove()) {
14452 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
14453 .addReg(SrcReg);
14454 } else {
14455 // Use stack to do the register copy.
14456 unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
14457 MachineRegisterInfo &RegInfo = F->getRegInfo();
14458 const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
14459 if (RC == &PPC::F8RCRegClass) {
14460 // Copy register from F8RCRegClass to G8RCRegclass.
14461 assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
14462 "Unsupported RegClass.");
14463
14464 StoreOp = PPC::STFD;
14465 LoadOp = PPC::LD;
14466 } else {
14467 // Copy register from G8RCRegClass to F8RCRegclass.
14468 assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
14469 (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
14470 "Unsupported RegClass.");
14471 }
14472
14473 MachineFrameInfo &MFI = F->getFrameInfo();
14474 int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
14475
14476 MachineMemOperand *MMOStore = F->getMachineMemOperand(
14477 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14479 MFI.getObjectAlign(FrameIdx));
14480
14481 // Store the SrcReg into the stack.
14482 BuildMI(*BB, MI, dl, TII->get(StoreOp))
14483 .addReg(SrcReg)
14484 .addImm(0)
14485 .addFrameIndex(FrameIdx)
14486 .addMemOperand(MMOStore);
14487
14488 MachineMemOperand *MMOLoad = F->getMachineMemOperand(
14489 MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
14491 MFI.getObjectAlign(FrameIdx));
14492
14493 // Load from the stack where SrcReg is stored, and save to DestReg,
14494 // so we have done the RegClass conversion from RegClass::SrcReg to
14495 // RegClass::DestReg.
14496 BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
14497 .addImm(0)
14498 .addFrameIndex(FrameIdx)
14499 .addMemOperand(MMOLoad);
14500 }
14501 };
14502
14503 Register OldFPSCRReg = MI.getOperand(0).getReg();
14504
14505 // Save FPSCR value.
14506 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
14507
14508 // When the operand is gprc register, use two least significant bits of the
14509 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
14510 //
14511 // copy OldFPSCRTmpReg, OldFPSCRReg
14512 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
14513 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
14514 // copy NewFPSCRReg, NewFPSCRTmpReg
14515 // mtfsf 255, NewFPSCRReg
14516 MachineOperand SrcOp = MI.getOperand(1);
14517 MachineRegisterInfo &RegInfo = F->getRegInfo();
14518 Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14519
14520 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
14521
14522 Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14523 Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14524
14525 // The first operand of INSERT_SUBREG should be a register which has
14526 // subregisters, we only care about its RegClass, so we should use an
14527 // IMPLICIT_DEF register.
14528 BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
14529 BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
14530 .addReg(ImDefReg)
14531 .add(SrcOp)
14532 .addImm(1);
14533
14534 Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
14535 BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
14536 .addReg(OldFPSCRTmpReg)
14537 .addReg(ExtSrcReg)
14538 .addImm(0)
14539 .addImm(62);
14540
14541 Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
14542 copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
14543
14544 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
14545 // bits of FPSCR.
14546 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
14547 .addImm(255)
14548 .addReg(NewFPSCRReg)
14549 .addImm(0)
14550 .addImm(0);
14551 } else if (MI.getOpcode() == PPC::SETFLM) {
14552 DebugLoc Dl = MI.getDebugLoc();
14553
14554 // Result of setflm is previous FPSCR content, so we need to save it first.
14555 Register OldFPSCRReg = MI.getOperand(0).getReg();
14556 if (MRI.use_empty(OldFPSCRReg))
14557 BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
14558 else
14559 BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
14560
14561 // Put bits in 32:63 to FPSCR.
14562 Register NewFPSCRReg = MI.getOperand(1).getReg();
14563 BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
14564 .addImm(255)
14565 .addReg(NewFPSCRReg)
14566 .addImm(0)
14567 .addImm(0);
14568 } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
14569 MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
14570 return emitProbedAlloca(MI, BB);
14571 } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
14572 DebugLoc DL = MI.getDebugLoc();
14573 Register Src = MI.getOperand(2).getReg();
14574 Register Lo = MI.getOperand(0).getReg();
14575 Register Hi = MI.getOperand(1).getReg();
14576 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14577 .addDef(Lo)
14578 .addUse(Src, 0, PPC::sub_gp8_x1);
14579 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
14580 .addDef(Hi)
14581 .addUse(Src, 0, PPC::sub_gp8_x0);
14582 } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
14583 MI.getOpcode() == PPC::STQX_PSEUDO) {
14584 DebugLoc DL = MI.getDebugLoc();
14585 // Ptr is used as the ptr_rc_no_r0 part
14586 // of LQ/STQ's memory operand and adding result of RA and RB,
14587 // so it has to be g8rc_and_g8rc_nox0.
14588 Register Ptr =
14589 F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
14590 Register Val = MI.getOperand(0).getReg();
14591 Register RA = MI.getOperand(1).getReg();
14592 Register RB = MI.getOperand(2).getReg();
14593 BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
14594 BuildMI(*BB, MI, DL,
14595 MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
14596 : TII->get(PPC::STQ))
14597 .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
14598 .addImm(0)
14599 .addReg(Ptr);
14600 } else if (MI.getOpcode() == PPC::LWAT_PSEUDO ||
14601 MI.getOpcode() == PPC::LDAT_PSEUDO) {
14602 DebugLoc DL = MI.getDebugLoc();
14603 Register DstReg = MI.getOperand(0).getReg();
14604 Register PtrReg = MI.getOperand(1).getReg();
14605 Register ValReg = MI.getOperand(2).getReg();
14606 unsigned FC = MI.getOperand(3).getImm();
14607 bool IsLwat = MI.getOpcode() == PPC::LWAT_PSEUDO;
14608 Register Val64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14609 if (IsLwat)
14610 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), Val64)
14611 .addImm(0)
14612 .addReg(ValReg)
14613 .addImm(PPC::sub_32);
14614 else
14615 Val64 = ValReg;
14616
14617 Register G8rPair = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14618 Register UndefG8r = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14619 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), UndefG8r);
14620 BuildMI(*BB, MI, DL, TII->get(PPC::REG_SEQUENCE), G8rPair)
14621 .addReg(UndefG8r)
14622 .addImm(PPC::sub_gp8_x0)
14623 .addReg(Val64)
14624 .addImm(PPC::sub_gp8_x1);
14625
14626 Register PairResult = MRI.createVirtualRegister(&PPC::G8pRCRegClass);
14627 BuildMI(*BB, MI, DL, TII->get(IsLwat ? PPC::LWAT : PPC::LDAT), PairResult)
14628 .addReg(G8rPair)
14629 .addReg(PtrReg)
14630 .addImm(FC);
14631 Register Result64 = MRI.createVirtualRegister(&PPC::G8RCRegClass);
14632 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), Result64)
14633 .addReg(PairResult, 0, PPC::sub_gp8_x0);
14634 if (IsLwat)
14635 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14636 .addReg(Result64, 0, PPC::sub_32);
14637 else
14638 BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), DstReg)
14639 .addReg(Result64);
14640 } else {
14641 llvm_unreachable("Unexpected instr type to insert");
14642 }
14643
14644 MI.eraseFromParent(); // The pseudo instruction is gone now.
14645 return BB;
14646}
14647
14648//===----------------------------------------------------------------------===//
14649// Target Optimization Hooks
14650//===----------------------------------------------------------------------===//
14651
14652static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
14653 // For the estimates, convergence is quadratic, so we essentially double the
14654 // number of digits correct after every iteration. For both FRE and FRSQRTE,
14655 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
14656 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
14657 int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
14658 if (VT.getScalarType() == MVT::f64)
14659 RefinementSteps++;
14660 return RefinementSteps;
14661}
14662
14663SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
14664 const DenormalMode &Mode) const {
14665 // We only have VSX Vector Test for software Square Root.
14666 EVT VT = Op.getValueType();
14667 if (!isTypeLegal(MVT::i1) ||
14668 (VT != MVT::f64 &&
14669 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
14671
14672 SDLoc DL(Op);
14673 // The output register of FTSQRT is CR field.
14674 SDValue FTSQRT = DAG.getNode(PPCISD::FTSQRT, DL, MVT::i32, Op);
14675 // ftsqrt BF,FRB
14676 // Let e_b be the unbiased exponent of the double-precision
14677 // floating-point operand in register FRB.
14678 // fe_flag is set to 1 if either of the following conditions occurs.
14679 // - The double-precision floating-point operand in register FRB is a zero,
14680 // a NaN, or an infinity, or a negative value.
14681 // - e_b is less than or equal to -970.
14682 // Otherwise fe_flag is set to 0.
14683 // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
14684 // not eligible for iteration. (zero/negative/infinity/nan or unbiased
14685 // exponent is less than -970)
14686 SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
14687 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
14688 FTSQRT, SRIdxVal),
14689 0);
14690}
14691
14692SDValue
14693PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
14694 SelectionDAG &DAG) const {
14695 // We only have VSX Vector Square Root.
14696 EVT VT = Op.getValueType();
14697 if (VT != MVT::f64 &&
14698 ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
14700
14701 return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
14702}
14703
14704SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
14705 int Enabled, int &RefinementSteps,
14706 bool &UseOneConstNR,
14707 bool Reciprocal) const {
14708 EVT VT = Operand.getValueType();
14709 if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
14710 (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
14711 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14712 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14713 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14714 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14715
14716 // The Newton-Raphson computation with a single constant does not provide
14717 // enough accuracy on some CPUs.
14718 UseOneConstNR = !Subtarget.needsTwoConstNR();
14719 return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
14720 }
14721 return SDValue();
14722}
14723
14724SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
14725 int Enabled,
14726 int &RefinementSteps) const {
14727 EVT VT = Operand.getValueType();
14728 if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
14729 (VT == MVT::f64 && Subtarget.hasFRE()) ||
14730 (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
14731 (VT == MVT::v2f64 && Subtarget.hasVSX())) {
14732 if (RefinementSteps == ReciprocalEstimate::Unspecified)
14733 RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
14734 return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
14735 }
14736 return SDValue();
14737}
14738
14740 // Note: This functionality is used only when arcp is enabled, and
14741 // on cores with reciprocal estimates (which are used when arcp is
14742 // enabled for division), this functionality is redundant with the default
14743 // combiner logic (once the division -> reciprocal/multiply transformation
14744 // has taken place). As a result, this matters more for older cores than for
14745 // newer ones.
14746
14747 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
14748 // reciprocal if there are two or more FDIVs (for embedded cores with only
14749 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
14750 switch (Subtarget.getCPUDirective()) {
14751 default:
14752 return 3;
14753 case PPC::DIR_440:
14754 case PPC::DIR_A2:
14755 case PPC::DIR_E500:
14756 case PPC::DIR_E500mc:
14757 case PPC::DIR_E5500:
14758 return 2;
14759 }
14760}
14761
14762// isConsecutiveLSLoc needs to work even if all adds have not yet been
14763// collapsed, and so we need to look through chains of them.
14765 int64_t& Offset, SelectionDAG &DAG) {
14766 if (DAG.isBaseWithConstantOffset(Loc)) {
14767 Base = Loc.getOperand(0);
14768 Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
14769
14770 // The base might itself be a base plus an offset, and if so, accumulate
14771 // that as well.
14772 getBaseWithConstantOffset(Loc.getOperand(0), Base, Offset, DAG);
14773 }
14774}
14775
14777 unsigned Bytes, int Dist,
14778 SelectionDAG &DAG) {
14779 if (VT.getSizeInBits() / 8 != Bytes)
14780 return false;
14781
14782 SDValue BaseLoc = Base->getBasePtr();
14783 if (Loc.getOpcode() == ISD::FrameIndex) {
14784 if (BaseLoc.getOpcode() != ISD::FrameIndex)
14785 return false;
14787 int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
14788 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
14789 int FS = MFI.getObjectSize(FI);
14790 int BFS = MFI.getObjectSize(BFI);
14791 if (FS != BFS || FS != (int)Bytes) return false;
14792 return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
14793 }
14794
14795 SDValue Base1 = Loc, Base2 = BaseLoc;
14796 int64_t Offset1 = 0, Offset2 = 0;
14797 getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
14798 getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
14799 if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
14800 return true;
14801
14802 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14803 const GlobalValue *GV1 = nullptr;
14804 const GlobalValue *GV2 = nullptr;
14805 Offset1 = 0;
14806 Offset2 = 0;
14807 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
14808 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
14809 if (isGA1 && isGA2 && GV1 == GV2)
14810 return Offset1 == (Offset2 + Dist*Bytes);
14811 return false;
14812}
14813
14814// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
14815// not enforce equality of the chain operands.
14817 unsigned Bytes, int Dist,
14818 SelectionDAG &DAG) {
14820 EVT VT = LS->getMemoryVT();
14821 SDValue Loc = LS->getBasePtr();
14822 return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
14823 }
14824
14825 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
14826 EVT VT;
14827 switch (N->getConstantOperandVal(1)) {
14828 default: return false;
14829 case Intrinsic::ppc_altivec_lvx:
14830 case Intrinsic::ppc_altivec_lvxl:
14831 case Intrinsic::ppc_vsx_lxvw4x:
14832 case Intrinsic::ppc_vsx_lxvw4x_be:
14833 VT = MVT::v4i32;
14834 break;
14835 case Intrinsic::ppc_vsx_lxvd2x:
14836 case Intrinsic::ppc_vsx_lxvd2x_be:
14837 VT = MVT::v2f64;
14838 break;
14839 case Intrinsic::ppc_altivec_lvebx:
14840 VT = MVT::i8;
14841 break;
14842 case Intrinsic::ppc_altivec_lvehx:
14843 VT = MVT::i16;
14844 break;
14845 case Intrinsic::ppc_altivec_lvewx:
14846 VT = MVT::i32;
14847 break;
14848 }
14849
14850 return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
14851 }
14852
14853 if (N->getOpcode() == ISD::INTRINSIC_VOID) {
14854 EVT VT;
14855 switch (N->getConstantOperandVal(1)) {
14856 default: return false;
14857 case Intrinsic::ppc_altivec_stvx:
14858 case Intrinsic::ppc_altivec_stvxl:
14859 case Intrinsic::ppc_vsx_stxvw4x:
14860 VT = MVT::v4i32;
14861 break;
14862 case Intrinsic::ppc_vsx_stxvd2x:
14863 VT = MVT::v2f64;
14864 break;
14865 case Intrinsic::ppc_vsx_stxvw4x_be:
14866 VT = MVT::v4i32;
14867 break;
14868 case Intrinsic::ppc_vsx_stxvd2x_be:
14869 VT = MVT::v2f64;
14870 break;
14871 case Intrinsic::ppc_altivec_stvebx:
14872 VT = MVT::i8;
14873 break;
14874 case Intrinsic::ppc_altivec_stvehx:
14875 VT = MVT::i16;
14876 break;
14877 case Intrinsic::ppc_altivec_stvewx:
14878 VT = MVT::i32;
14879 break;
14880 }
14881
14882 return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
14883 }
14884
14885 return false;
14886}
14887
14888// Return true is there is a nearyby consecutive load to the one provided
14889// (regardless of alignment). We search up and down the chain, looking though
14890// token factors and other loads (but nothing else). As a result, a true result
14891// indicates that it is safe to create a new consecutive load adjacent to the
14892// load provided.
14894 SDValue Chain = LD->getChain();
14895 EVT VT = LD->getMemoryVT();
14896
14897 SmallPtrSet<SDNode *, 16> LoadRoots;
14898 SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
14900
14901 // First, search up the chain, branching to follow all token-factor operands.
14902 // If we find a consecutive load, then we're done, otherwise, record all
14903 // nodes just above the top-level loads and token factors.
14904 while (!Queue.empty()) {
14905 SDNode *ChainNext = Queue.pop_back_val();
14906 if (!Visited.insert(ChainNext).second)
14907 continue;
14908
14909 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
14910 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14911 return true;
14912
14913 if (!Visited.count(ChainLD->getChain().getNode()))
14914 Queue.push_back(ChainLD->getChain().getNode());
14915 } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
14916 for (const SDUse &O : ChainNext->ops())
14917 if (!Visited.count(O.getNode()))
14918 Queue.push_back(O.getNode());
14919 } else
14920 LoadRoots.insert(ChainNext);
14921 }
14922
14923 // Second, search down the chain, starting from the top-level nodes recorded
14924 // in the first phase. These top-level nodes are the nodes just above all
14925 // loads and token factors. Starting with their uses, recursively look though
14926 // all loads (just the chain uses) and token factors to find a consecutive
14927 // load.
14928 Visited.clear();
14929 Queue.clear();
14930
14931 for (SDNode *I : LoadRoots) {
14932 Queue.push_back(I);
14933
14934 while (!Queue.empty()) {
14935 SDNode *LoadRoot = Queue.pop_back_val();
14936 if (!Visited.insert(LoadRoot).second)
14937 continue;
14938
14939 if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
14940 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
14941 return true;
14942
14943 for (SDNode *U : LoadRoot->users())
14944 if (((isa<MemSDNode>(U) &&
14945 cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
14946 U->getOpcode() == ISD::TokenFactor) &&
14947 !Visited.count(U))
14948 Queue.push_back(U);
14949 }
14950 }
14951
14952 return false;
14953}
14954
14955/// This function is called when we have proved that a SETCC node can be replaced
14956/// by subtraction (and other supporting instructions) so that the result of
14957/// comparison is kept in a GPR instead of CR. This function is purely for
14958/// codegen purposes and has some flags to guide the codegen process.
14959static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
14960 bool Swap, SDLoc &DL, SelectionDAG &DAG) {
14961 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14962
14963 // Zero extend the operands to the largest legal integer. Originally, they
14964 // must be of a strictly smaller size.
14965 auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
14966 DAG.getConstant(Size, DL, MVT::i32));
14967 auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
14968 DAG.getConstant(Size, DL, MVT::i32));
14969
14970 // Swap if needed. Depends on the condition code.
14971 if (Swap)
14972 std::swap(Op0, Op1);
14973
14974 // Subtract extended integers.
14975 auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
14976
14977 // Move the sign bit to the least significant position and zero out the rest.
14978 // Now the least significant bit carries the result of original comparison.
14979 auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
14980 DAG.getConstant(Size - 1, DL, MVT::i32));
14981 auto Final = Shifted;
14982
14983 // Complement the result if needed. Based on the condition code.
14984 if (Complement)
14985 Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
14986 DAG.getConstant(1, DL, MVT::i64));
14987
14988 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
14989}
14990
14991SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
14992 DAGCombinerInfo &DCI) const {
14993 assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
14994
14995 SelectionDAG &DAG = DCI.DAG;
14996 SDLoc DL(N);
14997
14998 // Size of integers being compared has a critical role in the following
14999 // analysis, so we prefer to do this when all types are legal.
15000 if (!DCI.isAfterLegalizeDAG())
15001 return SDValue();
15002
15003 // If all users of SETCC extend its value to a legal integer type
15004 // then we replace SETCC with a subtraction
15005 for (const SDNode *U : N->users())
15006 if (U->getOpcode() != ISD::ZERO_EXTEND)
15007 return SDValue();
15008
15009 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15010 auto OpSize = N->getOperand(0).getValueSizeInBits();
15011
15013
15014 if (OpSize < Size) {
15015 switch (CC) {
15016 default: break;
15017 case ISD::SETULT:
15018 return generateEquivalentSub(N, Size, false, false, DL, DAG);
15019 case ISD::SETULE:
15020 return generateEquivalentSub(N, Size, true, true, DL, DAG);
15021 case ISD::SETUGT:
15022 return generateEquivalentSub(N, Size, false, true, DL, DAG);
15023 case ISD::SETUGE:
15024 return generateEquivalentSub(N, Size, true, false, DL, DAG);
15025 }
15026 }
15027
15028 return SDValue();
15029}
15030
15031SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
15032 DAGCombinerInfo &DCI) const {
15033 SelectionDAG &DAG = DCI.DAG;
15034 SDLoc dl(N);
15035
15036 assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
15037 // If we're tracking CR bits, we need to be careful that we don't have:
15038 // trunc(binary-ops(zext(x), zext(y)))
15039 // or
15040 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
15041 // such that we're unnecessarily moving things into GPRs when it would be
15042 // better to keep them in CR bits.
15043
15044 // Note that trunc here can be an actual i1 trunc, or can be the effective
15045 // truncation that comes from a setcc or select_cc.
15046 if (N->getOpcode() == ISD::TRUNCATE &&
15047 N->getValueType(0) != MVT::i1)
15048 return SDValue();
15049
15050 if (N->getOperand(0).getValueType() != MVT::i32 &&
15051 N->getOperand(0).getValueType() != MVT::i64)
15052 return SDValue();
15053
15054 if (N->getOpcode() == ISD::SETCC ||
15055 N->getOpcode() == ISD::SELECT_CC) {
15056 // If we're looking at a comparison, then we need to make sure that the
15057 // high bits (all except for the first) don't matter the result.
15058 ISD::CondCode CC =
15059 cast<CondCodeSDNode>(N->getOperand(
15060 N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
15061 unsigned OpBits = N->getOperand(0).getValueSizeInBits();
15062
15063 if (ISD::isSignedIntSetCC(CC)) {
15064 if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
15065 DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
15066 return SDValue();
15067 } else if (ISD::isUnsignedIntSetCC(CC)) {
15068 if (!DAG.MaskedValueIsZero(N->getOperand(0),
15069 APInt::getHighBitsSet(OpBits, OpBits-1)) ||
15070 !DAG.MaskedValueIsZero(N->getOperand(1),
15071 APInt::getHighBitsSet(OpBits, OpBits-1)))
15072 return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
15073 : SDValue());
15074 } else {
15075 // This is neither a signed nor an unsigned comparison, just make sure
15076 // that the high bits are equal.
15077 KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
15078 KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
15079
15080 // We don't really care about what is known about the first bit (if
15081 // anything), so pretend that it is known zero for both to ensure they can
15082 // be compared as constants.
15083 Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
15084 Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
15085
15086 if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
15087 Op1Known.getConstant() != Op2Known.getConstant())
15088 return SDValue();
15089 }
15090 }
15091
15092 // We now know that the higher-order bits are irrelevant, we just need to
15093 // make sure that all of the intermediate operations are bit operations, and
15094 // all inputs are extensions.
15095 if (N->getOperand(0).getOpcode() != ISD::AND &&
15096 N->getOperand(0).getOpcode() != ISD::OR &&
15097 N->getOperand(0).getOpcode() != ISD::XOR &&
15098 N->getOperand(0).getOpcode() != ISD::SELECT &&
15099 N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
15100 N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
15101 N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
15102 N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
15103 N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
15104 return SDValue();
15105
15106 if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
15107 N->getOperand(1).getOpcode() != ISD::AND &&
15108 N->getOperand(1).getOpcode() != ISD::OR &&
15109 N->getOperand(1).getOpcode() != ISD::XOR &&
15110 N->getOperand(1).getOpcode() != ISD::SELECT &&
15111 N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
15112 N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
15113 N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
15114 N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
15115 N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
15116 return SDValue();
15117
15119 SmallVector<SDValue, 8> BinOps, PromOps;
15120 SmallPtrSet<SDNode *, 16> Visited;
15121
15122 for (unsigned i = 0; i < 2; ++i) {
15123 if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15124 N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15125 N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15126 N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15127 isa<ConstantSDNode>(N->getOperand(i)))
15128 Inputs.push_back(N->getOperand(i));
15129 else
15130 BinOps.push_back(N->getOperand(i));
15131
15132 if (N->getOpcode() == ISD::TRUNCATE)
15133 break;
15134 }
15135
15136 // Visit all inputs, collect all binary operations (and, or, xor and
15137 // select) that are all fed by extensions.
15138 while (!BinOps.empty()) {
15139 SDValue BinOp = BinOps.pop_back_val();
15140
15141 if (!Visited.insert(BinOp.getNode()).second)
15142 continue;
15143
15144 PromOps.push_back(BinOp);
15145
15146 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15147 // The condition of the select is not promoted.
15148 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15149 continue;
15150 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15151 continue;
15152
15153 if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15154 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15155 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
15156 BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
15157 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15158 Inputs.push_back(BinOp.getOperand(i));
15159 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15160 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15161 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15162 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15163 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
15164 BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15165 BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
15166 BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
15167 BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
15168 BinOps.push_back(BinOp.getOperand(i));
15169 } else {
15170 // We have an input that is not an extension or another binary
15171 // operation; we'll abort this transformation.
15172 return SDValue();
15173 }
15174 }
15175 }
15176
15177 // Make sure that this is a self-contained cluster of operations (which
15178 // is not quite the same thing as saying that everything has only one
15179 // use).
15180 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15181 if (isa<ConstantSDNode>(Inputs[i]))
15182 continue;
15183
15184 for (const SDNode *User : Inputs[i].getNode()->users()) {
15185 if (User != N && !Visited.count(User))
15186 return SDValue();
15187
15188 // Make sure that we're not going to promote the non-output-value
15189 // operand(s) or SELECT or SELECT_CC.
15190 // FIXME: Although we could sometimes handle this, and it does occur in
15191 // practice that one of the condition inputs to the select is also one of
15192 // the outputs, we currently can't deal with this.
15193 if (User->getOpcode() == ISD::SELECT) {
15194 if (User->getOperand(0) == Inputs[i])
15195 return SDValue();
15196 } else if (User->getOpcode() == ISD::SELECT_CC) {
15197 if (User->getOperand(0) == Inputs[i] ||
15198 User->getOperand(1) == Inputs[i])
15199 return SDValue();
15200 }
15201 }
15202 }
15203
15204 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15205 for (const SDNode *User : PromOps[i].getNode()->users()) {
15206 if (User != N && !Visited.count(User))
15207 return SDValue();
15208
15209 // Make sure that we're not going to promote the non-output-value
15210 // operand(s) or SELECT or SELECT_CC.
15211 // FIXME: Although we could sometimes handle this, and it does occur in
15212 // practice that one of the condition inputs to the select is also one of
15213 // the outputs, we currently can't deal with this.
15214 if (User->getOpcode() == ISD::SELECT) {
15215 if (User->getOperand(0) == PromOps[i])
15216 return SDValue();
15217 } else if (User->getOpcode() == ISD::SELECT_CC) {
15218 if (User->getOperand(0) == PromOps[i] ||
15219 User->getOperand(1) == PromOps[i])
15220 return SDValue();
15221 }
15222 }
15223 }
15224
15225 // Replace all inputs with the extension operand.
15226 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15227 // Constants may have users outside the cluster of to-be-promoted nodes,
15228 // and so we need to replace those as we do the promotions.
15229 if (isa<ConstantSDNode>(Inputs[i]))
15230 continue;
15231 else
15232 DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
15233 }
15234
15235 std::list<HandleSDNode> PromOpHandles;
15236 for (auto &PromOp : PromOps)
15237 PromOpHandles.emplace_back(PromOp);
15238
15239 // Replace all operations (these are all the same, but have a different
15240 // (i1) return type). DAG.getNode will validate that the types of
15241 // a binary operator match, so go through the list in reverse so that
15242 // we've likely promoted both operands first. Any intermediate truncations or
15243 // extensions disappear.
15244 while (!PromOpHandles.empty()) {
15245 SDValue PromOp = PromOpHandles.back().getValue();
15246 PromOpHandles.pop_back();
15247
15248 if (PromOp.getOpcode() == ISD::TRUNCATE ||
15249 PromOp.getOpcode() == ISD::SIGN_EXTEND ||
15250 PromOp.getOpcode() == ISD::ZERO_EXTEND ||
15251 PromOp.getOpcode() == ISD::ANY_EXTEND) {
15252 if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
15253 PromOp.getOperand(0).getValueType() != MVT::i1) {
15254 // The operand is not yet ready (see comment below).
15255 PromOpHandles.emplace_front(PromOp);
15256 continue;
15257 }
15258
15259 SDValue RepValue = PromOp.getOperand(0);
15260 if (isa<ConstantSDNode>(RepValue))
15261 RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
15262
15263 DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
15264 continue;
15265 }
15266
15267 unsigned C;
15268 switch (PromOp.getOpcode()) {
15269 default: C = 0; break;
15270 case ISD::SELECT: C = 1; break;
15271 case ISD::SELECT_CC: C = 2; break;
15272 }
15273
15274 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15275 PromOp.getOperand(C).getValueType() != MVT::i1) ||
15276 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15277 PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
15278 // The to-be-promoted operands of this node have not yet been
15279 // promoted (this should be rare because we're going through the
15280 // list backward, but if one of the operands has several users in
15281 // this cluster of to-be-promoted nodes, it is possible).
15282 PromOpHandles.emplace_front(PromOp);
15283 continue;
15284 }
15285
15287
15288 // If there are any constant inputs, make sure they're replaced now.
15289 for (unsigned i = 0; i < 2; ++i)
15290 if (isa<ConstantSDNode>(Ops[C+i]))
15291 Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
15292
15293 DAG.ReplaceAllUsesOfValueWith(PromOp,
15294 DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
15295 }
15296
15297 // Now we're left with the initial truncation itself.
15298 if (N->getOpcode() == ISD::TRUNCATE)
15299 return N->getOperand(0);
15300
15301 // Otherwise, this is a comparison. The operands to be compared have just
15302 // changed type (to i1), but everything else is the same.
15303 return SDValue(N, 0);
15304}
15305
15306SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
15307 DAGCombinerInfo &DCI) const {
15308 SelectionDAG &DAG = DCI.DAG;
15309 SDLoc dl(N);
15310
15311 // If we're tracking CR bits, we need to be careful that we don't have:
15312 // zext(binary-ops(trunc(x), trunc(y)))
15313 // or
15314 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
15315 // such that we're unnecessarily moving things into CR bits that can more
15316 // efficiently stay in GPRs. Note that if we're not certain that the high
15317 // bits are set as required by the final extension, we still may need to do
15318 // some masking to get the proper behavior.
15319
15320 // This same functionality is important on PPC64 when dealing with
15321 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
15322 // the return values of functions. Because it is so similar, it is handled
15323 // here as well.
15324
15325 if (N->getValueType(0) != MVT::i32 &&
15326 N->getValueType(0) != MVT::i64)
15327 return SDValue();
15328
15329 if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
15330 (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
15331 return SDValue();
15332
15333 if (N->getOperand(0).getOpcode() != ISD::AND &&
15334 N->getOperand(0).getOpcode() != ISD::OR &&
15335 N->getOperand(0).getOpcode() != ISD::XOR &&
15336 N->getOperand(0).getOpcode() != ISD::SELECT &&
15337 N->getOperand(0).getOpcode() != ISD::SELECT_CC)
15338 return SDValue();
15339
15341 SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
15342 SmallPtrSet<SDNode *, 16> Visited;
15343
15344 // Visit all inputs, collect all binary operations (and, or, xor and
15345 // select) that are all fed by truncations.
15346 while (!BinOps.empty()) {
15347 SDValue BinOp = BinOps.pop_back_val();
15348
15349 if (!Visited.insert(BinOp.getNode()).second)
15350 continue;
15351
15352 PromOps.push_back(BinOp);
15353
15354 for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
15355 // The condition of the select is not promoted.
15356 if (BinOp.getOpcode() == ISD::SELECT && i == 0)
15357 continue;
15358 if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
15359 continue;
15360
15361 if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
15362 isa<ConstantSDNode>(BinOp.getOperand(i))) {
15363 Inputs.push_back(BinOp.getOperand(i));
15364 } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
15365 BinOp.getOperand(i).getOpcode() == ISD::OR ||
15366 BinOp.getOperand(i).getOpcode() == ISD::XOR ||
15367 BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
15368 BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
15369 BinOps.push_back(BinOp.getOperand(i));
15370 } else {
15371 // We have an input that is not a truncation or another binary
15372 // operation; we'll abort this transformation.
15373 return SDValue();
15374 }
15375 }
15376 }
15377
15378 // The operands of a select that must be truncated when the select is
15379 // promoted because the operand is actually part of the to-be-promoted set.
15380 DenseMap<SDNode *, EVT> SelectTruncOp[2];
15381
15382 // Make sure that this is a self-contained cluster of operations (which
15383 // is not quite the same thing as saying that everything has only one
15384 // use).
15385 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15386 if (isa<ConstantSDNode>(Inputs[i]))
15387 continue;
15388
15389 for (SDNode *User : Inputs[i].getNode()->users()) {
15390 if (User != N && !Visited.count(User))
15391 return SDValue();
15392
15393 // If we're going to promote the non-output-value operand(s) or SELECT or
15394 // SELECT_CC, record them for truncation.
15395 if (User->getOpcode() == ISD::SELECT) {
15396 if (User->getOperand(0) == Inputs[i])
15397 SelectTruncOp[0].insert(std::make_pair(User,
15398 User->getOperand(0).getValueType()));
15399 } else if (User->getOpcode() == ISD::SELECT_CC) {
15400 if (User->getOperand(0) == Inputs[i])
15401 SelectTruncOp[0].insert(std::make_pair(User,
15402 User->getOperand(0).getValueType()));
15403 if (User->getOperand(1) == Inputs[i])
15404 SelectTruncOp[1].insert(std::make_pair(User,
15405 User->getOperand(1).getValueType()));
15406 }
15407 }
15408 }
15409
15410 for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
15411 for (SDNode *User : PromOps[i].getNode()->users()) {
15412 if (User != N && !Visited.count(User))
15413 return SDValue();
15414
15415 // If we're going to promote the non-output-value operand(s) or SELECT or
15416 // SELECT_CC, record them for truncation.
15417 if (User->getOpcode() == ISD::SELECT) {
15418 if (User->getOperand(0) == PromOps[i])
15419 SelectTruncOp[0].insert(std::make_pair(User,
15420 User->getOperand(0).getValueType()));
15421 } else if (User->getOpcode() == ISD::SELECT_CC) {
15422 if (User->getOperand(0) == PromOps[i])
15423 SelectTruncOp[0].insert(std::make_pair(User,
15424 User->getOperand(0).getValueType()));
15425 if (User->getOperand(1) == PromOps[i])
15426 SelectTruncOp[1].insert(std::make_pair(User,
15427 User->getOperand(1).getValueType()));
15428 }
15429 }
15430 }
15431
15432 unsigned PromBits = N->getOperand(0).getValueSizeInBits();
15433 bool ReallyNeedsExt = false;
15434 if (N->getOpcode() != ISD::ANY_EXTEND) {
15435 // If all of the inputs are not already sign/zero extended, then
15436 // we'll still need to do that at the end.
15437 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15438 if (isa<ConstantSDNode>(Inputs[i]))
15439 continue;
15440
15441 unsigned OpBits =
15442 Inputs[i].getOperand(0).getValueSizeInBits();
15443 assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
15444
15445 if ((N->getOpcode() == ISD::ZERO_EXTEND &&
15446 !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
15447 APInt::getHighBitsSet(OpBits,
15448 OpBits-PromBits))) ||
15449 (N->getOpcode() == ISD::SIGN_EXTEND &&
15450 DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
15451 (OpBits-(PromBits-1)))) {
15452 ReallyNeedsExt = true;
15453 break;
15454 }
15455 }
15456 }
15457
15458 // Convert PromOps to handles before doing any RAUW operations, as these
15459 // may CSE with existing nodes, deleting the originals.
15460 std::list<HandleSDNode> PromOpHandles;
15461 for (auto &PromOp : PromOps)
15462 PromOpHandles.emplace_back(PromOp);
15463
15464 // Replace all inputs, either with the truncation operand, or a
15465 // truncation or extension to the final output type.
15466 for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
15467 // Constant inputs need to be replaced with the to-be-promoted nodes that
15468 // use them because they might have users outside of the cluster of
15469 // promoted nodes.
15470 if (isa<ConstantSDNode>(Inputs[i]))
15471 continue;
15472
15473 SDValue InSrc = Inputs[i].getOperand(0);
15474 if (Inputs[i].getValueType() == N->getValueType(0))
15475 DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
15476 else if (N->getOpcode() == ISD::SIGN_EXTEND)
15477 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15478 DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
15479 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15480 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15481 DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
15482 else
15483 DAG.ReplaceAllUsesOfValueWith(Inputs[i],
15484 DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
15485 }
15486
15487 // Replace all operations (these are all the same, but have a different
15488 // (promoted) return type). DAG.getNode will validate that the types of
15489 // a binary operator match, so go through the list in reverse so that
15490 // we've likely promoted both operands first.
15491 while (!PromOpHandles.empty()) {
15492 SDValue PromOp = PromOpHandles.back().getValue();
15493 PromOpHandles.pop_back();
15494
15495 unsigned C;
15496 switch (PromOp.getOpcode()) {
15497 default: C = 0; break;
15498 case ISD::SELECT: C = 1; break;
15499 case ISD::SELECT_CC: C = 2; break;
15500 }
15501
15502 if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
15503 PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
15504 (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
15505 PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
15506 // The to-be-promoted operands of this node have not yet been
15507 // promoted (this should be rare because we're going through the
15508 // list backward, but if one of the operands has several users in
15509 // this cluster of to-be-promoted nodes, it is possible).
15510 PromOpHandles.emplace_front(PromOp);
15511 continue;
15512 }
15513
15514 // For SELECT and SELECT_CC nodes, we do a similar check for any
15515 // to-be-promoted comparison inputs.
15516 if (PromOp.getOpcode() == ISD::SELECT ||
15517 PromOp.getOpcode() == ISD::SELECT_CC) {
15518 if ((SelectTruncOp[0].count(PromOp.getNode()) &&
15519 PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
15520 (SelectTruncOp[1].count(PromOp.getNode()) &&
15521 PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
15522 PromOpHandles.emplace_front(PromOp);
15523 continue;
15524 }
15525 }
15526
15528
15529 // If this node has constant inputs, then they'll need to be promoted here.
15530 for (unsigned i = 0; i < 2; ++i) {
15531 if (!isa<ConstantSDNode>(Ops[C+i]))
15532 continue;
15533 if (Ops[C+i].getValueType() == N->getValueType(0))
15534 continue;
15535
15536 if (N->getOpcode() == ISD::SIGN_EXTEND)
15537 Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15538 else if (N->getOpcode() == ISD::ZERO_EXTEND)
15539 Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15540 else
15541 Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
15542 }
15543
15544 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
15545 // truncate them again to the original value type.
15546 if (PromOp.getOpcode() == ISD::SELECT ||
15547 PromOp.getOpcode() == ISD::SELECT_CC) {
15548 auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
15549 if (SI0 != SelectTruncOp[0].end())
15550 Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
15551 auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
15552 if (SI1 != SelectTruncOp[1].end())
15553 Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
15554 }
15555
15556 DAG.ReplaceAllUsesOfValueWith(PromOp,
15557 DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
15558 }
15559
15560 // Now we're left with the initial extension itself.
15561 if (!ReallyNeedsExt)
15562 return N->getOperand(0);
15563
15564 // To zero extend, just mask off everything except for the first bit (in the
15565 // i1 case).
15566 if (N->getOpcode() == ISD::ZERO_EXTEND)
15567 return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
15569 N->getValueSizeInBits(0), PromBits),
15570 dl, N->getValueType(0)));
15571
15572 assert(N->getOpcode() == ISD::SIGN_EXTEND &&
15573 "Invalid extension type");
15574 EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
15575 SDValue ShiftCst =
15576 DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
15577 return DAG.getNode(
15578 ISD::SRA, dl, N->getValueType(0),
15579 DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
15580 ShiftCst);
15581}
15582
15583// The function check a i128 load can convert to 16i8 load for Vcmpequb.
15585
15586 auto isValidForConvert = [](SDValue &Operand) {
15587 if (!Operand.hasOneUse())
15588 return false;
15589
15590 if (Operand.getValueType() != MVT::i128)
15591 return false;
15592
15593 if (Operand.getOpcode() == ISD::Constant)
15594 return true;
15595
15596 auto *LoadNode = dyn_cast<LoadSDNode>(Operand);
15597 if (!LoadNode)
15598 return false;
15599
15600 // If memory operation is volatile, do not perform any
15601 // optimization or transformation. Volatile operations must be preserved
15602 // as written to ensure correct program behavior, so we return an empty
15603 // SDValue to indicate no action.
15604
15605 if (LoadNode->isVolatile())
15606 return false;
15607
15608 // Only combine loads if both use the unindexed addressing mode.
15609 // PowerPC AltiVec/VMX does not support vector loads or stores with
15610 // pre/post-increment addressing. Indexed modes may imply implicit
15611 // pointer updates, which are not compatible with AltiVec vector
15612 // instructions.
15613 if (LoadNode->getAddressingMode() != ISD::UNINDEXED)
15614 return false;
15615
15616 // Only combine loads if both are non-extending loads
15617 // (ISD::NON_EXTLOAD). Extending loads (such as ISD::ZEXTLOAD or
15618 // ISD::SEXTLOAD) perform zero or sign extension, which may change the
15619 // loaded value's semantics and are not compatible with vector loads.
15620 if (LoadNode->getExtensionType() != ISD::NON_EXTLOAD)
15621 return false;
15622
15623 return true;
15624 };
15625
15626 return (isValidForConvert(LHS) && isValidForConvert(RHS));
15627}
15628
15630 const SDLoc &DL) {
15631
15632 assert(N->getOpcode() == ISD::SETCC && "Should be called with a SETCC node");
15633
15634 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15635 assert((CC == ISD::SETNE || CC == ISD::SETEQ) &&
15636 "CC mus be ISD::SETNE or ISD::SETEQ");
15637
15638 auto getV16i8Load = [&](const SDValue &Operand) {
15639 if (Operand.getOpcode() == ISD::Constant)
15640 return DAG.getBitcast(MVT::v16i8, Operand);
15641
15642 assert(Operand.getOpcode() == ISD::LOAD && "Must be LoadSDNode here.");
15643
15644 auto *LoadNode = cast<LoadSDNode>(Operand);
15645 return DAG.getLoad(MVT::v16i8, DL, LoadNode->getChain(),
15646 LoadNode->getBasePtr(), LoadNode->getMemOperand());
15647 };
15648
15649 // Following code transforms the DAG
15650 // t0: ch,glue = EntryToken
15651 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15652 // t3: i128,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15653 // undef:i64
15654 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15655 // t5: i128,ch =
15656 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64 t6: i1 =
15657 // setcc t3, t5, setne:ch
15658 //
15659 // ---->
15660 //
15661 // t0: ch,glue = EntryToken
15662 // t2: i64,ch = CopyFromReg t0, Register:i64 %0
15663 // t3: v16i8,ch = load<(load (s128) from %ir.a, align 1)> t0, t2,
15664 // undef:i64
15665 // t4: i64,ch = CopyFromReg t0, Register:i64 %1
15666 // t5: v16i8,ch =
15667 // load<(load (s128) from %ir.b, align 1)> t0, t4, undef:i64
15668 // t6: i32 =
15669 // llvm.ppc.altivec.vcmpequb.p TargetConstant:i32<10505>,
15670 // Constant:i32<2>, t3, t5
15671 // t7: i1 = setcc t6, Constant:i32<0>, seteq:ch
15672
15673 // Or transforms the DAG
15674 // t5: i128,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15675 // t8: i1 =
15676 // setcc Constant:i128<237684487579686500932345921536>, t5, setne:ch
15677 //
15678 // --->
15679 //
15680 // t5: v16i8,ch = load<(load (s128) from %ir.X, align 1)> t0, t2, undef:i64
15681 // t6: v16i8 = bitcast Constant:i128<237684487579686500932345921536>
15682 // t7: i32 =
15683 // llvm.ppc.altivec.vcmpequb.p Constant:i32<10962>, Constant:i32<2>, t5, t2
15684
15685 SDValue LHSVec = getV16i8Load(N->getOperand(0));
15686 SDValue RHSVec = getV16i8Load(N->getOperand(1));
15687
15688 SDValue IntrID =
15689 DAG.getConstant(Intrinsic::ppc_altivec_vcmpequb_p, DL, MVT::i32);
15690 SDValue CRSel = DAG.getConstant(2, DL, MVT::i32); // which CR6 predicate field
15691 SDValue PredResult = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
15692 IntrID, CRSel, LHSVec, RHSVec);
15693 // ppc_altivec_vcmpequb_p returns 1 when two vectors are the same,
15694 // so we need to invert the CC opcode.
15695 return DAG.getSetCC(DL, N->getValueType(0), PredResult,
15696 DAG.getConstant(0, DL, MVT::i32),
15697 CC == ISD::SETNE ? ISD::SETEQ : ISD::SETNE);
15698}
15699
15700// Detect whether there is a pattern like (setcc (and X, 1), 0, eq).
15701// If it is , return true; otherwise return false.
15703 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15704
15705 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15706 if (CC != ISD::SETEQ)
15707 return false;
15708
15709 SDValue LHS = N->getOperand(0);
15710 SDValue RHS = N->getOperand(1);
15711
15712 // Check the `SDValue &V` is from `and` with `1`.
15713 auto IsAndWithOne = [](SDValue &V) {
15714 if (V.getOpcode() == ISD::AND) {
15715 for (const SDValue &Op : V->ops())
15716 if (auto *C = dyn_cast<ConstantSDNode>(Op))
15717 if (C->isOne())
15718 return true;
15719 }
15720 return false;
15721 };
15722
15723 // Check whether the SETCC compare with zero.
15724 auto IsCompareWithZero = [](SDValue &V) {
15725 if (auto *C = dyn_cast<ConstantSDNode>(V))
15726 if (C->isZero())
15727 return true;
15728 return false;
15729 };
15730
15731 return (IsAndWithOne(LHS) && IsCompareWithZero(RHS)) ||
15732 (IsAndWithOne(RHS) && IsCompareWithZero(LHS));
15733}
15734
15735// You must check whether the `SDNode* N` can be converted to Xori using
15736// the function `static bool canConvertSETCCToXori(SDNode *N)`
15737// before calling the function; otherwise, it may produce incorrect results.
15739
15740 assert(N->getOpcode() == ISD::SETCC && "Should be SETCC SDNode here.");
15741 SDValue LHS = N->getOperand(0);
15742 SDValue RHS = N->getOperand(1);
15743 SDLoc DL(N);
15744
15745 [[maybe_unused]] ISD::CondCode CC =
15746 cast<CondCodeSDNode>(N->getOperand(2))->get();
15747 assert((CC == ISD::SETEQ) && "CC must be ISD::SETEQ.");
15748 // Rewrite it as XORI (and X, 1), 1.
15749 auto MakeXor1 = [&](SDValue V) {
15750 EVT VT = V.getValueType();
15751 SDValue One = DAG.getConstant(1, DL, VT);
15752 SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, V, One);
15753 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Xor);
15754 };
15755
15756 if (LHS.getOpcode() == ISD::AND && RHS.getOpcode() != ISD::AND)
15757 return MakeXor1(LHS);
15758
15759 if (RHS.getOpcode() == ISD::AND && LHS.getOpcode() != ISD::AND)
15760 return MakeXor1(RHS);
15761
15762 llvm_unreachable("Should not reach here.");
15763}
15764
15765SDValue PPCTargetLowering::combineSetCC(SDNode *N,
15766 DAGCombinerInfo &DCI) const {
15767 assert(N->getOpcode() == ISD::SETCC &&
15768 "Should be called with a SETCC node");
15769
15770 // Check if the pattern (setcc (and X, 1), 0, eq) is present.
15771 // If it is, rewrite it as XORI (and X, 1), 1.
15773 return ConvertSETCCToXori(N, DCI.DAG);
15774
15775 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
15776 if (CC == ISD::SETNE || CC == ISD::SETEQ) {
15777 SDValue LHS = N->getOperand(0);
15778 SDValue RHS = N->getOperand(1);
15779
15780 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
15781 if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
15782 LHS.hasOneUse())
15783 std::swap(LHS, RHS);
15784
15785 // x == 0-y --> x+y == 0
15786 // x != 0-y --> x+y != 0
15787 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
15788 RHS.hasOneUse()) {
15789 SDLoc DL(N);
15790 SelectionDAG &DAG = DCI.DAG;
15791 EVT VT = N->getValueType(0);
15792 EVT OpVT = LHS.getValueType();
15793 SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
15794 return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
15795 }
15796
15797 // Optimization: Fold i128 equality/inequality compares of two loads into a
15798 // vectorized compare using vcmpequb.p when Altivec is available.
15799 //
15800 // Rationale:
15801 // A scalar i128 SETCC (eq/ne) normally lowers to multiple scalar ops.
15802 // On VSX-capable subtargets, we can instead reinterpret the i128 loads
15803 // as v16i8 vectors and use the Altive vcmpequb.p instruction to
15804 // perform a full 128-bit equality check in a single vector compare.
15805 //
15806 // Example Result:
15807 // This transformation replaces memcmp(a, b, 16) with two vector loads
15808 // and one vector compare instruction.
15809
15810 if (Subtarget.hasAltivec() && canConvertToVcmpequb(LHS, RHS))
15811 return convertTwoLoadsAndCmpToVCMPEQUB(DCI.DAG, N, SDLoc(N));
15812 }
15813
15814 return DAGCombineTruncBoolExt(N, DCI);
15815}
15816
15817// Is this an extending load from an f32 to an f64?
15818static bool isFPExtLoad(SDValue Op) {
15819 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
15820 return LD->getExtensionType() == ISD::EXTLOAD &&
15821 Op.getValueType() == MVT::f64;
15822 return false;
15823}
15824
15825/// Reduces the number of fp-to-int conversion when building a vector.
15826///
15827/// If this vector is built out of floating to integer conversions,
15828/// transform it to a vector built out of floating point values followed by a
15829/// single floating to integer conversion of the vector.
15830/// Namely (build_vector (fptosi $A), (fptosi $B), ...)
15831/// becomes (fptosi (build_vector ($A, $B, ...)))
15832SDValue PPCTargetLowering::
15833combineElementTruncationToVectorTruncation(SDNode *N,
15834 DAGCombinerInfo &DCI) const {
15835 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
15836 "Should be called with a BUILD_VECTOR node");
15837
15838 SelectionDAG &DAG = DCI.DAG;
15839 SDLoc dl(N);
15840
15841 SDValue FirstInput = N->getOperand(0);
15842 assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
15843 "The input operand must be an fp-to-int conversion.");
15844
15845 // This combine happens after legalization so the fp_to_[su]i nodes are
15846 // already converted to PPCSISD nodes.
15847 unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
15848 if (FirstConversion == PPCISD::FCTIDZ ||
15849 FirstConversion == PPCISD::FCTIDUZ ||
15850 FirstConversion == PPCISD::FCTIWZ ||
15851 FirstConversion == PPCISD::FCTIWUZ) {
15852 bool IsSplat = true;
15853 bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
15854 FirstConversion == PPCISD::FCTIWUZ;
15855 EVT SrcVT = FirstInput.getOperand(0).getValueType();
15857 EVT TargetVT = N->getValueType(0);
15858 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15859 SDValue NextOp = N->getOperand(i);
15860 if (NextOp.getOpcode() != PPCISD::MFVSR)
15861 return SDValue();
15862 unsigned NextConversion = NextOp.getOperand(0).getOpcode();
15863 if (NextConversion != FirstConversion)
15864 return SDValue();
15865 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
15866 // This is not valid if the input was originally double precision. It is
15867 // also not profitable to do unless this is an extending load in which
15868 // case doing this combine will allow us to combine consecutive loads.
15869 if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
15870 return SDValue();
15871 if (N->getOperand(i) != FirstInput)
15872 IsSplat = false;
15873 }
15874
15875 // If this is a splat, we leave it as-is since there will be only a single
15876 // fp-to-int conversion followed by a splat of the integer. This is better
15877 // for 32-bit and smaller ints and neutral for 64-bit ints.
15878 if (IsSplat)
15879 return SDValue();
15880
15881 // Now that we know we have the right type of node, get its operands
15882 for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
15883 SDValue In = N->getOperand(i).getOperand(0);
15884 if (Is32Bit) {
15885 // For 32-bit values, we need to add an FP_ROUND node (if we made it
15886 // here, we know that all inputs are extending loads so this is safe).
15887 if (In.isUndef())
15888 Ops.push_back(DAG.getUNDEF(SrcVT));
15889 else {
15890 SDValue Trunc =
15891 DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, In.getOperand(0),
15892 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
15893 Ops.push_back(Trunc);
15894 }
15895 } else
15896 Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
15897 }
15898
15899 unsigned Opcode;
15900 if (FirstConversion == PPCISD::FCTIDZ ||
15901 FirstConversion == PPCISD::FCTIWZ)
15902 Opcode = ISD::FP_TO_SINT;
15903 else
15904 Opcode = ISD::FP_TO_UINT;
15905
15906 EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
15907 SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
15908 return DAG.getNode(Opcode, dl, TargetVT, BV);
15909 }
15910 return SDValue();
15911}
15912
15913// LXVKQ instruction load VSX vector with a special quadword value
15914// based on an immediate value. This helper method returns the details of the
15915// match as a tuple of {LXVKQ unsigned IMM Value, right_shift_amount}
15916// to help generate the LXVKQ instruction and the subsequent shift instruction
15917// required to match the original build vector pattern.
15918
15919// LXVKQPattern: {LXVKQ unsigned IMM Value, right_shift_amount}
15920using LXVKQPattern = std::tuple<uint32_t, uint8_t>;
15921
15922static std::optional<LXVKQPattern> getPatternInfo(const APInt &FullVal) {
15923
15924 // LXVKQ instruction loads the Quadword value:
15925 // 0x8000_0000_0000_0000_0000_0000_0000_0000 when imm = 0b10000
15926 static const APInt BasePattern = APInt(128, 0x8000000000000000ULL) << 64;
15927 static const uint32_t Uim = 16;
15928
15929 // Check for direct LXVKQ match (no shift needed)
15930 if (FullVal == BasePattern)
15931 return std::make_tuple(Uim, uint8_t{0});
15932
15933 // Check if FullValue is 1 (the result of the base pattern >> 127)
15934 if (FullVal == APInt(128, 1))
15935 return std::make_tuple(Uim, uint8_t{127});
15936
15937 return std::nullopt;
15938}
15939
15940/// Combine vector loads to a single load (using lxvkq) or splat with shift of a
15941/// constant (xxspltib + vsrq) by recognising patterns in the Build Vector.
15942/// LXVKQ instruction load VSX vector with a special quadword value based on an
15943/// immediate value. if UIM=0b10000 then LXVKQ loads VSR[32×TX+T] with value
15944/// 0x8000_0000_0000_0000_0000_0000_0000_0000.
15945/// This can be used to inline the build vector constants that have the
15946/// following patterns:
15947///
15948/// 0x8000_0000_0000_0000_0000_0000_0000_0000 (MSB set pattern)
15949/// 0x0000_0000_0000_0000_0000_0000_0000_0001 (LSB set pattern)
15950/// MSB pattern can directly loaded using LXVKQ while LSB is loaded using a
15951/// combination of splatting and right shift instructions.
15952
15953SDValue PPCTargetLowering::combineBVLoadsSpecialValue(SDValue Op,
15954 SelectionDAG &DAG) const {
15955
15956 assert((Op.getNode() && Op.getOpcode() == ISD::BUILD_VECTOR) &&
15957 "Expected a BuildVectorSDNode in combineBVLoadsSpecialValue");
15958
15959 // This transformation is only supported if we are loading either a byte,
15960 // halfword, word, or doubleword.
15961 EVT VT = Op.getValueType();
15962 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
15963 VT == MVT::v2i64))
15964 return SDValue();
15965
15966 LLVM_DEBUG(llvm::dbgs() << "\ncombineBVLoadsSpecialValue: Build vector ("
15967 << VT.getEVTString() << "): ";
15968 Op->dump());
15969
15970 unsigned NumElems = VT.getVectorNumElements();
15971 unsigned ElemBits = VT.getScalarSizeInBits();
15972
15973 bool IsLittleEndian = DAG.getDataLayout().isLittleEndian();
15974
15975 // Check for Non-constant operand in the build vector.
15976 for (const SDValue &Operand : Op.getNode()->op_values()) {
15977 if (!isa<ConstantSDNode>(Operand))
15978 return SDValue();
15979 }
15980
15981 // Assemble build vector operands as a 128-bit register value
15982 // We need to reconstruct what the 128-bit register pattern would be
15983 // that produces this vector when interpreted with the current endianness
15984 APInt FullVal = APInt::getZero(128);
15985
15986 for (unsigned Index = 0; Index < NumElems; ++Index) {
15987 auto *C = cast<ConstantSDNode>(Op.getOperand(Index));
15988
15989 // Get element value as raw bits (zero-extended)
15990 uint64_t ElemValue = C->getZExtValue();
15991
15992 // Mask to element size to ensure we only get the relevant bits
15993 if (ElemBits < 64)
15994 ElemValue &= ((1ULL << ElemBits) - 1);
15995
15996 // Calculate bit position for this element in the 128-bit register
15997 unsigned BitPos =
15998 (IsLittleEndian) ? (Index * ElemBits) : (128 - (Index + 1) * ElemBits);
15999
16000 // Create APInt for the element value and shift it to correct position
16001 APInt ElemAPInt(128, ElemValue);
16002 ElemAPInt <<= BitPos;
16003
16004 // Place the element value at the correct bit position
16005 FullVal |= ElemAPInt;
16006 }
16007
16008 if (FullVal.isZero() || FullVal.isAllOnes())
16009 return SDValue();
16010
16011 if (auto UIMOpt = getPatternInfo(FullVal)) {
16012 const auto &[Uim, ShiftAmount] = *UIMOpt;
16013 SDLoc Dl(Op);
16014
16015 // Generate LXVKQ instruction if the shift amount is zero.
16016 if (ShiftAmount == 0) {
16017 SDValue UimVal = DAG.getTargetConstant(Uim, Dl, MVT::i32);
16018 SDValue LxvkqInstr =
16019 SDValue(DAG.getMachineNode(PPC::LXVKQ, Dl, VT, UimVal), 0);
16021 << "combineBVLoadsSpecialValue: Instruction Emitted ";
16022 LxvkqInstr.dump());
16023 return LxvkqInstr;
16024 }
16025
16026 assert(ShiftAmount == 127 && "Unexpected lxvkq shift amount value");
16027
16028 // The right shifted pattern can be constructed using a combination of
16029 // XXSPLTIB and VSRQ instruction. VSRQ uses the shift amount from the lower
16030 // 7 bits of byte 15. This can be specified using XXSPLTIB with immediate
16031 // value 255.
16032 SDValue ShiftAmountVec =
16033 SDValue(DAG.getMachineNode(PPC::XXSPLTIB, Dl, MVT::v4i32,
16034 DAG.getTargetConstant(255, Dl, MVT::i32)),
16035 0);
16036 // Generate appropriate right shift instruction
16037 SDValue ShiftVec = SDValue(
16038 DAG.getMachineNode(PPC::VSRQ, Dl, VT, ShiftAmountVec, ShiftAmountVec),
16039 0);
16041 << "\n combineBVLoadsSpecialValue: Instruction Emitted ";
16042 ShiftVec.dump());
16043 return ShiftVec;
16044 }
16045 // No patterns matched for build vectors.
16046 return SDValue();
16047}
16048
16049/// Reduce the number of loads when building a vector.
16050///
16051/// Building a vector out of multiple loads can be converted to a load
16052/// of the vector type if the loads are consecutive. If the loads are
16053/// consecutive but in descending order, a shuffle is added at the end
16054/// to reorder the vector.
16056 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16057 "Should be called with a BUILD_VECTOR node");
16058
16059 SDLoc dl(N);
16060
16061 // Return early for non byte-sized type, as they can't be consecutive.
16062 if (!N->getValueType(0).getVectorElementType().isByteSized())
16063 return SDValue();
16064
16065 bool InputsAreConsecutiveLoads = true;
16066 bool InputsAreReverseConsecutive = true;
16067 unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
16068 SDValue FirstInput = N->getOperand(0);
16069 bool IsRoundOfExtLoad = false;
16070 LoadSDNode *FirstLoad = nullptr;
16071
16072 if (FirstInput.getOpcode() == ISD::FP_ROUND &&
16073 FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
16074 FirstLoad = cast<LoadSDNode>(FirstInput.getOperand(0));
16075 IsRoundOfExtLoad = FirstLoad->getExtensionType() == ISD::EXTLOAD;
16076 }
16077 // Not a build vector of (possibly fp_rounded) loads.
16078 if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
16079 N->getNumOperands() == 1)
16080 return SDValue();
16081
16082 if (!IsRoundOfExtLoad)
16083 FirstLoad = cast<LoadSDNode>(FirstInput);
16084
16086 InputLoads.push_back(FirstLoad);
16087 for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
16088 // If any inputs are fp_round(extload), they all must be.
16089 if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
16090 return SDValue();
16091
16092 SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
16093 N->getOperand(i);
16094 if (NextInput.getOpcode() != ISD::LOAD)
16095 return SDValue();
16096
16097 SDValue PreviousInput =
16098 IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
16099 LoadSDNode *LD1 = cast<LoadSDNode>(PreviousInput);
16100 LoadSDNode *LD2 = cast<LoadSDNode>(NextInput);
16101
16102 // If any inputs are fp_round(extload), they all must be.
16103 if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
16104 return SDValue();
16105
16106 // We only care about regular loads. The PPC-specific load intrinsics
16107 // will not lead to a merge opportunity.
16108 if (!DAG.areNonVolatileConsecutiveLoads(LD2, LD1, ElemSize, 1))
16109 InputsAreConsecutiveLoads = false;
16110 if (!DAG.areNonVolatileConsecutiveLoads(LD1, LD2, ElemSize, 1))
16111 InputsAreReverseConsecutive = false;
16112
16113 // Exit early if the loads are neither consecutive nor reverse consecutive.
16114 if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
16115 return SDValue();
16116 InputLoads.push_back(LD2);
16117 }
16118
16119 assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
16120 "The loads cannot be both consecutive and reverse consecutive.");
16121
16122 SDValue WideLoad;
16123 SDValue ReturnSDVal;
16124 if (InputsAreConsecutiveLoads) {
16125 assert(FirstLoad && "Input needs to be a LoadSDNode.");
16126 WideLoad = DAG.getLoad(N->getValueType(0), dl, FirstLoad->getChain(),
16127 FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16128 FirstLoad->getAlign());
16129 ReturnSDVal = WideLoad;
16130 } else if (InputsAreReverseConsecutive) {
16131 LoadSDNode *LastLoad = InputLoads.back();
16132 assert(LastLoad && "Input needs to be a LoadSDNode.");
16133 WideLoad = DAG.getLoad(N->getValueType(0), dl, LastLoad->getChain(),
16134 LastLoad->getBasePtr(), LastLoad->getPointerInfo(),
16135 LastLoad->getAlign());
16137 for (int i = N->getNumOperands() - 1; i >= 0; i--)
16138 Ops.push_back(i);
16139
16140 ReturnSDVal = DAG.getVectorShuffle(N->getValueType(0), dl, WideLoad,
16141 DAG.getUNDEF(N->getValueType(0)), Ops);
16142 } else
16143 return SDValue();
16144
16145 for (auto *LD : InputLoads)
16146 DAG.makeEquivalentMemoryOrdering(LD, WideLoad);
16147 return ReturnSDVal;
16148}
16149
16150// This function adds the required vector_shuffle needed to get
16151// the elements of the vector extract in the correct position
16152// as specified by the CorrectElems encoding.
16154 SDValue Input, uint64_t Elems,
16155 uint64_t CorrectElems) {
16156 SDLoc dl(N);
16157
16158 unsigned NumElems = Input.getValueType().getVectorNumElements();
16159 SmallVector<int, 16> ShuffleMask(NumElems, -1);
16160
16161 // Knowing the element indices being extracted from the original
16162 // vector and the order in which they're being inserted, just put
16163 // them at element indices required for the instruction.
16164 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16165 if (DAG.getDataLayout().isLittleEndian())
16166 ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
16167 else
16168 ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
16169 CorrectElems = CorrectElems >> 8;
16170 Elems = Elems >> 8;
16171 }
16172
16173 SDValue Shuffle =
16174 DAG.getVectorShuffle(Input.getValueType(), dl, Input,
16175 DAG.getUNDEF(Input.getValueType()), ShuffleMask);
16176
16177 EVT VT = N->getValueType(0);
16178 SDValue Conv = DAG.getBitcast(VT, Shuffle);
16179
16180 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
16181 Input.getValueType().getVectorElementType(),
16183 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
16184 DAG.getValueType(ExtVT));
16185}
16186
16187// Look for build vector patterns where input operands come from sign
16188// extended vector_extract elements of specific indices. If the correct indices
16189// aren't used, add a vector shuffle to fix up the indices and create
16190// SIGN_EXTEND_INREG node which selects the vector sign extend instructions
16191// during instruction selection.
16193 // This array encodes the indices that the vector sign extend instructions
16194 // extract from when extending from one type to another for both BE and LE.
16195 // The right nibble of each byte corresponds to the LE incides.
16196 // and the left nibble of each byte corresponds to the BE incides.
16197 // For example: 0x3074B8FC byte->word
16198 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
16199 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
16200 // For example: 0x000070F8 byte->double word
16201 // For LE: the allowed indices are: 0x0,0x8
16202 // For BE: the allowed indices are: 0x7,0xF
16203 uint64_t TargetElems[] = {
16204 0x3074B8FC, // b->w
16205 0x000070F8, // b->d
16206 0x10325476, // h->w
16207 0x00003074, // h->d
16208 0x00001032, // w->d
16209 };
16210
16211 uint64_t Elems = 0;
16212 int Index;
16213 SDValue Input;
16214
16215 auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
16216 if (!Op)
16217 return false;
16218 if (Op.getOpcode() != ISD::SIGN_EXTEND &&
16219 Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
16220 return false;
16221
16222 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
16223 // of the right width.
16224 SDValue Extract = Op.getOperand(0);
16225 if (Extract.getOpcode() == ISD::ANY_EXTEND)
16226 Extract = Extract.getOperand(0);
16227 if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
16228 return false;
16229
16231 if (!ExtOp)
16232 return false;
16233
16234 Index = ExtOp->getZExtValue();
16235 if (Input && Input != Extract.getOperand(0))
16236 return false;
16237
16238 if (!Input)
16239 Input = Extract.getOperand(0);
16240
16241 Elems = Elems << 8;
16242 Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
16243 Elems |= Index;
16244
16245 return true;
16246 };
16247
16248 // If the build vector operands aren't sign extended vector extracts,
16249 // of the same input vector, then return.
16250 for (unsigned i = 0; i < N->getNumOperands(); i++) {
16251 if (!isSExtOfVecExtract(N->getOperand(i))) {
16252 return SDValue();
16253 }
16254 }
16255
16256 // If the vector extract indices are not correct, add the appropriate
16257 // vector_shuffle.
16258 int TgtElemArrayIdx;
16259 int InputSize = Input.getValueType().getScalarSizeInBits();
16260 int OutputSize = N->getValueType(0).getScalarSizeInBits();
16261 if (InputSize + OutputSize == 40)
16262 TgtElemArrayIdx = 0;
16263 else if (InputSize + OutputSize == 72)
16264 TgtElemArrayIdx = 1;
16265 else if (InputSize + OutputSize == 48)
16266 TgtElemArrayIdx = 2;
16267 else if (InputSize + OutputSize == 80)
16268 TgtElemArrayIdx = 3;
16269 else if (InputSize + OutputSize == 96)
16270 TgtElemArrayIdx = 4;
16271 else
16272 return SDValue();
16273
16274 uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
16275 CorrectElems = DAG.getDataLayout().isLittleEndian()
16276 ? CorrectElems & 0x0F0F0F0F0F0F0F0F
16277 : CorrectElems & 0xF0F0F0F0F0F0F0F0;
16278 if (Elems != CorrectElems) {
16279 return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
16280 }
16281
16282 // Regular lowering will catch cases where a shuffle is not needed.
16283 return SDValue();
16284}
16285
16286// Look for the pattern of a load from a narrow width to i128, feeding
16287// into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
16288// (LXVRZX). This node represents a zero extending load that will be matched
16289// to the Load VSX Vector Rightmost instructions.
16291 SDLoc DL(N);
16292
16293 // This combine is only eligible for a BUILD_VECTOR of v1i128.
16294 if (N->getValueType(0) != MVT::v1i128)
16295 return SDValue();
16296
16297 SDValue Operand = N->getOperand(0);
16298 // Proceed with the transformation if the operand to the BUILD_VECTOR
16299 // is a load instruction.
16300 if (Operand.getOpcode() != ISD::LOAD)
16301 return SDValue();
16302
16303 auto *LD = cast<LoadSDNode>(Operand);
16304 EVT MemoryType = LD->getMemoryVT();
16305
16306 // This transformation is only valid if the we are loading either a byte,
16307 // halfword, word, or doubleword.
16308 bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
16309 MemoryType == MVT::i32 || MemoryType == MVT::i64;
16310
16311 // Ensure that the load from the narrow width is being zero extended to i128.
16312 if (!ValidLDType ||
16313 (LD->getExtensionType() != ISD::ZEXTLOAD &&
16314 LD->getExtensionType() != ISD::EXTLOAD))
16315 return SDValue();
16316
16317 SDValue LoadOps[] = {
16318 LD->getChain(), LD->getBasePtr(),
16319 DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
16320
16321 return DAG.getMemIntrinsicNode(PPCISD::LXVRZX, DL,
16322 DAG.getVTList(MVT::v1i128, MVT::Other),
16323 LoadOps, MemoryType, LD->getMemOperand());
16324}
16325
16326SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
16327 DAGCombinerInfo &DCI) const {
16328 assert(N->getOpcode() == ISD::BUILD_VECTOR &&
16329 "Should be called with a BUILD_VECTOR node");
16330
16331 SelectionDAG &DAG = DCI.DAG;
16332 SDLoc dl(N);
16333
16334 if (!Subtarget.hasVSX())
16335 return SDValue();
16336
16337 // The target independent DAG combiner will leave a build_vector of
16338 // float-to-int conversions intact. We can generate MUCH better code for
16339 // a float-to-int conversion of a vector of floats.
16340 SDValue FirstInput = N->getOperand(0);
16341 if (FirstInput.getOpcode() == PPCISD::MFVSR) {
16342 SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
16343 if (Reduced)
16344 return Reduced;
16345 }
16346
16347 // If we're building a vector out of consecutive loads, just load that
16348 // vector type.
16349 SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
16350 if (Reduced)
16351 return Reduced;
16352
16353 // If we're building a vector out of extended elements from another vector
16354 // we have P9 vector integer extend instructions. The code assumes legal
16355 // input types (i.e. it can't handle things like v4i16) so do not run before
16356 // legalization.
16357 if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
16358 Reduced = combineBVOfVecSExt(N, DAG);
16359 if (Reduced)
16360 return Reduced;
16361 }
16362
16363 // On Power10, the Load VSX Vector Rightmost instructions can be utilized
16364 // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
16365 // is a load from <valid narrow width> to i128.
16366 if (Subtarget.isISA3_1()) {
16367 SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
16368 if (BVOfZLoad)
16369 return BVOfZLoad;
16370 }
16371
16372 if (N->getValueType(0) != MVT::v2f64)
16373 return SDValue();
16374
16375 // Looking for:
16376 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
16377 if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
16378 FirstInput.getOpcode() != ISD::UINT_TO_FP)
16379 return SDValue();
16380 if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
16381 N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
16382 return SDValue();
16383 if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
16384 return SDValue();
16385
16386 SDValue Ext1 = FirstInput.getOperand(0);
16387 SDValue Ext2 = N->getOperand(1).getOperand(0);
16388 if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16390 return SDValue();
16391
16392 ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
16393 ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
16394 if (!Ext1Op || !Ext2Op)
16395 return SDValue();
16396 if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
16397 Ext1.getOperand(0) != Ext2.getOperand(0))
16398 return SDValue();
16399
16400 int FirstElem = Ext1Op->getZExtValue();
16401 int SecondElem = Ext2Op->getZExtValue();
16402 int SubvecIdx;
16403 if (FirstElem == 0 && SecondElem == 1)
16404 SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
16405 else if (FirstElem == 2 && SecondElem == 3)
16406 SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
16407 else
16408 return SDValue();
16409
16410 SDValue SrcVec = Ext1.getOperand(0);
16411 auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
16412 PPCISD::SINT_VEC_TO_FP : PPCISD::UINT_VEC_TO_FP;
16413 return DAG.getNode(NodeType, dl, MVT::v2f64,
16414 SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
16415}
16416
16417SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
16418 DAGCombinerInfo &DCI) const {
16419 assert((N->getOpcode() == ISD::SINT_TO_FP ||
16420 N->getOpcode() == ISD::UINT_TO_FP) &&
16421 "Need an int -> FP conversion node here");
16422
16423 if (useSoftFloat() || !Subtarget.has64BitSupport())
16424 return SDValue();
16425
16426 SelectionDAG &DAG = DCI.DAG;
16427 SDLoc dl(N);
16428 SDValue Op(N, 0);
16429
16430 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
16431 // from the hardware.
16432 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
16433 return SDValue();
16434 if (!Op.getOperand(0).getValueType().isSimple())
16435 return SDValue();
16436 if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
16437 Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
16438 return SDValue();
16439
16440 SDValue FirstOperand(Op.getOperand(0));
16441 bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
16442 (FirstOperand.getValueType() == MVT::i8 ||
16443 FirstOperand.getValueType() == MVT::i16);
16444 if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
16445 bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
16446 bool DstDouble = Op.getValueType() == MVT::f64;
16447 unsigned ConvOp = Signed ?
16448 (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
16449 (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
16450 SDValue WidthConst =
16451 DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
16452 dl, false);
16453 LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
16454 SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
16455 SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
16456 DAG.getVTList(MVT::f64, MVT::Other),
16457 Ops, MVT::i8, LDN->getMemOperand());
16458 DAG.makeEquivalentMemoryOrdering(LDN, Ld);
16459
16460 // For signed conversion, we need to sign-extend the value in the VSR
16461 if (Signed) {
16462 SDValue ExtOps[] = { Ld, WidthConst };
16463 SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
16464 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
16465 } else
16466 return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
16467 }
16468
16469
16470 // For i32 intermediate values, unfortunately, the conversion functions
16471 // leave the upper 32 bits of the value are undefined. Within the set of
16472 // scalar instructions, we have no method for zero- or sign-extending the
16473 // value. Thus, we cannot handle i32 intermediate values here.
16474 if (Op.getOperand(0).getValueType() == MVT::i32)
16475 return SDValue();
16476
16477 assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
16478 "UINT_TO_FP is supported only with FPCVT");
16479
16480 // If we have FCFIDS, then use it when converting to single-precision.
16481 // Otherwise, convert to double-precision and then round.
16482 unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16483 ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
16484 : PPCISD::FCFIDS)
16485 : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
16486 : PPCISD::FCFID);
16487 MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
16488 ? MVT::f32
16489 : MVT::f64;
16490
16491 // If we're converting from a float, to an int, and back to a float again,
16492 // then we don't need the store/load pair at all.
16493 if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
16494 Subtarget.hasFPCVT()) ||
16495 (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
16496 SDValue Src = Op.getOperand(0).getOperand(0);
16497 if (Src.getValueType() == MVT::f32) {
16498 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
16499 DCI.AddToWorklist(Src.getNode());
16500 } else if (Src.getValueType() != MVT::f64) {
16501 // Make sure that we don't pick up a ppc_fp128 source value.
16502 return SDValue();
16503 }
16504
16505 unsigned FCTOp =
16506 Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
16507 PPCISD::FCTIDUZ;
16508
16509 SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
16510 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
16511
16512 if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
16513 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
16514 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
16515 DCI.AddToWorklist(FP.getNode());
16516 }
16517
16518 return FP;
16519 }
16520
16521 return SDValue();
16522}
16523
16524// expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
16525// builtins) into loads with swaps.
16527 DAGCombinerInfo &DCI) const {
16528 // Delay VSX load for LE combine until after LegalizeOps to prioritize other
16529 // load combines.
16530 if (DCI.isBeforeLegalizeOps())
16531 return SDValue();
16532
16533 SelectionDAG &DAG = DCI.DAG;
16534 SDLoc dl(N);
16535 SDValue Chain;
16536 SDValue Base;
16537 MachineMemOperand *MMO;
16538
16539 switch (N->getOpcode()) {
16540 default:
16541 llvm_unreachable("Unexpected opcode for little endian VSX load");
16542 case ISD::LOAD: {
16544 Chain = LD->getChain();
16545 Base = LD->getBasePtr();
16546 MMO = LD->getMemOperand();
16547 // If the MMO suggests this isn't a load of a full vector, leave
16548 // things alone. For a built-in, we have to make the change for
16549 // correctness, so if there is a size problem that will be a bug.
16550 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16551 return SDValue();
16552 break;
16553 }
16556 Chain = Intrin->getChain();
16557 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
16558 // us what we want. Get operand 2 instead.
16559 Base = Intrin->getOperand(2);
16560 MMO = Intrin->getMemOperand();
16561 break;
16562 }
16563 }
16564
16565 MVT VecTy = N->getValueType(0).getSimpleVT();
16566
16567 SDValue LoadOps[] = { Chain, Base };
16568 SDValue Load = DAG.getMemIntrinsicNode(PPCISD::LXVD2X, dl,
16569 DAG.getVTList(MVT::v2f64, MVT::Other),
16570 LoadOps, MVT::v2f64, MMO);
16571
16572 DCI.AddToWorklist(Load.getNode());
16573 Chain = Load.getValue(1);
16574 SDValue Swap = DAG.getNode(
16575 PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
16576 DCI.AddToWorklist(Swap.getNode());
16577
16578 // Add a bitcast if the resulting load type doesn't match v2f64.
16579 if (VecTy != MVT::v2f64) {
16580 SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
16581 DCI.AddToWorklist(N.getNode());
16582 // Package {bitcast value, swap's chain} to match Load's shape.
16583 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
16584 N, Swap.getValue(1));
16585 }
16586
16587 return Swap;
16588}
16589
16590// expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
16591// builtins) into stores with swaps.
16593 DAGCombinerInfo &DCI) const {
16594 // Delay VSX store for LE combine until after LegalizeOps to prioritize other
16595 // store combines.
16596 if (DCI.isBeforeLegalizeOps())
16597 return SDValue();
16598
16599 SelectionDAG &DAG = DCI.DAG;
16600 SDLoc dl(N);
16601 SDValue Chain;
16602 SDValue Base;
16603 unsigned SrcOpnd;
16604 MachineMemOperand *MMO;
16605
16606 switch (N->getOpcode()) {
16607 default:
16608 llvm_unreachable("Unexpected opcode for little endian VSX store");
16609 case ISD::STORE: {
16611 Chain = ST->getChain();
16612 Base = ST->getBasePtr();
16613 MMO = ST->getMemOperand();
16614 SrcOpnd = 1;
16615 // If the MMO suggests this isn't a store of a full vector, leave
16616 // things alone. For a built-in, we have to make the change for
16617 // correctness, so if there is a size problem that will be a bug.
16618 if (!MMO->getSize().hasValue() || MMO->getSize().getValue() < 16)
16619 return SDValue();
16620 break;
16621 }
16622 case ISD::INTRINSIC_VOID: {
16624 Chain = Intrin->getChain();
16625 // Intrin->getBasePtr() oddly does not get what we want.
16626 Base = Intrin->getOperand(3);
16627 MMO = Intrin->getMemOperand();
16628 SrcOpnd = 2;
16629 break;
16630 }
16631 }
16632
16633 SDValue Src = N->getOperand(SrcOpnd);
16634 MVT VecTy = Src.getValueType().getSimpleVT();
16635
16636 // All stores are done as v2f64 and possible bit cast.
16637 if (VecTy != MVT::v2f64) {
16638 Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
16639 DCI.AddToWorklist(Src.getNode());
16640 }
16641
16642 SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
16643 DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
16644 DCI.AddToWorklist(Swap.getNode());
16645 Chain = Swap.getValue(1);
16646 SDValue StoreOps[] = { Chain, Swap, Base };
16647 SDValue Store = DAG.getMemIntrinsicNode(PPCISD::STXVD2X, dl,
16648 DAG.getVTList(MVT::Other),
16649 StoreOps, VecTy, MMO);
16650 DCI.AddToWorklist(Store.getNode());
16651 return Store;
16652}
16653
16654// Handle DAG combine for STORE (FP_TO_INT F).
16655SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
16656 DAGCombinerInfo &DCI) const {
16657 SelectionDAG &DAG = DCI.DAG;
16658 SDLoc dl(N);
16659 unsigned Opcode = N->getOperand(1).getOpcode();
16660 (void)Opcode;
16661 bool Strict = N->getOperand(1)->isStrictFPOpcode();
16662
16663 assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
16664 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT)
16665 && "Not a FP_TO_INT Instruction!");
16666
16667 SDValue Val = N->getOperand(1).getOperand(Strict ? 1 : 0);
16668 EVT Op1VT = N->getOperand(1).getValueType();
16669 EVT ResVT = Val.getValueType();
16670
16671 if (!Subtarget.hasVSX() || !Subtarget.hasFPCVT() || !isTypeLegal(ResVT))
16672 return SDValue();
16673
16674 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
16675 bool ValidTypeForStoreFltAsInt =
16676 (Op1VT == MVT::i32 || (Op1VT == MVT::i64 && Subtarget.isPPC64()) ||
16677 (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
16678
16679 // TODO: Lower conversion from f128 on all VSX targets
16680 if (ResVT == MVT::ppcf128 || (ResVT == MVT::f128 && !Subtarget.hasP9Vector()))
16681 return SDValue();
16682
16683 if ((Op1VT != MVT::i64 && !Subtarget.hasP8Vector()) ||
16684 cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
16685 return SDValue();
16686
16687 Val = convertFPToInt(N->getOperand(1), DAG, Subtarget);
16688
16689 // Set number of bytes being converted.
16690 unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
16691 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2),
16692 DAG.getIntPtrConstant(ByteSize, dl, false),
16693 DAG.getValueType(Op1VT)};
16694
16695 Val = DAG.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT, dl,
16696 DAG.getVTList(MVT::Other), Ops,
16697 cast<StoreSDNode>(N)->getMemoryVT(),
16698 cast<StoreSDNode>(N)->getMemOperand());
16699
16700 return Val;
16701}
16702
16703static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
16704 // Check that the source of the element keeps flipping
16705 // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
16706 bool PrevElemFromFirstVec = Mask[0] < NumElts;
16707 for (int i = 1, e = Mask.size(); i < e; i++) {
16708 if (PrevElemFromFirstVec && Mask[i] < NumElts)
16709 return false;
16710 if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
16711 return false;
16712 PrevElemFromFirstVec = !PrevElemFromFirstVec;
16713 }
16714 return true;
16715}
16716
16717static bool isSplatBV(SDValue Op) {
16718 if (Op.getOpcode() != ISD::BUILD_VECTOR)
16719 return false;
16720 SDValue FirstOp;
16721
16722 // Find first non-undef input.
16723 for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
16724 FirstOp = Op.getOperand(i);
16725 if (!FirstOp.isUndef())
16726 break;
16727 }
16728
16729 // All inputs are undef or the same as the first non-undef input.
16730 for (int i = 1, e = Op.getNumOperands(); i < e; i++)
16731 if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
16732 return false;
16733 return true;
16734}
16735
16737 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16738 return Op;
16739 if (Op.getOpcode() != ISD::BITCAST)
16740 return SDValue();
16741 Op = Op.getOperand(0);
16742 if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
16743 return Op;
16744 return SDValue();
16745}
16746
16747// Fix up the shuffle mask to account for the fact that the result of
16748// scalar_to_vector is not in lane zero. This just takes all values in
16749// the ranges specified by the min/max indices and adds the number of
16750// elements required to ensure each element comes from the respective
16751// position in the valid lane.
16752// On little endian, that's just the corresponding element in the other
16753// half of the vector. On big endian, it is in the same half but right
16754// justified rather than left justified in that half.
16756 SmallVectorImpl<int> &ShuffV, int LHSFirstElt, int LHSLastElt,
16757 int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts,
16758 unsigned RHSNumValidElts, const PPCSubtarget &Subtarget) {
16759 int LHSEltFixup =
16760 Subtarget.isLittleEndian() ? HalfVec : HalfVec - LHSNumValidElts;
16761 int RHSEltFixup =
16762 Subtarget.isLittleEndian() ? HalfVec : HalfVec - RHSNumValidElts;
16763 for (int I = 0, E = ShuffV.size(); I < E; ++I) {
16764 int Idx = ShuffV[I];
16765 if (Idx >= LHSFirstElt && Idx <= LHSLastElt)
16766 ShuffV[I] += LHSEltFixup;
16767 else if (Idx >= RHSFirstElt && Idx <= RHSLastElt)
16768 ShuffV[I] += RHSEltFixup;
16769 }
16770}
16771
16772// Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
16773// the original is:
16774// (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
16775// In such a case, just change the shuffle mask to extract the element
16776// from the permuted index.
16778 const PPCSubtarget &Subtarget) {
16779 SDLoc dl(OrigSToV);
16780 EVT VT = OrigSToV.getValueType();
16781 assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16782 "Expecting a SCALAR_TO_VECTOR here");
16783 SDValue Input = OrigSToV.getOperand(0);
16784
16785 if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
16786 ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
16787 SDValue OrigVector = Input.getOperand(0);
16788
16789 // Can't handle non-const element indices or different vector types
16790 // for the input to the extract and the output of the scalar_to_vector.
16791 if (Idx && VT == OrigVector.getValueType()) {
16792 unsigned NumElts = VT.getVectorNumElements();
16793 assert(
16794 NumElts > 1 &&
16795 "Cannot produce a permuted scalar_to_vector for one element vector");
16796 SmallVector<int, 16> NewMask(NumElts, -1);
16797 unsigned ResultInElt = NumElts / 2;
16798 ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
16799 NewMask[ResultInElt] = Idx->getZExtValue();
16800 return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
16801 }
16802 }
16803 return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
16804 OrigSToV.getOperand(0));
16805}
16806
16808 int HalfVec, int LHSLastElementDefined,
16809 int RHSLastElementDefined) {
16810 for (int Index : ShuffV) {
16811 if (Index < 0) // Skip explicitly undefined mask indices.
16812 continue;
16813 // Handle first input vector of the vector_shuffle.
16814 if ((LHSLastElementDefined >= 0) && (Index < HalfVec) &&
16815 (Index > LHSLastElementDefined))
16816 return false;
16817 // Handle second input vector of the vector_shuffle.
16818 if ((RHSLastElementDefined >= 0) &&
16819 (Index > HalfVec + RHSLastElementDefined))
16820 return false;
16821 }
16822 return true;
16823}
16824
16826 int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts,
16827 int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode,
16828 SelectionDAG &DAG, const PPCSubtarget &Subtarget) {
16829 EVT VecShuffOperandType = VecShuffOperand.getValueType();
16830 // Set up the values for the shuffle vector fixup.
16831 NumValidElts = ScalarSize / VecShuffOperandType.getScalarSizeInBits();
16832 // The last element depends on if the input comes from the LHS or RHS.
16833 //
16834 // For example:
16835 // (shuff (s_to_v i32), (bitcast (s_to_v i64), v4i32), ...)
16836 //
16837 // For the LHS: The last element that comes from the LHS is actually 0, not 3
16838 // because elements 1 and higher of a scalar_to_vector are undefined.
16839 // For the RHS: The last element that comes from the RHS is actually 5, not 7
16840 // because elements 1 and higher of a scalar_to_vector are undefined.
16841 // It is also not 4 because the original scalar_to_vector is wider and
16842 // actually contains two i32 elements.
16843 LastElt = (uint64_t)ScalarSize > ShuffleEltWidth
16844 ? ScalarSize / ShuffleEltWidth - 1 + FirstElt
16845 : FirstElt;
16846 SDValue SToVPermuted = getSToVPermuted(SToVNode, DAG, Subtarget);
16847 if (SToVPermuted.getValueType() != VecShuffOperandType)
16848 SToVPermuted = DAG.getBitcast(VecShuffOperandType, SToVPermuted);
16849 return SToVPermuted;
16850}
16851
16852// On little endian subtargets, combine shuffles such as:
16853// vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
16854// into:
16855// vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
16856// because the latter can be matched to a single instruction merge.
16857// Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
16858// to put the value into element zero. Adjust the shuffle mask so that the
16859// vector can remain in permuted form (to prevent a swap prior to a shuffle).
16860// On big endian targets, this is still useful for SCALAR_TO_VECTOR
16861// nodes with elements smaller than doubleword because all the ways
16862// of getting scalar data into a vector register put the value in the
16863// rightmost element of the left half of the vector.
16864SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
16865 SelectionDAG &DAG) const {
16866 SDValue LHS = SVN->getOperand(0);
16867 SDValue RHS = SVN->getOperand(1);
16868 auto Mask = SVN->getMask();
16869 int NumElts = LHS.getValueType().getVectorNumElements();
16870 SDValue Res(SVN, 0);
16871 SDLoc dl(SVN);
16872 bool IsLittleEndian = Subtarget.isLittleEndian();
16873
16874 // On big endian targets this is only useful for subtargets with direct moves.
16875 // On little endian targets it would be useful for all subtargets with VSX.
16876 // However adding special handling for LE subtargets without direct moves
16877 // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
16878 // which includes direct moves.
16879 if (!Subtarget.hasDirectMove())
16880 return Res;
16881
16882 // If this is not a shuffle of a shuffle and the first element comes from
16883 // the second vector, canonicalize to the commuted form. This will make it
16884 // more likely to match one of the single instruction patterns.
16885 if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
16886 RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
16887 std::swap(LHS, RHS);
16888 Res = DAG.getCommutedVectorShuffle(*SVN);
16889 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16890 }
16891
16892 // Adjust the shuffle mask if either input vector comes from a
16893 // SCALAR_TO_VECTOR and keep the respective input vector in permuted
16894 // form (to prevent the need for a swap).
16895 SmallVector<int, 16> ShuffV(Mask);
16896 SDValue SToVLHS = isScalarToVec(LHS);
16897 SDValue SToVRHS = isScalarToVec(RHS);
16898 if (SToVLHS || SToVRHS) {
16899 EVT VT = SVN->getValueType(0);
16900 uint64_t ShuffleEltWidth = VT.getVectorElementType().getSizeInBits();
16901 int ShuffleNumElts = ShuffV.size();
16902 int HalfVec = ShuffleNumElts / 2;
16903 // The width of the "valid lane" (i.e. the lane that contains the value that
16904 // is vectorized) needs to be expressed in terms of the number of elements
16905 // of the shuffle. It is thereby the ratio of the values before and after
16906 // any bitcast, which will be set later on if the LHS or RHS are
16907 // SCALAR_TO_VECTOR nodes.
16908 unsigned LHSNumValidElts = HalfVec;
16909 unsigned RHSNumValidElts = HalfVec;
16910
16911 // Initially assume that neither input is permuted. These will be adjusted
16912 // accordingly if either input is. Note, that -1 means that all elements
16913 // are undefined.
16914 int LHSFirstElt = 0;
16915 int RHSFirstElt = ShuffleNumElts;
16916 int LHSLastElt = -1;
16917 int RHSLastElt = -1;
16918
16919 // Get the permuted scalar to vector nodes for the source(s) that come from
16920 // ISD::SCALAR_TO_VECTOR.
16921 // On big endian systems, this only makes sense for element sizes smaller
16922 // than 64 bits since for 64-bit elements, all instructions already put
16923 // the value into element zero. Since scalar size of LHS and RHS may differ
16924 // after isScalarToVec, this should be checked using their own sizes.
16925 int LHSScalarSize = 0;
16926 int RHSScalarSize = 0;
16927 if (SToVLHS) {
16928 LHSScalarSize = SToVLHS.getValueType().getScalarSizeInBits();
16929 if (!IsLittleEndian && LHSScalarSize >= 64)
16930 return Res;
16931 }
16932 if (SToVRHS) {
16933 RHSScalarSize = SToVRHS.getValueType().getScalarSizeInBits();
16934 if (!IsLittleEndian && RHSScalarSize >= 64)
16935 return Res;
16936 }
16937 if (LHSScalarSize != 0)
16939 LHSScalarSize, ShuffleEltWidth, LHSNumValidElts, LHSFirstElt,
16940 LHSLastElt, LHS, SToVLHS, DAG, Subtarget);
16941 if (RHSScalarSize != 0)
16943 RHSScalarSize, ShuffleEltWidth, RHSNumValidElts, RHSFirstElt,
16944 RHSLastElt, RHS, SToVRHS, DAG, Subtarget);
16945
16946 if (!isShuffleMaskInRange(ShuffV, HalfVec, LHSLastElt, RHSLastElt))
16947 return Res;
16948
16949 // Fix up the shuffle mask to reflect where the desired element actually is.
16950 // The minimum and maximum indices that correspond to element zero for both
16951 // the LHS and RHS are computed and will control which shuffle mask entries
16952 // are to be changed. For example, if the RHS is permuted, any shuffle mask
16953 // entries in the range [RHSFirstElt,RHSLastElt] will be adjusted.
16955 ShuffV, LHSFirstElt, LHSLastElt, RHSFirstElt, RHSLastElt, HalfVec,
16956 LHSNumValidElts, RHSNumValidElts, Subtarget);
16957 Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
16958
16959 // We may have simplified away the shuffle. We won't be able to do anything
16960 // further with it here.
16961 if (!isa<ShuffleVectorSDNode>(Res))
16962 return Res;
16963 Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
16964 }
16965
16966 SDValue TheSplat = IsLittleEndian ? RHS : LHS;
16967 // The common case after we commuted the shuffle is that the RHS is a splat
16968 // and we have elements coming in from the splat at indices that are not
16969 // conducive to using a merge.
16970 // Example:
16971 // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
16972 if (!isSplatBV(TheSplat))
16973 return Res;
16974
16975 // We are looking for a mask such that all even elements are from
16976 // one vector and all odd elements from the other.
16977 if (!isAlternatingShuffMask(Mask, NumElts))
16978 return Res;
16979
16980 // Adjust the mask so we are pulling in the same index from the splat
16981 // as the index from the interesting vector in consecutive elements.
16982 if (IsLittleEndian) {
16983 // Example (even elements from first vector):
16984 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
16985 if (Mask[0] < NumElts)
16986 for (int i = 1, e = Mask.size(); i < e; i += 2) {
16987 if (ShuffV[i] < 0)
16988 continue;
16989 // If element from non-splat is undef, pick first element from splat.
16990 ShuffV[i] = (ShuffV[i - 1] >= 0 ? ShuffV[i - 1] : 0) + NumElts;
16991 }
16992 // Example (odd elements from first vector):
16993 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
16994 else
16995 for (int i = 0, e = Mask.size(); i < e; i += 2) {
16996 if (ShuffV[i] < 0)
16997 continue;
16998 // If element from non-splat is undef, pick first element from splat.
16999 ShuffV[i] = (ShuffV[i + 1] >= 0 ? ShuffV[i + 1] : 0) + NumElts;
17000 }
17001 } else {
17002 // Example (even elements from first vector):
17003 // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
17004 if (Mask[0] < NumElts)
17005 for (int i = 0, e = Mask.size(); i < e; i += 2) {
17006 if (ShuffV[i] < 0)
17007 continue;
17008 // If element from non-splat is undef, pick first element from splat.
17009 ShuffV[i] = ShuffV[i + 1] >= 0 ? ShuffV[i + 1] - NumElts : 0;
17010 }
17011 // Example (odd elements from first vector):
17012 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
17013 else
17014 for (int i = 1, e = Mask.size(); i < e; i += 2) {
17015 if (ShuffV[i] < 0)
17016 continue;
17017 // If element from non-splat is undef, pick first element from splat.
17018 ShuffV[i] = ShuffV[i - 1] >= 0 ? ShuffV[i - 1] - NumElts : 0;
17019 }
17020 }
17021
17022 // If the RHS has undefs, we need to remove them since we may have created
17023 // a shuffle that adds those instead of the splat value.
17024 SDValue SplatVal =
17025 cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
17026 TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
17027
17028 if (IsLittleEndian)
17029 RHS = TheSplat;
17030 else
17031 LHS = TheSplat;
17032 return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
17033}
17034
17035SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
17036 LSBaseSDNode *LSBase,
17037 DAGCombinerInfo &DCI) const {
17038 assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
17039 "Not a reverse memop pattern!");
17040
17041 auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
17042 auto Mask = SVN->getMask();
17043 int i = 0;
17044 auto I = Mask.rbegin();
17045 auto E = Mask.rend();
17046
17047 for (; I != E; ++I) {
17048 if (*I != i)
17049 return false;
17050 i++;
17051 }
17052 return true;
17053 };
17054
17055 SelectionDAG &DAG = DCI.DAG;
17056 EVT VT = SVN->getValueType(0);
17057
17058 if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
17059 return SDValue();
17060
17061 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
17062 // See comment in PPCVSXSwapRemoval.cpp.
17063 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
17064 if (!Subtarget.hasP9Vector())
17065 return SDValue();
17066
17067 if(!IsElementReverse(SVN))
17068 return SDValue();
17069
17070 if (LSBase->getOpcode() == ISD::LOAD) {
17071 // If the load return value 0 has more than one user except the
17072 // shufflevector instruction, it is not profitable to replace the
17073 // shufflevector with a reverse load.
17074 for (SDUse &Use : LSBase->uses())
17075 if (Use.getResNo() == 0 &&
17076 Use.getUser()->getOpcode() != ISD::VECTOR_SHUFFLE)
17077 return SDValue();
17078
17079 SDLoc dl(LSBase);
17080 SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
17081 return DAG.getMemIntrinsicNode(
17082 PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
17083 LSBase->getMemoryVT(), LSBase->getMemOperand());
17084 }
17085
17086 if (LSBase->getOpcode() == ISD::STORE) {
17087 // If there are other uses of the shuffle, the swap cannot be avoided.
17088 // Forcing the use of an X-Form (since swapped stores only have
17089 // X-Forms) without removing the swap is unprofitable.
17090 if (!SVN->hasOneUse())
17091 return SDValue();
17092
17093 SDLoc dl(LSBase);
17094 SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
17095 LSBase->getBasePtr()};
17096 return DAG.getMemIntrinsicNode(
17097 PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
17098 LSBase->getMemoryVT(), LSBase->getMemOperand());
17099 }
17100
17101 llvm_unreachable("Expected a load or store node here");
17102}
17103
17104static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth) {
17105 unsigned IntrinsicID = Intrin.getConstantOperandVal(1);
17106 if (IntrinsicID == Intrinsic::ppc_stdcx)
17107 StoreWidth = 8;
17108 else if (IntrinsicID == Intrinsic::ppc_stwcx)
17109 StoreWidth = 4;
17110 else if (IntrinsicID == Intrinsic::ppc_sthcx)
17111 StoreWidth = 2;
17112 else if (IntrinsicID == Intrinsic::ppc_stbcx)
17113 StoreWidth = 1;
17114 else
17115 return false;
17116 return true;
17117}
17118
17121 if (N->getOpcode() == PPCISD::ADDC && N->hasAnyUseOfValue(1)) {
17122 // (ADDC (ADDE 0, 0, C), -1) -> C
17123 SDValue LHS = N->getOperand(0);
17124 SDValue RHS = N->getOperand(1);
17125 if (LHS->getOpcode() == PPCISD::ADDE &&
17126 isNullConstant(LHS->getOperand(0)) &&
17127 isNullConstant(LHS->getOperand(1)) && isAllOnesConstant(RHS)) {
17128 return DCI.CombineTo(N, SDValue(N, 0), LHS->getOperand(2));
17129 }
17130 }
17131 return SDValue();
17132}
17133
17135 DAGCombinerInfo &DCI) const {
17136 SelectionDAG &DAG = DCI.DAG;
17137 SDLoc dl(N);
17138 switch (N->getOpcode()) {
17139 default: break;
17140 case ISD::ADD:
17141 return combineADD(N, DCI);
17142 case ISD::AND: {
17143 // We don't want (and (zext (shift...)), C) if C fits in the width of the
17144 // original input as that will prevent us from selecting optimal rotates.
17145 // This only matters if the input to the extend is i32 widened to i64.
17146 SDValue Op1 = N->getOperand(0);
17147 SDValue Op2 = N->getOperand(1);
17148 if ((Op1.getOpcode() != ISD::ZERO_EXTEND &&
17149 Op1.getOpcode() != ISD::ANY_EXTEND) ||
17150 !isa<ConstantSDNode>(Op2) || N->getValueType(0) != MVT::i64 ||
17151 Op1.getOperand(0).getValueType() != MVT::i32)
17152 break;
17153 SDValue NarrowOp = Op1.getOperand(0);
17154 if (NarrowOp.getOpcode() != ISD::SHL && NarrowOp.getOpcode() != ISD::SRL &&
17155 NarrowOp.getOpcode() != ISD::ROTL && NarrowOp.getOpcode() != ISD::ROTR)
17156 break;
17157
17158 uint64_t Imm = Op2->getAsZExtVal();
17159 // Make sure that the constant is narrow enough to fit in the narrow type.
17160 if (!isUInt<32>(Imm))
17161 break;
17162 SDValue ConstOp = DAG.getConstant(Imm, dl, MVT::i32);
17163 SDValue NarrowAnd = DAG.getNode(ISD::AND, dl, MVT::i32, NarrowOp, ConstOp);
17164 return DAG.getZExtOrTrunc(NarrowAnd, dl, N->getValueType(0));
17165 }
17166 case ISD::SHL:
17167 return combineSHL(N, DCI);
17168 case ISD::SRA:
17169 return combineSRA(N, DCI);
17170 case ISD::SRL:
17171 return combineSRL(N, DCI);
17172 case ISD::MUL:
17173 return combineMUL(N, DCI);
17174 case ISD::FMA:
17175 case PPCISD::FNMSUB:
17176 return combineFMALike(N, DCI);
17177 case PPCISD::SHL:
17178 if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
17179 return N->getOperand(0);
17180 break;
17181 case PPCISD::SRL:
17182 if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
17183 return N->getOperand(0);
17184 break;
17185 case PPCISD::SRA:
17186 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
17187 if (C->isZero() || // 0 >>s V -> 0.
17188 C->isAllOnes()) // -1 >>s V -> -1.
17189 return N->getOperand(0);
17190 }
17191 break;
17192 case ISD::SIGN_EXTEND:
17193 case ISD::ZERO_EXTEND:
17194 case ISD::ANY_EXTEND:
17195 return DAGCombineExtBoolTrunc(N, DCI);
17196 case ISD::TRUNCATE:
17197 return combineTRUNCATE(N, DCI);
17198 case ISD::SETCC:
17199 if (SDValue CSCC = combineSetCC(N, DCI))
17200 return CSCC;
17201 [[fallthrough]];
17202 case ISD::SELECT_CC:
17203 return DAGCombineTruncBoolExt(N, DCI);
17204 case ISD::SINT_TO_FP:
17205 case ISD::UINT_TO_FP:
17206 return combineFPToIntToFP(N, DCI);
17208 if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
17209 LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
17210 return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
17211 }
17212 return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
17213 case ISD::STORE: {
17214
17215 EVT Op1VT = N->getOperand(1).getValueType();
17216 unsigned Opcode = N->getOperand(1).getOpcode();
17217
17218 if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT ||
17219 Opcode == ISD::STRICT_FP_TO_SINT || Opcode == ISD::STRICT_FP_TO_UINT) {
17220 SDValue Val = combineStoreFPToInt(N, DCI);
17221 if (Val)
17222 return Val;
17223 }
17224
17225 if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
17226 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
17227 SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
17228 if (Val)
17229 return Val;
17230 }
17231
17232 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
17233 if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
17234 N->getOperand(1).getNode()->hasOneUse() &&
17235 (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
17236 (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
17237
17238 // STBRX can only handle simple types and it makes no sense to store less
17239 // two bytes in byte-reversed order.
17240 EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
17241 if (mVT.isExtended() || mVT.getSizeInBits() < 16)
17242 break;
17243
17244 SDValue BSwapOp = N->getOperand(1).getOperand(0);
17245 // Do an any-extend to 32-bits if this is a half-word input.
17246 if (BSwapOp.getValueType() == MVT::i16)
17247 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
17248
17249 // If the type of BSWAP operand is wider than stored memory width
17250 // it need to be shifted to the right side before STBRX.
17251 if (Op1VT.bitsGT(mVT)) {
17252 int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
17253 BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
17254 DAG.getConstant(Shift, dl, MVT::i32));
17255 // Need to truncate if this is a bswap of i64 stored as i32/i16.
17256 if (Op1VT == MVT::i64)
17257 BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
17258 }
17259
17260 SDValue Ops[] = {
17261 N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
17262 };
17263 return
17264 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other),
17265 Ops, cast<StoreSDNode>(N)->getMemoryVT(),
17266 cast<StoreSDNode>(N)->getMemOperand());
17267 }
17268
17269 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
17270 // So it can increase the chance of CSE constant construction.
17271 if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
17272 isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
17273 // Need to sign-extended to 64-bits to handle negative values.
17274 EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
17275 uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
17276 MemVT.getSizeInBits());
17277 SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
17278
17279 auto *ST = cast<StoreSDNode>(N);
17280 SDValue NewST = DAG.getStore(ST->getChain(), dl, Const64,
17281 ST->getBasePtr(), ST->getOffset(), MemVT,
17282 ST->getMemOperand(), ST->getAddressingMode(),
17283 /*IsTruncating=*/true);
17284 // Note we use CombineTo here to prevent DAGCombiner from visiting the
17285 // new store which will change the constant by removing non-demanded bits.
17286 return ST->isUnindexed()
17287 ? DCI.CombineTo(N, NewST, /*AddTo=*/false)
17288 : DCI.CombineTo(N, NewST, NewST.getValue(1), /*AddTo=*/false);
17289 }
17290
17291 // For little endian, VSX stores require generating xxswapd/lxvd2x.
17292 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17293 if (Op1VT.isSimple()) {
17294 MVT StoreVT = Op1VT.getSimpleVT();
17295 if (Subtarget.needsSwapsForVSXMemOps() &&
17296 (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
17297 StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
17298 return expandVSXStoreForLE(N, DCI);
17299 }
17300 break;
17301 }
17302 case ISD::LOAD: {
17304 EVT VT = LD->getValueType(0);
17305
17306 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17307 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17308 if (VT.isSimple()) {
17309 MVT LoadVT = VT.getSimpleVT();
17310 if (Subtarget.needsSwapsForVSXMemOps() &&
17311 (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
17312 LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
17313 return expandVSXLoadForLE(N, DCI);
17314 }
17315
17316 // We sometimes end up with a 64-bit integer load, from which we extract
17317 // two single-precision floating-point numbers. This happens with
17318 // std::complex<float>, and other similar structures, because of the way we
17319 // canonicalize structure copies. However, if we lack direct moves,
17320 // then the final bitcasts from the extracted integer values to the
17321 // floating-point numbers turn into store/load pairs. Even with direct moves,
17322 // just loading the two floating-point numbers is likely better.
17323 auto ReplaceTwoFloatLoad = [&]() {
17324 if (VT != MVT::i64)
17325 return false;
17326
17327 if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
17328 LD->isVolatile())
17329 return false;
17330
17331 // We're looking for a sequence like this:
17332 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
17333 // t16: i64 = srl t13, Constant:i32<32>
17334 // t17: i32 = truncate t16
17335 // t18: f32 = bitcast t17
17336 // t19: i32 = truncate t13
17337 // t20: f32 = bitcast t19
17338
17339 if (!LD->hasNUsesOfValue(2, 0))
17340 return false;
17341
17342 auto UI = LD->user_begin();
17343 while (UI.getUse().getResNo() != 0) ++UI;
17344 SDNode *Trunc = *UI++;
17345 while (UI.getUse().getResNo() != 0) ++UI;
17346 SDNode *RightShift = *UI;
17347 if (Trunc->getOpcode() != ISD::TRUNCATE)
17348 std::swap(Trunc, RightShift);
17349
17350 if (Trunc->getOpcode() != ISD::TRUNCATE ||
17351 Trunc->getValueType(0) != MVT::i32 ||
17352 !Trunc->hasOneUse())
17353 return false;
17354 if (RightShift->getOpcode() != ISD::SRL ||
17355 !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
17356 RightShift->getConstantOperandVal(1) != 32 ||
17357 !RightShift->hasOneUse())
17358 return false;
17359
17360 SDNode *Trunc2 = *RightShift->user_begin();
17361 if (Trunc2->getOpcode() != ISD::TRUNCATE ||
17362 Trunc2->getValueType(0) != MVT::i32 ||
17363 !Trunc2->hasOneUse())
17364 return false;
17365
17366 SDNode *Bitcast = *Trunc->user_begin();
17367 SDNode *Bitcast2 = *Trunc2->user_begin();
17368
17369 if (Bitcast->getOpcode() != ISD::BITCAST ||
17370 Bitcast->getValueType(0) != MVT::f32)
17371 return false;
17372 if (Bitcast2->getOpcode() != ISD::BITCAST ||
17373 Bitcast2->getValueType(0) != MVT::f32)
17374 return false;
17375
17376 if (Subtarget.isLittleEndian())
17377 std::swap(Bitcast, Bitcast2);
17378
17379 // Bitcast has the second float (in memory-layout order) and Bitcast2
17380 // has the first one.
17381
17382 SDValue BasePtr = LD->getBasePtr();
17383 if (LD->isIndexed()) {
17384 assert(LD->getAddressingMode() == ISD::PRE_INC &&
17385 "Non-pre-inc AM on PPC?");
17386 BasePtr =
17387 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17388 LD->getOffset());
17389 }
17390
17391 auto MMOFlags =
17392 LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
17393 SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
17394 LD->getPointerInfo(), LD->getAlign(),
17395 MMOFlags, LD->getAAInfo());
17396 SDValue AddPtr =
17397 DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
17398 BasePtr, DAG.getIntPtrConstant(4, dl));
17399 SDValue FloatLoad2 = DAG.getLoad(
17400 MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
17401 LD->getPointerInfo().getWithOffset(4),
17402 commonAlignment(LD->getAlign(), 4), MMOFlags, LD->getAAInfo());
17403
17404 if (LD->isIndexed()) {
17405 // Note that DAGCombine should re-form any pre-increment load(s) from
17406 // what is produced here if that makes sense.
17407 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
17408 }
17409
17410 DCI.CombineTo(Bitcast2, FloatLoad);
17411 DCI.CombineTo(Bitcast, FloatLoad2);
17412
17413 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
17414 SDValue(FloatLoad2.getNode(), 1));
17415 return true;
17416 };
17417
17418 if (ReplaceTwoFloatLoad())
17419 return SDValue(N, 0);
17420
17421 EVT MemVT = LD->getMemoryVT();
17422 Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
17423 Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
17424 if (LD->isUnindexed() && VT.isVector() &&
17425 ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
17426 // P8 and later hardware should just use LOAD.
17427 !Subtarget.hasP8Vector() &&
17428 (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
17429 VT == MVT::v4f32))) &&
17430 LD->getAlign() < ABIAlignment) {
17431 // This is a type-legal unaligned Altivec load.
17432 SDValue Chain = LD->getChain();
17433 SDValue Ptr = LD->getBasePtr();
17434 bool isLittleEndian = Subtarget.isLittleEndian();
17435
17436 // This implements the loading of unaligned vectors as described in
17437 // the venerable Apple Velocity Engine overview. Specifically:
17438 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
17439 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
17440 //
17441 // The general idea is to expand a sequence of one or more unaligned
17442 // loads into an alignment-based permutation-control instruction (lvsl
17443 // or lvsr), a series of regular vector loads (which always truncate
17444 // their input address to an aligned address), and a series of
17445 // permutations. The results of these permutations are the requested
17446 // loaded values. The trick is that the last "extra" load is not taken
17447 // from the address you might suspect (sizeof(vector) bytes after the
17448 // last requested load), but rather sizeof(vector) - 1 bytes after the
17449 // last requested vector. The point of this is to avoid a page fault if
17450 // the base address happened to be aligned. This works because if the
17451 // base address is aligned, then adding less than a full vector length
17452 // will cause the last vector in the sequence to be (re)loaded.
17453 // Otherwise, the next vector will be fetched as you might suspect was
17454 // necessary.
17455
17456 // We might be able to reuse the permutation generation from
17457 // a different base address offset from this one by an aligned amount.
17458 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
17459 // optimization later.
17460 Intrinsic::ID Intr, IntrLD, IntrPerm;
17461 MVT PermCntlTy, PermTy, LDTy;
17462 Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17463 : Intrinsic::ppc_altivec_lvsl;
17464 IntrLD = Intrinsic::ppc_altivec_lvx;
17465 IntrPerm = Intrinsic::ppc_altivec_vperm;
17466 PermCntlTy = MVT::v16i8;
17467 PermTy = MVT::v4i32;
17468 LDTy = MVT::v4i32;
17469
17470 SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
17471
17472 // Create the new MMO for the new base load. It is like the original MMO,
17473 // but represents an area in memory almost twice the vector size centered
17474 // on the original address. If the address is unaligned, we might start
17475 // reading up to (sizeof(vector)-1) bytes below the address of the
17476 // original unaligned load.
17478 MachineMemOperand *BaseMMO =
17479 MF.getMachineMemOperand(LD->getMemOperand(),
17480 -(int64_t)MemVT.getStoreSize()+1,
17481 2*MemVT.getStoreSize()-1);
17482
17483 // Create the new base load.
17484 SDValue LDXIntID =
17485 DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
17486 SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
17487 SDValue BaseLoad =
17489 DAG.getVTList(PermTy, MVT::Other),
17490 BaseLoadOps, LDTy, BaseMMO);
17491
17492 // Note that the value of IncOffset (which is provided to the next
17493 // load's pointer info offset value, and thus used to calculate the
17494 // alignment), and the value of IncValue (which is actually used to
17495 // increment the pointer value) are different! This is because we
17496 // require the next load to appear to be aligned, even though it
17497 // is actually offset from the base pointer by a lesser amount.
17498 int IncOffset = VT.getSizeInBits() / 8;
17499 int IncValue = IncOffset;
17500
17501 // Walk (both up and down) the chain looking for another load at the real
17502 // (aligned) offset (the alignment of the other load does not matter in
17503 // this case). If found, then do not use the offset reduction trick, as
17504 // that will prevent the loads from being later combined (as they would
17505 // otherwise be duplicates).
17506 if (!findConsecutiveLoad(LD, DAG))
17507 --IncValue;
17508
17510 DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
17511 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
17512
17513 MachineMemOperand *ExtraMMO =
17514 MF.getMachineMemOperand(LD->getMemOperand(),
17515 1, 2*MemVT.getStoreSize()-1);
17516 SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
17517 SDValue ExtraLoad =
17519 DAG.getVTList(PermTy, MVT::Other),
17520 ExtraLoadOps, LDTy, ExtraMMO);
17521
17522 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17523 BaseLoad.getValue(1), ExtraLoad.getValue(1));
17524
17525 // Because vperm has a big-endian bias, we must reverse the order
17526 // of the input vectors and complement the permute control vector
17527 // when generating little endian code. We have already handled the
17528 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
17529 // and ExtraLoad here.
17530 SDValue Perm;
17531 if (isLittleEndian)
17532 Perm = BuildIntrinsicOp(IntrPerm,
17533 ExtraLoad, BaseLoad, PermCntl, DAG, dl);
17534 else
17535 Perm = BuildIntrinsicOp(IntrPerm,
17536 BaseLoad, ExtraLoad, PermCntl, DAG, dl);
17537
17538 if (VT != PermTy)
17539 Perm = Subtarget.hasAltivec()
17540 ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
17541 : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
17542 DAG.getTargetConstant(1, dl, MVT::i64));
17543 // second argument is 1 because this rounding
17544 // is always exact.
17545
17546 // The output of the permutation is our loaded result, the TokenFactor is
17547 // our new chain.
17548 DCI.CombineTo(N, Perm, TF);
17549 return SDValue(N, 0);
17550 }
17551 }
17552 break;
17554 bool isLittleEndian = Subtarget.isLittleEndian();
17555 unsigned IID = N->getConstantOperandVal(0);
17556 Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
17557 : Intrinsic::ppc_altivec_lvsl);
17558 if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
17559 SDValue Add = N->getOperand(1);
17560
17561 int Bits = 4 /* 16 byte alignment */;
17562
17563 if (DAG.MaskedValueIsZero(Add->getOperand(1),
17564 APInt::getAllOnes(Bits /* alignment */)
17565 .zext(Add.getScalarValueSizeInBits()))) {
17566 SDNode *BasePtr = Add->getOperand(0).getNode();
17567 for (SDNode *U : BasePtr->users()) {
17568 if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17569 U->getConstantOperandVal(0) == IID) {
17570 // We've found another LVSL/LVSR, and this address is an aligned
17571 // multiple of that one. The results will be the same, so use the
17572 // one we've just found instead.
17573
17574 return SDValue(U, 0);
17575 }
17576 }
17577 }
17578
17579 if (isa<ConstantSDNode>(Add->getOperand(1))) {
17580 SDNode *BasePtr = Add->getOperand(0).getNode();
17581 for (SDNode *U : BasePtr->users()) {
17582 if (U->getOpcode() == ISD::ADD &&
17583 isa<ConstantSDNode>(U->getOperand(1)) &&
17584 (Add->getConstantOperandVal(1) - U->getConstantOperandVal(1)) %
17585 (1ULL << Bits) ==
17586 0) {
17587 SDNode *OtherAdd = U;
17588 for (SDNode *V : OtherAdd->users()) {
17589 if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17590 V->getConstantOperandVal(0) == IID) {
17591 return SDValue(V, 0);
17592 }
17593 }
17594 }
17595 }
17596 }
17597 }
17598
17599 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
17600 // Expose the vabsduw/h/b opportunity for down stream
17601 if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
17602 (IID == Intrinsic::ppc_altivec_vmaxsw ||
17603 IID == Intrinsic::ppc_altivec_vmaxsh ||
17604 IID == Intrinsic::ppc_altivec_vmaxsb)) {
17605 SDValue V1 = N->getOperand(1);
17606 SDValue V2 = N->getOperand(2);
17607 if ((V1.getSimpleValueType() == MVT::v4i32 ||
17608 V1.getSimpleValueType() == MVT::v8i16 ||
17609 V1.getSimpleValueType() == MVT::v16i8) &&
17611 // (0-a, a)
17612 if (V1.getOpcode() == ISD::SUB &&
17614 V1.getOperand(1) == V2) {
17615 return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
17616 }
17617 // (a, 0-a)
17618 if (V2.getOpcode() == ISD::SUB &&
17620 V2.getOperand(1) == V1) {
17621 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17622 }
17623 // (x-y, y-x)
17624 if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
17625 V1.getOperand(0) == V2.getOperand(1) &&
17626 V1.getOperand(1) == V2.getOperand(0)) {
17627 return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
17628 }
17629 }
17630 }
17631 }
17632
17633 break;
17635 switch (N->getConstantOperandVal(1)) {
17636 default:
17637 break;
17638 case Intrinsic::ppc_altivec_vsum4sbs:
17639 case Intrinsic::ppc_altivec_vsum4shs:
17640 case Intrinsic::ppc_altivec_vsum4ubs: {
17641 // These sum-across intrinsics only have a chain due to the side effect
17642 // that they may set the SAT bit. If we know the SAT bit will not be set
17643 // for some inputs, we can replace any uses of their chain with the
17644 // input chain.
17645 if (BuildVectorSDNode *BVN =
17646 dyn_cast<BuildVectorSDNode>(N->getOperand(3))) {
17647 APInt APSplatBits, APSplatUndef;
17648 unsigned SplatBitSize;
17649 bool HasAnyUndefs;
17650 bool BVNIsConstantSplat = BVN->isConstantSplat(
17651 APSplatBits, APSplatUndef, SplatBitSize, HasAnyUndefs, 0,
17652 !Subtarget.isLittleEndian());
17653 // If the constant splat vector is 0, the SAT bit will not be set.
17654 if (BVNIsConstantSplat && APSplatBits == 0)
17655 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), N->getOperand(0));
17656 }
17657 return SDValue();
17658 }
17659 case Intrinsic::ppc_vsx_lxvw4x:
17660 case Intrinsic::ppc_vsx_lxvd2x:
17661 // For little endian, VSX loads require generating lxvd2x/xxswapd.
17662 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
17663 if (Subtarget.needsSwapsForVSXMemOps())
17664 return expandVSXLoadForLE(N, DCI);
17665 break;
17666 }
17667 break;
17669 // For little endian, VSX stores require generating xxswapd/stxvd2x.
17670 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
17671 if (Subtarget.needsSwapsForVSXMemOps()) {
17672 switch (N->getConstantOperandVal(1)) {
17673 default:
17674 break;
17675 case Intrinsic::ppc_vsx_stxvw4x:
17676 case Intrinsic::ppc_vsx_stxvd2x:
17677 return expandVSXStoreForLE(N, DCI);
17678 }
17679 }
17680 break;
17681 case ISD::BSWAP: {
17682 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
17683 // For subtargets without LDBRX, we can still do better than the default
17684 // expansion even for 64-bit BSWAP (LOAD).
17685 bool Is64BitBswapOn64BitTgt =
17686 Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
17687 bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
17688 N->getOperand(0).hasOneUse();
17689 if (IsSingleUseNormalLd &&
17690 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
17691 (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
17692 SDValue Load = N->getOperand(0);
17693 LoadSDNode *LD = cast<LoadSDNode>(Load);
17694 // Create the byte-swapping load.
17695 SDValue Ops[] = {
17696 LD->getChain(), // Chain
17697 LD->getBasePtr(), // Ptr
17698 DAG.getValueType(N->getValueType(0)) // VT
17699 };
17700 SDValue BSLoad =
17701 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl,
17702 DAG.getVTList(N->getValueType(0) == MVT::i64 ?
17703 MVT::i64 : MVT::i32, MVT::Other),
17704 Ops, LD->getMemoryVT(), LD->getMemOperand());
17705
17706 // If this is an i16 load, insert the truncate.
17707 SDValue ResVal = BSLoad;
17708 if (N->getValueType(0) == MVT::i16)
17709 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
17710
17711 // First, combine the bswap away. This makes the value produced by the
17712 // load dead.
17713 DCI.CombineTo(N, ResVal);
17714
17715 // Next, combine the load away, we give it a bogus result value but a real
17716 // chain result. The result value is dead because the bswap is dead.
17717 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
17718
17719 // Return N so it doesn't get rechecked!
17720 return SDValue(N, 0);
17721 }
17722 // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
17723 // before legalization so that the BUILD_PAIR is handled correctly.
17724 if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
17725 !IsSingleUseNormalLd)
17726 return SDValue();
17727 LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
17728
17729 // Can't split volatile or atomic loads.
17730 if (!LD->isSimple())
17731 return SDValue();
17732 SDValue BasePtr = LD->getBasePtr();
17733 SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
17734 LD->getPointerInfo(), LD->getAlign());
17735 Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
17736 BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
17737 DAG.getIntPtrConstant(4, dl));
17739 LD->getMemOperand(), 4, 4);
17740 SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
17741 Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
17742 SDValue Res;
17743 if (Subtarget.isLittleEndian())
17744 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
17745 else
17746 Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
17747 SDValue TF =
17748 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
17749 Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
17750 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
17751 return Res;
17752 }
17753 case PPCISD::VCMP:
17754 // If a VCMP_rec node already exists with exactly the same operands as this
17755 // node, use its result instead of this node (VCMP_rec computes both a CR6
17756 // and a normal output).
17757 //
17758 if (!N->getOperand(0).hasOneUse() &&
17759 !N->getOperand(1).hasOneUse() &&
17760 !N->getOperand(2).hasOneUse()) {
17761
17762 // Scan all of the users of the LHS, looking for VCMP_rec's that match.
17763 SDNode *VCMPrecNode = nullptr;
17764
17765 SDNode *LHSN = N->getOperand(0).getNode();
17766 for (SDNode *User : LHSN->users())
17767 if (User->getOpcode() == PPCISD::VCMP_rec &&
17768 User->getOperand(1) == N->getOperand(1) &&
17769 User->getOperand(2) == N->getOperand(2) &&
17770 User->getOperand(0) == N->getOperand(0)) {
17771 VCMPrecNode = User;
17772 break;
17773 }
17774
17775 // If there is no VCMP_rec node, or if the flag value has a single use,
17776 // don't transform this.
17777 if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
17778 break;
17779
17780 // Look at the (necessarily single) use of the flag value. If it has a
17781 // chain, this transformation is more complex. Note that multiple things
17782 // could use the value result, which we should ignore.
17783 SDNode *FlagUser = nullptr;
17784 for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
17785 FlagUser == nullptr; ++UI) {
17786 assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
17787 SDNode *User = UI->getUser();
17788 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
17789 if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
17790 FlagUser = User;
17791 break;
17792 }
17793 }
17794 }
17795
17796 // If the user is a MFOCRF instruction, we know this is safe.
17797 // Otherwise we give up for right now.
17798 if (FlagUser->getOpcode() == PPCISD::MFOCRF)
17799 return SDValue(VCMPrecNode, 0);
17800 }
17801 break;
17802 case ISD::BR_CC: {
17803 // If this is a branch on an altivec predicate comparison, lower this so
17804 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
17805 // lowering is done pre-legalize, because the legalizer lowers the predicate
17806 // compare down to code that is difficult to reassemble.
17807 // This code also handles branches that depend on the result of a store
17808 // conditional.
17809 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
17810 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
17811
17812 int CompareOpc;
17813 bool isDot;
17814
17815 if (!isa<ConstantSDNode>(RHS) || (CC != ISD::SETEQ && CC != ISD::SETNE))
17816 break;
17817
17818 // Since we are doing this pre-legalize, the RHS can be a constant of
17819 // arbitrary bitwidth which may cause issues when trying to get the value
17820 // from the underlying APInt.
17821 auto RHSAPInt = RHS->getAsAPIntVal();
17822 if (!RHSAPInt.isIntN(64))
17823 break;
17824
17825 unsigned Val = RHSAPInt.getZExtValue();
17826 auto isImpossibleCompare = [&]() {
17827 // If this is a comparison against something other than 0/1, then we know
17828 // that the condition is never/always true.
17829 if (Val != 0 && Val != 1) {
17830 if (CC == ISD::SETEQ) // Cond never true, remove branch.
17831 return N->getOperand(0);
17832 // Always !=, turn it into an unconditional branch.
17833 return DAG.getNode(ISD::BR, dl, MVT::Other,
17834 N->getOperand(0), N->getOperand(4));
17835 }
17836 return SDValue();
17837 };
17838 // Combine branches fed by store conditional instructions (st[bhwd]cx).
17839 unsigned StoreWidth = 0;
17840 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
17841 isStoreConditional(LHS, StoreWidth)) {
17842 if (SDValue Impossible = isImpossibleCompare())
17843 return Impossible;
17844 PPC::Predicate CompOpc;
17845 // eq 0 => ne
17846 // ne 0 => eq
17847 // eq 1 => eq
17848 // ne 1 => ne
17849 if (Val == 0)
17850 CompOpc = CC == ISD::SETEQ ? PPC::PRED_NE : PPC::PRED_EQ;
17851 else
17852 CompOpc = CC == ISD::SETEQ ? PPC::PRED_EQ : PPC::PRED_NE;
17853
17854 SDValue Ops[] = {LHS.getOperand(0), LHS.getOperand(2), LHS.getOperand(3),
17855 DAG.getConstant(StoreWidth, dl, MVT::i32)};
17856 auto *MemNode = cast<MemSDNode>(LHS);
17857 SDValue ConstSt = DAG.getMemIntrinsicNode(
17858 PPCISD::STORE_COND, dl,
17859 DAG.getVTList(MVT::i32, MVT::Other, MVT::Glue), Ops,
17860 MemNode->getMemoryVT(), MemNode->getMemOperand());
17861
17862 SDValue InChain;
17863 // Unchain the branch from the original store conditional.
17864 if (N->getOperand(0) == LHS.getValue(1))
17865 InChain = LHS.getOperand(0);
17866 else if (N->getOperand(0).getOpcode() == ISD::TokenFactor) {
17867 SmallVector<SDValue, 4> InChains;
17868 SDValue InTF = N->getOperand(0);
17869 for (int i = 0, e = InTF.getNumOperands(); i < e; i++)
17870 if (InTF.getOperand(i) != LHS.getValue(1))
17871 InChains.push_back(InTF.getOperand(i));
17872 InChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, InChains);
17873 }
17874
17875 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, InChain,
17876 DAG.getConstant(CompOpc, dl, MVT::i32),
17877 DAG.getRegister(PPC::CR0, MVT::i32), N->getOperand(4),
17878 ConstSt.getValue(2));
17879 }
17880
17881 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
17882 getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
17883 assert(isDot && "Can't compare against a vector result!");
17884
17885 if (SDValue Impossible = isImpossibleCompare())
17886 return Impossible;
17887
17888 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
17889 // Create the PPCISD altivec 'dot' comparison node.
17890 SDValue Ops[] = {
17891 LHS.getOperand(2), // LHS of compare
17892 LHS.getOperand(3), // RHS of compare
17893 DAG.getConstant(CompareOpc, dl, MVT::i32)
17894 };
17895 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
17896 SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
17897
17898 // Unpack the result based on how the target uses it.
17899 PPC::Predicate CompOpc;
17900 switch (LHS.getConstantOperandVal(1)) {
17901 default: // Can't happen, don't crash on invalid number though.
17902 case 0: // Branch on the value of the EQ bit of CR6.
17903 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
17904 break;
17905 case 1: // Branch on the inverted value of the EQ bit of CR6.
17906 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
17907 break;
17908 case 2: // Branch on the value of the LT bit of CR6.
17909 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
17910 break;
17911 case 3: // Branch on the inverted value of the LT bit of CR6.
17912 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
17913 break;
17914 }
17915
17916 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
17917 DAG.getConstant(CompOpc, dl, MVT::i32),
17918 DAG.getRegister(PPC::CR6, MVT::i32),
17919 N->getOperand(4), CompNode.getValue(1));
17920 }
17921 break;
17922 }
17923 case ISD::BUILD_VECTOR:
17924 return DAGCombineBuildVector(N, DCI);
17925 case PPCISD::ADDC:
17926 return DAGCombineAddc(N, DCI);
17927 }
17928
17929 return SDValue();
17930}
17931
17932SDValue
17934 SelectionDAG &DAG,
17935 SmallVectorImpl<SDNode *> &Created) const {
17936 // fold (sdiv X, pow2)
17937 EVT VT = N->getValueType(0);
17938 if (VT == MVT::i64 && !Subtarget.isPPC64())
17939 return SDValue();
17940 if ((VT != MVT::i32 && VT != MVT::i64) ||
17941 !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
17942 return SDValue();
17943
17944 SDLoc DL(N);
17945 SDValue N0 = N->getOperand(0);
17946
17947 bool IsNegPow2 = Divisor.isNegatedPowerOf2();
17948 unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countr_zero();
17949 SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
17950
17951 SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
17952 Created.push_back(Op.getNode());
17953
17954 if (IsNegPow2) {
17955 Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
17956 Created.push_back(Op.getNode());
17957 }
17958
17959 return Op;
17960}
17961
17962//===----------------------------------------------------------------------===//
17963// Inline Assembly Support
17964//===----------------------------------------------------------------------===//
17965
17967 KnownBits &Known,
17968 const APInt &DemandedElts,
17969 const SelectionDAG &DAG,
17970 unsigned Depth) const {
17971 Known.resetAll();
17972 switch (Op.getOpcode()) {
17973 default: break;
17974 case PPCISD::LBRX: {
17975 // lhbrx is known to have the top bits cleared out.
17976 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
17977 Known.Zero = 0xFFFF0000;
17978 break;
17979 }
17980 case PPCISD::ADDE: {
17981 if (Op.getResNo() == 0) {
17982 // (0|1), _ = ADDE 0, 0, CARRY
17983 SDValue LHS = Op.getOperand(0);
17984 SDValue RHS = Op.getOperand(1);
17985 if (isNullConstant(LHS) && isNullConstant(RHS))
17986 Known.Zero = ~1ULL;
17987 }
17988 break;
17989 }
17991 switch (Op.getConstantOperandVal(0)) {
17992 default: break;
17993 case Intrinsic::ppc_altivec_vcmpbfp_p:
17994 case Intrinsic::ppc_altivec_vcmpeqfp_p:
17995 case Intrinsic::ppc_altivec_vcmpequb_p:
17996 case Intrinsic::ppc_altivec_vcmpequh_p:
17997 case Intrinsic::ppc_altivec_vcmpequw_p:
17998 case Intrinsic::ppc_altivec_vcmpequd_p:
17999 case Intrinsic::ppc_altivec_vcmpequq_p:
18000 case Intrinsic::ppc_altivec_vcmpgefp_p:
18001 case Intrinsic::ppc_altivec_vcmpgtfp_p:
18002 case Intrinsic::ppc_altivec_vcmpgtsb_p:
18003 case Intrinsic::ppc_altivec_vcmpgtsh_p:
18004 case Intrinsic::ppc_altivec_vcmpgtsw_p:
18005 case Intrinsic::ppc_altivec_vcmpgtsd_p:
18006 case Intrinsic::ppc_altivec_vcmpgtsq_p:
18007 case Intrinsic::ppc_altivec_vcmpgtub_p:
18008 case Intrinsic::ppc_altivec_vcmpgtuh_p:
18009 case Intrinsic::ppc_altivec_vcmpgtuw_p:
18010 case Intrinsic::ppc_altivec_vcmpgtud_p:
18011 case Intrinsic::ppc_altivec_vcmpgtuq_p:
18012 Known.Zero = ~1U; // All bits but the low one are known to be zero.
18013 break;
18014 }
18015 break;
18016 }
18018 switch (Op.getConstantOperandVal(1)) {
18019 default:
18020 break;
18021 case Intrinsic::ppc_load2r:
18022 // Top bits are cleared for load2r (which is the same as lhbrx).
18023 Known.Zero = 0xFFFF0000;
18024 break;
18025 }
18026 break;
18027 }
18028 }
18029}
18030
18032 switch (Subtarget.getCPUDirective()) {
18033 default: break;
18034 case PPC::DIR_970:
18035 case PPC::DIR_PWR4:
18036 case PPC::DIR_PWR5:
18037 case PPC::DIR_PWR5X:
18038 case PPC::DIR_PWR6:
18039 case PPC::DIR_PWR6X:
18040 case PPC::DIR_PWR7:
18041 case PPC::DIR_PWR8:
18042 case PPC::DIR_PWR9:
18043 case PPC::DIR_PWR10:
18044 case PPC::DIR_PWR11:
18045 case PPC::DIR_PWR_FUTURE: {
18046 if (!ML)
18047 break;
18048
18050 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
18051 // so that we can decrease cache misses and branch-prediction misses.
18052 // Actual alignment of the loop will depend on the hotness check and other
18053 // logic in alignBlocks.
18054 if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
18055 return Align(32);
18056 }
18057
18058 const PPCInstrInfo *TII = Subtarget.getInstrInfo();
18059
18060 // For small loops (between 5 and 8 instructions), align to a 32-byte
18061 // boundary so that the entire loop fits in one instruction-cache line.
18062 uint64_t LoopSize = 0;
18063 for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
18064 for (const MachineInstr &J : **I) {
18065 LoopSize += TII->getInstSizeInBytes(J);
18066 if (LoopSize > 32)
18067 break;
18068 }
18069
18070 if (LoopSize > 16 && LoopSize <= 32)
18071 return Align(32);
18072
18073 break;
18074 }
18075 }
18076
18078}
18079
18080/// getConstraintType - Given a constraint, return the type of
18081/// constraint it is for this target.
18084 if (Constraint.size() == 1) {
18085 switch (Constraint[0]) {
18086 default: break;
18087 case 'b':
18088 case 'r':
18089 case 'f':
18090 case 'd':
18091 case 'v':
18092 case 'y':
18093 return C_RegisterClass;
18094 case 'Z':
18095 // FIXME: While Z does indicate a memory constraint, it specifically
18096 // indicates an r+r address (used in conjunction with the 'y' modifier
18097 // in the replacement string). Currently, we're forcing the base
18098 // register to be r0 in the asm printer (which is interpreted as zero)
18099 // and forming the complete address in the second register. This is
18100 // suboptimal.
18101 return C_Memory;
18102 }
18103 } else if (Constraint == "wc") { // individual CR bits.
18104 return C_RegisterClass;
18105 } else if (Constraint == "wa" || Constraint == "wd" ||
18106 Constraint == "wf" || Constraint == "ws" ||
18107 Constraint == "wi" || Constraint == "ww") {
18108 return C_RegisterClass; // VSX registers.
18109 }
18110 return TargetLowering::getConstraintType(Constraint);
18111}
18112
18113/// Examine constraint type and operand type and determine a weight value.
18114/// This object must already have been set up with the operand type
18115/// and the current alternative constraint selected.
18118 AsmOperandInfo &info, const char *constraint) const {
18120 Value *CallOperandVal = info.CallOperandVal;
18121 // If we don't have a value, we can't do a match,
18122 // but allow it at the lowest weight.
18123 if (!CallOperandVal)
18124 return CW_Default;
18125 Type *type = CallOperandVal->getType();
18126
18127 // Look at the constraint type.
18128 if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
18129 return CW_Register; // an individual CR bit.
18130 else if ((StringRef(constraint) == "wa" ||
18131 StringRef(constraint) == "wd" ||
18132 StringRef(constraint) == "wf") &&
18133 type->isVectorTy())
18134 return CW_Register;
18135 else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
18136 return CW_Register; // just hold 64-bit integers data.
18137 else if (StringRef(constraint) == "ws" && type->isDoubleTy())
18138 return CW_Register;
18139 else if (StringRef(constraint) == "ww" && type->isFloatTy())
18140 return CW_Register;
18141
18142 switch (*constraint) {
18143 default:
18145 break;
18146 case 'b':
18147 if (type->isIntegerTy())
18148 weight = CW_Register;
18149 break;
18150 case 'f':
18151 if (type->isFloatTy())
18152 weight = CW_Register;
18153 break;
18154 case 'd':
18155 if (type->isDoubleTy())
18156 weight = CW_Register;
18157 break;
18158 case 'v':
18159 if (type->isVectorTy())
18160 weight = CW_Register;
18161 break;
18162 case 'y':
18163 weight = CW_Register;
18164 break;
18165 case 'Z':
18166 weight = CW_Memory;
18167 break;
18168 }
18169 return weight;
18170}
18171
18172std::pair<unsigned, const TargetRegisterClass *>
18174 StringRef Constraint,
18175 MVT VT) const {
18176 if (Constraint.size() == 1) {
18177 // GCC RS6000 Constraint Letters
18178 switch (Constraint[0]) {
18179 case 'b': // R1-R31
18180 if (VT == MVT::i64 && Subtarget.isPPC64())
18181 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
18182 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
18183 case 'r': // R0-R31
18184 if (VT == MVT::i64 && Subtarget.isPPC64())
18185 return std::make_pair(0U, &PPC::G8RCRegClass);
18186 return std::make_pair(0U, &PPC::GPRCRegClass);
18187 // 'd' and 'f' constraints are both defined to be "the floating point
18188 // registers", where one is for 32-bit and the other for 64-bit. We don't
18189 // really care overly much here so just give them all the same reg classes.
18190 case 'd':
18191 case 'f':
18192 if (Subtarget.hasSPE()) {
18193 if (VT == MVT::f32 || VT == MVT::i32)
18194 return std::make_pair(0U, &PPC::GPRCRegClass);
18195 if (VT == MVT::f64 || VT == MVT::i64)
18196 return std::make_pair(0U, &PPC::SPERCRegClass);
18197 } else {
18198 if (VT == MVT::f32 || VT == MVT::i32)
18199 return std::make_pair(0U, &PPC::F4RCRegClass);
18200 if (VT == MVT::f64 || VT == MVT::i64)
18201 return std::make_pair(0U, &PPC::F8RCRegClass);
18202 }
18203 break;
18204 case 'v':
18205 if (Subtarget.hasAltivec() && VT.isVector())
18206 return std::make_pair(0U, &PPC::VRRCRegClass);
18207 else if (Subtarget.hasVSX())
18208 // Scalars in Altivec registers only make sense with VSX.
18209 return std::make_pair(0U, &PPC::VFRCRegClass);
18210 break;
18211 case 'y': // crrc
18212 return std::make_pair(0U, &PPC::CRRCRegClass);
18213 }
18214 } else if (Constraint == "wc" && Subtarget.useCRBits()) {
18215 // An individual CR bit.
18216 return std::make_pair(0U, &PPC::CRBITRCRegClass);
18217 } else if ((Constraint == "wa" || Constraint == "wd" ||
18218 Constraint == "wf" || Constraint == "wi") &&
18219 Subtarget.hasVSX()) {
18220 // A VSX register for either a scalar (FP) or vector. There is no
18221 // support for single precision scalars on subtargets prior to Power8.
18222 if (VT.isVector())
18223 return std::make_pair(0U, &PPC::VSRCRegClass);
18224 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18225 return std::make_pair(0U, &PPC::VSSRCRegClass);
18226 return std::make_pair(0U, &PPC::VSFRCRegClass);
18227 } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
18228 if (VT == MVT::f32 && Subtarget.hasP8Vector())
18229 return std::make_pair(0U, &PPC::VSSRCRegClass);
18230 else
18231 return std::make_pair(0U, &PPC::VSFRCRegClass);
18232 } else if (Constraint == "lr") {
18233 if (VT == MVT::i64)
18234 return std::make_pair(0U, &PPC::LR8RCRegClass);
18235 else
18236 return std::make_pair(0U, &PPC::LRRCRegClass);
18237 }
18238
18239 // Handle special cases of physical registers that are not properly handled
18240 // by the base class.
18241 if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
18242 // If we name a VSX register, we can't defer to the base class because it
18243 // will not recognize the correct register (their names will be VSL{0-31}
18244 // and V{0-31} so they won't match). So we match them here.
18245 if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
18246 int VSNum = atoi(Constraint.data() + 3);
18247 assert(VSNum >= 0 && VSNum <= 63 &&
18248 "Attempted to access a vsr out of range");
18249 if (VSNum < 32)
18250 return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
18251 return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
18252 }
18253
18254 // For float registers, we can't defer to the base class as it will match
18255 // the SPILLTOVSRRC class.
18256 if (Constraint.size() > 3 && Constraint[1] == 'f') {
18257 int RegNum = atoi(Constraint.data() + 2);
18258 if (RegNum > 31 || RegNum < 0)
18259 report_fatal_error("Invalid floating point register number");
18260 if (VT == MVT::f32 || VT == MVT::i32)
18261 return Subtarget.hasSPE()
18262 ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
18263 : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
18264 if (VT == MVT::f64 || VT == MVT::i64)
18265 return Subtarget.hasSPE()
18266 ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
18267 : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
18268 }
18269 }
18270
18271 std::pair<unsigned, const TargetRegisterClass *> R =
18273
18274 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
18275 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
18276 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
18277 // register.
18278 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
18279 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
18280 if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
18281 PPC::GPRCRegClass.contains(R.first))
18282 return std::make_pair(TRI->getMatchingSuperReg(R.first,
18283 PPC::sub_32, &PPC::G8RCRegClass),
18284 &PPC::G8RCRegClass);
18285
18286 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
18287 if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
18288 R.first = PPC::CR0;
18289 R.second = &PPC::CRRCRegClass;
18290 }
18291 // FIXME: This warning should ideally be emitted in the front end.
18292 const auto &TM = getTargetMachine();
18293 if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
18294 if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
18295 (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
18296 (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
18297 errs() << "warning: vector registers 20 to 32 are reserved in the "
18298 "default AIX AltiVec ABI and cannot be used\n";
18299 }
18300
18301 return R;
18302}
18303
18304/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
18305/// vector. If it is invalid, don't add anything to Ops.
18307 StringRef Constraint,
18308 std::vector<SDValue> &Ops,
18309 SelectionDAG &DAG) const {
18310 SDValue Result;
18311
18312 // Only support length 1 constraints.
18313 if (Constraint.size() > 1)
18314 return;
18315
18316 char Letter = Constraint[0];
18317 switch (Letter) {
18318 default: break;
18319 case 'I':
18320 case 'J':
18321 case 'K':
18322 case 'L':
18323 case 'M':
18324 case 'N':
18325 case 'O':
18326 case 'P': {
18328 if (!CST) return; // Must be an immediate to match.
18329 SDLoc dl(Op);
18330 int64_t Value = CST->getSExtValue();
18331 EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
18332 // numbers are printed as such.
18333 switch (Letter) {
18334 default: llvm_unreachable("Unknown constraint letter!");
18335 case 'I': // "I" is a signed 16-bit constant.
18336 if (isInt<16>(Value))
18337 Result = DAG.getTargetConstant(Value, dl, TCVT);
18338 break;
18339 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
18341 Result = DAG.getTargetConstant(Value, dl, TCVT);
18342 break;
18343 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
18345 Result = DAG.getTargetConstant(Value, dl, TCVT);
18346 break;
18347 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
18348 if (isUInt<16>(Value))
18349 Result = DAG.getTargetConstant(Value, dl, TCVT);
18350 break;
18351 case 'M': // "M" is a constant that is greater than 31.
18352 if (Value > 31)
18353 Result = DAG.getTargetConstant(Value, dl, TCVT);
18354 break;
18355 case 'N': // "N" is a positive constant that is an exact power of two.
18356 if (Value > 0 && isPowerOf2_64(Value))
18357 Result = DAG.getTargetConstant(Value, dl, TCVT);
18358 break;
18359 case 'O': // "O" is the constant zero.
18360 if (Value == 0)
18361 Result = DAG.getTargetConstant(Value, dl, TCVT);
18362 break;
18363 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
18364 if (isInt<16>(-Value))
18365 Result = DAG.getTargetConstant(Value, dl, TCVT);
18366 break;
18367 }
18368 break;
18369 }
18370 }
18371
18372 if (Result.getNode()) {
18373 Ops.push_back(Result);
18374 return;
18375 }
18376
18377 // Handle standard constraint letters.
18379}
18380
18383 SelectionDAG &DAG) const {
18384 if (I.getNumOperands() <= 1)
18385 return;
18386 if (!isa<ConstantSDNode>(Ops[1].getNode()))
18387 return;
18388 auto IntrinsicID = Ops[1].getNode()->getAsZExtVal();
18389 if (IntrinsicID != Intrinsic::ppc_tdw && IntrinsicID != Intrinsic::ppc_tw &&
18390 IntrinsicID != Intrinsic::ppc_trapd && IntrinsicID != Intrinsic::ppc_trap)
18391 return;
18392
18393 if (MDNode *MDN = I.getMetadata(LLVMContext::MD_annotation))
18394 Ops.push_back(DAG.getMDNode(MDN));
18395}
18396
18397// isLegalAddressingMode - Return true if the addressing mode represented
18398// by AM is legal for this target, for a load/store of the specified type.
18400 const AddrMode &AM, Type *Ty,
18401 unsigned AS,
18402 Instruction *I) const {
18403 // Vector type r+i form is supported since power9 as DQ form. We don't check
18404 // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
18405 // imm form is preferred and the offset can be adjusted to use imm form later
18406 // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
18407 // max offset to check legal addressing mode, we should be a little aggressive
18408 // to contain other offsets for that LSRUse.
18409 if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
18410 return false;
18411
18412 // PPC allows a sign-extended 16-bit immediate field.
18413 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
18414 return false;
18415
18416 // No global is ever allowed as a base.
18417 if (AM.BaseGV)
18418 return false;
18419
18420 // PPC only support r+r,
18421 switch (AM.Scale) {
18422 case 0: // "r+i" or just "i", depending on HasBaseReg.
18423 break;
18424 case 1:
18425 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
18426 return false;
18427 // Otherwise we have r+r or r+i.
18428 break;
18429 case 2:
18430 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
18431 return false;
18432 // Allow 2*r as r+r.
18433 break;
18434 default:
18435 // No other scales are supported.
18436 return false;
18437 }
18438
18439 return true;
18440}
18441
18442SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
18443 SelectionDAG &DAG) const {
18445 MachineFrameInfo &MFI = MF.getFrameInfo();
18446 MFI.setReturnAddressIsTaken(true);
18447
18448 SDLoc dl(Op);
18449 unsigned Depth = Op.getConstantOperandVal(0);
18450
18451 // Make sure the function does not optimize away the store of the RA to
18452 // the stack.
18453 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
18454 FuncInfo->setLRStoreRequired();
18455 auto PtrVT = getPointerTy(MF.getDataLayout());
18456
18457 if (Depth > 0) {
18458 // The link register (return address) is saved in the caller's frame
18459 // not the callee's stack frame. So we must get the caller's frame
18460 // address and load the return address at the LR offset from there.
18461 SDValue FrameAddr =
18462 DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18464 SDValue Offset =
18465 DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
18466 Subtarget.getScalarIntVT());
18467 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
18468 DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
18470 }
18471
18472 // Just load the return address off the stack.
18473 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
18474 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
18476}
18477
18478SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
18479 SelectionDAG &DAG) const {
18480 SDLoc dl(Op);
18481 unsigned Depth = Op.getConstantOperandVal(0);
18482
18483 MachineFunction &MF = DAG.getMachineFunction();
18484 MachineFrameInfo &MFI = MF.getFrameInfo();
18485 MFI.setFrameAddressIsTaken(true);
18486
18487 EVT PtrVT = getPointerTy(MF.getDataLayout());
18488 bool isPPC64 = PtrVT == MVT::i64;
18489
18490 // Naked functions never have a frame pointer, and so we use r1. For all
18491 // other functions, this decision must be delayed until during PEI.
18492 unsigned FrameReg;
18493 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
18494 FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
18495 else
18496 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
18497
18498 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
18499 PtrVT);
18500 while (Depth--)
18501 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
18502 FrameAddr, MachinePointerInfo());
18503 return FrameAddr;
18504}
18505
18506#define GET_REGISTER_MATCHER
18507#include "PPCGenAsmMatcher.inc"
18508
18510 const MachineFunction &MF) const {
18511 bool IsPPC64 = Subtarget.isPPC64();
18512
18513 bool Is64Bit = IsPPC64 && VT == LLT::scalar(64);
18514 if (!Is64Bit && VT != LLT::scalar(32))
18515 report_fatal_error("Invalid register global variable type");
18516
18518 if (!Reg)
18519 return Reg;
18520
18521 // FIXME: Unable to generate code for `-O2` but okay for `-O0`.
18522 // Need followup investigation as to why.
18523 if ((IsPPC64 && Reg == PPC::R2) || Reg == PPC::R0)
18524 report_fatal_error(Twine("Trying to reserve an invalid register \"" +
18525 StringRef(RegName) + "\"."));
18526
18527 // Convert GPR to GP8R register for 64bit.
18528 if (Is64Bit && StringRef(RegName).starts_with_insensitive("r"))
18529 Reg = Reg.id() - PPC::R0 + PPC::X0;
18530
18531 return Reg;
18532}
18533
18535 // 32-bit SVR4 ABI access everything as got-indirect.
18536 if (Subtarget.is32BitELFABI())
18537 return true;
18538
18539 // AIX accesses everything indirectly through the TOC, which is similar to
18540 // the GOT.
18541 if (Subtarget.isAIXABI())
18542 return true;
18543
18545 // If it is small or large code model, module locals are accessed
18546 // indirectly by loading their address from .toc/.got.
18547 if (CModel == CodeModel::Small || CModel == CodeModel::Large)
18548 return true;
18549
18550 // JumpTable and BlockAddress are accessed as got-indirect.
18552 return true;
18553
18555 return Subtarget.isGVIndirectSymbol(G->getGlobal());
18556
18557 return false;
18558}
18559
18560bool
18562 // The PowerPC target isn't yet aware of offsets.
18563 return false;
18564}
18565
18567 const CallBase &I,
18568 MachineFunction &MF,
18569 unsigned Intrinsic) const {
18570 switch (Intrinsic) {
18571 case Intrinsic::ppc_atomicrmw_xchg_i128:
18572 case Intrinsic::ppc_atomicrmw_add_i128:
18573 case Intrinsic::ppc_atomicrmw_sub_i128:
18574 case Intrinsic::ppc_atomicrmw_nand_i128:
18575 case Intrinsic::ppc_atomicrmw_and_i128:
18576 case Intrinsic::ppc_atomicrmw_or_i128:
18577 case Intrinsic::ppc_atomicrmw_xor_i128:
18578 case Intrinsic::ppc_cmpxchg_i128:
18579 Info.opc = ISD::INTRINSIC_W_CHAIN;
18580 Info.memVT = MVT::i128;
18581 Info.ptrVal = I.getArgOperand(0);
18582 Info.offset = 0;
18583 Info.align = Align(16);
18586 return true;
18587 case Intrinsic::ppc_atomic_load_i128:
18588 Info.opc = ISD::INTRINSIC_W_CHAIN;
18589 Info.memVT = MVT::i128;
18590 Info.ptrVal = I.getArgOperand(0);
18591 Info.offset = 0;
18592 Info.align = Align(16);
18594 return true;
18595 case Intrinsic::ppc_atomic_store_i128:
18596 Info.opc = ISD::INTRINSIC_VOID;
18597 Info.memVT = MVT::i128;
18598 Info.ptrVal = I.getArgOperand(2);
18599 Info.offset = 0;
18600 Info.align = Align(16);
18602 return true;
18603 case Intrinsic::ppc_altivec_lvx:
18604 case Intrinsic::ppc_altivec_lvxl:
18605 case Intrinsic::ppc_altivec_lvebx:
18606 case Intrinsic::ppc_altivec_lvehx:
18607 case Intrinsic::ppc_altivec_lvewx:
18608 case Intrinsic::ppc_vsx_lxvd2x:
18609 case Intrinsic::ppc_vsx_lxvw4x:
18610 case Intrinsic::ppc_vsx_lxvd2x_be:
18611 case Intrinsic::ppc_vsx_lxvw4x_be:
18612 case Intrinsic::ppc_vsx_lxvl:
18613 case Intrinsic::ppc_vsx_lxvll: {
18614 EVT VT;
18615 switch (Intrinsic) {
18616 case Intrinsic::ppc_altivec_lvebx:
18617 VT = MVT::i8;
18618 break;
18619 case Intrinsic::ppc_altivec_lvehx:
18620 VT = MVT::i16;
18621 break;
18622 case Intrinsic::ppc_altivec_lvewx:
18623 VT = MVT::i32;
18624 break;
18625 case Intrinsic::ppc_vsx_lxvd2x:
18626 case Intrinsic::ppc_vsx_lxvd2x_be:
18627 VT = MVT::v2f64;
18628 break;
18629 default:
18630 VT = MVT::v4i32;
18631 break;
18632 }
18633
18634 Info.opc = ISD::INTRINSIC_W_CHAIN;
18635 Info.memVT = VT;
18636 Info.ptrVal = I.getArgOperand(0);
18637 Info.offset = -VT.getStoreSize()+1;
18638 Info.size = 2*VT.getStoreSize()-1;
18639 Info.align = Align(1);
18640 Info.flags = MachineMemOperand::MOLoad;
18641 return true;
18642 }
18643 case Intrinsic::ppc_altivec_stvx:
18644 case Intrinsic::ppc_altivec_stvxl:
18645 case Intrinsic::ppc_altivec_stvebx:
18646 case Intrinsic::ppc_altivec_stvehx:
18647 case Intrinsic::ppc_altivec_stvewx:
18648 case Intrinsic::ppc_vsx_stxvd2x:
18649 case Intrinsic::ppc_vsx_stxvw4x:
18650 case Intrinsic::ppc_vsx_stxvd2x_be:
18651 case Intrinsic::ppc_vsx_stxvw4x_be:
18652 case Intrinsic::ppc_vsx_stxvl:
18653 case Intrinsic::ppc_vsx_stxvll: {
18654 EVT VT;
18655 switch (Intrinsic) {
18656 case Intrinsic::ppc_altivec_stvebx:
18657 VT = MVT::i8;
18658 break;
18659 case Intrinsic::ppc_altivec_stvehx:
18660 VT = MVT::i16;
18661 break;
18662 case Intrinsic::ppc_altivec_stvewx:
18663 VT = MVT::i32;
18664 break;
18665 case Intrinsic::ppc_vsx_stxvd2x:
18666 case Intrinsic::ppc_vsx_stxvd2x_be:
18667 VT = MVT::v2f64;
18668 break;
18669 default:
18670 VT = MVT::v4i32;
18671 break;
18672 }
18673
18674 Info.opc = ISD::INTRINSIC_VOID;
18675 Info.memVT = VT;
18676 Info.ptrVal = I.getArgOperand(1);
18677 Info.offset = -VT.getStoreSize()+1;
18678 Info.size = 2*VT.getStoreSize()-1;
18679 Info.align = Align(1);
18680 Info.flags = MachineMemOperand::MOStore;
18681 return true;
18682 }
18683 case Intrinsic::ppc_stdcx:
18684 case Intrinsic::ppc_stwcx:
18685 case Intrinsic::ppc_sthcx:
18686 case Intrinsic::ppc_stbcx: {
18687 EVT VT;
18688 auto Alignment = Align(8);
18689 switch (Intrinsic) {
18690 case Intrinsic::ppc_stdcx:
18691 VT = MVT::i64;
18692 break;
18693 case Intrinsic::ppc_stwcx:
18694 VT = MVT::i32;
18695 Alignment = Align(4);
18696 break;
18697 case Intrinsic::ppc_sthcx:
18698 VT = MVT::i16;
18699 Alignment = Align(2);
18700 break;
18701 case Intrinsic::ppc_stbcx:
18702 VT = MVT::i8;
18703 Alignment = Align(1);
18704 break;
18705 }
18706 Info.opc = ISD::INTRINSIC_W_CHAIN;
18707 Info.memVT = VT;
18708 Info.ptrVal = I.getArgOperand(0);
18709 Info.offset = 0;
18710 Info.align = Alignment;
18712 return true;
18713 }
18714 default:
18715 break;
18716 }
18717
18718 return false;
18719}
18720
18721/// It returns EVT::Other if the type should be determined using generic
18722/// target-independent logic.
18724 LLVMContext &Context, const MemOp &Op,
18725 const AttributeList &FuncAttributes) const {
18726 if (getTargetMachine().getOptLevel() != CodeGenOptLevel::None) {
18727 // We should use Altivec/VSX loads and stores when available. For unaligned
18728 // addresses, unaligned VSX loads are only fast starting with the P8.
18729 if (Subtarget.hasAltivec() && Op.size() >= 16) {
18730 if (Op.isMemset() && Subtarget.hasVSX()) {
18731 uint64_t TailSize = Op.size() % 16;
18732 // For memset lowering, EXTRACT_VECTOR_ELT tries to return constant
18733 // element if vector element type matches tail store. For tail size
18734 // 3/4, the tail store is i32, v4i32 cannot be used, need a legal one.
18735 if (TailSize > 2 && TailSize <= 4) {
18736 return MVT::v8i16;
18737 }
18738 return MVT::v4i32;
18739 }
18740 if (Op.isAligned(Align(16)) || Subtarget.hasP8Vector())
18741 return MVT::v4i32;
18742 }
18743 }
18744
18745 if (Subtarget.isPPC64()) {
18746 return MVT::i64;
18747 }
18748
18749 return MVT::i32;
18750}
18751
18752/// Returns true if it is beneficial to convert a load of a constant
18753/// to just the constant itself.
18755 Type *Ty) const {
18756 assert(Ty->isIntegerTy());
18757
18758 unsigned BitSize = Ty->getPrimitiveSizeInBits();
18759 return !(BitSize == 0 || BitSize > 64);
18760}
18761
18763 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
18764 return false;
18765 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
18766 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
18767 return NumBits1 == 64 && NumBits2 == 32;
18768}
18769
18771 if (!VT1.isInteger() || !VT2.isInteger())
18772 return false;
18773 unsigned NumBits1 = VT1.getSizeInBits();
18774 unsigned NumBits2 = VT2.getSizeInBits();
18775 return NumBits1 == 64 && NumBits2 == 32;
18776}
18777
18779 // Generally speaking, zexts are not free, but they are free when they can be
18780 // folded with other operations.
18781 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
18782 EVT MemVT = LD->getMemoryVT();
18783 if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
18784 (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
18785 (LD->getExtensionType() == ISD::NON_EXTLOAD ||
18786 LD->getExtensionType() == ISD::ZEXTLOAD))
18787 return true;
18788 }
18789
18790 // FIXME: Add other cases...
18791 // - 32-bit shifts with a zext to i64
18792 // - zext after ctlz, bswap, etc.
18793 // - zext after and by a constant mask
18794
18795 return TargetLowering::isZExtFree(Val, VT2);
18796}
18797
18798bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
18799 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
18800 "invalid fpext types");
18801 // Extending to float128 is not free.
18802 if (DestVT == MVT::f128)
18803 return false;
18804 return true;
18805}
18806
18808 return isInt<16>(Imm) || isUInt<16>(Imm);
18809}
18810
18812 return isInt<16>(Imm) || isUInt<16>(Imm);
18813}
18814
18817 unsigned *Fast) const {
18819 return false;
18820
18821 // PowerPC supports unaligned memory access for simple non-vector types.
18822 // Although accessing unaligned addresses is not as efficient as accessing
18823 // aligned addresses, it is generally more efficient than manual expansion,
18824 // and generally only traps for software emulation when crossing page
18825 // boundaries.
18826
18827 if (!VT.isSimple())
18828 return false;
18829
18830 if (VT.isFloatingPoint() && !VT.isVector() &&
18831 !Subtarget.allowsUnalignedFPAccess())
18832 return false;
18833
18834 if (VT.getSimpleVT().isVector()) {
18835 if (Subtarget.hasVSX()) {
18836 if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
18837 VT != MVT::v4f32 && VT != MVT::v4i32)
18838 return false;
18839 } else {
18840 return false;
18841 }
18842 }
18843
18844 if (VT == MVT::ppcf128)
18845 return false;
18846
18847 if (Fast)
18848 *Fast = 1;
18849
18850 return true;
18851}
18852
18854 SDValue C) const {
18855 // Check integral scalar types.
18856 if (!VT.isScalarInteger())
18857 return false;
18858 if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
18859 if (!ConstNode->getAPIntValue().isSignedIntN(64))
18860 return false;
18861 // This transformation will generate >= 2 operations. But the following
18862 // cases will generate <= 2 instructions during ISEL. So exclude them.
18863 // 1. If the constant multiplier fits 16 bits, it can be handled by one
18864 // HW instruction, ie. MULLI
18865 // 2. If the multiplier after shifted fits 16 bits, an extra shift
18866 // instruction is needed than case 1, ie. MULLI and RLDICR
18867 int64_t Imm = ConstNode->getSExtValue();
18868 unsigned Shift = llvm::countr_zero<uint64_t>(Imm);
18869 Imm >>= Shift;
18870 if (isInt<16>(Imm))
18871 return false;
18872 uint64_t UImm = static_cast<uint64_t>(Imm);
18873 if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
18874 isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
18875 return true;
18876 }
18877 return false;
18878}
18879
18885
18887 Type *Ty) const {
18888 if (Subtarget.hasSPE() || Subtarget.useSoftFloat())
18889 return false;
18890 switch (Ty->getScalarType()->getTypeID()) {
18891 case Type::FloatTyID:
18892 case Type::DoubleTyID:
18893 return true;
18894 case Type::FP128TyID:
18895 return Subtarget.hasP9Vector();
18896 default:
18897 return false;
18898 }
18899}
18900
18901// FIXME: add more patterns which are not profitable to hoist.
18903 if (!I->hasOneUse())
18904 return true;
18905
18906 Instruction *User = I->user_back();
18907 assert(User && "A single use instruction with no uses.");
18908
18909 switch (I->getOpcode()) {
18910 case Instruction::FMul: {
18911 // Don't break FMA, PowerPC prefers FMA.
18912 if (User->getOpcode() != Instruction::FSub &&
18913 User->getOpcode() != Instruction::FAdd)
18914 return true;
18915
18917 const Function *F = I->getFunction();
18918 const DataLayout &DL = F->getDataLayout();
18919 Type *Ty = User->getOperand(0)->getType();
18920 bool AllowContract = I->getFastMathFlags().allowContract() &&
18921 User->getFastMathFlags().allowContract();
18922
18923 return !(isFMAFasterThanFMulAndFAdd(*F, Ty) &&
18925 (AllowContract || Options.AllowFPOpFusion == FPOpFusion::Fast));
18926 }
18927 case Instruction::Load: {
18928 // Don't break "store (load float*)" pattern, this pattern will be combined
18929 // to "store (load int32)" in later InstCombine pass. See function
18930 // combineLoadToOperationType. On PowerPC, loading a float point takes more
18931 // cycles than loading a 32 bit integer.
18932 LoadInst *LI = cast<LoadInst>(I);
18933 // For the loads that combineLoadToOperationType does nothing, like
18934 // ordered load, it should be profitable to hoist them.
18935 // For swifterror load, it can only be used for pointer to pointer type, so
18936 // later type check should get rid of this case.
18937 if (!LI->isUnordered())
18938 return true;
18939
18940 if (User->getOpcode() != Instruction::Store)
18941 return true;
18942
18943 if (I->getType()->getTypeID() != Type::FloatTyID)
18944 return true;
18945
18946 return false;
18947 }
18948 default:
18949 return true;
18950 }
18951 return true;
18952}
18953
18954const MCPhysReg *
18956 // LR is a callee-save register, but we must treat it as clobbered by any call
18957 // site. Hence we include LR in the scratch registers, which are in turn added
18958 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
18959 // to CTR, which is used by any indirect call.
18960 static const MCPhysReg ScratchRegs[] = {
18961 PPC::X12, PPC::LR8, PPC::CTR8, 0
18962 };
18963
18964 return ScratchRegs;
18965}
18966
18968 const Constant *PersonalityFn) const {
18969 return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
18970}
18971
18973 const Constant *PersonalityFn) const {
18974 return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
18975}
18976
18977bool
18979 EVT VT , unsigned DefinedValues) const {
18980 if (VT == MVT::v2i64)
18981 return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
18982
18983 if (Subtarget.hasVSX())
18984 return true;
18985
18987}
18988
18990 if (DisableILPPref || Subtarget.enableMachineScheduler())
18992
18993 return Sched::ILP;
18994}
18995
18996// Create a fast isel object.
18997FastISel *
18999 const TargetLibraryInfo *LibInfo) const {
19000 return PPC::createFastISel(FuncInfo, LibInfo);
19001}
19002
19003// 'Inverted' means the FMA opcode after negating one multiplicand.
19004// For example, (fma -a b c) = (fnmsub a b c)
19005static unsigned invertFMAOpcode(unsigned Opc) {
19006 switch (Opc) {
19007 default:
19008 llvm_unreachable("Invalid FMA opcode for PowerPC!");
19009 case ISD::FMA:
19010 return PPCISD::FNMSUB;
19011 case PPCISD::FNMSUB:
19012 return ISD::FMA;
19013 }
19014}
19015
19017 bool LegalOps, bool OptForSize,
19019 unsigned Depth) const {
19021 return SDValue();
19022
19023 unsigned Opc = Op.getOpcode();
19024 EVT VT = Op.getValueType();
19025 SDNodeFlags Flags = Op.getNode()->getFlags();
19026
19027 switch (Opc) {
19028 case PPCISD::FNMSUB:
19029 if (!Op.hasOneUse() || !isTypeLegal(VT))
19030 break;
19031
19033 SDValue N0 = Op.getOperand(0);
19034 SDValue N1 = Op.getOperand(1);
19035 SDValue N2 = Op.getOperand(2);
19036 SDLoc Loc(Op);
19037
19039 SDValue NegN2 =
19040 getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
19041
19042 if (!NegN2)
19043 return SDValue();
19044
19045 // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
19046 // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
19047 // These transformations may change sign of zeroes. For example,
19048 // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
19049 if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
19050 // Try and choose the cheaper one to negate.
19052 SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
19053 N0Cost, Depth + 1);
19054
19056 SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
19057 N1Cost, Depth + 1);
19058
19059 if (NegN0 && N0Cost <= N1Cost) {
19060 Cost = std::min(N0Cost, N2Cost);
19061 return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
19062 } else if (NegN1) {
19063 Cost = std::min(N1Cost, N2Cost);
19064 return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
19065 }
19066 }
19067
19068 // (fneg (fnmsub a b c)) => (fma a b (fneg c))
19069 if (isOperationLegal(ISD::FMA, VT)) {
19070 Cost = N2Cost;
19071 return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
19072 }
19073
19074 break;
19075 }
19076
19077 return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
19078 Cost, Depth);
19079}
19080
19081// Override to enable LOAD_STACK_GUARD lowering on Linux.
19083 if (M.getStackProtectorGuard() == "tls" || Subtarget.isTargetLinux())
19084 return true;
19086}
19087
19089 bool ForCodeSize) const {
19090 if (!VT.isSimple() || !Subtarget.hasVSX())
19091 return false;
19092
19093 switch(VT.getSimpleVT().SimpleTy) {
19094 default:
19095 // For FP types that are currently not supported by PPC backend, return
19096 // false. Examples: f16, f80.
19097 return false;
19098 case MVT::f32:
19099 case MVT::f64: {
19100 if (Subtarget.hasPrefixInstrs() && Subtarget.hasP10Vector()) {
19101 // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
19102 return true;
19103 }
19104 bool IsExact;
19105 APSInt IntResult(16, false);
19106 // The rounding mode doesn't really matter because we only care about floats
19107 // that can be converted to integers exactly.
19108 Imm.convertToInteger(IntResult, APFloat::rmTowardZero, &IsExact);
19109 // For exact values in the range [-16, 15] we can materialize the float.
19110 if (IsExact && IntResult <= 15 && IntResult >= -16)
19111 return true;
19112 return Imm.isZero();
19113 }
19114 case MVT::ppcf128:
19115 return Imm.isPosZero();
19116 }
19117}
19118
19119// For vector shift operation op, fold
19120// (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
19122 SelectionDAG &DAG) {
19123 SDValue N0 = N->getOperand(0);
19124 SDValue N1 = N->getOperand(1);
19125 EVT VT = N0.getValueType();
19126 unsigned OpSizeInBits = VT.getScalarSizeInBits();
19127 unsigned Opcode = N->getOpcode();
19128 unsigned TargetOpcode;
19129
19130 switch (Opcode) {
19131 default:
19132 llvm_unreachable("Unexpected shift operation");
19133 case ISD::SHL:
19134 TargetOpcode = PPCISD::SHL;
19135 break;
19136 case ISD::SRL:
19137 TargetOpcode = PPCISD::SRL;
19138 break;
19139 case ISD::SRA:
19140 TargetOpcode = PPCISD::SRA;
19141 break;
19142 }
19143
19144 if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
19145 N1->getOpcode() == ISD::AND)
19146 if (ConstantSDNode *Mask = isConstOrConstSplat(N1->getOperand(1)))
19147 if (Mask->getZExtValue() == OpSizeInBits - 1)
19148 return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
19149
19150 return SDValue();
19151}
19152
19153SDValue PPCTargetLowering::combineVectorShift(SDNode *N,
19154 DAGCombinerInfo &DCI) const {
19155 EVT VT = N->getValueType(0);
19156 assert(VT.isVector() && "Vector type expected.");
19157
19158 unsigned Opc = N->getOpcode();
19159 assert((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA) &&
19160 "Unexpected opcode.");
19161
19162 if (!isOperationLegal(Opc, VT))
19163 return SDValue();
19164
19165 EVT EltTy = VT.getScalarType();
19166 unsigned EltBits = EltTy.getSizeInBits();
19167 if (EltTy != MVT::i64 && EltTy != MVT::i32)
19168 return SDValue();
19169
19170 SDValue N1 = N->getOperand(1);
19171 uint64_t SplatBits = 0;
19172 bool AddSplatCase = false;
19173 unsigned OpcN1 = N1.getOpcode();
19174 if (OpcN1 == PPCISD::VADD_SPLAT &&
19176 AddSplatCase = true;
19177 SplatBits = N1.getConstantOperandVal(0);
19178 }
19179
19180 if (!AddSplatCase) {
19181 if (OpcN1 != ISD::BUILD_VECTOR)
19182 return SDValue();
19183
19184 unsigned SplatBitSize;
19185 bool HasAnyUndefs;
19186 APInt APSplatBits, APSplatUndef;
19187 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(N1);
19188 bool BVNIsConstantSplat =
19189 BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
19190 HasAnyUndefs, 0, !Subtarget.isLittleEndian());
19191 if (!BVNIsConstantSplat || SplatBitSize != EltBits)
19192 return SDValue();
19193 SplatBits = APSplatBits.getZExtValue();
19194 }
19195
19196 SDLoc DL(N);
19197 SDValue N0 = N->getOperand(0);
19198 // PPC vector shifts by word/double look at only the low 5/6 bits of the
19199 // shift vector, which means the max value is 31/63. A shift vector of all
19200 // 1s will be truncated to 31/63, which is useful as vspltiw is limited to
19201 // -16 to 15 range.
19202 if (SplatBits == (EltBits - 1)) {
19203 unsigned NewOpc;
19204 switch (Opc) {
19205 case ISD::SHL:
19206 NewOpc = PPCISD::SHL;
19207 break;
19208 case ISD::SRL:
19209 NewOpc = PPCISD::SRL;
19210 break;
19211 case ISD::SRA:
19212 NewOpc = PPCISD::SRA;
19213 break;
19214 }
19215 SDValue SplatOnes = getCanonicalConstSplat(255, 1, VT, DCI.DAG, DL);
19216 return DCI.DAG.getNode(NewOpc, DL, VT, N0, SplatOnes);
19217 }
19218
19219 if (Opc != ISD::SHL || !isOperationLegal(ISD::ADD, VT))
19220 return SDValue();
19221
19222 // For 64-bit there is no splat immediate so we want to catch shift by 1 here
19223 // before the BUILD_VECTOR is replaced by a load.
19224 if (EltTy != MVT::i64 || SplatBits != 1)
19225 return SDValue();
19226
19227 return DCI.DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0);
19228}
19229
19230SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
19231 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19232 return Value;
19233
19234 if (N->getValueType(0).isVector())
19235 return combineVectorShift(N, DCI);
19236
19237 SDValue N0 = N->getOperand(0);
19238 ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
19239 if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
19240 N0.getOpcode() != ISD::SIGN_EXTEND ||
19241 N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
19242 N->getValueType(0) != MVT::i64)
19243 return SDValue();
19244
19245 // We can't save an operation here if the value is already extended, and
19246 // the existing shift is easier to combine.
19247 SDValue ExtsSrc = N0.getOperand(0);
19248 if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
19249 ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
19250 return SDValue();
19251
19252 SDLoc DL(N0);
19253 SDValue ShiftBy = SDValue(CN1, 0);
19254 // We want the shift amount to be i32 on the extswli, but the shift could
19255 // have an i64.
19256 if (ShiftBy.getValueType() == MVT::i64)
19257 ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
19258
19259 return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
19260 ShiftBy);
19261}
19262
19263SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
19264 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19265 return Value;
19266
19267 if (N->getValueType(0).isVector())
19268 return combineVectorShift(N, DCI);
19269
19270 return SDValue();
19271}
19272
19273SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
19274 if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
19275 return Value;
19276
19277 if (N->getValueType(0).isVector())
19278 return combineVectorShift(N, DCI);
19279
19280 return SDValue();
19281}
19282
19283// Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
19284// Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
19285// When C is zero, the equation (addi Z, -C) can be simplified to Z
19286// Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
19288 const PPCSubtarget &Subtarget) {
19289 if (!Subtarget.isPPC64())
19290 return SDValue();
19291
19292 SDValue LHS = N->getOperand(0);
19293 SDValue RHS = N->getOperand(1);
19294
19295 auto isZextOfCompareWithConstant = [](SDValue Op) {
19296 if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
19297 Op.getValueType() != MVT::i64)
19298 return false;
19299
19300 SDValue Cmp = Op.getOperand(0);
19301 if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
19302 Cmp.getOperand(0).getValueType() != MVT::i64)
19303 return false;
19304
19305 if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
19306 int64_t NegConstant = 0 - Constant->getSExtValue();
19307 // Due to the limitations of the addi instruction,
19308 // -C is required to be [-32768, 32767].
19309 return isInt<16>(NegConstant);
19310 }
19311
19312 return false;
19313 };
19314
19315 bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
19316 bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
19317
19318 // If there is a pattern, canonicalize a zext operand to the RHS.
19319 if (LHSHasPattern && !RHSHasPattern)
19320 std::swap(LHS, RHS);
19321 else if (!LHSHasPattern && !RHSHasPattern)
19322 return SDValue();
19323
19324 SDLoc DL(N);
19325 EVT CarryType = Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
19326 SDVTList VTs = DAG.getVTList(MVT::i64, CarryType);
19327 SDValue Cmp = RHS.getOperand(0);
19328 SDValue Z = Cmp.getOperand(0);
19329 auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
19330 int64_t NegConstant = 0 - Constant->getSExtValue();
19331
19332 switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
19333 default: break;
19334 case ISD::SETNE: {
19335 // when C == 0
19336 // --> addze X, (addic Z, -1).carry
19337 // /
19338 // add X, (zext(setne Z, C))--
19339 // \ when -32768 <= -C <= 32767 && C != 0
19340 // --> addze X, (addic (addi Z, -C), -1).carry
19341 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19342 DAG.getConstant(NegConstant, DL, MVT::i64));
19343 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19344 SDValue Addc =
19345 DAG.getNode(ISD::UADDO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19346 AddOrZ, DAG.getAllOnesConstant(DL, MVT::i64),
19347 DAG.getConstant(0, DL, CarryType));
19348 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19349 DAG.getConstant(0, DL, MVT::i64),
19350 SDValue(Addc.getNode(), 1));
19351 }
19352 case ISD::SETEQ: {
19353 // when C == 0
19354 // --> addze X, (subfic Z, 0).carry
19355 // /
19356 // add X, (zext(sete Z, C))--
19357 // \ when -32768 <= -C <= 32767 && C != 0
19358 // --> addze X, (subfic (addi Z, -C), 0).carry
19359 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
19360 DAG.getConstant(NegConstant, DL, MVT::i64));
19361 SDValue AddOrZ = NegConstant != 0 ? Add : Z;
19362 SDValue Subc =
19363 DAG.getNode(ISD::USUBO_CARRY, DL, DAG.getVTList(MVT::i64, CarryType),
19364 DAG.getConstant(0, DL, MVT::i64), AddOrZ,
19365 DAG.getConstant(0, DL, CarryType));
19366 SDValue Invert = DAG.getNode(ISD::XOR, DL, CarryType, Subc.getValue(1),
19367 DAG.getConstant(1UL, DL, CarryType));
19368 return DAG.getNode(ISD::UADDO_CARRY, DL, VTs, LHS,
19369 DAG.getConstant(0, DL, MVT::i64), Invert);
19370 }
19371 }
19372
19373 return SDValue();
19374}
19375
19376// Transform
19377// (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
19378// (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
19379// In this case both C1 and C2 must be known constants.
19380// C1+C2 must fit into a 34 bit signed integer.
19382 const PPCSubtarget &Subtarget) {
19383 if (!Subtarget.isUsingPCRelativeCalls())
19384 return SDValue();
19385
19386 // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
19387 // If we find that node try to cast the Global Address and the Constant.
19388 SDValue LHS = N->getOperand(0);
19389 SDValue RHS = N->getOperand(1);
19390
19391 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19392 std::swap(LHS, RHS);
19393
19394 if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
19395 return SDValue();
19396
19397 // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
19400
19401 // Check that both casts succeeded.
19402 if (!GSDN || !ConstNode)
19403 return SDValue();
19404
19405 int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
19406 SDLoc DL(GSDN);
19407
19408 // The signed int offset needs to fit in 34 bits.
19409 if (!isInt<34>(NewOffset))
19410 return SDValue();
19411
19412 // The new global address is a copy of the old global address except
19413 // that it has the updated Offset.
19414 SDValue GA =
19415 DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
19416 NewOffset, GSDN->getTargetFlags());
19417 SDValue MatPCRel =
19418 DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
19419 return MatPCRel;
19420}
19421
19422// Transform (add X, (build_vector (T 1), (T 1), ...)) -> (sub X, (XXLEQVOnes))
19423// XXLEQVOnes creates an all-1s vector (0xFFFFFFFF...) efficiently via xxleqv
19424// Mathematical identity: X + 1 = X - (-1)
19425// Applies to v4i32, v2i64, v8i16, v16i8 where all elements are constant 1
19426// Requirement: VSX feature for efficient xxleqv generation
19428 const PPCSubtarget &Subtarget) {
19429
19430 EVT VT = N->getValueType(0);
19431 if (!Subtarget.hasVSX())
19432 return SDValue();
19433
19434 // Handle v2i64, v4i32, v8i16 and v16i8 types
19435 if (!(VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v4i32 ||
19436 VT == MVT::v2i64))
19437 return SDValue();
19438
19439 SDValue LHS = N->getOperand(0);
19440 SDValue RHS = N->getOperand(1);
19441
19442 // Check if RHS is BUILD_VECTOR
19443 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19444 return SDValue();
19445
19446 // Check if all the elements are 1
19447 unsigned NumOfEles = RHS.getNumOperands();
19448 for (unsigned i = 0; i < NumOfEles; ++i) {
19449 auto *CN = dyn_cast<ConstantSDNode>(RHS.getOperand(i));
19450 if (!CN || CN->getSExtValue() != 1)
19451 return SDValue();
19452 }
19453 SDLoc DL(N);
19454
19455 SDValue MinusOne = DAG.getConstant(APInt::getAllOnes(32), DL, MVT::i32);
19456 SmallVector<SDValue, 4> Ops(4, MinusOne);
19457 SDValue AllOnesVec = DAG.getBuildVector(MVT::v4i32, DL, Ops);
19458
19459 // Bitcast to the target vector type
19460 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT, AllOnesVec);
19461
19462 return DAG.getNode(ISD::SUB, DL, VT, LHS, Bitcast);
19463}
19464
19465SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
19466 if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
19467 return Value;
19468
19469 if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
19470 return Value;
19471
19472 if (auto Value = combineADDToSUB(N, DCI.DAG, Subtarget))
19473 return Value;
19474 return SDValue();
19475}
19476
19477// Detect TRUNCATE operations on bitcasts of float128 values.
19478// What we are looking for here is the situtation where we extract a subset
19479// of bits from a 128 bit float.
19480// This can be of two forms:
19481// 1) BITCAST of f128 feeding TRUNCATE
19482// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
19483// The reason this is required is because we do not have a legal i128 type
19484// and so we want to prevent having to store the f128 and then reload part
19485// of it.
19486SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
19487 DAGCombinerInfo &DCI) const {
19488 // If we are using CRBits then try that first.
19489 if (Subtarget.useCRBits()) {
19490 // Check if CRBits did anything and return that if it did.
19491 if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
19492 return CRTruncValue;
19493 }
19494
19495 SDLoc dl(N);
19496 SDValue Op0 = N->getOperand(0);
19497
19498 // Looking for a truncate of i128 to i64.
19499 if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
19500 return SDValue();
19501
19502 int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
19503
19504 // SRL feeding TRUNCATE.
19505 if (Op0.getOpcode() == ISD::SRL) {
19506 ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
19507 // The right shift has to be by 64 bits.
19508 if (!ConstNode || ConstNode->getZExtValue() != 64)
19509 return SDValue();
19510
19511 // Switch the element number to extract.
19512 EltToExtract = EltToExtract ? 0 : 1;
19513 // Update Op0 past the SRL.
19514 Op0 = Op0.getOperand(0);
19515 }
19516
19517 // BITCAST feeding a TRUNCATE possibly via SRL.
19518 if (Op0.getOpcode() == ISD::BITCAST &&
19519 Op0.getValueType() == MVT::i128 &&
19520 Op0.getOperand(0).getValueType() == MVT::f128) {
19521 SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
19522 return DCI.DAG.getNode(
19523 ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast,
19524 DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
19525 }
19526 return SDValue();
19527}
19528
19529SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
19530 SelectionDAG &DAG = DCI.DAG;
19531
19532 ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
19533 if (!ConstOpOrElement)
19534 return SDValue();
19535
19536 // An imul is usually smaller than the alternative sequence for legal type.
19538 isOperationLegal(ISD::MUL, N->getValueType(0)))
19539 return SDValue();
19540
19541 auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
19542 switch (this->Subtarget.getCPUDirective()) {
19543 default:
19544 // TODO: enhance the condition for subtarget before pwr8
19545 return false;
19546 case PPC::DIR_PWR8:
19547 // type mul add shl
19548 // scalar 4 1 1
19549 // vector 7 2 2
19550 return true;
19551 case PPC::DIR_PWR9:
19552 case PPC::DIR_PWR10:
19553 case PPC::DIR_PWR11:
19555 // type mul add shl
19556 // scalar 5 2 2
19557 // vector 7 2 2
19558
19559 // The cycle RATIO of related operations are showed as a table above.
19560 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
19561 // scalar and vector type. For 2 instrs patterns, add/sub + shl
19562 // are 4, it is always profitable; but for 3 instrs patterns
19563 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
19564 // So we should only do it for vector type.
19565 return IsAddOne && IsNeg ? VT.isVector() : true;
19566 }
19567 };
19568
19569 EVT VT = N->getValueType(0);
19570 SDLoc DL(N);
19571
19572 const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
19573 bool IsNeg = MulAmt.isNegative();
19574 APInt MulAmtAbs = MulAmt.abs();
19575
19576 if ((MulAmtAbs - 1).isPowerOf2()) {
19577 // (mul x, 2^N + 1) => (add (shl x, N), x)
19578 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
19579
19580 if (!IsProfitable(IsNeg, true, VT))
19581 return SDValue();
19582
19583 SDValue Op0 = N->getOperand(0);
19584 SDValue Op1 =
19585 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19586 DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
19587 SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
19588
19589 if (!IsNeg)
19590 return Res;
19591
19592 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
19593 } else if ((MulAmtAbs + 1).isPowerOf2()) {
19594 // (mul x, 2^N - 1) => (sub (shl x, N), x)
19595 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
19596
19597 if (!IsProfitable(IsNeg, false, VT))
19598 return SDValue();
19599
19600 SDValue Op0 = N->getOperand(0);
19601 SDValue Op1 =
19602 DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
19603 DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
19604
19605 if (!IsNeg)
19606 return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
19607 else
19608 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
19609
19610 } else {
19611 return SDValue();
19612 }
19613}
19614
19615// Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
19616// in combiner since we need to check SD flags and other subtarget features.
19617SDValue PPCTargetLowering::combineFMALike(SDNode *N,
19618 DAGCombinerInfo &DCI) const {
19619 SDValue N0 = N->getOperand(0);
19620 SDValue N1 = N->getOperand(1);
19621 SDValue N2 = N->getOperand(2);
19622 SDNodeFlags Flags = N->getFlags();
19623 EVT VT = N->getValueType(0);
19624 SelectionDAG &DAG = DCI.DAG;
19625 const TargetOptions &Options = getTargetMachine().Options;
19626 unsigned Opc = N->getOpcode();
19628 bool LegalOps = !DCI.isBeforeLegalizeOps();
19629 SDLoc Loc(N);
19630
19631 if (!isOperationLegal(ISD::FMA, VT))
19632 return SDValue();
19633
19634 // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
19635 // since (fnmsub a b c)=-0 while c-ab=+0.
19636 if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
19637 return SDValue();
19638
19639 // (fma (fneg a) b c) => (fnmsub a b c)
19640 // (fnmsub (fneg a) b c) => (fma a b c)
19641 if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
19642 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
19643
19644 // (fma a (fneg b) c) => (fnmsub a b c)
19645 // (fnmsub a (fneg b) c) => (fma a b c)
19646 if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
19647 return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
19648
19649 return SDValue();
19650}
19651
19652bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
19653 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
19654 if (!Subtarget.is64BitELFABI())
19655 return false;
19656
19657 // If not a tail call then no need to proceed.
19658 if (!CI->isTailCall())
19659 return false;
19660
19661 // If sibling calls have been disabled and tail-calls aren't guaranteed
19662 // there is no reason to duplicate.
19663 auto &TM = getTargetMachine();
19664 if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
19665 return false;
19666
19667 // Can't tail call a function called indirectly, or if it has variadic args.
19668 const Function *Callee = CI->getCalledFunction();
19669 if (!Callee || Callee->isVarArg())
19670 return false;
19671
19672 // Make sure the callee and caller calling conventions are eligible for tco.
19673 const Function *Caller = CI->getParent()->getParent();
19674 if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
19675 CI->getCallingConv()))
19676 return false;
19677
19678 // If the function is local then we have a good chance at tail-calling it
19679 return getTargetMachine().shouldAssumeDSOLocal(Callee);
19680}
19681
19682bool PPCTargetLowering::
19683isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
19684 const Value *Mask = AndI.getOperand(1);
19685 // If the mask is suitable for andi. or andis. we should sink the and.
19686 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
19687 // Can't handle constants wider than 64-bits.
19688 if (CI->getBitWidth() > 64)
19689 return false;
19690 int64_t ConstVal = CI->getZExtValue();
19691 return isUInt<16>(ConstVal) ||
19692 (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
19693 }
19694
19695 // For non-constant masks, we can always use the record-form and.
19696 return true;
19697}
19698
19699/// getAddrModeForFlags - Based on the set of address flags, select the most
19700/// optimal instruction format to match by.
19701PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
19702 // This is not a node we should be handling here.
19703 if (Flags == PPC::MOF_None)
19704 return PPC::AM_None;
19705 // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
19706 for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
19707 if ((Flags & FlagSet) == FlagSet)
19708 return PPC::AM_DForm;
19709 for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
19710 if ((Flags & FlagSet) == FlagSet)
19711 return PPC::AM_DSForm;
19712 for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
19713 if ((Flags & FlagSet) == FlagSet)
19714 return PPC::AM_DQForm;
19715 for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
19716 if ((Flags & FlagSet) == FlagSet)
19717 return PPC::AM_PrefixDForm;
19718 // If no other forms are selected, return an X-Form as it is the most
19719 // general addressing mode.
19720 return PPC::AM_XForm;
19721}
19722
19723/// Set alignment flags based on whether or not the Frame Index is aligned.
19724/// Utilized when computing flags for address computation when selecting
19725/// load and store instructions.
19726static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
19727 SelectionDAG &DAG) {
19728 bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
19729 FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
19730 if (!FI)
19731 return;
19733 unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
19734 // If this is (add $FI, $S16Imm), the alignment flags are already set
19735 // based on the immediate. We just need to clear the alignment flags
19736 // if the FI alignment is weaker.
19737 if ((FrameIndexAlign % 4) != 0)
19738 FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
19739 if ((FrameIndexAlign % 16) != 0)
19740 FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
19741 // If the address is a plain FrameIndex, set alignment flags based on
19742 // FI alignment.
19743 if (!IsAdd) {
19744 if ((FrameIndexAlign % 4) == 0)
19745 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19746 if ((FrameIndexAlign % 16) == 0)
19747 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19748 }
19749}
19750
19751/// Given a node, compute flags that are used for address computation when
19752/// selecting load and store instructions. The flags computed are stored in
19753/// FlagSet. This function takes into account whether the node is a constant,
19754/// an ADD, OR, or a constant, and computes the address flags accordingly.
19755static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
19756 SelectionDAG &DAG) {
19757 // Set the alignment flags for the node depending on if the node is
19758 // 4-byte or 16-byte aligned.
19759 auto SetAlignFlagsForImm = [&](uint64_t Imm) {
19760 if ((Imm & 0x3) == 0)
19761 FlagSet |= PPC::MOF_RPlusSImm16Mult4;
19762 if ((Imm & 0xf) == 0)
19763 FlagSet |= PPC::MOF_RPlusSImm16Mult16;
19764 };
19765
19767 // All 32-bit constants can be computed as LIS + Disp.
19768 const APInt &ConstImm = CN->getAPIntValue();
19769 if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
19770 FlagSet |= PPC::MOF_AddrIsSImm32;
19771 SetAlignFlagsForImm(ConstImm.getZExtValue());
19772 setAlignFlagsForFI(N, FlagSet, DAG);
19773 }
19774 if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
19775 FlagSet |= PPC::MOF_RPlusSImm34;
19776 else // Let constant materialization handle large constants.
19777 FlagSet |= PPC::MOF_NotAddNorCst;
19778 } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
19779 // This address can be represented as an addition of:
19780 // - Register + Imm16 (possibly a multiple of 4/16)
19781 // - Register + Imm34
19782 // - Register + PPCISD::Lo
19783 // - Register + Register
19784 // In any case, we won't have to match this as Base + Zero.
19785 SDValue RHS = N.getOperand(1);
19787 const APInt &ConstImm = CN->getAPIntValue();
19788 if (ConstImm.isSignedIntN(16)) {
19789 FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
19790 SetAlignFlagsForImm(ConstImm.getZExtValue());
19791 setAlignFlagsForFI(N, FlagSet, DAG);
19792 }
19793 if (ConstImm.isSignedIntN(34))
19794 FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
19795 else
19796 FlagSet |= PPC::MOF_RPlusR; // Register.
19797 } else if (RHS.getOpcode() == PPCISD::Lo && !RHS.getConstantOperandVal(1))
19798 FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
19799 else
19800 FlagSet |= PPC::MOF_RPlusR;
19801 } else { // The address computation is not a constant or an addition.
19802 setAlignFlagsForFI(N, FlagSet, DAG);
19803 FlagSet |= PPC::MOF_NotAddNorCst;
19804 }
19805}
19806
19807static bool isPCRelNode(SDValue N) {
19808 return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
19813}
19814
19815/// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
19816/// the address flags of the load/store instruction that is to be matched.
19817unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
19818 SelectionDAG &DAG) const {
19819 unsigned FlagSet = PPC::MOF_None;
19820
19821 // Compute subtarget flags.
19822 if (!Subtarget.hasP9Vector())
19823 FlagSet |= PPC::MOF_SubtargetBeforeP9;
19824 else
19825 FlagSet |= PPC::MOF_SubtargetP9;
19826
19827 if (Subtarget.hasPrefixInstrs())
19828 FlagSet |= PPC::MOF_SubtargetP10;
19829
19830 if (Subtarget.hasSPE())
19831 FlagSet |= PPC::MOF_SubtargetSPE;
19832
19833 // Check if we have a PCRel node and return early.
19834 if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
19835 return FlagSet;
19836
19837 // If the node is the paired load/store intrinsics, compute flags for
19838 // address computation and return early.
19839 unsigned ParentOp = Parent->getOpcode();
19840 if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
19841 (ParentOp == ISD::INTRINSIC_VOID))) {
19842 unsigned ID = Parent->getConstantOperandVal(1);
19843 if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
19844 SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
19845 ? Parent->getOperand(2)
19846 : Parent->getOperand(3);
19847 computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
19848 FlagSet |= PPC::MOF_Vector;
19849 return FlagSet;
19850 }
19851 }
19852
19853 // Mark this as something we don't want to handle here if it is atomic
19854 // or pre-increment instruction.
19855 if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
19856 if (LSB->isIndexed())
19857 return PPC::MOF_None;
19858
19859 // Compute in-memory type flags. This is based on if there are scalars,
19860 // floats or vectors.
19861 const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
19862 assert(MN && "Parent should be a MemSDNode!");
19863 EVT MemVT = MN->getMemoryVT();
19864 unsigned Size = MemVT.getSizeInBits();
19865 if (MemVT.isScalarInteger()) {
19866 assert(Size <= 128 &&
19867 "Not expecting scalar integers larger than 16 bytes!");
19868 if (Size < 32)
19869 FlagSet |= PPC::MOF_SubWordInt;
19870 else if (Size == 32)
19871 FlagSet |= PPC::MOF_WordInt;
19872 else
19873 FlagSet |= PPC::MOF_DoubleWordInt;
19874 } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
19875 if (Size == 128)
19876 FlagSet |= PPC::MOF_Vector;
19877 else if (Size == 256) {
19878 assert(Subtarget.pairedVectorMemops() &&
19879 "256-bit vectors are only available when paired vector memops is "
19880 "enabled!");
19881 FlagSet |= PPC::MOF_Vector;
19882 } else
19883 llvm_unreachable("Not expecting illegal vectors!");
19884 } else { // Floating point type: can be scalar, f128 or vector types.
19885 if (Size == 32 || Size == 64)
19886 FlagSet |= PPC::MOF_ScalarFloat;
19887 else if (MemVT == MVT::f128 || MemVT.isVector())
19888 FlagSet |= PPC::MOF_Vector;
19889 else
19890 llvm_unreachable("Not expecting illegal scalar floats!");
19891 }
19892
19893 // Compute flags for address computation.
19894 computeFlagsForAddressComputation(N, FlagSet, DAG);
19895
19896 // Compute type extension flags.
19897 if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
19898 switch (LN->getExtensionType()) {
19899 case ISD::SEXTLOAD:
19900 FlagSet |= PPC::MOF_SExt;
19901 break;
19902 case ISD::EXTLOAD:
19903 case ISD::ZEXTLOAD:
19904 FlagSet |= PPC::MOF_ZExt;
19905 break;
19906 case ISD::NON_EXTLOAD:
19907 FlagSet |= PPC::MOF_NoExt;
19908 break;
19909 }
19910 } else
19911 FlagSet |= PPC::MOF_NoExt;
19912
19913 // For integers, no extension is the same as zero extension.
19914 // We set the extension mode to zero extension so we don't have
19915 // to add separate entries in AddrModesMap for loads and stores.
19916 if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
19917 FlagSet |= PPC::MOF_ZExt;
19918 FlagSet &= ~PPC::MOF_NoExt;
19919 }
19920
19921 // If we don't have prefixed instructions, 34-bit constants should be
19922 // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
19923 bool IsNonP1034BitConst =
19925 FlagSet) == PPC::MOF_RPlusSImm34;
19926 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
19927 IsNonP1034BitConst)
19928 FlagSet |= PPC::MOF_NotAddNorCst;
19929
19930 return FlagSet;
19931}
19932
19933/// SelectForceXFormMode - Given the specified address, force it to be
19934/// represented as an indexed [r+r] operation (an XForm instruction).
19936 SDValue &Base,
19937 SelectionDAG &DAG) const {
19938
19940 int16_t ForceXFormImm = 0;
19941 if (provablyDisjointOr(DAG, N) &&
19942 !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
19943 Disp = N.getOperand(0);
19944 Base = N.getOperand(1);
19945 return Mode;
19946 }
19947
19948 // If the address is the result of an add, we will utilize the fact that the
19949 // address calculation includes an implicit add. However, we can reduce
19950 // register pressure if we do not materialize a constant just for use as the
19951 // index register. We only get rid of the add if it is not an add of a
19952 // value and a 16-bit signed constant and both have a single use.
19953 if (N.getOpcode() == ISD::ADD &&
19954 (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
19955 !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
19956 Disp = N.getOperand(0);
19957 Base = N.getOperand(1);
19958 return Mode;
19959 }
19960
19961 // Otherwise, use R0 as the base register.
19962 Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
19963 N.getValueType());
19964 Base = N;
19965
19966 return Mode;
19967}
19968
19970 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
19971 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
19972 EVT ValVT = Val.getValueType();
19973 // If we are splitting a scalar integer into f64 parts (i.e. so they
19974 // can be placed into VFRC registers), we need to zero extend and
19975 // bitcast the values. This will ensure the value is placed into a
19976 // VSR using direct moves or stack operations as needed.
19977 if (PartVT == MVT::f64 &&
19978 (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
19979 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
19980 Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
19981 Parts[0] = Val;
19982 return true;
19983 }
19984 return false;
19985}
19986
19987SDValue PPCTargetLowering::lowerToLibCall(const char *LibCallName, SDValue Op,
19988 SelectionDAG &DAG) const {
19989 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19991 EVT RetVT = Op.getValueType();
19992 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
19993 SDValue Callee =
19994 DAG.getExternalSymbol(LibCallName, TLI.getPointerTy(DAG.getDataLayout()));
19995 bool SignExtend = TLI.shouldSignExtendTypeInLibCall(RetTy, false);
19997 for (const SDValue &N : Op->op_values()) {
19998 EVT ArgVT = N.getValueType();
19999 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
20000 TargetLowering::ArgListEntry Entry(N, ArgTy);
20001 Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgTy, SignExtend);
20002 Entry.IsZExt = !Entry.IsSExt;
20003 Args.push_back(Entry);
20004 }
20005
20006 SDValue InChain = DAG.getEntryNode();
20007 SDValue TCChain = InChain;
20008 const Function &F = DAG.getMachineFunction().getFunction();
20009 bool isTailCall =
20010 TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
20011 (RetTy == F.getReturnType() || F.getReturnType()->isVoidTy());
20012 if (isTailCall)
20013 InChain = TCChain;
20014 CLI.setDebugLoc(SDLoc(Op))
20015 .setChain(InChain)
20016 .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args))
20017 .setTailCall(isTailCall)
20018 .setSExtResult(SignExtend)
20019 .setZExtResult(!SignExtend)
20021 return TLI.LowerCallTo(CLI).first;
20022}
20023
20024SDValue PPCTargetLowering::lowerLibCallBasedOnType(
20025 const char *LibCallFloatName, const char *LibCallDoubleName, SDValue Op,
20026 SelectionDAG &DAG) const {
20027 if (Op.getValueType() == MVT::f32)
20028 return lowerToLibCall(LibCallFloatName, Op, DAG);
20029
20030 if (Op.getValueType() == MVT::f64)
20031 return lowerToLibCall(LibCallDoubleName, Op, DAG);
20032
20033 return SDValue();
20034}
20035
20036bool PPCTargetLowering::isLowringToMASSFiniteSafe(SDValue Op) const {
20037 SDNodeFlags Flags = Op.getNode()->getFlags();
20038 return isLowringToMASSSafe(Op) && Flags.hasNoSignedZeros() &&
20039 Flags.hasNoNaNs() && Flags.hasNoInfs();
20040}
20041
20042bool PPCTargetLowering::isLowringToMASSSafe(SDValue Op) const {
20043 return Op.getNode()->getFlags().hasApproximateFuncs();
20044}
20045
20046bool PPCTargetLowering::isScalarMASSConversionEnabled() const {
20048}
20049
20050SDValue PPCTargetLowering::lowerLibCallBase(const char *LibCallDoubleName,
20051 const char *LibCallFloatName,
20052 const char *LibCallDoubleNameFinite,
20053 const char *LibCallFloatNameFinite,
20054 SDValue Op,
20055 SelectionDAG &DAG) const {
20056 if (!isScalarMASSConversionEnabled() || !isLowringToMASSSafe(Op))
20057 return SDValue();
20058
20059 if (!isLowringToMASSFiniteSafe(Op))
20060 return lowerLibCallBasedOnType(LibCallFloatName, LibCallDoubleName, Op,
20061 DAG);
20062
20063 return lowerLibCallBasedOnType(LibCallFloatNameFinite,
20064 LibCallDoubleNameFinite, Op, DAG);
20065}
20066
20067SDValue PPCTargetLowering::lowerPow(SDValue Op, SelectionDAG &DAG) const {
20068 return lowerLibCallBase("__xl_pow", "__xl_powf", "__xl_pow_finite",
20069 "__xl_powf_finite", Op, DAG);
20070}
20071
20072SDValue PPCTargetLowering::lowerSin(SDValue Op, SelectionDAG &DAG) const {
20073 return lowerLibCallBase("__xl_sin", "__xl_sinf", "__xl_sin_finite",
20074 "__xl_sinf_finite", Op, DAG);
20075}
20076
20077SDValue PPCTargetLowering::lowerCos(SDValue Op, SelectionDAG &DAG) const {
20078 return lowerLibCallBase("__xl_cos", "__xl_cosf", "__xl_cos_finite",
20079 "__xl_cosf_finite", Op, DAG);
20080}
20081
20082SDValue PPCTargetLowering::lowerLog(SDValue Op, SelectionDAG &DAG) const {
20083 return lowerLibCallBase("__xl_log", "__xl_logf", "__xl_log_finite",
20084 "__xl_logf_finite", Op, DAG);
20085}
20086
20087SDValue PPCTargetLowering::lowerLog10(SDValue Op, SelectionDAG &DAG) const {
20088 return lowerLibCallBase("__xl_log10", "__xl_log10f", "__xl_log10_finite",
20089 "__xl_log10f_finite", Op, DAG);
20090}
20091
20092SDValue PPCTargetLowering::lowerExp(SDValue Op, SelectionDAG &DAG) const {
20093 return lowerLibCallBase("__xl_exp", "__xl_expf", "__xl_exp_finite",
20094 "__xl_expf_finite", Op, DAG);
20095}
20096
20097// If we happen to match to an aligned D-Form, check if the Frame Index is
20098// adequately aligned. If it is not, reset the mode to match to X-Form.
20099static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
20102 return;
20103 if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
20106}
20107
20108/// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
20109/// compute the address flags of the node, get the optimal address mode based
20110/// on the flags, and set the Base and Disp based on the address mode.
20112 SDValue N, SDValue &Disp,
20113 SDValue &Base,
20114 SelectionDAG &DAG,
20115 MaybeAlign Align) const {
20116 SDLoc DL(Parent);
20117
20118 // Compute the address flags.
20119 unsigned Flags = computeMOFlags(Parent, N, DAG);
20120
20121 // Get the optimal address mode based on the Flags.
20122 PPC::AddrMode Mode = getAddrModeForFlags(Flags);
20123
20124 // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
20125 // Select an X-Form load if it is not.
20126 setXFormForUnalignedFI(N, Flags, Mode);
20127
20128 // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
20129 if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
20130 assert(Subtarget.isUsingPCRelativeCalls() &&
20131 "Must be using PC-Relative calls when a valid PC-Relative node is "
20132 "present!");
20133 Mode = PPC::AM_PCRel;
20134 }
20135
20136 // Set Base and Disp accordingly depending on the address mode.
20137 switch (Mode) {
20138 case PPC::AM_DForm:
20139 case PPC::AM_DSForm:
20140 case PPC::AM_DQForm: {
20141 // This is a register plus a 16-bit immediate. The base will be the
20142 // register and the displacement will be the immediate unless it
20143 // isn't sufficiently aligned.
20144 if (Flags & PPC::MOF_RPlusSImm16) {
20145 SDValue Op0 = N.getOperand(0);
20146 SDValue Op1 = N.getOperand(1);
20147 int16_t Imm = Op1->getAsZExtVal();
20148 if (!Align || isAligned(*Align, Imm)) {
20149 Disp = DAG.getSignedTargetConstant(Imm, DL, N.getValueType());
20150 Base = Op0;
20152 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20153 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20154 }
20155 break;
20156 }
20157 }
20158 // This is a register plus the @lo relocation. The base is the register
20159 // and the displacement is the global address.
20160 else if (Flags & PPC::MOF_RPlusLo) {
20161 Disp = N.getOperand(1).getOperand(0); // The global address.
20166 Base = N.getOperand(0);
20167 break;
20168 }
20169 // This is a constant address at most 32 bits. The base will be
20170 // zero or load-immediate-shifted and the displacement will be
20171 // the low 16 bits of the address.
20172 else if (Flags & PPC::MOF_AddrIsSImm32) {
20173 auto *CN = cast<ConstantSDNode>(N);
20174 EVT CNType = CN->getValueType(0);
20175 uint64_t CNImm = CN->getZExtValue();
20176 // If this address fits entirely in a 16-bit sext immediate field, codegen
20177 // this as "d, 0".
20178 int16_t Imm;
20179 if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
20180 Disp = DAG.getSignedTargetConstant(Imm, DL, CNType);
20181 Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20182 CNType);
20183 break;
20184 }
20185 // Handle 32-bit sext immediate with LIS + Addr mode.
20186 if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
20187 (!Align || isAligned(*Align, CNImm))) {
20188 int32_t Addr = (int32_t)CNImm;
20189 // Otherwise, break this down into LIS + Disp.
20190 Disp = DAG.getSignedTargetConstant((int16_t)Addr, DL, MVT::i32);
20191 Base = DAG.getSignedTargetConstant((Addr - (int16_t)Addr) >> 16, DL,
20192 MVT::i32);
20193 uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
20194 Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
20195 break;
20196 }
20197 }
20198 // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
20199 Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
20201 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20202 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
20203 } else
20204 Base = N;
20205 break;
20206 }
20207 case PPC::AM_PrefixDForm: {
20208 int64_t Imm34 = 0;
20209 unsigned Opcode = N.getOpcode();
20210 if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
20211 (isIntS34Immediate(N.getOperand(1), Imm34))) {
20212 // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
20213 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20214 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
20215 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
20216 else
20217 Base = N.getOperand(0);
20218 } else if (isIntS34Immediate(N, Imm34)) {
20219 // The address is a 34-bit signed immediate.
20220 Disp = DAG.getSignedTargetConstant(Imm34, DL, N.getValueType());
20221 Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
20222 }
20223 break;
20224 }
20225 case PPC::AM_PCRel: {
20226 // When selecting PC-Relative instructions, "Base" is not utilized as
20227 // we select the address as [PC+imm].
20228 Disp = N;
20229 break;
20230 }
20231 case PPC::AM_None:
20232 break;
20233 default: { // By default, X-Form is always available to be selected.
20234 // When a frame index is not aligned, we also match by XForm.
20236 Base = FI ? N : N.getOperand(1);
20237 Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
20238 N.getValueType())
20239 : N.getOperand(0);
20240 break;
20241 }
20242 }
20243 return Mode;
20244}
20245
20247 bool Return,
20248 bool IsVarArg) const {
20249 switch (CC) {
20250 case CallingConv::Cold:
20251 return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF);
20252 default:
20253 return CC_PPC64_ELF;
20254 }
20255}
20256
20258 return Subtarget.isPPC64() && Subtarget.hasQuadwordAtomics();
20259}
20260
20263 unsigned Size = AI->getType()->getPrimitiveSizeInBits();
20264 if (shouldInlineQuadwordAtomics() && Size == 128)
20266
20267 switch (AI->getOperation()) {
20273 default:
20275 }
20276
20277 llvm_unreachable("unreachable atomicrmw operation");
20278}
20279
20287
20288static Intrinsic::ID
20290 switch (BinOp) {
20291 default:
20292 llvm_unreachable("Unexpected AtomicRMW BinOp");
20294 return Intrinsic::ppc_atomicrmw_xchg_i128;
20295 case AtomicRMWInst::Add:
20296 return Intrinsic::ppc_atomicrmw_add_i128;
20297 case AtomicRMWInst::Sub:
20298 return Intrinsic::ppc_atomicrmw_sub_i128;
20299 case AtomicRMWInst::And:
20300 return Intrinsic::ppc_atomicrmw_and_i128;
20301 case AtomicRMWInst::Or:
20302 return Intrinsic::ppc_atomicrmw_or_i128;
20303 case AtomicRMWInst::Xor:
20304 return Intrinsic::ppc_atomicrmw_xor_i128;
20306 return Intrinsic::ppc_atomicrmw_nand_i128;
20307 }
20308}
20309
20311 IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
20312 Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
20313 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20314 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20315 Type *ValTy = Incr->getType();
20316 assert(ValTy->getPrimitiveSizeInBits() == 128);
20317 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20318 Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
20319 Value *IncrHi =
20320 Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
20321 Value *LoHi = Builder.CreateIntrinsic(
20323 {AlignedAddr, IncrLo, IncrHi});
20324 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20325 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20326 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20327 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20328 return Builder.CreateOr(
20329 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20330}
20331
20333 IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
20334 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
20335 assert(shouldInlineQuadwordAtomics() && "Only support quadword now");
20336 Module *M = Builder.GetInsertBlock()->getParent()->getParent();
20337 Type *ValTy = CmpVal->getType();
20338 assert(ValTy->getPrimitiveSizeInBits() == 128);
20339 Function *IntCmpXchg =
20340 Intrinsic::getOrInsertDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
20341 Type *Int64Ty = Type::getInt64Ty(M->getContext());
20342 Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
20343 Value *CmpHi =
20344 Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
20345 Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
20346 Value *NewHi =
20347 Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
20348 emitLeadingFence(Builder, CI, Ord);
20349 Value *LoHi =
20350 Builder.CreateCall(IntCmpXchg, {AlignedAddr, CmpLo, CmpHi, NewLo, NewHi});
20351 emitTrailingFence(Builder, CI, Ord);
20352 Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
20353 Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
20354 Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
20355 Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
20356 return Builder.CreateOr(
20357 Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
20358}
20359
20361 return Subtarget.useCRBits();
20362}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static MCRegister MatchRegisterName(StringRef Name)
static unsigned getCallOpcode(const MachineFunction &CallerF, bool IsIndirect, bool IsTailCall, std::optional< CallLowering::PtrAuthInfo > &PAI, MachineRegisterInfo &MRI)
return SDValue()
static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2, unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &DL)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
static std::pair< Register, unsigned > getBaseWithConstantOffset(MachineRegisterInfo &MRI, Register Reg)
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
This file implements the APSInt class, which is a simple class that represents an arbitrary sized int...
static bool isLoad(int Opcode)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is +0.0.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Atomic ordering constants.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
lazy value info
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
static int getEstimateRefinementSteps(EVT VT, const LoongArchSubtarget &Subtarget)
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static bool isConstantOrUndef(const SDValue Op)
#define P(N)
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
cl::opt< bool > ANDIGlueBug("expose-ppc-andi-glue-bug", cl::desc("expose the ANDI glue bug on PPC"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static bool IsSelectCC(MachineInstr &MI)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static SDValue DAGCombineAddc(SDNode *N, llvm::PPCTargetLowering::DAGCombinerInfo &DCI)
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
std::tuple< uint32_t, uint8_t > LXVKQPattern
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static bool isShuffleMaskInRange(const SmallVectorImpl< int > &ShuffV, int HalfVec, int LHSLastElementDefined, int RHSLastElementDefined)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool IsPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static void updateForAIXShLibTLSModelOpt(TLSModel::Model &Model, SelectionDAG &DAG, const TargetMachine &TM)
updateForAIXShLibTLSModelOpt - Helper to initialize TLS model opt settings, and then apply the update...
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool callsShareTOCBase(const Function *Caller, const GlobalValue *CalleeGV, const TargetMachine &TM)
static SDValue generateSToVPermutedForVecShuffle(int ScalarSize, uint64_t ShuffleEltWidth, unsigned &NumValidElts, int FirstElt, int &LastElt, SDValue VecShuffOperand, SDValue SToVNode, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
constexpr uint64_t AIXSmallTlsPolicySizeLimit
static bool canConvertToVcmpequb(SDValue &LHS, SDValue &RHS)
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static cl::opt< unsigned > PPCGatherAllAliasesMaxDepth("ppc-gather-alias-max-depth", cl::init(18), cl::Hidden, cl::desc("max depth when checking alias info in GatherAllAliases()"))
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static bool IsSelect(MachineInstr &MI)
static SDValue ConvertCarryFlagToCarryValue(EVT SumType, SDValue Flag, EVT CarryType, SelectionDAG &DAG, const PPCSubtarget &STI)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool isStoreConditional(SDValue Intrin, unsigned &StoreWidth)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static cl::opt< unsigned > PPCMinimumBitTestCmps("ppc-min-bit-test-cmps", cl::init(3), cl::Hidden, cl::desc("Set minimum of largest number of comparisons to use bit test for " "switch on PPC."))
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
cl::opt< bool > DisableAutoPairedVecSt("disable-auto-paired-vec-st", cl::desc("disable automatically generated 32byte paired vector stores"), cl::init(true), cl::Hidden)
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static SDValue getDataClassTest(SDValue Op, FPClassTest Mask, const SDLoc &Dl, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSFirstElt, int LHSLastElt, int RHSFirstElt, int RHSLastElt, int HalfVec, unsigned LHSNumValidElts, unsigned RHSNumValidElts, const PPCSubtarget &Subtarget)
static SDValue AdjustLength(SDValue Val, unsigned Bits, bool Left, SelectionDAG &DAG)
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static std::optional< LXVKQPattern > getPatternInfo(const APInt &FullVal)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static SDValue ConvertSETCCToXori(SDNode *N, SelectionDAG &DAG)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static cl::opt< bool > DisablePerfectShuffle("ppc-disable-perfect-shuffle", cl::desc("disable vector permute decomposition"), cl::init(true), cl::Hidden)
bool isValidMtVsrBmi(APInt &BitMask, BuildVectorSDNode &BVN, bool IsLittleEndian)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDValue combineADDToSUB(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static bool canConvertSETCCToXori(SDNode *N)
static cl::opt< unsigned > PPCMinimumJumpTableEntries("ppc-min-jump-table-entries", cl::init(64), cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on PPC"))
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static SDValue ConvertCarryValueToCarryFlag(EVT SumType, SDValue Value, SelectionDAG &DAG, const PPCSubtarget &STI)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InGlue, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
SDValue convertTwoLoadsAndCmpToVCMPEQUB(SelectionDAG &DAG, SDNode *N, const SDLoc &DL)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static cl::opt< unsigned > PPCAIXTLSModelOptUseIEForLDLimit("ppc-aix-shared-lib-tls-model-opt-limit", cl::init(1), cl::Hidden, cl::desc("Set inclusive limit count of TLS local-dynamic access(es) in a " "function to use initial-exec"))
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static cl::opt< bool > DisableP10StoreForward("disable-p10-store-forward", cl::desc("disable P10 store forward-friendly conversion"), cl::Hidden, cl::init(false))
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isFunctionGlobalAddress(const GlobalValue *CalleeGV)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
if(PassOpts->AAPipeline)
pre isel intrinsic Pre ISel Intrinsic Lowering
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
SI optimize exec mask operations pre RA
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const SparcSubtarget *Subtarget)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & PPCDoubleDouble()
Definition APFloat.h:299
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
bool isDenormal() const
Definition APFloat.h:1432
APInt bitcastToAPInt() const
Definition APFloat.h:1335
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition APInt.h:1407
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition APInt.h:450
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
APInt abs() const
Get the absolute value.
Definition APInt.h:1796
bool isAllOnes() const
Determine if all bits are set. This is true for zero-width values.
Definition APInt.h:372
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
void clearAllBits()
Set every bit to 0.
Definition APInt.h:1397
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool getBoolValue() const
Convert APInt to a boolean value.
Definition APInt.h:472
double bitsToDouble() const
Converts APInt bits to a double.
Definition APInt.h:1723
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition APInt.h:297
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
LLVM_ABI APInt extractBits(unsigned numBits, unsigned bitPosition) const
Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
Definition APInt.cpp:482
An arbitrary precision integer that knows its signedness.
Definition APSInt.h:24
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
An instruction that atomically checks whether a specified value is in a memory location,...
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const BlockAddress * getBlockAddress() const
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP, bool IsCustom=false)
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, MCRegister Reg, MVT LocVT, LocInfo HTP)
static CCValAssign getMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP, bool IsCustom=false)
bool needsCustom() const
int64_t getLocMemOffset() const
unsigned getValNo() const
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, int64_t Offset, MVT LocVT, LocInfo HTP)
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
CallingConv::ID getCallingConv() const
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
LLVM_ABI Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:282
uint64_t getZExtValue() const
const APInt & getAPIntValue() const
int64_t getSExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
bool isLittleEndian() const
Layout endianness...
Definition DataLayout.h:207
LLVM_ABI unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition FastISel.h:66
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
arg_iterator arg_begin()
Definition Function.h:866
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
const Argument * const_arg_iterator
Definition Function.h:73
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:227
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const GlobalValue * getGlobal() const
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
void setThreadLocalMode(ThreadLocalMode Val)
bool hasHiddenVisibility() const
LLVM_ABI StringRef getSection() const
Definition Globals.cpp:191
Module * getParent()
Get the module that this global value is contained inside of...
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this global belongs to.
Definition Globals.cpp:132
bool hasComdat() const
Type * getValueType() const
bool hasProtectedVisibility() const
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
LLVM_ABI bool hasAtomicLoad() const LLVM_READONLY
Return true if this atomic instruction loads from memory.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
bool isUnordered() const
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
bool hasValue() const
TypeSize getValue() const
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Metadata node.
Definition Metadata.h:1078
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
void setCallFrameSize(unsigned N)
Set the call frame size on entry to this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
LocationSize getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getBasePtr() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
static bool hasPCRelFlag(unsigned TF)
bool is32BitELFABI() const
unsigned descriptorTOCAnchorOffset() const
MVT getScalarIntVT() const
bool isAIXABI() const
const PPCFrameLowering * getFrameLowering() const override
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
MCRegister getEnvironmentPointerRegister() const
bool isSVR4ABI() const
bool isLittleEndian() const
MCRegister getTOCPointerRegister() const
MCRegister getStackPointerRegister() const
bool is64BitELFABI() const
bool isELFv2ABI() const
const PPCTargetMachine & getTargetMachine() const
const PPCRegisterInfo * getRegisterInfo() const override
unsigned descriptorEnvironmentPointerOffset() const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallBase &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
bool hasInlineStackProbe(const MachineFunction &MF) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool supportsTailCallFor(const CallBase *CB) const
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
bool hasMultipleConditionRegisters(EVT VT) const override
Does the target have multiple (allocatable) condition registers that can be used to store the results...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=std::nullopt) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const override
Perform a store-conditional operation to Addr.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const override
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool shallExtractConstSplatVectorElementToStore(Type *VectorTy, unsigned ElemSizeInBits, unsigned &Index) const override
Return true if the target shall perform extract vector element and store given that the vector is kno...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
void CollectTargetIntrinsicOperands(const CallInst &I, SmallVectorImpl< SDValue > &Ops, SelectionDAG &DAG) const override
unsigned getStackProbeSize(const MachineFunction &MF) const
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool useLoadStackGuardNode(const Module &M) const override
Override to support customized stack guard loading.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< value_op_iterator > op_values() const
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
user_iterator user_begin() const
Provide iteration support to walk over all users of an SDNode.
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getFreeze(SDValue V)
Return a freeze using the SDLoc of the value operand.
LLVM_ABI SDValue makeEquivalentMemoryOrdering(SDValue OldChain, SDValue NewMemOpChain)
If an existing load has uses of its chain, create a token factor node with that chain and the new mem...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
LLVM_ABI Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
static constexpr unsigned MaxRecursionDepth
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
LLVM_ABI bool areNonVolatileConsecutiveLoads(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes, int Dist) const
Return true if loads are next to each other and can be merged.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getMDNode(const MDNode *MD)
Return an MDNodeSDNode which holds an MDNode.
LLVM_ABI void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
LLVM_ABI SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type size() const
Definition SmallPtrSet.h:99
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
Class to represent struct types.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
void setMinimumBitTestCmps(unsigned Val)
Set the minimum of largest of number of comparisons to generate BitTest.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
const TargetMachine & getTargetMachine() const
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL) const
Returns the type for the shift amount of a shift opcode.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setMinimumJumpTableEntries(unsigned Val)
Indicate the minimum number of blocks to generate jump tables.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
unsigned GatherAllAliasesMaxDepth
Depth that GatherAllAliases should continue looking for chain dependencies when trying to find a more...
NegatibleCost
Enum that specifies when a float negation is beneficial.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, SDValue &Chain) const
Check whether a given call node is in tail position within its function.
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool useLoadStackGuardNode(const Module &M) const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
virtual unsigned combineRepeatedFPDivisors() const
Indicate whether this target prefers to combine FDIVs with the same divisor.
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
virtual TargetLoweringObjectFile * getObjFileLowering() const
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
bool shouldAssumeDSOLocal(const GlobalValue *GV) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
bool getFunctionSections() const
Return true if functions should be emitted into their own section, corresponding to -ffunction-sectio...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned PPCGenScalarMASSEntries
Enables scalar MASS conversions.
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI bool isEmptyTy() const
Return true if this type is empty, that is, it has no elements or all of its elements are empty.
Definition Type.cpp:180
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
@ FloatTyID
32-bit floating point type
Definition Type.h:58
@ DoubleTyID
64-bit floating point type
Definition Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition Type.h:61
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition Type.h:311
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition Type.h:156
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition Type.h:258
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI bool isConstantSplatVectorAllOnes(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are ~0 ...
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ TargetConstantPool
Definition ISDOpcodes.h:184
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition ISDOpcodes.h:45
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ TargetExternalSymbol
Definition ISDOpcodes.h:185
@ TargetJumpTable
Definition ISDOpcodes.h:183
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:534
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition ISDOpcodes.h:180
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition ISDOpcodes.h:145
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:719
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ TargetGlobalTLSAddress
Definition ISDOpcodes.h:181
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
This namespace contains an enum with a value for every intrinsic/builtin function known by LLVM.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ VecShuffle
Definition NVPTX.h:139
@ MO_TLSLDM_FLAG
MO_TLSLDM_FLAG - on AIX the ML relocation type is only valid for a reference to a TOC symbol from the...
Definition PPC.h:146
@ MO_PIC_LO_FLAG
MO_PIC_LO_FLAG = MO_PIC_FLAG | MO_LO.
Definition PPC.h:194
@ MO_TPREL_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TPREL_FLAG.
Definition PPC.h:197
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:172
@ MO_GOT_PCREL_FLAG
MO_GOT_PCREL_FLAG = MO_PCREL_FLAG | MO_GOT_FLAG.
Definition PPC.h:203
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition PPC.h:154
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition PPC.h:121
@ MO_TLSLD_FLAG
MO_TLSLD_FLAG - If this bit is set the symbol reference is relative to TLS Local Dynamic model.
Definition PPC.h:150
@ MO_TLS_PCREL_FLAG
MO_TPREL_PCREL_FLAG = MO_PCREL_FLAG | MO_TLS.
Definition PPC.h:200
@ MO_TPREL_HA
Definition PPC.h:179
@ MO_PLT
On PPC, the 12 bits are not enough for all target operand flags.
Definition PPC.h:113
@ MO_TLS
Symbol for VK_TLS fixup attached to an ADD instruction.
Definition PPC.h:188
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set, the symbol reference is relative to the thread pointer and the sy...
Definition PPC.h:140
@ MO_TPREL_LO
Definition PPC.h:178
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition PPC.h:175
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:166
@ MO_PIC_HA_FLAG
MO_PIC_HA_FLAG = MO_PIC_FLAG | MO_HA.
Definition PPC.h:191
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition PPC.h:135
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition PPC.h:160
@ MO_HA
Definition PPC.h:176
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition PPC.h:117
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
Define some predicates that are used for node matching.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
@ Define
Register definition.
Invariant opcodes: All instruction sets have these as their low opcodes.
@ XMC_PR
Program Code.
Definition XCOFF.h:106
@ XTY_ER
External reference.
Definition XCOFF.h:242
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
iterator end() const
Definition BasicBlock.h:89
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
static bool isIndirectCall(const MachineInstr &MI)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
LLVM_ABI void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, SmallVectorImpl< ISD::OutputArg > &Outs, const TargetLowering &TLI, const DataLayout &DL)
Given an LLVM IR type and return type attributes, compute the return value EVTs and flags,...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
InstructionCost Cost
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
static bool isRunOfOnes64(uint64_t Val, unsigned &MB, unsigned &ME)
bool isa_and_nonnull(const Y &Val)
Definition Casting.h:676
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool CC_PPC64_ELF(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
unsigned M1(unsigned Val)
Definition VE.h:377
bool isReleaseOrStronger(AtomicOrdering AO)
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:147
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool convertToNonDenormSingle(APInt &ArgAPInt)
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition MathExtras.h:150
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition MathExtras.h:155
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1966
DWARFExpression::Operation Op
bool isPhysRegUsedAfter(Register Reg, MachineBasicBlock::iterator MBI)
Check if physical register Reg is used after MBI.
unsigned M0(unsigned Val)
Definition VE.h:376
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr bool isShiftedInt(int64_t x)
Checks if a signed integer is an N bit number shifted left by S.
Definition MathExtras.h:182
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr unsigned BitWidth
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
static bool isRunOfOnes(unsigned Val, unsigned &MB, unsigned &ME)
Returns true iff Val consists of one contiguous run of 1s with any number of 0s on either side.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
@ Enabled
Convert any .debug_str_offsets tables to DWARF64 if needed.
Definition DWP.h:27
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
constexpr bool isShiftedUInt(uint64_t x)
Checks if a unsigned integer is an N bit number shifted left by S.
Definition MathExtras.h:198
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
static const unsigned PerfectShuffleTable[6561+1]
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
LLVM_ABI std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition ValueTypes.h:142
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:453
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
unsigned getByValSize() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition KnownBits.h:54
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition KnownBits.h:60
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
void setNoFPExcept(bool b)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
CallLoweringInfo & setIsPostTypeLegalization(bool Value=true)
CallLoweringInfo & setLibCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList)
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setTailCall(bool Value=true)
CallLoweringInfo & setSExtResult(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
LLVM_ABI void AddToWorklist(SDNode *N)
LLVM_ABI SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)
This structure is used to pass arguments to makeLibCall function.