LLVM 23.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
17#include "llvm/ADT/SmallSet.h"
22#include "llvm/IR/GlobalAlias.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
26#include "llvm/IR/Module.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v8f16, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
128 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
129 }
130
131 if (Subtarget.hasVector())
132 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
133 }
134
135 // Compute derived properties from the register classes
136 computeRegisterProperties(Subtarget.getRegisterInfo());
137
138 // Set up special registers.
139 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
140
141 // TODO: It may be better to default to latency-oriented scheduling, however
142 // LLVM's current latency-oriented scheduler can't handle physreg definitions
143 // such as SystemZ has with CC, so set this to the register-pressure
144 // scheduler, because it can.
146
149
151
152 // Instructions are strings of 2-byte aligned 2-byte values.
154 // For performance reasons we prefer 16-byte alignment.
156
157 // Handle operations that are handled in a similar way for all types.
158 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
159 I <= MVT::LAST_FP_VALUETYPE;
160 ++I) {
162 if (isTypeLegal(VT)) {
163 // Lower SET_CC into an IPM-based sequence.
167
168 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
170
171 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
174 }
175 }
176
177 // Expand jump table branches as address arithmetic followed by an
178 // indirect jump.
180
181 // Expand BRCOND into a BR_CC (see above).
183
184 // Handle integer types except i128.
185 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
186 I <= MVT::LAST_INTEGER_VALUETYPE;
187 ++I) {
189 if (isTypeLegal(VT) && VT != MVT::i128) {
191
192 // Expand individual DIV and REMs into DIVREMs.
199
200 // Support addition/subtraction with overflow.
203
204 // Support addition/subtraction with carry.
207
208 // Support carry in as value rather than glue.
211
212 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
213 // available, or if the operand is constant.
215
216 // Use POPCNT on z196 and above.
217 if (Subtarget.hasPopulationCount())
219 else
221
222 // No special instructions for these.
225
226 // Use *MUL_LOHI where possible instead of MULH*.
231
232 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
233 // unsigned on z10 (only z196 and above have native support for
234 // unsigned conversions).
241 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
242 auto OpAction =
243 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
244 setOperationAction(Op, VT, OpAction);
245 }
246 }
247 }
248
249 // Handle i128 if legal.
250 if (isTypeLegal(MVT::i128)) {
251 // No special instructions for these.
258
259 // We may be able to use VSLDB/VSLD/VSRD for these.
262
263 // No special instructions for these before z17.
264 if (!Subtarget.hasVectorEnhancements3()) {
274 } else {
275 // Even if we do have a legal 128-bit multiply, we do not
276 // want 64-bit multiply-high operations to use it.
279 }
280
281 // Support addition/subtraction with carry.
286
287 // Use VPOPCT and add up partial results.
289
290 // Additional instructions available with z17.
291 if (Subtarget.hasVectorEnhancements3()) {
292 setOperationAction(ISD::ABS, MVT::i128, Legal);
293
295 MVT::i128, Legal);
296 }
297 }
298
299 // These need custom handling in order to handle the f16 conversions.
308
309 // Type legalization will convert 8- and 16-bit atomic operations into
310 // forms that operate on i32s (but still keeping the original memory VT).
311 // Lower them into full i32 operations.
323
324 // Whether or not i128 is not a legal type, we need to custom lower
325 // the atomic operations in order to exploit SystemZ instructions.
330
331 // Mark sign/zero extending atomic loads as legal, which will make
332 // DAGCombiner fold extensions into atomic loads if possible.
334 {MVT::i8, MVT::i16, MVT::i32}, Legal);
336 {MVT::i8, MVT::i16}, Legal);
338 MVT::i8, Legal);
339
340 // We can use the CC result of compare-and-swap to implement
341 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
345
347
348 // Traps are legal, as we will convert them to "j .+2".
349 setOperationAction(ISD::TRAP, MVT::Other, Legal);
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
355
356 // On z17 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
371
372 // Expand 128 bit shifts without using a libcall.
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(MVT::i128)) {
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(MVT::i128))
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
390 for (MVT VT : MVT::integer_valuetypes()) {
394 }
395
396 // Handle the various types of symbolic address.
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
407
410
411 // Handle prefetches with PFD or PFDRL.
413
414 // Handle readcyclecounter with STCKF.
416
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Opcode, VT) == Legal)
421 setOperationAction(Opcode, VT, Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(VT, InnerVT, Expand);
428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
446 }
447 }
448
449 // Handle integer vector types.
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
461 }
462 if (Subtarget.hasVectorEnhancements3() &&
463 VT != MVT::v16i8 && VT != MVT::v8i16) {
468 }
473 if (Subtarget.hasVectorEnhancements1())
475 else
479
480 // Convert a GPR scalar to a vector by inserting it into element 0.
482
483 // Use a series of unpacks for extensions.
486
487 // Detect shifts/rotates by a scalar amount and convert them into
488 // V*_BY_SCALAR.
493
494 // Add ISD::VECREDUCE_ADD as custom in order to implement
495 // it with VZERO+VSUM
497
498 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
499 // and inverting the result as necessary.
501
503 Legal);
504 }
505 }
506
507 if (Subtarget.hasVector()) {
508 // There should be no need to check for float types other than v2f64
509 // since <2 x f32> isn't a legal type.
518
527 }
528
529 if (Subtarget.hasVectorEnhancements2()) {
538
547 }
548
549 // Handle floating-point types.
550 if (!useSoftFloat()) {
551 // Promote all f16 operations to float, with some exceptions below.
552 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
553 setOperationAction(Opc, MVT::f16, Promote);
555 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
557 setTruncStoreAction(VT, MVT::f16, Expand);
558 }
560 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
564
565 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
566 setOperationAction(Op, MVT::f16, Legal);
567 }
568
569 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
570 I <= MVT::LAST_FP_VALUETYPE;
571 ++I) {
573 if (isTypeLegal(VT) && VT != MVT::f16) {
574 // We can use FI for FRINT.
576
577 // We can use the extended form of FI for other rounding operations.
578 if (Subtarget.hasFPExtension()) {
585 }
586
587 // No special instructions for these.
593
594 // Special treatment.
596
597 // Handle constrained floating-point operations.
606 if (Subtarget.hasFPExtension()) {
613 }
614
615 // Extension from f16 needs libcall.
618 }
619 }
620
621 // Handle floating-point vector types.
622 if (Subtarget.hasVector()) {
623 // Scalar-to-vector conversion is just a subreg.
627
628 // Some insertions and extractions can be done directly but others
629 // need to go via integers.
636
637 // These operations have direct equivalents.
638 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
639 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
640 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
641 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
642 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
643 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
644 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
645 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
646 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
649 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
653
654 // Handle constrained floating-point operations.
668
673 if (Subtarget.hasVectorEnhancements1()) {
676 }
677 }
678
679 // The vector enhancements facility 1 has instructions for these.
680 if (Subtarget.hasVectorEnhancements1()) {
681 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
682 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
683 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
684 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
685 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
686 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
687 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
688 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
689 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
692 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
696
697 for (MVT Type : {MVT::f64, MVT::v2f64, MVT::f32, MVT::v4f32, MVT::f128}) {
704 }
705
706 // Handle constrained floating-point operations.
720 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
721 MVT::v4f32, MVT::v2f64 }) {
726 }
727 }
728
729 // We only have fused f128 multiply-addition on vector registers.
730 if (!Subtarget.hasVectorEnhancements1()) {
733 }
734
735 // We don't have a copysign instruction on vector registers.
736 if (Subtarget.hasVectorEnhancements1())
738
739 // Needed so that we don't try to implement f128 constant loads using
740 // a load-and-extend of a f80 constant (in cases where the constant
741 // would fit in an f80).
742 for (MVT VT : MVT::fp_valuetypes())
743 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
744
745 // We don't have extending load instruction on vector registers.
746 if (Subtarget.hasVectorEnhancements1()) {
747 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
748 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
749 }
750
751 // Floating-point truncation and stores need to be done separately.
752 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
753 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
754 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
755
756 // We have 64-bit FPR<->GPR moves, but need special handling for
757 // 32-bit forms.
758 if (!Subtarget.hasVector()) {
761 }
762
763 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
764 // structure, but VAEND is a no-op.
768
769 if (Subtarget.isTargetzOS()) {
770 // Handle address space casts between mixed sized pointers.
773 }
774
776
777 // Codes for which we want to perform some z-specific combinations.
781 ISD::LOAD,
794 ISD::SRL,
795 ISD::SRA,
796 ISD::MUL,
797 ISD::SDIV,
798 ISD::UDIV,
799 ISD::SREM,
800 ISD::UREM,
803
804 // Handle intrinsics.
807
808 // We're not using SJLJ for exception handling, but they're implemented
809 // solely to support use of __builtin_setjmp / __builtin_longjmp.
812
813 // We want to use MVC in preference to even a single load/store pair.
814 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
816
817 // Same with memmove.
818 MaxStoresPerMemmove = Subtarget.hasVector() ? 2 : 0;
820
821 // The main memset sequence is a byte store followed by an MVC.
822 // Two STC or MV..I stores win over that, but the kind of fused stores
823 // generated by target-independent code don't when the byte value is
824 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
825 // than "STC;MVC". Handle the choice in target-specific code instead.
826 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
828
829 // Default to having -disable-strictnode-mutation on
830 IsStrictFPEnabled = true;
831}
832
834 return Subtarget.hasSoftFloat();
835}
836
838 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
839 unsigned &NumIntermediates, MVT &RegisterVT) const {
840 // Pass fp16 vectors in VR(s).
841 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16)) {
842 IntermediateVT = RegisterVT = MVT::v8f16;
843 return NumIntermediates =
845 }
847 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
848}
849
852 EVT VT) const {
853 // 128-bit single-element vector types are passed like other vectors,
854 // not like their element type.
855 if (Subtarget.hasVector() && VT.isVector() && VT.getSizeInBits() == 128 &&
856 VT.getVectorNumElements() == 1)
857 return MVT::v16i8;
858 // Pass fp16 vectors in VR(s).
859 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16))
860 return MVT::v8f16;
861 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
862}
863
865 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
866 // Pass fp16 vectors in VR(s).
867 if (Subtarget.hasVector() && VT.isVectorOf(MVT::f16))
869 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
870}
871
873 LLVMContext &, EVT VT) const {
874 if (!VT.isVector())
875 return MVT::i32;
877}
878
880 const MachineFunction &MF, EVT VT) const {
881 if (useSoftFloat())
882 return false;
883
884 VT = VT.getScalarType();
885
886 if (!VT.isSimple())
887 return false;
888
889 switch (VT.getSimpleVT().SimpleTy) {
890 case MVT::f32:
891 case MVT::f64:
892 return true;
893 case MVT::f128:
894 return Subtarget.hasVectorEnhancements1();
895 default:
896 break;
897 }
898
899 return false;
900}
901
902// Return true if the constant can be generated with a vector instruction,
903// such as VGM, VGMB or VREPI.
905 const SystemZSubtarget &Subtarget) {
906 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
907 if (!Subtarget.hasVector() ||
908 (isFP128 && !Subtarget.hasVectorEnhancements1()))
909 return false;
910
911 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
912 // preferred way of creating all-zero and all-one vectors so give it
913 // priority over other methods below.
914 unsigned Mask = 0;
915 unsigned I = 0;
916 for (; I < SystemZ::VectorBytes; ++I) {
917 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
918 if (Byte == 0xff)
919 Mask |= 1ULL << I;
920 else if (Byte != 0)
921 break;
922 }
923 if (I == SystemZ::VectorBytes) {
924 Opcode = SystemZISD::BYTE_MASK;
925 OpVals.push_back(Mask);
927 return true;
928 }
929
930 if (SplatBitSize > 64)
931 return false;
932
933 auto TryValue = [&](uint64_t Value) -> bool {
934 // Try VECTOR REPLICATE IMMEDIATE
935 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
936 if (isInt<16>(SignedValue)) {
937 OpVals.push_back(((unsigned) SignedValue));
938 Opcode = SystemZISD::REPLICATE;
940 SystemZ::VectorBits / SplatBitSize);
941 return true;
942 }
943 // Try VECTOR GENERATE MASK
944 unsigned Start, End;
945 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
946 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
947 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
948 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
949 OpVals.push_back(Start - (64 - SplatBitSize));
950 OpVals.push_back(End - (64 - SplatBitSize));
951 Opcode = SystemZISD::ROTATE_MASK;
953 SystemZ::VectorBits / SplatBitSize);
954 return true;
955 }
956 return false;
957 };
958
959 // First try assuming that any undefined bits above the highest set bit
960 // and below the lowest set bit are 1s. This increases the likelihood of
961 // being able to use a sign-extended element value in VECTOR REPLICATE
962 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
963 uint64_t SplatBitsZ = SplatBits.getZExtValue();
964 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
965 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
966 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
967 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
968 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
969 if (TryValue(SplatBitsZ | Upper | Lower))
970 return true;
971
972 // Now try assuming that any undefined bits between the first and
973 // last defined set bits are set. This increases the chances of
974 // using a non-wraparound mask.
975 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
976 return TryValue(SplatBitsZ | Middle);
977}
978
980 if (IntImm.isSingleWord()) {
981 IntBits = APInt(128, IntImm.getZExtValue());
982 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
983 } else
984 IntBits = IntImm;
985 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
986
987 // Find the smallest splat.
988 SplatBits = IntImm;
989 unsigned Width = SplatBits.getBitWidth();
990 while (Width > 8) {
991 unsigned HalfSize = Width / 2;
992 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
993 APInt LowValue = SplatBits.trunc(HalfSize);
994
995 // If the two halves do not match, stop here.
996 if (HighValue != LowValue || 8 > HalfSize)
997 break;
998
999 SplatBits = HighValue;
1000 Width = HalfSize;
1001 }
1002 SplatUndef = 0;
1003 SplatBitSize = Width;
1004}
1005
1007 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
1008 bool HasAnyUndefs;
1009
1010 // Get IntBits by finding the 128 bit splat.
1011 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
1012 true);
1013
1014 // Get SplatBits by finding the 8 bit or greater splat.
1015 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
1016 true);
1017}
1018
1020 bool ForCodeSize) const {
1021 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
1022 if (Imm.isZero() || Imm.isNegZero())
1023 return true;
1024
1025 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
1026}
1027
1030 MachineBasicBlock *MBB) const {
1031 DebugLoc DL = MI.getDebugLoc();
1032 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1033 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1034
1035 MachineFunction *MF = MBB->getParent();
1036 MachineRegisterInfo &MRI = MF->getRegInfo();
1037
1038 const BasicBlock *BB = MBB->getBasicBlock();
1039 MachineFunction::iterator I = ++MBB->getIterator();
1040
1041 Register DstReg = MI.getOperand(0).getReg();
1042 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1043 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1044 (void)TRI;
1045 Register MainDstReg = MRI.createVirtualRegister(RC);
1046 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1047
1048 MVT PVT = getPointerTy(MF->getDataLayout());
1049 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1050 // For v = setjmp(buf), we generate.
1051 // Algorithm:
1052 //
1053 // ---------
1054 // | thisMBB |
1055 // ---------
1056 // |
1057 // ------------------------
1058 // | |
1059 // ---------- ---------------
1060 // | mainMBB | | restoreMBB |
1061 // | v = 0 | | v = 1 |
1062 // ---------- ---------------
1063 // | |
1064 // -------------------------
1065 // |
1066 // -----------------------------
1067 // | sinkMBB |
1068 // | phi(v_mainMBB,v_restoreMBB) |
1069 // -----------------------------
1070 // thisMBB:
1071 // buf[FPOffset] = Frame Pointer if hasFP.
1072 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1073 // buf[BCOffset] = Backchain value if building with -mbackchain.
1074 // buf[SPOffset] = Stack Pointer.
1075 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1076 // SjLjSetup restoreMBB
1077 // mainMBB:
1078 // v_main = 0
1079 // sinkMBB:
1080 // v = phi(v_main, v_restore)
1081 // restoreMBB:
1082 // v_restore = 1
1083
1084 MachineBasicBlock *ThisMBB = MBB;
1085 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1086 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1087 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1088
1089 MF->insert(I, MainMBB);
1090 MF->insert(I, SinkMBB);
1091 MF->push_back(RestoreMBB);
1092 RestoreMBB->setMachineBlockAddressTaken();
1093
1095
1096 // Transfer the remainder of BB and its successor edges to sinkMBB.
1097 SinkMBB->splice(SinkMBB->begin(), MBB,
1098 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1100
1101 // thisMBB:
1102 const int64_t FPOffset = 0; // Slot 1.
1103 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1104 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1105 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1106
1107 // Buf address.
1108 Register BufReg = MI.getOperand(1).getReg();
1109
1110 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1111 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1112
1113 // Prepare IP for longjmp.
1114 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1115 .addMBB(RestoreMBB);
1116 // Store IP for return from jmp, slot 2, offset = 1.
1117 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1118 .addReg(LabelReg)
1119 .addReg(BufReg)
1120 .addImm(LabelOffset)
1121 .addReg(0);
1122
1123 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1124 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1125 if (HasFP) {
1126 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1127 .addReg(SpecialRegs->getFramePointerRegister())
1128 .addReg(BufReg)
1129 .addImm(FPOffset)
1130 .addReg(0);
1131 }
1132
1133 // Store SP.
1134 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1135 .addReg(SpecialRegs->getStackPointerRegister())
1136 .addReg(BufReg)
1137 .addImm(SPOffset)
1138 .addReg(0);
1139
1140 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1141 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1142 if (BackChain) {
1143 Register BCReg = MRI.createVirtualRegister(PtrRC);
1144 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1145 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1146 .addReg(SpecialRegs->getStackPointerRegister())
1147 .addImm(TFL->getBackchainOffset(*MF))
1148 .addReg(0);
1149
1150 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1151 .addReg(BCReg)
1152 .addReg(BufReg)
1153 .addImm(BCOffset)
1154 .addReg(0);
1155 }
1156
1157 // Setup.
1158 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1159 .addMBB(RestoreMBB);
1160
1161 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1162 MIB.addRegMask(RegInfo->getNoPreservedMask());
1163
1164 ThisMBB->addSuccessor(MainMBB);
1165 ThisMBB->addSuccessor(RestoreMBB);
1166
1167 // mainMBB:
1168 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1169 MainMBB->addSuccessor(SinkMBB);
1170
1171 // sinkMBB:
1172 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1173 .addReg(MainDstReg)
1174 .addMBB(MainMBB)
1175 .addReg(RestoreDstReg)
1176 .addMBB(RestoreMBB);
1177
1178 // restoreMBB.
1179 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1180 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1181 RestoreMBB->addSuccessor(SinkMBB);
1182
1183 MI.eraseFromParent();
1184
1185 return SinkMBB;
1186}
1187
1190 MachineBasicBlock *MBB) const {
1191
1192 DebugLoc DL = MI.getDebugLoc();
1193 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1194
1195 MachineFunction *MF = MBB->getParent();
1196 MachineRegisterInfo &MRI = MF->getRegInfo();
1197
1198 MVT PVT = getPointerTy(MF->getDataLayout());
1199 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1200 Register BufReg = MI.getOperand(0).getReg();
1201 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1202 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1203
1204 Register Tmp = MRI.createVirtualRegister(RC);
1205 Register BCReg = MRI.createVirtualRegister(RC);
1206
1208
1209 const int64_t FPOffset = 0;
1210 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1211 const int64_t BCOffset = 2 * PVT.getStoreSize();
1212 const int64_t SPOffset = 3 * PVT.getStoreSize();
1213 const int64_t LPOffset = 4 * PVT.getStoreSize();
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1216 .addReg(BufReg)
1217 .addImm(LabelOffset)
1218 .addReg(0);
1219
1220 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1221 SpecialRegs->getFramePointerRegister())
1222 .addReg(BufReg)
1223 .addImm(FPOffset)
1224 .addReg(0);
1225
1226 // We are restoring R13 even though we never stored in setjmp from llvm,
1227 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1228 // gcc setjmp and llvm longjmp.
1229 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1230 .addReg(BufReg)
1231 .addImm(LPOffset)
1232 .addReg(0);
1233
1234 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1235 if (BackChain) {
1236 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1237 .addReg(BufReg)
1238 .addImm(BCOffset)
1239 .addReg(0);
1240 }
1241
1242 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1243 SpecialRegs->getStackPointerRegister())
1244 .addReg(BufReg)
1245 .addImm(SPOffset)
1246 .addReg(0);
1247
1248 if (BackChain) {
1249 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1250 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1251 .addReg(BCReg)
1252 .addReg(SpecialRegs->getStackPointerRegister())
1253 .addImm(TFL->getBackchainOffset(*MF))
1254 .addReg(0);
1255 }
1256
1257 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1258
1259 MI.eraseFromParent();
1260 return MBB;
1261}
1262
1263/// Returns true if stack probing through inline assembly is requested.
1265 // If the function specifically requests inline stack probes, emit them.
1266 if (MF.getFunction().hasFnAttribute("probe-stack"))
1267 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1268 "inline-asm";
1269 return false;
1270}
1271
1276
1281
1284 const AtomicRMWInst *RMW) const {
1285 // Don't expand subword operations as they require special treatment.
1286 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1288
1289 // Don't expand if there is a target instruction available.
1290 if (Subtarget.hasInterlockedAccess1() &&
1291 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1298
1300}
1301
1303 // We can use CGFI or CLGFI.
1304 return isInt<32>(Imm) || isUInt<32>(Imm);
1305}
1306
1308 // We can use ALGFI or SLGFI.
1309 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1310}
1311
1313 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1314 // Unaligned accesses should never be slower than the expanded version.
1315 // We check specifically for aligned accesses in the few cases where
1316 // they are required.
1317 if (Fast)
1318 *Fast = 1;
1319 return true;
1320}
1321
1323 EVT VT = Y.getValueType();
1324
1325 // We can use NC(G)RK for types in GPRs ...
1326 if (VT == MVT::i32 || VT == MVT::i64)
1327 return Subtarget.hasMiscellaneousExtensions3();
1328
1329 // ... or VNC for types in VRs.
1330 if (VT.isVector() || VT == MVT::i128)
1331 return Subtarget.hasVector();
1332
1333 return false;
1334}
1335
1336// Information about the addressing mode for a memory access.
1338 // True if a long displacement is supported.
1340
1341 // True if use of index register is supported.
1343
1344 AddressingMode(bool LongDispl, bool IdxReg) :
1345 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1346};
1347
1348// Return the desired addressing mode for a Load which has only one use (in
1349// the same block) which is a Store.
1351 Type *Ty) {
1352 // With vector support a Load->Store combination may be combined to either
1353 // an MVC or vector operations and it seems to work best to allow the
1354 // vector addressing mode.
1355 if (HasVector)
1356 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1357
1358 // Otherwise only the MVC case is special.
1359 bool MVC = Ty->isIntegerTy(8);
1360 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1361}
1362
1363// Return the addressing mode which seems most desirable given an LLVM
1364// Instruction pointer.
1365static AddressingMode
1368 switch (II->getIntrinsicID()) {
1369 default: break;
1370 case Intrinsic::memset:
1371 case Intrinsic::memmove:
1372 case Intrinsic::memcpy:
1373 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1374 }
1375 }
1376
1377 if (isa<LoadInst>(I) && I->hasOneUse()) {
1378 auto *SingleUser = cast<Instruction>(*I->user_begin());
1379 if (SingleUser->getParent() == I->getParent()) {
1380 if (isa<ICmpInst>(SingleUser)) {
1381 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1382 if (C->getBitWidth() <= 64 &&
1383 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1384 // Comparison of memory with 16 bit signed / unsigned immediate
1385 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1386 } else if (isa<StoreInst>(SingleUser))
1387 // Load->Store
1388 return getLoadStoreAddrMode(HasVector, I->getType());
1389 }
1390 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1391 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1392 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1393 // Load->Store
1394 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1395 }
1396
1397 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1398
1399 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1400 // dependencies (LDE only supports small offsets).
1401 // * Utilize the vector registers to hold floating point
1402 // values (vector load / store instructions only support small
1403 // offsets).
1404
1405 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1406 I->getOperand(0)->getType());
1407 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1408 bool IsVectorAccess = MemAccessTy->isVectorTy();
1409
1410 // A store of an extracted vector element will be combined into a VSTE type
1411 // instruction.
1412 if (!IsVectorAccess && isa<StoreInst>(I)) {
1413 Value *DataOp = I->getOperand(0);
1414 if (isa<ExtractElementInst>(DataOp))
1415 IsVectorAccess = true;
1416 }
1417
1418 // A load which gets inserted into a vector element will be combined into a
1419 // VLE type instruction.
1420 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1421 User *LoadUser = *I->user_begin();
1422 if (isa<InsertElementInst>(LoadUser))
1423 IsVectorAccess = true;
1424 }
1425
1426 if (IsFPAccess || IsVectorAccess)
1427 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1428 }
1429
1430 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1431}
1432
1434 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1435 // Punt on globals for now, although they can be used in limited
1436 // RELATIVE LONG cases.
1437 if (AM.BaseGV)
1438 return false;
1439
1440 // Require a 20-bit signed offset.
1441 if (!isInt<20>(AM.BaseOffs))
1442 return false;
1443
1444 bool RequireD12 =
1445 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1446 AddressingMode SupportedAM(!RequireD12, true);
1447 if (I != nullptr)
1448 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1449
1450 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1451 return false;
1452
1453 if (!SupportedAM.IndexReg)
1454 // No indexing allowed.
1455 return AM.Scale == 0;
1456 else
1457 // Indexing is OK but no scale factor can be applied.
1458 return AM.Scale == 0 || AM.Scale == 1;
1459}
1460
1462 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1463 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1464 const AttributeList &FuncAttributes, EVT *LargestVT) const {
1465
1466 assert(Limit != ~0U &&
1467 "Expected EmitTargetCodeForMemXXX() to handle AlwaysInline cases.");
1468
1469 if (Op.isZeroMemset())
1470 return false; // Memset zero: Use XC.
1471
1472 const int MVCFastLen = 16;
1473 // Use MVC up to 16 bytes. Small memset uses STC/MVI for first byte.
1474 if ((Op.isMemset() ? Op.size() - 1 : Op.size()) <= MVCFastLen)
1475 return false;
1476
1477 // Avoid unaligned VL/VST:s.
1478 if (!Op.isAligned(Align(8)) || (Op.size() >= 25 && Op.size() <= 31))
1479 return false;
1480
1482 Context, MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, LargestVT);
1483}
1484
1486 LLVMContext &Context, const MemOp &Op,
1487 const AttributeList &FuncAttributes) const {
1488 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1489}
1490
1491bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1492 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1493 return false;
1494 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1495 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1496 return FromBits > ToBits;
1497}
1498
1500 if (!FromVT.isInteger() || !ToVT.isInteger())
1501 return false;
1502 unsigned FromBits = FromVT.getFixedSizeInBits();
1503 unsigned ToBits = ToVT.getFixedSizeInBits();
1504 return FromBits > ToBits;
1505}
1506
1507//===----------------------------------------------------------------------===//
1508// Inline asm support
1509//===----------------------------------------------------------------------===//
1510
1513 if (Constraint.size() == 1) {
1514 switch (Constraint[0]) {
1515 case 'a': // Address register
1516 case 'd': // Data register (equivalent to 'r')
1517 case 'f': // Floating-point register
1518 case 'h': // High-part register
1519 case 'r': // General-purpose register
1520 case 'v': // Vector register
1521 return C_RegisterClass;
1522
1523 case 'Q': // Memory with base and unsigned 12-bit displacement
1524 case 'R': // Likewise, plus an index
1525 case 'S': // Memory with base and signed 20-bit displacement
1526 case 'T': // Likewise, plus an index
1527 case 'm': // Equivalent to 'T'.
1528 return C_Memory;
1529
1530 case 'I': // Unsigned 8-bit constant
1531 case 'J': // Unsigned 12-bit constant
1532 case 'K': // Signed 16-bit constant
1533 case 'L': // Signed 20-bit displacement (on all targets we support)
1534 case 'M': // 0x7fffffff
1535 return C_Immediate;
1536
1537 default:
1538 break;
1539 }
1540 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1541 switch (Constraint[1]) {
1542 case 'Q': // Address with base and unsigned 12-bit displacement
1543 case 'R': // Likewise, plus an index
1544 case 'S': // Address with base and signed 20-bit displacement
1545 case 'T': // Likewise, plus an index
1546 return C_Address;
1547
1548 default:
1549 break;
1550 }
1551 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1552 if (StringRef("{@cc}").compare(Constraint) == 0)
1553 return C_Other;
1554 }
1555 return TargetLowering::getConstraintType(Constraint);
1556}
1557
1560 AsmOperandInfo &Info, const char *Constraint) const {
1562 Value *CallOperandVal = Info.CallOperandVal;
1563 // If we don't have a value, we can't do a match,
1564 // but allow it at the lowest weight.
1565 if (!CallOperandVal)
1566 return CW_Default;
1567 Type *type = CallOperandVal->getType();
1568 // Look at the constraint type.
1569 switch (*Constraint) {
1570 default:
1571 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1572 break;
1573
1574 case 'a': // Address register
1575 case 'd': // Data register (equivalent to 'r')
1576 case 'h': // High-part register
1577 case 'r': // General-purpose register
1578 Weight =
1579 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1580 break;
1581
1582 case 'f': // Floating-point register
1583 if (!useSoftFloat())
1584 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1585 break;
1586
1587 case 'v': // Vector register
1588 if (Subtarget.hasVector())
1589 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1590 : CW_Default;
1591 break;
1592
1593 case 'I': // Unsigned 8-bit constant
1594 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1595 if (isUInt<8>(C->getZExtValue()))
1596 Weight = CW_Constant;
1597 break;
1598
1599 case 'J': // Unsigned 12-bit constant
1600 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1601 if (isUInt<12>(C->getZExtValue()))
1602 Weight = CW_Constant;
1603 break;
1604
1605 case 'K': // Signed 16-bit constant
1606 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1607 if (isInt<16>(C->getSExtValue()))
1608 Weight = CW_Constant;
1609 break;
1610
1611 case 'L': // Signed 20-bit displacement (on all targets we support)
1612 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1613 if (isInt<20>(C->getSExtValue()))
1614 Weight = CW_Constant;
1615 break;
1616
1617 case 'M': // 0x7fffffff
1618 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1619 if (C->getZExtValue() == 0x7fffffff)
1620 Weight = CW_Constant;
1621 break;
1622 }
1623 return Weight;
1624}
1625
1626// Parse a "{tNNN}" register constraint for which the register type "t"
1627// has already been verified. MC is the class associated with "t" and
1628// Map maps 0-based register numbers to LLVM register numbers.
1629static std::pair<unsigned, const TargetRegisterClass *>
1631 const unsigned *Map, unsigned Size) {
1632 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1633 if (isdigit(Constraint[2])) {
1634 unsigned Index;
1635 bool Failed =
1636 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1637 if (!Failed && Index < Size && Map[Index])
1638 return std::make_pair(Map[Index], RC);
1639 }
1640 return std::make_pair(0U, nullptr);
1641}
1642
1643std::pair<unsigned, const TargetRegisterClass *>
1645 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1646 if (Constraint.size() == 1) {
1647 // GCC Constraint Letters
1648 switch (Constraint[0]) {
1649 default: break;
1650 case 'd': // Data register (equivalent to 'r')
1651 case 'r': // General-purpose register
1652 if (VT.getSizeInBits() == 64)
1653 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1654 else if (VT.getSizeInBits() == 128)
1655 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1656 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1657
1658 case 'a': // Address register
1659 if (VT == MVT::i64)
1660 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1661 else if (VT == MVT::i128)
1662 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1663 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1664
1665 case 'h': // High-part register (an LLVM extension)
1666 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1667
1668 case 'f': // Floating-point register
1669 if (!useSoftFloat()) {
1670 if (VT.getSizeInBits() == 16)
1671 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1672 else if (VT.getSizeInBits() == 64)
1673 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1674 else if (VT.getSizeInBits() == 128)
1675 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1676 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1677 }
1678 break;
1679
1680 case 'v': // Vector register
1681 if (Subtarget.hasVector()) {
1682 if (VT.getSizeInBits() == 16)
1683 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1684 if (VT.getSizeInBits() == 32)
1685 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1686 if (VT.getSizeInBits() == 64)
1687 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1688 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1689 }
1690 break;
1691 }
1692 }
1693 if (Constraint.starts_with("{")) {
1694
1695 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1696 // to check the size on.
1697 auto getVTSizeInBits = [&VT]() {
1698 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1699 };
1700
1701 // We need to override the default register parsing for GPRs and FPRs
1702 // because the interpretation depends on VT. The internal names of
1703 // the registers are also different from the external names
1704 // (F0D and F0S instead of F0, etc.).
1705 if (Constraint[1] == 'r') {
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1709 if (getVTSizeInBits() == 128)
1710 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1714 }
1715 if (Constraint[1] == 'f') {
1716 if (useSoftFloat())
1717 return std::make_pair(
1718 0u, static_cast<const TargetRegisterClass *>(nullptr));
1719 if (getVTSizeInBits() == 16)
1720 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1722 if (getVTSizeInBits() == 32)
1723 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1725 if (getVTSizeInBits() == 128)
1726 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1728 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1730 }
1731 if (Constraint[1] == 'v') {
1732 if (!Subtarget.hasVector())
1733 return std::make_pair(
1734 0u, static_cast<const TargetRegisterClass *>(nullptr));
1735 if (getVTSizeInBits() == 16)
1736 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1738 if (getVTSizeInBits() == 32)
1739 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1741 if (getVTSizeInBits() == 64)
1742 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1744 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1746 }
1747 if (Constraint[1] == '@') {
1748 if (StringRef("{@cc}").compare(Constraint) == 0)
1749 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1750 }
1751 }
1752 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1753}
1754
1755// FIXME? Maybe this could be a TableGen attribute on some registers and
1756// this table could be generated automatically from RegInfo.
1759 const MachineFunction &MF) const {
1760 Register Reg =
1762 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1763 : SystemZ::NoRegister)
1764 .Case("r15",
1765 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1766 .Default(Register());
1767
1768 return Reg;
1769}
1770
1772 const Constant *PersonalityFn) const {
1773 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1774}
1775
1777 const Constant *PersonalityFn) const {
1778 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1779}
1780
1781// Convert condition code in CCReg to an i32 value.
1783 SDLoc DL(CCReg);
1784 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1785 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1786 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1787}
1788
1789// Lower @cc targets via setcc.
1791 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1792 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1793 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1794 return SDValue();
1795
1796 // Check that return type is valid.
1797 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1798 OpInfo.ConstraintVT.getSizeInBits() < 8)
1799 report_fatal_error("Glue output operand is of invalid type");
1800
1801 if (Glue.getNode()) {
1802 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1803 Chain = Glue.getValue(1);
1804 } else
1805 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1806 return getCCResult(DAG, Glue);
1807}
1808
1810 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1811 SelectionDAG &DAG) const {
1812 // Only support length 1 constraints for now.
1813 if (Constraint.size() == 1) {
1814 switch (Constraint[0]) {
1815 case 'I': // Unsigned 8-bit constant
1816 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1817 if (isUInt<8>(C->getZExtValue()))
1818 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1819 Op.getValueType()));
1820 return;
1821
1822 case 'J': // Unsigned 12-bit constant
1823 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1824 if (isUInt<12>(C->getZExtValue()))
1825 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1826 Op.getValueType()));
1827 return;
1828
1829 case 'K': // Signed 16-bit constant
1830 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1831 if (isInt<16>(C->getSExtValue()))
1832 Ops.push_back(DAG.getSignedTargetConstant(
1833 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1834 return;
1835
1836 case 'L': // Signed 20-bit displacement (on all targets we support)
1837 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1838 if (isInt<20>(C->getSExtValue()))
1839 Ops.push_back(DAG.getSignedTargetConstant(
1840 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1841 return;
1842
1843 case 'M': // 0x7fffffff
1844 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1845 if (C->getZExtValue() == 0x7fffffff)
1846 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1847 Op.getValueType()));
1848 return;
1849 }
1850 }
1852}
1853
1854//===----------------------------------------------------------------------===//
1855// Calling conventions
1856//===----------------------------------------------------------------------===//
1857
1858#include "SystemZGenCallingConv.inc"
1859
1861 CallingConv::ID) const {
1862 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1863 SystemZ::R14D, 0 };
1864 return ScratchRegs;
1865}
1866
1868 Type *ToType) const {
1869 return isTruncateFree(FromType, ToType);
1870}
1871
1873 return CI->isTailCall();
1874}
1875
1876// Value is a value that has been passed to us in the location described by VA
1877// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1878// any loads onto Chain.
1880 CCValAssign &VA, SDValue Chain,
1881 SDValue Value) {
1882 // If the argument has been promoted from a smaller type, insert an
1883 // assertion to capture this.
1884 if (VA.getLocInfo() == CCValAssign::SExt)
1886 DAG.getValueType(VA.getValVT()));
1887 else if (VA.getLocInfo() == CCValAssign::ZExt)
1889 DAG.getValueType(VA.getValVT()));
1890
1891 if (VA.isExtInLoc())
1892 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1893 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1894 // If this is a short vector argument loaded from the stack,
1895 // extend from i64 to full vector size and then bitcast.
1896 assert(VA.getLocVT() == MVT::i64);
1897 assert(VA.getValVT().isVector());
1898 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1899 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1900 } else
1901 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1902 return Value;
1903}
1904
1905// Value is a value of type VA.getValVT() that we need to copy into
1906// the location described by VA. Return a copy of Value converted to
1907// VA.getValVT(). The caller is responsible for handling indirect values.
1909 CCValAssign &VA, SDValue Value) {
1910 switch (VA.getLocInfo()) {
1911 case CCValAssign::SExt:
1912 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1913 case CCValAssign::ZExt:
1914 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1915 case CCValAssign::AExt:
1916 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1917 case CCValAssign::BCvt: {
1918 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1919 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1920 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1921 // For an f32 vararg we need to first promote it to an f64 and then
1922 // bitcast it to an i64.
1923 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1924 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1925 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1926 ? MVT::v2i64
1927 : VA.getLocVT();
1928 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1929 // For ELF, this is a short vector argument to be stored to the stack,
1930 // bitcast to v2i64 and then extract first element.
1931 if (BitCastToType == MVT::v2i64)
1932 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1933 DAG.getConstant(0, DL, MVT::i32));
1934 return Value;
1935 }
1936 case CCValAssign::Full:
1937 return Value;
1938 default:
1939 llvm_unreachable("Unhandled getLocInfo()");
1940 }
1941}
1942
1944 SDLoc DL(In);
1945 SDValue Lo, Hi;
1946 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1947 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1948 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1949 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1950 DAG.getConstant(64, DL, MVT::i32)));
1951 } else {
1952 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1953 }
1954
1955 // FIXME: If v2i64 were a legal type, we could use it instead of
1956 // Untyped here. This might enable improved folding.
1957 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1958 MVT::Untyped, Hi, Lo);
1959 return SDValue(Pair, 0);
1960}
1961
1963 SDLoc DL(In);
1964 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1965 DL, MVT::i64, In);
1966 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1967 DL, MVT::i64, In);
1968
1969 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1970 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1971 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1972 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1973 DAG.getConstant(64, DL, MVT::i32));
1974 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1975 } else {
1976 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1977 }
1978}
1979
1981 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1982 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1983 EVT ValueVT = Val.getValueType();
1984 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1985 // Inline assembly operand.
1986 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1987 return true;
1988 }
1989
1990 return false;
1991}
1992
1994 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1995 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1996 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1997 // Inline assembly operand.
1998 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1999 return DAG.getBitcast(ValueVT, Res);
2000 }
2001
2002 return SDValue();
2003}
2004
2005// The first part of a split stack argument is at index I in Args (and
2006// ArgLocs). Return the type of a part and the number of them by reference.
2007template <class ArgTy>
2009 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
2010 MVT &PartVT, unsigned &NumParts) {
2011 if (!Args[I].Flags.isSplit())
2012 return false;
2013 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
2014 "ArgLocs havoc.");
2015 PartVT = ArgLocs[I].getValVT();
2016 NumParts = 1;
2017 for (unsigned PartIdx = I + 1;; ++PartIdx) {
2018 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
2019 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
2020 ++NumParts;
2021 if (Args[PartIdx].Flags.isSplitEnd())
2022 break;
2023 }
2024 return true;
2025}
2026
2028 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2029 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2030 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2032 MachineFrameInfo &MFI = MF.getFrameInfo();
2033 MachineRegisterInfo &MRI = MF.getRegInfo();
2034 SystemZMachineFunctionInfo *FuncInfo =
2036 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2037 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2038
2039 // Assign locations to all of the incoming arguments.
2041 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2042 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2043 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2044
2045 unsigned NumFixedGPRs = 0;
2046 unsigned NumFixedFPRs = 0;
2047 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2048 SDValue ArgValue;
2049 CCValAssign &VA = ArgLocs[I];
2050 EVT LocVT = VA.getLocVT();
2051 if (VA.isRegLoc()) {
2052 // Arguments passed in registers
2053 const TargetRegisterClass *RC;
2054 switch (LocVT.getSimpleVT().SimpleTy) {
2055 default:
2056 // Integers smaller than i64 should be promoted to i64.
2057 llvm_unreachable("Unexpected argument type");
2058 case MVT::i32:
2059 NumFixedGPRs += 1;
2060 RC = &SystemZ::GR32BitRegClass;
2061 break;
2062 case MVT::i64:
2063 NumFixedGPRs += 1;
2064 RC = &SystemZ::GR64BitRegClass;
2065 break;
2066 case MVT::f16:
2067 NumFixedFPRs += 1;
2068 RC = &SystemZ::FP16BitRegClass;
2069 break;
2070 case MVT::f32:
2071 NumFixedFPRs += 1;
2072 RC = &SystemZ::FP32BitRegClass;
2073 break;
2074 case MVT::f64:
2075 NumFixedFPRs += 1;
2076 RC = &SystemZ::FP64BitRegClass;
2077 break;
2078 case MVT::f128:
2079 NumFixedFPRs += 2;
2080 RC = &SystemZ::FP128BitRegClass;
2081 break;
2082 case MVT::v16i8:
2083 case MVT::v8i16:
2084 case MVT::v4i32:
2085 case MVT::v2i64:
2086 case MVT::v8f16:
2087 case MVT::v4f32:
2088 case MVT::v2f64:
2089 RC = &SystemZ::VR128BitRegClass;
2090 break;
2091 }
2092
2093 Register VReg = MRI.createVirtualRegister(RC);
2094 MRI.addLiveIn(VA.getLocReg(), VReg);
2095 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2096 } else {
2097 assert(VA.isMemLoc() && "Argument not register or memory");
2098
2099 // Create the frame index object for this incoming parameter.
2100 // FIXME: Pre-include call frame size in the offset, should not
2101 // need to manually add it here.
2102 int64_t ArgSPOffset = VA.getLocMemOffset();
2103 if (Subtarget.isTargetXPLINK64()) {
2104 auto &XPRegs =
2105 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2106 ArgSPOffset += XPRegs.getCallFrameSize();
2107 }
2108 int FI =
2109 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2110
2111 // Create the SelectionDAG nodes corresponding to a load
2112 // from this parameter. Unpromoted ints and floats are
2113 // passed as right-justified 8-byte values.
2114 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2115 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2116 VA.getLocVT() == MVT::f16) {
2117 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2118 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2119 DAG.getIntPtrConstant(SlotOffs, DL));
2120 }
2121 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2123 }
2124
2125 // Convert the value of the argument register into the value that's
2126 // being passed.
2127 if (VA.getLocInfo() == CCValAssign::Indirect) {
2128 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2130 // If the original argument was split (e.g. i128), we need
2131 // to load all parts of it here (using the same address).
2132 MVT PartVT;
2133 unsigned NumParts;
2134 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2135 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2136 ++I;
2137 CCValAssign &PartVA = ArgLocs[I];
2138 unsigned PartOffset = Ins[I].PartOffset;
2139 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2140 DAG.getIntPtrConstant(PartOffset, DL));
2141 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2143 assert(PartOffset && "Offset should be non-zero.");
2144 }
2145 }
2146 } else
2147 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2148 }
2149
2150 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2151 // Save the number of non-varargs registers for later use by va_start, etc.
2152 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2153 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2154
2155 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2156 Subtarget.getSpecialRegisters());
2157
2158 // Likewise the address (in the form of a frame index) of where the
2159 // first stack vararg would be. The 1-byte size here is arbitrary.
2160 // FIXME: Pre-include call frame size in the offset, should not
2161 // need to manually add it here.
2162 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2163 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2164 FuncInfo->setVarArgsFrameIndex(FI);
2165 }
2166
2167 if (IsVarArg && Subtarget.isTargetELF()) {
2168 // Save the number of non-varargs registers for later use by va_start, etc.
2169 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2170 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2171
2172 // Likewise the address (in the form of a frame index) of where the
2173 // first stack vararg would be. The 1-byte size here is arbitrary.
2174 int64_t VarArgsOffset = CCInfo.getStackSize();
2175 FuncInfo->setVarArgsFrameIndex(
2176 MFI.CreateFixedObject(1, VarArgsOffset, true));
2177
2178 // ...and a similar frame index for the caller-allocated save area
2179 // that will be used to store the incoming registers.
2180 int64_t RegSaveOffset =
2181 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2182 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2183 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2184
2185 // Store the FPR varargs in the reserved frame slots. (We store the
2186 // GPRs as part of the prologue.)
2187 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2189 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2190 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2191 int FI =
2193 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2195 &SystemZ::FP64BitRegClass);
2196 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2197 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2199 }
2200 // Join the stores, which are independent of one another.
2201 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2202 ArrayRef(&MemOps[NumFixedFPRs],
2203 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2204 }
2205 }
2206
2207 if (Subtarget.isTargetXPLINK64()) {
2208 // Create virual register for handling incoming "ADA" special register (R5)
2209 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2210 Register ADAvReg = MRI.createVirtualRegister(RC);
2211 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2212 Subtarget.getSpecialRegisters());
2213 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2214 FuncInfo->setADAVirtualRegister(ADAvReg);
2215 }
2216 return Chain;
2217}
2218
2219static bool canUseSiblingCall(const CCState &ArgCCInfo,
2222 // Punt if there are any indirect or stack arguments, or if the call
2223 // needs the callee-saved argument register R6, or if the call uses
2224 // the callee-saved register arguments SwiftSelf and SwiftError.
2225 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2226 CCValAssign &VA = ArgLocs[I];
2228 return false;
2229 if (!VA.isRegLoc())
2230 return false;
2231 Register Reg = VA.getLocReg();
2232 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2233 return false;
2234 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2235 return false;
2236 }
2237 return true;
2238}
2239
2241 unsigned Offset, bool LoadAdr = false) {
2244 Register ADAvReg = MFI->getADAVirtualRegister();
2246
2247 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2248 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2249
2250 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2251 if (!LoadAdr)
2252 Result = DAG.getLoad(
2253 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2255
2256 return Result;
2257}
2258
2259// ADA access using Global value
2260// Note: for functions, address of descriptor is returned
2262 EVT PtrVT) {
2263 unsigned ADAtype;
2264 bool LoadAddr = false;
2265 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2266 bool IsFunction =
2267 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2268 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2269
2270 if (IsFunction) {
2271 if (IsInternal) {
2273 LoadAddr = true;
2274 } else
2276 } else {
2278 }
2279 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2280
2281 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2282}
2283
2284static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2285 SDLoc &DL, SDValue &Chain) {
2286 unsigned ADADelta = 0; // ADA offset in desc.
2287 unsigned EPADelta = 8; // EPA offset in desc.
2290
2291 // XPLink calling convention.
2292 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2293 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2294 G->getGlobal()->hasPrivateLinkage());
2295 if (IsInternal) {
2298 Register ADAvReg = MFI->getADAVirtualRegister();
2299 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2300 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2301 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2302 return true;
2303 } else {
2305 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2306 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2307 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2308 }
2309 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2311 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2312 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2313 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2314 } else {
2315 // Function pointer case
2316 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2317 DAG.getConstant(ADADelta, DL, PtrVT));
2318 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2320 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2321 DAG.getConstant(EPADelta, DL, PtrVT));
2322 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2324 }
2325 return false;
2326}
2327
2328SDValue
2330 SmallVectorImpl<SDValue> &InVals) const {
2331 SelectionDAG &DAG = CLI.DAG;
2332 SDLoc &DL = CLI.DL;
2334 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2336 SDValue Chain = CLI.Chain;
2337 SDValue Callee = CLI.Callee;
2338 bool &IsTailCall = CLI.IsTailCall;
2339 CallingConv::ID CallConv = CLI.CallConv;
2340 bool IsVarArg = CLI.IsVarArg;
2342 EVT PtrVT = getPointerTy(MF.getDataLayout());
2343 LLVMContext &Ctx = *DAG.getContext();
2344 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2345
2346 // FIXME: z/OS support to be added in later.
2347 if (Subtarget.isTargetXPLINK64())
2348 IsTailCall = false;
2349
2350 // Integer args <=32 bits should have an extension attribute.
2351 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2352
2353 // Analyze the operands of the call, assigning locations to each operand.
2355 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2356 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2357
2358 // We don't support GuaranteedTailCallOpt, only automatically-detected
2359 // sibling calls.
2360 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2361 IsTailCall = false;
2362
2363 // Get a count of how many bytes are to be pushed on the stack.
2364 unsigned NumBytes = ArgCCInfo.getStackSize();
2365
2366 // Mark the start of the call.
2367 if (!IsTailCall)
2368 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2369
2370 // Copy argument values to their designated locations.
2372 SmallVector<SDValue, 8> MemOpChains;
2373 SDValue StackPtr;
2374 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2375 CCValAssign &VA = ArgLocs[I];
2376 SDValue ArgValue = OutVals[I];
2377
2378 if (VA.getLocInfo() == CCValAssign::Indirect) {
2379 // Store the argument in a stack slot and pass its address.
2380 EVT SlotVT;
2381 MVT PartVT;
2382 unsigned NumParts = 1;
2383 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2384 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2385 else
2386 SlotVT = Outs[I].VT;
2387 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2388 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2389
2390 MachinePointerInfo StackPtrInfo =
2392 MemOpChains.push_back(
2393 DAG.getStore(Chain, DL, ArgValue, SpillSlot, StackPtrInfo));
2394 // If the original argument was split (e.g. i128), we need
2395 // to store all parts of it here (and pass just one address).
2396 assert(Outs[I].PartOffset == 0);
2397 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2398 ++I;
2399 SDValue PartValue = OutVals[I];
2400 unsigned PartOffset = Outs[I].PartOffset;
2401 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2402 DAG.getIntPtrConstant(PartOffset, DL));
2403 MemOpChains.push_back(
2404 DAG.getStore(Chain, DL, PartValue, Address,
2405 StackPtrInfo.getWithOffset(PartOffset)));
2406 assert(PartOffset && "Offset should be non-zero.");
2407 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2408 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2409 }
2410 ArgValue = SpillSlot;
2411 } else
2412 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2413
2414 if (VA.isRegLoc()) {
2415 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2416 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2417 // and low values.
2418 if (VA.getLocVT() == MVT::i128)
2419 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2420 // Queue up the argument copies and emit them at the end.
2421 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2422 } else {
2423 assert(VA.isMemLoc() && "Argument not register or memory");
2424
2425 // Work out the address of the stack slot. Unpromoted ints and
2426 // floats are passed as right-justified 8-byte values.
2427 if (!StackPtr.getNode())
2428 StackPtr = DAG.getCopyFromReg(Chain, DL,
2429 Regs->getStackPointerRegister(), PtrVT);
2430 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2431 VA.getLocMemOffset();
2432 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2433 Offset += 4;
2434 else if (VA.getLocVT() == MVT::f16)
2435 Offset += 6;
2436 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2438
2439 // Emit the store.
2440 MemOpChains.push_back(
2441 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2442
2443 // Although long doubles or vectors are passed through the stack when
2444 // they are vararg (non-fixed arguments), if a long double or vector
2445 // occupies the third and fourth slot of the argument list GPR3 should
2446 // still shadow the third slot of the argument list.
2447 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2448 SDValue ShadowArgValue =
2449 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2450 DAG.getIntPtrConstant(1, DL));
2451 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2452 }
2453 }
2454 }
2455
2456 // Join the stores, which are independent of one another.
2457 if (!MemOpChains.empty())
2458 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2459
2460 // Accept direct calls by converting symbolic call addresses to the
2461 // associated Target* opcodes. Force %r1 to be used for indirect
2462 // tail calls.
2463 SDValue Glue;
2464
2465 if (Subtarget.isTargetXPLINK64()) {
2466 SDValue ADA;
2467 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2468 if (!IsBRASL) {
2469 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2470 ->getAddressOfCalleeRegister();
2471 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2472 Glue = Chain.getValue(1);
2473 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2474 }
2475 RegsToPass.push_back(std::make_pair(
2476 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2477 } else {
2478 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2479 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2480 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2481 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2482 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2483 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2484 } else if (IsTailCall) {
2485 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2486 Glue = Chain.getValue(1);
2487 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2488 }
2489 }
2490
2491 // Build a sequence of copy-to-reg nodes, chained and glued together.
2492 for (const auto &[Reg, N] : RegsToPass) {
2493 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2494 Glue = Chain.getValue(1);
2495 }
2496
2497 // The first call operand is the chain and the second is the target address.
2499 Ops.push_back(Chain);
2500 Ops.push_back(Callee);
2501
2502 // Add argument registers to the end of the list so that they are
2503 // known live into the call.
2504 for (const auto &[Reg, N] : RegsToPass)
2505 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2506
2507 // Add a register mask operand representing the call-preserved registers.
2508 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2509 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2510 assert(Mask && "Missing call preserved mask for calling convention");
2511 Ops.push_back(DAG.getRegisterMask(Mask));
2512
2513 // Glue the call to the argument copies, if any.
2514 if (Glue.getNode())
2515 Ops.push_back(Glue);
2516
2517 // Emit the call.
2518 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2519 if (IsTailCall) {
2520 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2521 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2522 return Ret;
2523 }
2524 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2525 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2526 Glue = Chain.getValue(1);
2527
2528 // Mark the end of the call, which is glued to the call itself.
2529 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2530 Glue = Chain.getValue(1);
2531
2532 // Assign locations to each value returned by this call.
2534 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2535 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2536
2537 // Copy all of the result registers out of their specified physreg.
2538 for (CCValAssign &VA : RetLocs) {
2539 // Copy the value out, gluing the copy to the end of the call sequence.
2540 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2541 VA.getLocVT(), Glue);
2542 Chain = RetValue.getValue(1);
2543 Glue = RetValue.getValue(2);
2544
2545 // Convert the value of the return register into the value that's
2546 // being returned.
2547 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2548 }
2549
2550 return Chain;
2551}
2552
2553// Generate a call taking the given operands as arguments and returning a
2554// result of type RetVT.
2556 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2557 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2558 bool DoesNotReturn, bool IsReturnValueUsed) const {
2560 Args.reserve(Ops.size());
2561
2562 for (SDValue Op : Ops) {
2564 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2565 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2566 Entry.IsZExt = !Entry.IsSExt;
2567 Args.push_back(Entry);
2568 }
2569
2570 SDValue Callee =
2571 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2572
2573 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2575 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2576 CLI.setDebugLoc(DL)
2577 .setChain(Chain)
2578 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2579 .setNoReturn(DoesNotReturn)
2580 .setDiscardResult(!IsReturnValueUsed)
2581 .setSExtResult(SignExtend)
2582 .setZExtResult(!SignExtend);
2583 return LowerCallTo(CLI);
2584}
2585
2587 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2588 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2589 const Type *RetTy) const {
2590 // Special case that we cannot easily detect in RetCC_SystemZ since
2591 // i128 may not be a legal type.
2592 for (auto &Out : Outs)
2593 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2594 return false;
2595
2597 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2598 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2599}
2600
2601SDValue
2603 bool IsVarArg,
2605 const SmallVectorImpl<SDValue> &OutVals,
2606 const SDLoc &DL, SelectionDAG &DAG) const {
2608
2609 // Integer args <=32 bits should have an extension attribute.
2610 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2611
2612 // Assign locations to each returned value.
2614 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2615 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2616
2617 // Quick exit for void returns
2618 if (RetLocs.empty())
2619 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2620
2621 if (CallConv == CallingConv::GHC)
2622 report_fatal_error("GHC functions return void only");
2623
2624 // Copy the result values into the output registers.
2625 SDValue Glue;
2627 RetOps.push_back(Chain);
2628 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2629 CCValAssign &VA = RetLocs[I];
2630 SDValue RetValue = OutVals[I];
2631
2632 // Make the return register live on exit.
2633 assert(VA.isRegLoc() && "Can only return in registers!");
2634
2635 // Promote the value as required.
2636 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2637
2638 // Chain and glue the copies together.
2639 Register Reg = VA.getLocReg();
2640 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2641 Glue = Chain.getValue(1);
2642 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2643 }
2644
2645 // Update chain and glue.
2646 RetOps[0] = Chain;
2647 if (Glue.getNode())
2648 RetOps.push_back(Glue);
2649
2650 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2651}
2652
2653// Return true if Op is an intrinsic node with chain that returns the CC value
2654// as its only (other) argument. Provide the associated SystemZISD opcode and
2655// the mask of valid CC values if so.
2656static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2657 unsigned &CCValid) {
2658 unsigned Id = Op.getConstantOperandVal(1);
2659 switch (Id) {
2660 case Intrinsic::s390_tbegin:
2661 Opcode = SystemZISD::TBEGIN;
2662 CCValid = SystemZ::CCMASK_TBEGIN;
2663 return true;
2664
2665 case Intrinsic::s390_tbegin_nofloat:
2666 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2667 CCValid = SystemZ::CCMASK_TBEGIN;
2668 return true;
2669
2670 case Intrinsic::s390_tend:
2671 Opcode = SystemZISD::TEND;
2672 CCValid = SystemZ::CCMASK_TEND;
2673 return true;
2674
2675 default:
2676 return false;
2677 }
2678}
2679
2680// Return true if Op is an intrinsic node without chain that returns the
2681// CC value as its final argument. Provide the associated SystemZISD
2682// opcode and the mask of valid CC values if so.
2683static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2684 unsigned Id = Op.getConstantOperandVal(0);
2685 switch (Id) {
2686 case Intrinsic::s390_vpkshs:
2687 case Intrinsic::s390_vpksfs:
2688 case Intrinsic::s390_vpksgs:
2689 Opcode = SystemZISD::PACKS_CC;
2690 CCValid = SystemZ::CCMASK_VCMP;
2691 return true;
2692
2693 case Intrinsic::s390_vpklshs:
2694 case Intrinsic::s390_vpklsfs:
2695 case Intrinsic::s390_vpklsgs:
2696 Opcode = SystemZISD::PACKLS_CC;
2697 CCValid = SystemZ::CCMASK_VCMP;
2698 return true;
2699
2700 case Intrinsic::s390_vceqbs:
2701 case Intrinsic::s390_vceqhs:
2702 case Intrinsic::s390_vceqfs:
2703 case Intrinsic::s390_vceqgs:
2704 case Intrinsic::s390_vceqqs:
2705 Opcode = SystemZISD::VICMPES;
2706 CCValid = SystemZ::CCMASK_VCMP;
2707 return true;
2708
2709 case Intrinsic::s390_vchbs:
2710 case Intrinsic::s390_vchhs:
2711 case Intrinsic::s390_vchfs:
2712 case Intrinsic::s390_vchgs:
2713 case Intrinsic::s390_vchqs:
2714 Opcode = SystemZISD::VICMPHS;
2715 CCValid = SystemZ::CCMASK_VCMP;
2716 return true;
2717
2718 case Intrinsic::s390_vchlbs:
2719 case Intrinsic::s390_vchlhs:
2720 case Intrinsic::s390_vchlfs:
2721 case Intrinsic::s390_vchlgs:
2722 case Intrinsic::s390_vchlqs:
2723 Opcode = SystemZISD::VICMPHLS;
2724 CCValid = SystemZ::CCMASK_VCMP;
2725 return true;
2726
2727 case Intrinsic::s390_vtm:
2728 Opcode = SystemZISD::VTM;
2729 CCValid = SystemZ::CCMASK_VCMP;
2730 return true;
2731
2732 case Intrinsic::s390_vfaebs:
2733 case Intrinsic::s390_vfaehs:
2734 case Intrinsic::s390_vfaefs:
2735 Opcode = SystemZISD::VFAE_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vfaezbs:
2740 case Intrinsic::s390_vfaezhs:
2741 case Intrinsic::s390_vfaezfs:
2742 Opcode = SystemZISD::VFAEZ_CC;
2743 CCValid = SystemZ::CCMASK_ANY;
2744 return true;
2745
2746 case Intrinsic::s390_vfeebs:
2747 case Intrinsic::s390_vfeehs:
2748 case Intrinsic::s390_vfeefs:
2749 Opcode = SystemZISD::VFEE_CC;
2750 CCValid = SystemZ::CCMASK_ANY;
2751 return true;
2752
2753 case Intrinsic::s390_vfeezbs:
2754 case Intrinsic::s390_vfeezhs:
2755 case Intrinsic::s390_vfeezfs:
2756 Opcode = SystemZISD::VFEEZ_CC;
2757 CCValid = SystemZ::CCMASK_ANY;
2758 return true;
2759
2760 case Intrinsic::s390_vfenebs:
2761 case Intrinsic::s390_vfenehs:
2762 case Intrinsic::s390_vfenefs:
2763 Opcode = SystemZISD::VFENE_CC;
2764 CCValid = SystemZ::CCMASK_ANY;
2765 return true;
2766
2767 case Intrinsic::s390_vfenezbs:
2768 case Intrinsic::s390_vfenezhs:
2769 case Intrinsic::s390_vfenezfs:
2770 Opcode = SystemZISD::VFENEZ_CC;
2771 CCValid = SystemZ::CCMASK_ANY;
2772 return true;
2773
2774 case Intrinsic::s390_vistrbs:
2775 case Intrinsic::s390_vistrhs:
2776 case Intrinsic::s390_vistrfs:
2777 Opcode = SystemZISD::VISTR_CC;
2779 return true;
2780
2781 case Intrinsic::s390_vstrcbs:
2782 case Intrinsic::s390_vstrchs:
2783 case Intrinsic::s390_vstrcfs:
2784 Opcode = SystemZISD::VSTRC_CC;
2785 CCValid = SystemZ::CCMASK_ANY;
2786 return true;
2787
2788 case Intrinsic::s390_vstrczbs:
2789 case Intrinsic::s390_vstrczhs:
2790 case Intrinsic::s390_vstrczfs:
2791 Opcode = SystemZISD::VSTRCZ_CC;
2792 CCValid = SystemZ::CCMASK_ANY;
2793 return true;
2794
2795 case Intrinsic::s390_vstrsb:
2796 case Intrinsic::s390_vstrsh:
2797 case Intrinsic::s390_vstrsf:
2798 Opcode = SystemZISD::VSTRS_CC;
2799 CCValid = SystemZ::CCMASK_ANY;
2800 return true;
2801
2802 case Intrinsic::s390_vstrszb:
2803 case Intrinsic::s390_vstrszh:
2804 case Intrinsic::s390_vstrszf:
2805 Opcode = SystemZISD::VSTRSZ_CC;
2806 CCValid = SystemZ::CCMASK_ANY;
2807 return true;
2808
2809 case Intrinsic::s390_vfcedbs:
2810 case Intrinsic::s390_vfcesbs:
2811 Opcode = SystemZISD::VFCMPES;
2812 CCValid = SystemZ::CCMASK_VCMP;
2813 return true;
2814
2815 case Intrinsic::s390_vfchdbs:
2816 case Intrinsic::s390_vfchsbs:
2817 Opcode = SystemZISD::VFCMPHS;
2818 CCValid = SystemZ::CCMASK_VCMP;
2819 return true;
2820
2821 case Intrinsic::s390_vfchedbs:
2822 case Intrinsic::s390_vfchesbs:
2823 Opcode = SystemZISD::VFCMPHES;
2824 CCValid = SystemZ::CCMASK_VCMP;
2825 return true;
2826
2827 case Intrinsic::s390_vftcidb:
2828 case Intrinsic::s390_vftcisb:
2829 Opcode = SystemZISD::VFTCI;
2830 CCValid = SystemZ::CCMASK_VCMP;
2831 return true;
2832
2833 case Intrinsic::s390_tdc:
2834 Opcode = SystemZISD::TDC;
2835 CCValid = SystemZ::CCMASK_TDC;
2836 return true;
2837
2838 default:
2839 return false;
2840 }
2841}
2842
2843// Emit an intrinsic with chain and an explicit CC register result.
2845 unsigned Opcode) {
2846 // Copy all operands except the intrinsic ID.
2847 unsigned NumOps = Op.getNumOperands();
2849 Ops.reserve(NumOps - 1);
2850 Ops.push_back(Op.getOperand(0));
2851 for (unsigned I = 2; I < NumOps; ++I)
2852 Ops.push_back(Op.getOperand(I));
2853
2854 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2855 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2856 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2857 SDValue OldChain = SDValue(Op.getNode(), 1);
2858 SDValue NewChain = SDValue(Intr.getNode(), 1);
2859 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2860 return Intr.getNode();
2861}
2862
2863// Emit an intrinsic with an explicit CC register result.
2865 unsigned Opcode) {
2866 // Copy all operands except the intrinsic ID.
2867 SDLoc DL(Op);
2868 unsigned NumOps = Op.getNumOperands();
2870 Ops.reserve(NumOps - 1);
2871 for (unsigned I = 1; I < NumOps; ++I) {
2872 SDValue CurrOper = Op.getOperand(I);
2873 if (CurrOper.getValueType() == MVT::f16) {
2874 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2875 "Unhandled intrinsic with f16 operand.");
2876 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2877 }
2878 Ops.push_back(CurrOper);
2879 }
2880
2881 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2882 return Intr.getNode();
2883}
2884
2885// CC is a comparison that will be implemented using an integer or
2886// floating-point comparison. Return the condition code mask for
2887// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2888// unsigned comparisons and clear for signed ones. In the floating-point
2889// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2891#define CONV(X) \
2892 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2893 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2894 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2895
2896 switch (CC) {
2897 default:
2898 llvm_unreachable("Invalid integer condition!");
2899
2900 CONV(EQ);
2901 CONV(NE);
2902 CONV(GT);
2903 CONV(GE);
2904 CONV(LT);
2905 CONV(LE);
2906
2907 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2909 }
2910#undef CONV
2911}
2912
2913// If C can be converted to a comparison against zero, adjust the operands
2914// as necessary.
2915static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2916 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2917 return;
2918
2919 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2920 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2921 return;
2922
2923 int64_t Value = ConstOp1->getSExtValue();
2924 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2925 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2926 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2927 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2928 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2929 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2930 }
2931}
2932
2933// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2934// adjust the operands as necessary.
2935static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2936 Comparison &C) {
2937 // For us to make any changes, it must a comparison between a single-use
2938 // load and a constant.
2939 if (!C.Op0.hasOneUse() ||
2940 C.Op0.getOpcode() != ISD::LOAD ||
2941 C.Op1.getOpcode() != ISD::Constant)
2942 return;
2943
2944 // We must have an 8- or 16-bit load.
2945 auto *Load = cast<LoadSDNode>(C.Op0);
2946 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2947 if ((NumBits != 8 && NumBits != 16) ||
2948 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2949 return;
2950
2951 // The load must be an extending one and the constant must be within the
2952 // range of the unextended value.
2953 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2954 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2955 return;
2956 uint64_t Value = ConstOp1->getZExtValue();
2957 uint64_t Mask = (1 << NumBits) - 1;
2958 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2959 // Make sure that ConstOp1 is in range of C.Op0.
2960 int64_t SignedValue = ConstOp1->getSExtValue();
2961 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2962 return;
2963 if (C.ICmpType != SystemZICMP::SignedOnly) {
2964 // Unsigned comparison between two sign-extended values is equivalent
2965 // to unsigned comparison between two zero-extended values.
2966 Value &= Mask;
2967 } else if (NumBits == 8) {
2968 // Try to treat the comparison as unsigned, so that we can use CLI.
2969 // Adjust CCMask and Value as necessary.
2970 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2971 // Test whether the high bit of the byte is set.
2972 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2973 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2974 // Test whether the high bit of the byte is clear.
2975 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2976 else
2977 // No instruction exists for this combination.
2978 return;
2979 C.ICmpType = SystemZICMP::UnsignedOnly;
2980 }
2981 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2982 if (Value > Mask)
2983 return;
2984 // If the constant is in range, we can use any comparison.
2985 C.ICmpType = SystemZICMP::Any;
2986 } else
2987 return;
2988
2989 // Make sure that the first operand is an i32 of the right extension type.
2990 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2993 if (C.Op0.getValueType() != MVT::i32 ||
2994 Load->getExtensionType() != ExtType) {
2995 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2996 Load->getBasePtr(), Load->getPointerInfo(),
2997 Load->getMemoryVT(), Load->getAlign(),
2998 Load->getMemOperand()->getFlags());
2999 // Update the chain uses.
3000 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
3001 }
3002
3003 // Make sure that the second operand is an i32 with the right value.
3004 if (C.Op1.getValueType() != MVT::i32 ||
3005 Value != ConstOp1->getZExtValue())
3006 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
3007}
3008
3009// Return true if Op is either an unextended load, or a load suitable
3010// for integer register-memory comparisons of type ICmpType.
3011static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
3012 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
3013 if (Load) {
3014 // There are no instructions to compare a register with a memory byte.
3015 if (Load->getMemoryVT() == MVT::i8)
3016 return false;
3017 // Otherwise decide on extension type.
3018 switch (Load->getExtensionType()) {
3019 case ISD::NON_EXTLOAD:
3020 return true;
3021 case ISD::SEXTLOAD:
3022 return ICmpType != SystemZICMP::UnsignedOnly;
3023 case ISD::ZEXTLOAD:
3024 return ICmpType != SystemZICMP::SignedOnly;
3025 default:
3026 break;
3027 }
3028 }
3029 return false;
3030}
3031
3032// Return true if it is better to swap the operands of C.
3033static bool shouldSwapCmpOperands(const Comparison &C) {
3034 // If one side of the compare is a load of the stackguard reference value,
3035 // then that load should be Op1.
3036 if (C.Op0.isMachineOpcode() &&
3037 (C.Op0.getMachineOpcode() == SystemZ::LOAD_STACK_GUARD))
3038 return true;
3039
3040 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3041 if (C.Op0.getValueType() == MVT::i128)
3042 return false;
3043 if (C.Op0.getValueType() == MVT::f128)
3044 return false;
3045
3046 // Always keep a floating-point constant second, since comparisons with
3047 // zero can use LOAD TEST and comparisons with other constants make a
3048 // natural memory operand.
3049 if (isa<ConstantFPSDNode>(C.Op1))
3050 return false;
3051
3052 // Never swap comparisons with zero since there are many ways to optimize
3053 // those later.
3054 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3055 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3056 return false;
3057
3058 // Also keep natural memory operands second if the loaded value is
3059 // only used here. Several comparisons have memory forms.
3060 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3061 return false;
3062
3063 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3064 // In that case we generally prefer the memory to be second.
3065 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3066 // The only exceptions are when the second operand is a constant and
3067 // we can use things like CHHSI.
3068 if (!ConstOp1)
3069 return true;
3070 // The unsigned memory-immediate instructions can handle 16-bit
3071 // unsigned integers.
3072 if (C.ICmpType != SystemZICMP::SignedOnly &&
3073 isUInt<16>(ConstOp1->getZExtValue()))
3074 return false;
3075 // The signed memory-immediate instructions can handle 16-bit
3076 // signed integers.
3077 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3078 isInt<16>(ConstOp1->getSExtValue()))
3079 return false;
3080 return true;
3081 }
3082
3083 // Try to promote the use of CGFR and CLGFR.
3084 unsigned Opcode0 = C.Op0.getOpcode();
3085 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3086 return true;
3087 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3088 return true;
3089 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3090 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3091 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3092 return true;
3093
3094 return false;
3095}
3096
3097// Check whether C tests for equality between X and Y and whether X - Y
3098// or Y - X is also computed. In that case it's better to compare the
3099// result of the subtraction against zero.
3101 Comparison &C) {
3102 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3103 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3104 for (SDNode *N : C.Op0->users()) {
3105 if (N->getOpcode() == ISD::SUB &&
3106 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3107 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3108 // Disable the nsw and nuw flags: the backend needs to handle
3109 // overflow as well during comparison elimination.
3110 N->dropFlags(SDNodeFlags::NoWrap);
3111 C.Op0 = SDValue(N, 0);
3112 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3113 return;
3114 }
3115 }
3116 }
3117}
3118
3119// Check whether C compares a floating-point value with zero and if that
3120// floating-point value is also negated. In this case we can use the
3121// negation to set CC, so avoiding separate LOAD AND TEST and
3122// LOAD (NEGATIVE/COMPLEMENT) instructions.
3123static void adjustForFNeg(Comparison &C) {
3124 // This optimization is invalid for strict comparisons, since FNEG
3125 // does not raise any exceptions.
3126 if (C.Chain)
3127 return;
3128 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3129 if (C1 && C1->isZero()) {
3130 for (SDNode *N : C.Op0->users()) {
3131 if (N->getOpcode() == ISD::FNEG) {
3132 C.Op0 = SDValue(N, 0);
3133 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3134 return;
3135 }
3136 }
3137 }
3138}
3139
3140// Check whether C compares (shl X, 32) with 0 and whether X is
3141// also sign-extended. In that case it is better to test the result
3142// of the sign extension using LTGFR.
3143//
3144// This case is important because InstCombine transforms a comparison
3145// with (sext (trunc X)) into a comparison with (shl X, 32).
3146static void adjustForLTGFR(Comparison &C) {
3147 // Check for a comparison between (shl X, 32) and 0.
3148 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3149 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3150 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3151 if (C1 && C1->getZExtValue() == 32) {
3152 SDValue ShlOp0 = C.Op0.getOperand(0);
3153 // See whether X has any SIGN_EXTEND_INREG uses.
3154 for (SDNode *N : ShlOp0->users()) {
3155 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3156 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3157 C.Op0 = SDValue(N, 0);
3158 return;
3159 }
3160 }
3161 }
3162 }
3163}
3164
3165// If C compares the truncation of an extending load, try to compare
3166// the untruncated value instead. This exposes more opportunities to
3167// reuse CC.
3168static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3169 Comparison &C) {
3170 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3171 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3172 C.Op1.getOpcode() == ISD::Constant &&
3173 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3174 C.Op1->getAsZExtVal() == 0) {
3175 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3176 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3177 C.Op0.getValueSizeInBits().getFixedValue()) {
3178 unsigned Type = L->getExtensionType();
3179 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3180 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3181 C.Op0 = C.Op0.getOperand(0);
3182 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3183 }
3184 }
3185 }
3186}
3187
3188// Adjust if a given Compare is a check of the stack guard against a stack
3189// guard instance on the stack. Specifically, this checks if:
3190// - The operands are a load of the stack guard, and a load from a stack slot
3191// - The original opcode is ICMP
3192// - ICMPType is compatible with unsigned comparison.
3194 Comparison &C) {
3195
3196 // Opcode must be ICMP.
3197 if (C.Opcode != SystemZISD::ICMP)
3198 return;
3199 // ICmpType must be Unsigned or Any.
3200 if (C.ICmpType == SystemZICMP::SignedOnly)
3201 return;
3202 // Op0 must be FrameIndex Load.
3203 if (!(ISD::isNormalLoad(C.Op0.getNode()) &&
3204 dyn_cast<FrameIndexSDNode>(C.Op0.getOperand(1))))
3205 return;
3206 // Op1 must be LOAD_STACK_GUARD.
3207 if (!C.Op1.isMachineOpcode() ||
3208 C.Op1.getMachineOpcode() != SystemZ::LOAD_STACK_GUARD)
3209 return;
3210
3211 // At this point we are sure that this is a proper CMP_STACKGUARD
3212 // case, update the opcode to reflect this.
3213 C.Opcode = SystemZISD::CMP_STACKGUARD;
3214 C.Op1 = SDValue();
3215}
3216
3217// Return true if shift operation N has an in-range constant shift value.
3218// Store it in ShiftVal if so.
3219static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3220 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3221 if (!Shift)
3222 return false;
3223
3224 uint64_t Amount = Shift->getZExtValue();
3225 if (Amount >= N.getValueSizeInBits())
3226 return false;
3227
3228 ShiftVal = Amount;
3229 return true;
3230}
3231
3232// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3233// instruction and whether the CC value is descriptive enough to handle
3234// a comparison of type Opcode between the AND result and CmpVal.
3235// CCMask says which comparison result is being tested and BitSize is
3236// the number of bits in the operands. If TEST UNDER MASK can be used,
3237// return the corresponding CC mask, otherwise return 0.
3238static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3239 uint64_t Mask, uint64_t CmpVal,
3240 unsigned ICmpType) {
3241 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3242
3243 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3244 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3245 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3246 return 0;
3247
3248 // Work out the masks for the lowest and highest bits.
3250 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3251
3252 // Signed ordered comparisons are effectively unsigned if the sign
3253 // bit is dropped.
3254 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3255
3256 // Check for equality comparisons with 0, or the equivalent.
3257 if (CmpVal == 0) {
3258 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3260 if (CCMask == SystemZ::CCMASK_CMP_NE)
3262 }
3263 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3264 if (CCMask == SystemZ::CCMASK_CMP_LT)
3266 if (CCMask == SystemZ::CCMASK_CMP_GE)
3268 }
3269 if (EffectivelyUnsigned && CmpVal < Low) {
3270 if (CCMask == SystemZ::CCMASK_CMP_LE)
3272 if (CCMask == SystemZ::CCMASK_CMP_GT)
3274 }
3275
3276 // Check for equality comparisons with the mask, or the equivalent.
3277 if (CmpVal == Mask) {
3278 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3280 if (CCMask == SystemZ::CCMASK_CMP_NE)
3282 }
3283 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3284 if (CCMask == SystemZ::CCMASK_CMP_GT)
3286 if (CCMask == SystemZ::CCMASK_CMP_LE)
3288 }
3289 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3290 if (CCMask == SystemZ::CCMASK_CMP_GE)
3292 if (CCMask == SystemZ::CCMASK_CMP_LT)
3294 }
3295
3296 // Check for ordered comparisons with the top bit.
3297 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3298 if (CCMask == SystemZ::CCMASK_CMP_LE)
3300 if (CCMask == SystemZ::CCMASK_CMP_GT)
3302 }
3303 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3304 if (CCMask == SystemZ::CCMASK_CMP_LT)
3306 if (CCMask == SystemZ::CCMASK_CMP_GE)
3308 }
3309
3310 // If there are just two bits, we can do equality checks for Low and High
3311 // as well.
3312 if (Mask == Low + High) {
3313 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3315 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3317 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3319 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3321 }
3322
3323 // Looks like we've exhausted our options.
3324 return 0;
3325}
3326
3327// See whether C can be implemented as a TEST UNDER MASK instruction.
3328// Update the arguments with the TM version if so.
3330 Comparison &C) {
3331 // Use VECTOR TEST UNDER MASK for i128 operations.
3332 if (C.Op0.getValueType() == MVT::i128) {
3333 // We can use VTM for EQ/NE comparisons of x & y against 0.
3334 if (C.Op0.getOpcode() == ISD::AND &&
3335 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3336 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3337 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3338 if (Mask && Mask->getAPIntValue() == 0) {
3339 C.Opcode = SystemZISD::VTM;
3340 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3341 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3342 C.CCValid = SystemZ::CCMASK_VCMP;
3343 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3344 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3345 else
3346 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3347 }
3348 }
3349 return;
3350 }
3351
3352 // Check that we have a comparison with a constant.
3353 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3354 if (!ConstOp1)
3355 return;
3356 uint64_t CmpVal = ConstOp1->getZExtValue();
3357
3358 // Check whether the nonconstant input is an AND with a constant mask.
3359 Comparison NewC(C);
3360 uint64_t MaskVal;
3361 ConstantSDNode *Mask = nullptr;
3362 if (C.Op0.getOpcode() == ISD::AND) {
3363 NewC.Op0 = C.Op0.getOperand(0);
3364 NewC.Op1 = C.Op0.getOperand(1);
3365 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3366 if (!Mask)
3367 return;
3368 MaskVal = Mask->getZExtValue();
3369 } else {
3370 // There is no instruction to compare with a 64-bit immediate
3371 // so use TMHH instead if possible. We need an unsigned ordered
3372 // comparison with an i64 immediate.
3373 if (NewC.Op0.getValueType() != MVT::i64 ||
3374 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3375 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3376 NewC.ICmpType == SystemZICMP::SignedOnly)
3377 return;
3378 // Convert LE and GT comparisons into LT and GE.
3379 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3380 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3381 if (CmpVal == uint64_t(-1))
3382 return;
3383 CmpVal += 1;
3384 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3385 }
3386 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3387 // be masked off without changing the result.
3388 MaskVal = -(CmpVal & -CmpVal);
3389 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3390 }
3391 if (!MaskVal)
3392 return;
3393
3394 // Check whether the combination of mask, comparison value and comparison
3395 // type are suitable.
3396 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3397 unsigned NewCCMask, ShiftVal;
3398 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3399 NewC.Op0.getOpcode() == ISD::SHL &&
3400 isSimpleShift(NewC.Op0, ShiftVal) &&
3401 (MaskVal >> ShiftVal != 0) &&
3402 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3403 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3404 MaskVal >> ShiftVal,
3405 CmpVal >> ShiftVal,
3406 SystemZICMP::Any))) {
3407 NewC.Op0 = NewC.Op0.getOperand(0);
3408 MaskVal >>= ShiftVal;
3409 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3410 NewC.Op0.getOpcode() == ISD::SRL &&
3411 isSimpleShift(NewC.Op0, ShiftVal) &&
3412 (MaskVal << ShiftVal != 0) &&
3413 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3414 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3415 MaskVal << ShiftVal,
3416 CmpVal << ShiftVal,
3418 NewC.Op0 = NewC.Op0.getOperand(0);
3419 MaskVal <<= ShiftVal;
3420 } else {
3421 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3422 NewC.ICmpType);
3423 if (!NewCCMask)
3424 return;
3425 }
3426
3427 // Go ahead and make the change.
3428 C.Opcode = SystemZISD::TM;
3429 C.Op0 = NewC.Op0;
3430 if (Mask && Mask->getZExtValue() == MaskVal)
3431 C.Op1 = SDValue(Mask, 0);
3432 else
3433 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3434 C.CCValid = SystemZ::CCMASK_TM;
3435 C.CCMask = NewCCMask;
3436}
3437
3438// Implement i128 comparison in vector registers.
3439static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3440 Comparison &C) {
3441 if (C.Opcode != SystemZISD::ICMP)
3442 return;
3443 if (C.Op0.getValueType() != MVT::i128)
3444 return;
3445
3446 // Recognize vector comparison reductions.
3447 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3448 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3449 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3450 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3451 bool CmpNull = isNullConstant(C.Op1);
3452 SDValue Src = peekThroughBitcasts(C.Op0);
3453 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3454 Src = Src.getOperand(0);
3455 CmpNull = !CmpNull;
3456 }
3457 unsigned Opcode = 0;
3458 if (Src.hasOneUse()) {
3459 switch (Src.getOpcode()) {
3460 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3461 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3462 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3463 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3464 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3465 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3466 default: break;
3467 }
3468 }
3469 if (Opcode) {
3470 C.Opcode = Opcode;
3471 C.Op0 = Src->getOperand(0);
3472 C.Op1 = Src->getOperand(1);
3473 C.CCValid = SystemZ::CCMASK_VCMP;
3475 if (!CmpEq)
3476 C.CCMask ^= C.CCValid;
3477 return;
3478 }
3479 }
3480
3481 // Everything below here is not useful if we have native i128 compares.
3482 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3483 return;
3484
3485 // (In-)Equality comparisons can be implemented via VCEQGS.
3486 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3487 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3488 C.Opcode = SystemZISD::VICMPES;
3489 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3490 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3491 C.CCValid = SystemZ::CCMASK_VCMP;
3492 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3493 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3494 else
3495 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3496 return;
3497 }
3498
3499 // Normalize other comparisons to GT.
3500 bool Swap = false, Invert = false;
3501 switch (C.CCMask) {
3502 case SystemZ::CCMASK_CMP_GT: break;
3503 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3504 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3505 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3506 default: llvm_unreachable("Invalid integer condition!");
3507 }
3508 if (Swap)
3509 std::swap(C.Op0, C.Op1);
3510
3511 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3512 C.Opcode = SystemZISD::UCMP128HI;
3513 else
3514 C.Opcode = SystemZISD::SCMP128HI;
3515 C.CCValid = SystemZ::CCMASK_ANY;
3516 C.CCMask = SystemZ::CCMASK_1;
3517
3518 if (Invert)
3519 C.CCMask ^= C.CCValid;
3520}
3521
3522// See whether the comparison argument contains a redundant AND
3523// and remove it if so. This sometimes happens due to the generic
3524// BRCOND expansion.
3526 Comparison &C) {
3527 if (C.Op0.getOpcode() != ISD::AND)
3528 return;
3529 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3530 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3531 return;
3532 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3533 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3534 return;
3535
3536 C.Op0 = C.Op0.getOperand(0);
3537}
3538
3539// Return a Comparison that tests the condition-code result of intrinsic
3540// node Call against constant integer CC using comparison code Cond.
3541// Opcode is the opcode of the SystemZISD operation for the intrinsic
3542// and CCValid is the set of possible condition-code results.
3543static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3544 SDValue Call, unsigned CCValid, uint64_t CC,
3546 Comparison C(Call, SDValue(), SDValue());
3547 C.Opcode = Opcode;
3548 C.CCValid = CCValid;
3549 if (Cond == ISD::SETEQ)
3550 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3551 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3552 else if (Cond == ISD::SETNE)
3553 // ...and the inverse of that.
3554 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3555 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3556 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3557 // always true for CC>3.
3558 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3559 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3560 // ...and the inverse of that.
3561 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3562 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3563 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3564 // always true for CC>3.
3565 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3566 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3567 // ...and the inverse of that.
3568 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3569 else
3570 llvm_unreachable("Unexpected integer comparison type");
3571 C.CCMask &= CCValid;
3572 return C;
3573}
3574
3575// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3576static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3577 ISD::CondCode Cond, const SDLoc &DL,
3578 SDValue Chain = SDValue(),
3579 bool IsSignaling = false) {
3580 if (CmpOp1.getOpcode() == ISD::Constant) {
3581 assert(!Chain);
3582 unsigned Opcode, CCValid;
3583 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3584 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3585 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3586 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3587 CmpOp1->getAsZExtVal(), Cond);
3588 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3589 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3590 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3591 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3592 CmpOp1->getAsZExtVal(), Cond);
3593 }
3594 Comparison C(CmpOp0, CmpOp1, Chain);
3595 C.CCMask = CCMaskForCondCode(Cond);
3596 if (C.Op0.getValueType().isFloatingPoint()) {
3597 C.CCValid = SystemZ::CCMASK_FCMP;
3598 if (!C.Chain)
3599 C.Opcode = SystemZISD::FCMP;
3600 else if (!IsSignaling)
3601 C.Opcode = SystemZISD::STRICT_FCMP;
3602 else
3603 C.Opcode = SystemZISD::STRICT_FCMPS;
3605 } else {
3606 assert(!C.Chain);
3607 C.CCValid = SystemZ::CCMASK_ICMP;
3608 C.Opcode = SystemZISD::ICMP;
3609 // Choose the type of comparison. Equality and inequality tests can
3610 // use either signed or unsigned comparisons. The choice also doesn't
3611 // matter if both sign bits are known to be clear. In those cases we
3612 // want to give the main isel code the freedom to choose whichever
3613 // form fits best.
3614 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3615 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3616 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3617 C.ICmpType = SystemZICMP::Any;
3618 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3619 C.ICmpType = SystemZICMP::UnsignedOnly;
3620 else
3621 C.ICmpType = SystemZICMP::SignedOnly;
3622 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3623 adjustForRedundantAnd(DAG, DL, C);
3624 adjustZeroCmp(DAG, DL, C);
3625 adjustSubwordCmp(DAG, DL, C);
3626 adjustForSubtraction(DAG, DL, C);
3628 adjustICmpTruncate(DAG, DL, C);
3629 }
3630
3631 if (shouldSwapCmpOperands(C)) {
3632 std::swap(C.Op0, C.Op1);
3633 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3634 }
3635
3637 adjustICmp128(DAG, DL, C);
3639 return C;
3640}
3641
3642// Emit the comparison instruction described by C.
3643static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3644 if (!C.Op1.getNode()) {
3645 if (C.Opcode == SystemZISD::CMP_STACKGUARD)
3646 return DAG.getNode(SystemZISD::CMP_STACKGUARD, DL, MVT::i32, C.Op0);
3647 SDNode *Node;
3648 switch (C.Op0.getOpcode()) {
3650 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3651 return SDValue(Node, 0);
3653 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3654 return SDValue(Node, Node->getNumValues() - 1);
3655 default:
3656 llvm_unreachable("Invalid comparison operands");
3657 }
3658 }
3659 if (C.Opcode == SystemZISD::ICMP)
3660 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3661 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3662 if (C.Opcode == SystemZISD::TM) {
3663 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3665 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3666 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3667 }
3668 if (C.Opcode == SystemZISD::VICMPES ||
3669 C.Opcode == SystemZISD::VICMPHS ||
3670 C.Opcode == SystemZISD::VICMPHLS ||
3671 C.Opcode == SystemZISD::VFCMPES ||
3672 C.Opcode == SystemZISD::VFCMPHS ||
3673 C.Opcode == SystemZISD::VFCMPHES) {
3674 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3675 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3676 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3677 return SDValue(Val.getNode(), 1);
3678 }
3679 if (C.Chain) {
3680 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3681 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3682 }
3683 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3684}
3685
3686// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3687// 64 bits. Extend is the extension type to use. Store the high part
3688// in Hi and the low part in Lo.
3689static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3690 SDValue Op0, SDValue Op1, SDValue &Hi,
3691 SDValue &Lo) {
3692 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3693 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3694 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3695 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3696 DAG.getConstant(32, DL, MVT::i64));
3697 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3698 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3699}
3700
3701// Lower a binary operation that produces two VT results, one in each
3702// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3703// and Opcode performs the GR128 operation. Store the even register result
3704// in Even and the odd register result in Odd.
3705static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3706 unsigned Opcode, SDValue Op0, SDValue Op1,
3707 SDValue &Even, SDValue &Odd) {
3708 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3709 bool Is32Bit = is32Bit(VT);
3710 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3711 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3712}
3713
3714// Return an i32 value that is 1 if the CC value produced by CCReg is
3715// in the mask CCMask and 0 otherwise. CC is known to have a value
3716// in CCValid, so other values can be ignored.
3717static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3718 unsigned CCValid, unsigned CCMask) {
3719 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3720 DAG.getConstant(0, DL, MVT::i32),
3721 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3722 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3723 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3724}
3725
3726// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3727// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3728// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3729// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3730// floating-point comparisons.
3733 switch (CC) {
3734 case ISD::SETOEQ:
3735 case ISD::SETEQ:
3736 switch (Mode) {
3737 case CmpMode::Int: return SystemZISD::VICMPE;
3738 case CmpMode::FP: return SystemZISD::VFCMPE;
3739 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3740 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3741 }
3742 llvm_unreachable("Bad mode");
3743
3744 case ISD::SETOGE:
3745 case ISD::SETGE:
3746 switch (Mode) {
3747 case CmpMode::Int: return 0;
3748 case CmpMode::FP: return SystemZISD::VFCMPHE;
3749 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3750 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3751 }
3752 llvm_unreachable("Bad mode");
3753
3754 case ISD::SETOGT:
3755 case ISD::SETGT:
3756 switch (Mode) {
3757 case CmpMode::Int: return SystemZISD::VICMPH;
3758 case CmpMode::FP: return SystemZISD::VFCMPH;
3759 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3760 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3761 }
3762 llvm_unreachable("Bad mode");
3763
3764 case ISD::SETUGT:
3765 switch (Mode) {
3766 case CmpMode::Int: return SystemZISD::VICMPHL;
3767 case CmpMode::FP: return 0;
3768 case CmpMode::StrictFP: return 0;
3769 case CmpMode::SignalingFP: return 0;
3770 }
3771 llvm_unreachable("Bad mode");
3772
3773 default:
3774 return 0;
3775 }
3776}
3777
3778// Return the SystemZISD vector comparison operation for CC or its inverse,
3779// or 0 if neither can be done directly. Indicate in Invert whether the
3780// result is for the inverse of CC. Mode is as above.
3782 bool &Invert) {
3783 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3784 Invert = false;
3785 return Opcode;
3786 }
3787
3788 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3789 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3790 Invert = true;
3791 return Opcode;
3792 }
3793
3794 return 0;
3795}
3796
3797// Return a v2f64 that contains the extended form of elements Start and Start+1
3798// of v4f32 value Op. If Chain is nonnull, return the strict form.
3799static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3800 SDValue Op, SDValue Chain) {
3801 int Mask[] = { Start, -1, Start + 1, -1 };
3802 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3803 if (Chain) {
3804 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3805 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3806 }
3807 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3808}
3809
3810// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3811// producing a result of type VT. If Chain is nonnull, return the strict form.
3812SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3813 const SDLoc &DL, EVT VT,
3814 SDValue CmpOp0,
3815 SDValue CmpOp1,
3816 SDValue Chain) const {
3817 // There is no hardware support for v4f32 (unless we have the vector
3818 // enhancements facility 1), so extend the vector into two v2f64s
3819 // and compare those.
3820 if (CmpOp0.getValueType() == MVT::v4f32 &&
3821 !Subtarget.hasVectorEnhancements1()) {
3822 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3823 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3824 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3825 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3826 if (Chain) {
3827 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3828 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3829 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3830 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3831 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3832 H1.getValue(1), L1.getValue(1),
3833 HRes.getValue(1), LRes.getValue(1) };
3834 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3835 SDValue Ops[2] = { Res, NewChain };
3836 return DAG.getMergeValues(Ops, DL);
3837 }
3838 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3839 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3840 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3841 }
3842 if (Chain) {
3843 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3844 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3845 }
3846 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3847}
3848
3849// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3850// an integer mask of type VT. If Chain is nonnull, we have a strict
3851// floating-point comparison. If in addition IsSignaling is true, we have
3852// a strict signaling floating-point comparison.
3853SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3854 const SDLoc &DL, EVT VT,
3855 ISD::CondCode CC,
3856 SDValue CmpOp0,
3857 SDValue CmpOp1,
3858 SDValue Chain,
3859 bool IsSignaling) const {
3860 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3861 assert (!Chain || IsFP);
3862 assert (!IsSignaling || Chain);
3863 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3864 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3865 bool Invert = false;
3866 SDValue Cmp;
3867 switch (CC) {
3868 // Handle tests for order using (or (ogt y x) (oge x y)).
3869 case ISD::SETUO:
3870 Invert = true;
3871 [[fallthrough]];
3872 case ISD::SETO: {
3873 assert(IsFP && "Unexpected integer comparison");
3874 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3875 DL, VT, CmpOp1, CmpOp0, Chain);
3876 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3877 DL, VT, CmpOp0, CmpOp1, Chain);
3878 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3879 if (Chain)
3880 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3881 LT.getValue(1), GE.getValue(1));
3882 break;
3883 }
3884
3885 // Handle <> tests using (or (ogt y x) (ogt x y)).
3886 case ISD::SETUEQ:
3887 Invert = true;
3888 [[fallthrough]];
3889 case ISD::SETONE: {
3890 assert(IsFP && "Unexpected integer comparison");
3891 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3892 DL, VT, CmpOp1, CmpOp0, Chain);
3893 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3894 DL, VT, CmpOp0, CmpOp1, Chain);
3895 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3896 if (Chain)
3897 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3898 LT.getValue(1), GT.getValue(1));
3899 break;
3900 }
3901
3902 // Otherwise a single comparison is enough. It doesn't really
3903 // matter whether we try the inversion or the swap first, since
3904 // there are no cases where both work.
3905 default:
3906 // Optimize sign-bit comparisons to signed compares.
3907 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3909 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3910 APInt Mask;
3911 if (CmpOp0.getOpcode() == ISD::AND
3912 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3913 && Mask == APInt::getSignMask(EltSize)) {
3914 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3915 CmpOp0 = CmpOp0.getOperand(0);
3916 }
3917 }
3918 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3919 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3920 else {
3922 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3923 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3924 else
3925 llvm_unreachable("Unhandled comparison");
3926 }
3927 if (Chain)
3928 Chain = Cmp.getValue(1);
3929 break;
3930 }
3931 if (Invert) {
3932 SDValue Mask =
3933 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3934 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3935 }
3936 if (Chain && Chain.getNode() != Cmp.getNode()) {
3937 SDValue Ops[2] = { Cmp, Chain };
3938 Cmp = DAG.getMergeValues(Ops, DL);
3939 }
3940 return Cmp;
3941}
3942
3943SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3944 SelectionDAG &DAG) const {
3945 SDValue CmpOp0 = Op.getOperand(0);
3946 SDValue CmpOp1 = Op.getOperand(1);
3947 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3948 SDLoc DL(Op);
3949 EVT VT = Op.getValueType();
3950 if (VT.isVector())
3951 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3952
3953 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3954 SDValue CCReg = emitCmp(DAG, DL, C);
3955 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3956}
3957
3958SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3959 SelectionDAG &DAG,
3960 bool IsSignaling) const {
3961 SDValue Chain = Op.getOperand(0);
3962 SDValue CmpOp0 = Op.getOperand(1);
3963 SDValue CmpOp1 = Op.getOperand(2);
3964 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3965 SDLoc DL(Op);
3966 EVT VT = Op.getNode()->getValueType(0);
3967 if (VT.isVector()) {
3968 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3969 Chain, IsSignaling);
3970 return Res.getValue(Op.getResNo());
3971 }
3972
3973 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3974 SDValue CCReg = emitCmp(DAG, DL, C);
3975 CCReg->setFlags(Op->getFlags());
3976 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3977 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3978 return DAG.getMergeValues(Ops, DL);
3979}
3980
3981SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3982 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3983 SDValue CmpOp0 = Op.getOperand(2);
3984 SDValue CmpOp1 = Op.getOperand(3);
3985 SDValue Dest = Op.getOperand(4);
3986 SDLoc DL(Op);
3987
3988 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3989 SDValue CCReg = emitCmp(DAG, DL, C);
3990 return DAG.getNode(
3991 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3992 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3993 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3994}
3995
3996// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3997// allowing Pos and Neg to be wider than CmpOp.
3998static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3999 return (Neg.getOpcode() == ISD::SUB &&
4000 Neg.getOperand(0).getOpcode() == ISD::Constant &&
4001 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
4002 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
4003 Pos.getOperand(0) == CmpOp)));
4004}
4005
4006// Return the absolute or negative absolute of Op; IsNegative decides which.
4008 bool IsNegative) {
4009 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
4010 if (IsNegative)
4011 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
4012 DAG.getConstant(0, DL, Op.getValueType()), Op);
4013 return Op;
4014}
4015
4017 Comparison C, SDValue TrueOp, SDValue FalseOp) {
4018 EVT VT = MVT::i128;
4019 unsigned Op;
4020
4021 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
4022 C.CCMask == SystemZ::CCMASK_CMP_GE ||
4023 C.CCMask == SystemZ::CCMASK_CMP_LE) {
4024 std::swap(TrueOp, FalseOp);
4025 C.CCMask ^= C.CCValid;
4026 }
4027 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
4028 std::swap(C.Op0, C.Op1);
4029 C.CCMask = SystemZ::CCMASK_CMP_GT;
4030 }
4031 switch (C.CCMask) {
4033 Op = SystemZISD::VICMPE;
4034 break;
4036 if (C.ICmpType == SystemZICMP::UnsignedOnly)
4037 Op = SystemZISD::VICMPHL;
4038 else
4039 Op = SystemZISD::VICMPH;
4040 break;
4041 default:
4042 llvm_unreachable("Unhandled comparison");
4043 break;
4044 }
4045
4046 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
4047 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
4048 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
4049 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
4050}
4051
4052SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
4053 SelectionDAG &DAG) const {
4054 SDValue CmpOp0 = Op.getOperand(0);
4055 SDValue CmpOp1 = Op.getOperand(1);
4056 SDValue TrueOp = Op.getOperand(2);
4057 SDValue FalseOp = Op.getOperand(3);
4058 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4059 SDLoc DL(Op);
4060
4061 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
4062 // legalizer, as it will be handled according to the type of the resulting
4063 // value. Extend them here if needed.
4064 if (CmpOp0.getSimpleValueType() == MVT::f16) {
4065 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
4066 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
4067 }
4068
4069 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
4070
4071 // Check for absolute and negative-absolute selections, including those
4072 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4073 // This check supplements the one in DAGCombiner.
4074 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4075 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4076 C.Op1.getOpcode() == ISD::Constant &&
4077 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4078 C.Op1->getAsZExtVal() == 0) {
4079 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4080 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4081 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4082 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4083 }
4084
4085 if (Subtarget.hasVectorEnhancements3() &&
4086 C.Opcode == SystemZISD::ICMP &&
4087 C.Op0.getValueType() == MVT::i128 &&
4088 TrueOp.getValueType() == MVT::i128) {
4089 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4090 }
4091
4092 SDValue CCReg = emitCmp(DAG, DL, C);
4093 SDValue Ops[] = {TrueOp, FalseOp,
4094 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4095 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4096
4097 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4098}
4099
4100SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4101 SelectionDAG &DAG) const {
4102 SDLoc DL(Node);
4103 const GlobalValue *GV = Node->getGlobal();
4104 int64_t Offset = Node->getOffset();
4105 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4107
4109 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4110 if (isInt<32>(Offset)) {
4111 // Assign anchors at 1<<12 byte boundaries.
4112 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4113 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4114 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4115
4116 // The offset can be folded into the address if it is aligned to a
4117 // halfword.
4118 Offset -= Anchor;
4119 if (Offset != 0 && (Offset & 1) == 0) {
4120 SDValue Full =
4121 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4122 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4123 Offset = 0;
4124 }
4125 } else {
4126 // Conservatively load a constant offset greater than 32 bits into a
4127 // register below.
4128 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4129 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4130 }
4131 } else if (Subtarget.isTargetELF()) {
4132 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4133 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4134 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4136 } else if (Subtarget.isTargetzOS()) {
4137 Result = getADAEntry(DAG, GV, DL, PtrVT);
4138 } else
4139 llvm_unreachable("Unexpected Subtarget");
4140
4141 // If there was a non-zero offset that we didn't fold, create an explicit
4142 // addition for it.
4143 if (Offset != 0)
4144 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4145 DAG.getSignedConstant(Offset, DL, PtrVT));
4146
4147 return Result;
4148}
4149
4150SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4151 SelectionDAG &DAG,
4152 unsigned Opcode,
4153 SDValue GOTOffset) const {
4154 SDLoc DL(Node);
4155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4156 SDValue Chain = DAG.getEntryNode();
4157 SDValue Glue;
4158
4161 report_fatal_error("In GHC calling convention TLS is not supported");
4162
4163 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4164 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4165 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4166 Glue = Chain.getValue(1);
4167 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4168 Glue = Chain.getValue(1);
4169
4170 // The first call operand is the chain and the second is the TLS symbol.
4172 Ops.push_back(Chain);
4173 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4174 Node->getValueType(0),
4175 0, 0));
4176
4177 // Add argument registers to the end of the list so that they are
4178 // known live into the call.
4179 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4180 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4181
4182 // Add a register mask operand representing the call-preserved registers.
4183 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4184 const uint32_t *Mask =
4185 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4186 assert(Mask && "Missing call preserved mask for calling convention");
4187 Ops.push_back(DAG.getRegisterMask(Mask));
4188
4189 // Glue the call to the argument copies.
4190 Ops.push_back(Glue);
4191
4192 // Emit the call.
4193 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4194 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4195 Glue = Chain.getValue(1);
4196
4197 // Copy the return value from %r2.
4198 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4199}
4200
4201SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4202 SelectionDAG &DAG) const {
4203 SDValue Chain = DAG.getEntryNode();
4204 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4205
4206 // The high part of the thread pointer is in access register 0.
4207 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4208 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4209
4210 // The low part of the thread pointer is in access register 1.
4211 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4212 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4213
4214 // Merge them into a single 64-bit address.
4215 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4216 DAG.getConstant(32, DL, PtrVT));
4217 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4218}
4219
4220SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4221 SelectionDAG &DAG) const {
4222 if (DAG.getTarget().useEmulatedTLS())
4223 return LowerToTLSEmulatedModel(Node, DAG);
4224 SDLoc DL(Node);
4225 const GlobalValue *GV = Node->getGlobal();
4226 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4227 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4228
4231 report_fatal_error("In GHC calling convention TLS is not supported");
4232
4233 SDValue TP = lowerThreadPointer(DL, DAG);
4234
4235 // Get the offset of GA from the thread pointer, based on the TLS model.
4237 switch (model) {
4239 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4240 SystemZConstantPoolValue *CPV =
4242
4243 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4244 Offset = DAG.getLoad(
4245 PtrVT, DL, DAG.getEntryNode(), Offset,
4247
4248 // Call __tls_get_offset to retrieve the offset.
4249 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4250 break;
4251 }
4252
4254 // Load the GOT offset of the module ID.
4255 SystemZConstantPoolValue *CPV =
4257
4258 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4259 Offset = DAG.getLoad(
4260 PtrVT, DL, DAG.getEntryNode(), Offset,
4262
4263 // Call __tls_get_offset to retrieve the module base offset.
4264 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4265
4266 // Note: The SystemZLDCleanupPass will remove redundant computations
4267 // of the module base offset. Count total number of local-dynamic
4268 // accesses to trigger execution of that pass.
4269 SystemZMachineFunctionInfo* MFI =
4270 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4272
4273 // Add the per-symbol offset.
4275
4276 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4277 DTPOffset = DAG.getLoad(
4278 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4280
4281 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4282 break;
4283 }
4284
4285 case TLSModel::InitialExec: {
4286 // Load the offset from the GOT.
4287 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4289 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4290 Offset =
4291 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4293 break;
4294 }
4295
4296 case TLSModel::LocalExec: {
4297 // Force the offset into the constant pool and load it from there.
4298 SystemZConstantPoolValue *CPV =
4300
4301 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4302 Offset = DAG.getLoad(
4303 PtrVT, DL, DAG.getEntryNode(), Offset,
4305 break;
4306 }
4307 }
4308
4309 // Add the base and offset together.
4310 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4311}
4312
4313SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4314 SelectionDAG &DAG) const {
4315 SDLoc DL(Node);
4316 const BlockAddress *BA = Node->getBlockAddress();
4317 int64_t Offset = Node->getOffset();
4318 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4319
4320 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4321 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4322 return Result;
4323}
4324
4325SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4326 SelectionDAG &DAG) const {
4327 SDLoc DL(JT);
4328 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4329 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4330
4331 // Use LARL to load the address of the table.
4332 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4333}
4334
4335SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4336 SelectionDAG &DAG) const {
4337 SDLoc DL(CP);
4338 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4339
4342 Result =
4343 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4344 else
4345 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4346 CP->getOffset());
4347
4348 // Use LARL to load the address of the constant pool entry.
4349 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4350}
4351
4352SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4353 SelectionDAG &DAG) const {
4354 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4355 MachineFunction &MF = DAG.getMachineFunction();
4356 MachineFrameInfo &MFI = MF.getFrameInfo();
4357 MFI.setFrameAddressIsTaken(true);
4358
4359 SDLoc DL(Op);
4360 unsigned Depth = Op.getConstantOperandVal(0);
4361 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4362
4363 // By definition, the frame address is the address of the back chain. (In
4364 // the case of packed stack without backchain, return the address where the
4365 // backchain would have been stored. This will either be an unused space or
4366 // contain a saved register).
4367 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4368 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4369
4370 if (Depth > 0) {
4371 // FIXME The frontend should detect this case.
4372 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4373 report_fatal_error("Unsupported stack frame traversal count");
4374
4375 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4376 while (Depth--) {
4377 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4378 MachinePointerInfo());
4379 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4380 }
4381 }
4382
4383 return BackChain;
4384}
4385
4386SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4387 SelectionDAG &DAG) const {
4388 MachineFunction &MF = DAG.getMachineFunction();
4389 MachineFrameInfo &MFI = MF.getFrameInfo();
4390 MFI.setReturnAddressIsTaken(true);
4391
4392 SDLoc DL(Op);
4393 unsigned Depth = Op.getConstantOperandVal(0);
4394 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4395
4396 if (Depth > 0) {
4397 // FIXME The frontend should detect this case.
4398 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4399 report_fatal_error("Unsupported stack frame traversal count");
4400
4401 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4402 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4403 int Offset = TFL->getReturnAddressOffset(MF);
4404 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4405 DAG.getSignedConstant(Offset, DL, PtrVT));
4406 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4407 MachinePointerInfo());
4408 }
4409
4410 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4411 // implicit live-in.
4412 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4414 &SystemZ::GR64BitRegClass);
4415 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4416}
4417
4418SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4419 SelectionDAG &DAG) const {
4420 SDLoc DL(Op);
4421 SDValue In = Op.getOperand(0);
4422 EVT InVT = In.getValueType();
4423 EVT ResVT = Op.getValueType();
4424
4425 // Convert loads directly. This is normally done by DAGCombiner,
4426 // but we need this case for bitcasts that are created during lowering
4427 // and which are then lowered themselves.
4428 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4429 if (ISD::isNormalLoad(LoadN)) {
4430 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4431 LoadN->getBasePtr(), LoadN->getMemOperand());
4432 // Update the chain uses.
4433 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4434 return NewLoad;
4435 }
4436
4437 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4438 SDValue In64;
4439 if (Subtarget.hasHighWord()) {
4440 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4441 MVT::i64);
4442 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4443 MVT::i64, SDValue(U64, 0), In);
4444 } else {
4445 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4446 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4447 DAG.getConstant(32, DL, MVT::i64));
4448 }
4449 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4450 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4451 DL, MVT::f32, Out64);
4452 }
4453 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4454 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4455 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4456 MVT::f64, SDValue(U64, 0), In);
4457 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4458 if (Subtarget.hasHighWord())
4459 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4460 MVT::i32, Out64);
4461 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4462 DAG.getConstant(32, DL, MVT::i64));
4463 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4464 }
4465 llvm_unreachable("Unexpected bitcast combination");
4466}
4467
4468SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4469 SelectionDAG &DAG) const {
4470
4471 if (Subtarget.isTargetXPLINK64())
4472 return lowerVASTART_XPLINK(Op, DAG);
4473 else
4474 return lowerVASTART_ELF(Op, DAG);
4475}
4476
4477SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4478 SelectionDAG &DAG) const {
4479 MachineFunction &MF = DAG.getMachineFunction();
4480 SystemZMachineFunctionInfo *FuncInfo =
4481 MF.getInfo<SystemZMachineFunctionInfo>();
4482
4483 SDLoc DL(Op);
4484
4485 // vastart just stores the address of the VarArgsFrameIndex slot into the
4486 // memory location argument.
4487 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4488 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4489 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4490 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4491 MachinePointerInfo(SV));
4492}
4493
4494SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4495 SelectionDAG &DAG) const {
4496 MachineFunction &MF = DAG.getMachineFunction();
4497 SystemZMachineFunctionInfo *FuncInfo =
4498 MF.getInfo<SystemZMachineFunctionInfo>();
4499 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4500
4501 SDValue Chain = Op.getOperand(0);
4502 SDValue Addr = Op.getOperand(1);
4503 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4504 SDLoc DL(Op);
4505
4506 // The initial values of each field.
4507 const unsigned NumFields = 4;
4508 SDValue Fields[NumFields] = {
4509 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4510 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4511 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4512 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4513 };
4514
4515 // Store each field into its respective slot.
4516 SDValue MemOps[NumFields];
4517 unsigned Offset = 0;
4518 for (unsigned I = 0; I < NumFields; ++I) {
4519 SDValue FieldAddr = Addr;
4520 if (Offset != 0)
4521 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4523 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4524 MachinePointerInfo(SV, Offset));
4525 Offset += 8;
4526 }
4527 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4528}
4529
4530SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4531 SelectionDAG &DAG) const {
4532 SDValue Chain = Op.getOperand(0);
4533 SDValue DstPtr = Op.getOperand(1);
4534 SDValue SrcPtr = Op.getOperand(2);
4535 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4536 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4537 SDLoc DL(Op);
4538
4539 uint32_t Sz =
4540 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4541 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4542 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4543 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4544 MachinePointerInfo(SrcSV));
4545}
4546
4547SDValue
4548SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4549 SelectionDAG &DAG) const {
4550 if (Subtarget.isTargetXPLINK64())
4551 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4552 else
4553 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4554}
4555
4556SDValue
4557SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4558 SelectionDAG &DAG) const {
4559 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4560 MachineFunction &MF = DAG.getMachineFunction();
4561 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4562 SDValue Chain = Op.getOperand(0);
4563 SDValue Size = Op.getOperand(1);
4564 SDValue Align = Op.getOperand(2);
4565 SDLoc DL(Op);
4566
4567 // If user has set the no alignment function attribute, ignore
4568 // alloca alignments.
4569 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4570
4571 uint64_t StackAlign = TFI->getStackAlignment();
4572 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4573 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4574
4575 SDValue NeededSpace = Size;
4576
4577 // Add extra space for alignment if needed.
4578 EVT PtrVT = getPointerTy(MF.getDataLayout());
4579 if (ExtraAlignSpace)
4580 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4581 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4582
4583 bool IsSigned = false;
4584 bool DoesNotReturn = false;
4585 bool IsReturnValueUsed = false;
4586 EVT VT = Op.getValueType();
4587 SDValue AllocaCall =
4588 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4589 CallingConv::C, IsSigned, DL, DoesNotReturn,
4590 IsReturnValueUsed)
4591 .first;
4592
4593 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4594 // to end of call in order to ensure it isn't broken up from the call
4595 // sequence.
4596 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4597 Register SPReg = Regs.getStackPointerRegister();
4598 Chain = AllocaCall.getValue(1);
4599 SDValue Glue = AllocaCall.getValue(2);
4600 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4601 Chain = NewSPRegNode.getValue(1);
4602
4603 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4604 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4605 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4606
4607 // Dynamically realign if needed.
4608 if (ExtraAlignSpace) {
4609 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4610 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4611 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4612 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4613 }
4614
4615 SDValue Ops[2] = {Result, Chain};
4616 return DAG.getMergeValues(Ops, DL);
4617}
4618
4619SDValue
4620SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4621 SelectionDAG &DAG) const {
4622 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4623 MachineFunction &MF = DAG.getMachineFunction();
4624 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4625 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4626
4627 SDValue Chain = Op.getOperand(0);
4628 SDValue Size = Op.getOperand(1);
4629 SDValue Align = Op.getOperand(2);
4630 SDLoc DL(Op);
4631
4632 // If user has set the no alignment function attribute, ignore
4633 // alloca alignments.
4634 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4635
4636 uint64_t StackAlign = TFI->getStackAlignment();
4637 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4638 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4639
4641 SDValue NeededSpace = Size;
4642
4643 // Get a reference to the stack pointer.
4644 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4645
4646 // If we need a backchain, save it now.
4647 SDValue Backchain;
4648 if (StoreBackchain)
4649 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4650 MachinePointerInfo());
4651
4652 // Add extra space for alignment if needed.
4653 if (ExtraAlignSpace)
4654 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4655 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4656
4657 // Get the new stack pointer value.
4658 SDValue NewSP;
4659 if (hasInlineStackProbe(MF)) {
4660 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4661 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4662 Chain = NewSP.getValue(1);
4663 }
4664 else {
4665 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4666 // Copy the new stack pointer back.
4667 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4668 }
4669
4670 // The allocated data lives above the 160 bytes allocated for the standard
4671 // frame, plus any outgoing stack arguments. We don't know how much that
4672 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4673 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4674 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4675
4676 // Dynamically realign if needed.
4677 if (RequiredAlign > StackAlign) {
4678 Result =
4679 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4680 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4681 Result =
4682 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4683 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4684 }
4685
4686 if (StoreBackchain)
4687 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4688 MachinePointerInfo());
4689
4690 SDValue Ops[2] = { Result, Chain };
4691 return DAG.getMergeValues(Ops, DL);
4692}
4693
4694SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4695 SDValue Op, SelectionDAG &DAG) const {
4696 SDLoc DL(Op);
4697
4698 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4699}
4700
4701SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4702 SelectionDAG &DAG,
4703 unsigned Opcode) const {
4704 EVT VT = Op.getValueType();
4705 SDLoc DL(Op);
4706 SDValue Even, Odd;
4707
4708 // This custom expander is only used on z17 and later for 64-bit types.
4709 assert(!is32Bit(VT));
4710 assert(Subtarget.hasMiscellaneousExtensions2());
4711
4712 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4713 // the high result in the even register. Return the latter.
4714 lowerGR128Binary(DAG, DL, VT, Opcode,
4715 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4716 return Even;
4717}
4718
4719SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4720 SelectionDAG &DAG) const {
4721 EVT VT = Op.getValueType();
4722 SDLoc DL(Op);
4723 SDValue Ops[2];
4724 if (is32Bit(VT))
4725 // Just do a normal 64-bit multiplication and extract the results.
4726 // We define this so that it can be used for constant division.
4727 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4728 Op.getOperand(1), Ops[1], Ops[0]);
4729 else if (Subtarget.hasMiscellaneousExtensions2())
4730 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4731 // the high result in the even register. ISD::SMUL_LOHI is defined to
4732 // return the low half first, so the results are in reverse order.
4733 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4734 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4735 else {
4736 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4737 //
4738 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4739 //
4740 // but using the fact that the upper halves are either all zeros
4741 // or all ones:
4742 //
4743 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4744 //
4745 // and grouping the right terms together since they are quicker than the
4746 // multiplication:
4747 //
4748 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4749 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4750 SDValue LL = Op.getOperand(0);
4751 SDValue RL = Op.getOperand(1);
4752 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4753 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4754 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4755 // the high result in the even register. ISD::SMUL_LOHI is defined to
4756 // return the low half first, so the results are in reverse order.
4757 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4758 LL, RL, Ops[1], Ops[0]);
4759 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4760 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4761 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4762 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4763 }
4764 return DAG.getMergeValues(Ops, DL);
4765}
4766
4767SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4768 SelectionDAG &DAG) const {
4769 EVT VT = Op.getValueType();
4770 SDLoc DL(Op);
4771 SDValue Ops[2];
4772 if (is32Bit(VT))
4773 // Just do a normal 64-bit multiplication and extract the results.
4774 // We define this so that it can be used for constant division.
4775 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4776 Op.getOperand(1), Ops[1], Ops[0]);
4777 else
4778 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4779 // the high result in the even register. ISD::UMUL_LOHI is defined to
4780 // return the low half first, so the results are in reverse order.
4781 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4782 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4783 return DAG.getMergeValues(Ops, DL);
4784}
4785
4786SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4787 SelectionDAG &DAG) const {
4788 SDValue Op0 = Op.getOperand(0);
4789 SDValue Op1 = Op.getOperand(1);
4790 EVT VT = Op.getValueType();
4791 SDLoc DL(Op);
4792
4793 // We use DSGF for 32-bit division. This means the first operand must
4794 // always be 64-bit, and the second operand should be 32-bit whenever
4795 // that is possible, to improve performance.
4796 if (is32Bit(VT))
4797 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4798 else if (DAG.ComputeNumSignBits(Op1) > 32)
4799 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4800
4801 // DSG(F) returns the remainder in the even register and the
4802 // quotient in the odd register.
4803 SDValue Ops[2];
4804 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4805 return DAG.getMergeValues(Ops, DL);
4806}
4807
4808SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4809 SelectionDAG &DAG) const {
4810 EVT VT = Op.getValueType();
4811 SDLoc DL(Op);
4812
4813 // DL(G) returns the remainder in the even register and the
4814 // quotient in the odd register.
4815 SDValue Ops[2];
4816 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4817 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4818 return DAG.getMergeValues(Ops, DL);
4819}
4820
4821SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4822 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4823
4824 // Get the known-zero masks for each operand.
4825 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4826 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4827 DAG.computeKnownBits(Ops[1])};
4828
4829 // See if the upper 32 bits of one operand and the lower 32 bits of the
4830 // other are known zero. They are the low and high operands respectively.
4831 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4832 Known[1].Zero.getZExtValue() };
4833 unsigned High, Low;
4834 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4835 High = 1, Low = 0;
4836 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4837 High = 0, Low = 1;
4838 else
4839 return Op;
4840
4841 SDValue LowOp = Ops[Low];
4842 SDValue HighOp = Ops[High];
4843
4844 // If the high part is a constant, we're better off using IILH.
4845 if (HighOp.getOpcode() == ISD::Constant)
4846 return Op;
4847
4848 // If the low part is a constant that is outside the range of LHI,
4849 // then we're better off using IILF.
4850 if (LowOp.getOpcode() == ISD::Constant) {
4851 int64_t Value = int32_t(LowOp->getAsZExtVal());
4852 if (!isInt<16>(Value))
4853 return Op;
4854 }
4855
4856 // Check whether the high part is an AND that doesn't change the
4857 // high 32 bits and just masks out low bits. We can skip it if so.
4858 if (HighOp.getOpcode() == ISD::AND &&
4859 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4860 SDValue HighOp0 = HighOp.getOperand(0);
4861 uint64_t Mask = HighOp.getConstantOperandVal(1);
4862 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4863 HighOp = HighOp0;
4864 }
4865
4866 // Take advantage of the fact that all GR32 operations only change the
4867 // low 32 bits by truncating Low to an i32 and inserting it directly
4868 // using a subreg. The interesting cases are those where the truncation
4869 // can be folded.
4870 SDLoc DL(Op);
4871 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4872 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4873 MVT::i64, HighOp, Low32);
4874}
4875
4876// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4877SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4878 SelectionDAG &DAG) const {
4879 SDNode *N = Op.getNode();
4880 SDValue LHS = N->getOperand(0);
4881 SDValue RHS = N->getOperand(1);
4882 SDLoc DL(N);
4883
4884 if (N->getValueType(0) == MVT::i128) {
4885 unsigned BaseOp = 0;
4886 unsigned FlagOp = 0;
4887 bool IsBorrow = false;
4888 switch (Op.getOpcode()) {
4889 default: llvm_unreachable("Unknown instruction!");
4890 case ISD::UADDO:
4891 BaseOp = ISD::ADD;
4892 FlagOp = SystemZISD::VACC;
4893 break;
4894 case ISD::USUBO:
4895 BaseOp = ISD::SUB;
4896 FlagOp = SystemZISD::VSCBI;
4897 IsBorrow = true;
4898 break;
4899 }
4900 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4901 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4902 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4903 DAG.getValueType(MVT::i1));
4904 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4905 if (IsBorrow)
4906 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4907 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4908 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4909 }
4910
4911 unsigned BaseOp = 0;
4912 unsigned CCValid = 0;
4913 unsigned CCMask = 0;
4914
4915 switch (Op.getOpcode()) {
4916 default: llvm_unreachable("Unknown instruction!");
4917 case ISD::SADDO:
4918 BaseOp = SystemZISD::SADDO;
4919 CCValid = SystemZ::CCMASK_ARITH;
4921 break;
4922 case ISD::SSUBO:
4923 BaseOp = SystemZISD::SSUBO;
4924 CCValid = SystemZ::CCMASK_ARITH;
4926 break;
4927 case ISD::UADDO:
4928 BaseOp = SystemZISD::UADDO;
4929 CCValid = SystemZ::CCMASK_LOGICAL;
4931 break;
4932 case ISD::USUBO:
4933 BaseOp = SystemZISD::USUBO;
4934 CCValid = SystemZ::CCMASK_LOGICAL;
4936 break;
4937 }
4938
4939 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4940 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4941
4942 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4943 if (N->getValueType(1) == MVT::i1)
4944 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4945
4946 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4947}
4948
4949static bool isAddCarryChain(SDValue Carry) {
4950 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4951 Carry->getValueType(0) != MVT::i128)
4952 Carry = Carry.getOperand(2);
4953 return Carry.getOpcode() == ISD::UADDO &&
4954 Carry->getValueType(0) != MVT::i128;
4955}
4956
4957static bool isSubBorrowChain(SDValue Carry) {
4958 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4959 Carry->getValueType(0) != MVT::i128)
4960 Carry = Carry.getOperand(2);
4961 return Carry.getOpcode() == ISD::USUBO &&
4962 Carry->getValueType(0) != MVT::i128;
4963}
4964
4965// Lower UADDO_CARRY/USUBO_CARRY nodes.
4966SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4967 SelectionDAG &DAG) const {
4968
4969 SDNode *N = Op.getNode();
4970 MVT VT = N->getSimpleValueType(0);
4971
4972 // Let legalize expand this if it isn't a legal type yet.
4973 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4974 return SDValue();
4975
4976 SDValue LHS = N->getOperand(0);
4977 SDValue RHS = N->getOperand(1);
4978 SDValue Carry = Op.getOperand(2);
4979 SDLoc DL(N);
4980
4981 if (VT == MVT::i128) {
4982 unsigned BaseOp = 0;
4983 unsigned FlagOp = 0;
4984 bool IsBorrow = false;
4985 switch (Op.getOpcode()) {
4986 default: llvm_unreachable("Unknown instruction!");
4987 case ISD::UADDO_CARRY:
4988 BaseOp = SystemZISD::VAC;
4989 FlagOp = SystemZISD::VACCC;
4990 break;
4991 case ISD::USUBO_CARRY:
4992 BaseOp = SystemZISD::VSBI;
4993 FlagOp = SystemZISD::VSBCBI;
4994 IsBorrow = true;
4995 break;
4996 }
4997 if (IsBorrow)
4998 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4999 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
5000 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
5001 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
5002 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
5003 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
5004 DAG.getValueType(MVT::i1));
5005 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
5006 if (IsBorrow)
5007 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
5008 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
5009 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
5010 }
5011
5012 unsigned BaseOp = 0;
5013 unsigned CCValid = 0;
5014 unsigned CCMask = 0;
5015
5016 switch (Op.getOpcode()) {
5017 default: llvm_unreachable("Unknown instruction!");
5018 case ISD::UADDO_CARRY:
5019 if (!isAddCarryChain(Carry))
5020 return SDValue();
5021
5022 BaseOp = SystemZISD::ADDCARRY;
5023 CCValid = SystemZ::CCMASK_LOGICAL;
5025 break;
5026 case ISD::USUBO_CARRY:
5027 if (!isSubBorrowChain(Carry))
5028 return SDValue();
5029
5030 BaseOp = SystemZISD::SUBCARRY;
5031 CCValid = SystemZ::CCMASK_LOGICAL;
5033 break;
5034 }
5035
5036 // Set the condition code from the carry flag.
5037 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
5038 DAG.getConstant(CCValid, DL, MVT::i32),
5039 DAG.getConstant(CCMask, DL, MVT::i32));
5040
5041 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5042 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
5043
5044 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
5045 if (N->getValueType(1) == MVT::i1)
5046 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
5047
5048 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
5049}
5050
5051SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
5052 SelectionDAG &DAG) const {
5053 EVT VT = Op.getValueType();
5054 SDLoc DL(Op);
5055 Op = Op.getOperand(0);
5056
5057 if (VT.getScalarSizeInBits() == 128) {
5058 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
5059 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
5060 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
5061 DAG.getConstant(0, DL, MVT::i64));
5062 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5063 return Op;
5064 }
5065
5066 // Handle vector types via VPOPCT.
5067 if (VT.isVector()) {
5068 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
5069 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
5070 switch (VT.getScalarSizeInBits()) {
5071 case 8:
5072 break;
5073 case 16: {
5074 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5075 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5076 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5077 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5078 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5079 break;
5080 }
5081 case 32: {
5082 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5083 DAG.getConstant(0, DL, MVT::i32));
5084 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5085 break;
5086 }
5087 case 64: {
5088 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5089 DAG.getConstant(0, DL, MVT::i32));
5090 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5091 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5092 break;
5093 }
5094 default:
5095 llvm_unreachable("Unexpected type");
5096 }
5097 return Op;
5098 }
5099
5100 // Get the known-zero mask for the operand.
5101 KnownBits Known = DAG.computeKnownBits(Op);
5102 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5103 if (NumSignificantBits == 0)
5104 return DAG.getConstant(0, DL, VT);
5105
5106 // Skip known-zero high parts of the operand.
5107 int64_t OrigBitSize = VT.getSizeInBits();
5108 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5109 BitSize = std::min(BitSize, OrigBitSize);
5110
5111 // The POPCNT instruction counts the number of bits in each byte.
5112 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5113 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5114 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5115
5116 // Add up per-byte counts in a binary tree. All bits of Op at
5117 // position larger than BitSize remain zero throughout.
5118 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5119 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5120 if (BitSize != OrigBitSize)
5121 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5122 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5123 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5124 }
5125
5126 // Extract overall result from high byte.
5127 if (BitSize > 8)
5128 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5129 DAG.getConstant(BitSize - 8, DL, VT));
5130
5131 return Op;
5132}
5133
5134SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5135 SelectionDAG &DAG) const {
5136 SDLoc DL(Op);
5137 AtomicOrdering FenceOrdering =
5138 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5139 SyncScope::ID FenceSSID =
5140 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5141
5142 // The only fence that needs an instruction is a sequentially-consistent
5143 // cross-thread fence.
5144 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5145 FenceSSID == SyncScope::System) {
5146 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5147 Op.getOperand(0)),
5148 0);
5149 }
5150
5151 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5152 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5153}
5154
5155SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5156 SelectionDAG &DAG) const {
5157 EVT RegVT = Op.getValueType();
5158 if (RegVT.getSizeInBits() == 128)
5159 return lowerATOMIC_LDST_I128(Op, DAG);
5160 return lowerLoadF16(Op, DAG);
5161}
5162
5163SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5164 SelectionDAG &DAG) const {
5165 auto *Node = cast<AtomicSDNode>(Op.getNode());
5166 if (Node->getMemoryVT().getSizeInBits() == 128)
5167 return lowerATOMIC_LDST_I128(Op, DAG);
5168 return lowerStoreF16(Op, DAG);
5169}
5170
5171SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5172 SelectionDAG &DAG) const {
5173 auto *Node = cast<AtomicSDNode>(Op.getNode());
5174 assert(
5175 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5176 "Only custom lowering i128 or f128.");
5177 // Use same code to handle both legal and non-legal i128 types.
5179 LowerOperationWrapper(Node, Results, DAG);
5180 return DAG.getMergeValues(Results, SDLoc(Op));
5181}
5182
5183// Prepare for a Compare And Swap for a subword operation. This needs to be
5184// done in memory with 4 bytes at natural alignment.
5186 SDValue &AlignedAddr, SDValue &BitShift,
5187 SDValue &NegBitShift) {
5188 EVT PtrVT = Addr.getValueType();
5189 EVT WideVT = MVT::i32;
5190
5191 // Get the address of the containing word.
5192 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5193 DAG.getSignedConstant(-4, DL, PtrVT));
5194
5195 // Get the number of bits that the word must be rotated left in order
5196 // to bring the field to the top bits of a GR32.
5197 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5198 DAG.getConstant(3, DL, PtrVT));
5199 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5200
5201 // Get the complementing shift amount, for rotating a field in the top
5202 // bits back to its proper position.
5203 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5204 DAG.getConstant(0, DL, WideVT), BitShift);
5205
5206}
5207
5208// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5209// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5210SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5211 SelectionDAG &DAG,
5212 unsigned Opcode) const {
5213 auto *Node = cast<AtomicSDNode>(Op.getNode());
5214
5215 // 32-bit operations need no special handling.
5216 EVT NarrowVT = Node->getMemoryVT();
5217 EVT WideVT = MVT::i32;
5218 if (NarrowVT == WideVT)
5219 return Op;
5220
5221 int64_t BitSize = NarrowVT.getSizeInBits();
5222 SDValue ChainIn = Node->getChain();
5223 SDValue Addr = Node->getBasePtr();
5224 SDValue Src2 = Node->getVal();
5225 MachineMemOperand *MMO = Node->getMemOperand();
5226 SDLoc DL(Node);
5227
5228 // Convert atomic subtracts of constants into additions.
5229 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5230 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5231 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5232 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5233 Src2.getValueType());
5234 }
5235
5236 SDValue AlignedAddr, BitShift, NegBitShift;
5237 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5238
5239 // Extend the source operand to 32 bits and prepare it for the inner loop.
5240 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5241 // operations require the source to be shifted in advance. (This shift
5242 // can be folded if the source is constant.) For AND and NAND, the lower
5243 // bits must be set, while for other opcodes they should be left clear.
5244 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5245 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5246 DAG.getConstant(32 - BitSize, DL, WideVT));
5247 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5248 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5249 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5250 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5251
5252 // Construct the ATOMIC_LOADW_* node.
5253 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5254 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5255 DAG.getConstant(BitSize, DL, WideVT) };
5256 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5257 NarrowVT, MMO);
5258
5259 // Rotate the result of the final CS so that the field is in the lower
5260 // bits of a GR32, then truncate it.
5261 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5262 DAG.getConstant(BitSize, DL, WideVT));
5263 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5264
5265 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5266 return DAG.getMergeValues(RetOps, DL);
5267}
5268
5269// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5270// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5271SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5272 SelectionDAG &DAG) const {
5273 auto *Node = cast<AtomicSDNode>(Op.getNode());
5274 EVT MemVT = Node->getMemoryVT();
5275 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5276 // A full-width operation: negate and use LAA(G).
5277 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5278 assert(Subtarget.hasInterlockedAccess1() &&
5279 "Should have been expanded by AtomicExpand pass.");
5280 SDValue Src2 = Node->getVal();
5281 SDLoc DL(Src2);
5282 SDValue NegSrc2 =
5283 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5284 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5285 Node->getChain(), Node->getBasePtr(), NegSrc2,
5286 Node->getMemOperand());
5287 }
5288
5289 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5290}
5291
5292// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5293SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5294 SelectionDAG &DAG) const {
5295 auto *Node = cast<AtomicSDNode>(Op.getNode());
5296 SDValue ChainIn = Node->getOperand(0);
5297 SDValue Addr = Node->getOperand(1);
5298 SDValue CmpVal = Node->getOperand(2);
5299 SDValue SwapVal = Node->getOperand(3);
5300 MachineMemOperand *MMO = Node->getMemOperand();
5301 SDLoc DL(Node);
5302
5303 if (Node->getMemoryVT() == MVT::i128) {
5304 // Use same code to handle both legal and non-legal i128 types.
5306 LowerOperationWrapper(Node, Results, DAG);
5307 return DAG.getMergeValues(Results, DL);
5308 }
5309
5310 // We have native support for 32-bit and 64-bit compare and swap, but we
5311 // still need to expand extracting the "success" result from the CC.
5312 EVT NarrowVT = Node->getMemoryVT();
5313 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5314 if (NarrowVT == WideVT) {
5315 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5316 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5317 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5318 DL, Tys, Ops, NarrowVT, MMO);
5319 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5321
5322 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5323 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5324 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5325 return SDValue();
5326 }
5327
5328 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5329 // via a fullword ATOMIC_CMP_SWAPW operation.
5330 int64_t BitSize = NarrowVT.getSizeInBits();
5331
5332 SDValue AlignedAddr, BitShift, NegBitShift;
5333 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5334
5335 // Construct the ATOMIC_CMP_SWAPW node.
5336 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5337 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5338 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5339 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5340 VTList, Ops, NarrowVT, MMO);
5341 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5343
5344 // emitAtomicCmpSwapW() will zero extend the result (original value).
5345 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5346 DAG.getValueType(NarrowVT));
5347 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5348 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5349 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5350 return SDValue();
5351}
5352
5354SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5355 // Because of how we convert atomic_load and atomic_store to normal loads and
5356 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5357 // since DAGCombine hasn't been updated to account for atomic, but non
5358 // volatile loads. (See D57601)
5359 if (auto *SI = dyn_cast<StoreInst>(&I))
5360 if (SI->isAtomic())
5362 if (auto *LI = dyn_cast<LoadInst>(&I))
5363 if (LI->isAtomic())
5365 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5366 if (AI->isAtomic())
5368 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5369 if (AI->isAtomic())
5372}
5373
5374SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5375 SelectionDAG &DAG) const {
5376 MachineFunction &MF = DAG.getMachineFunction();
5377 auto *Regs = Subtarget.getSpecialRegisters();
5379 report_fatal_error("Variable-sized stack allocations are not supported "
5380 "in GHC calling convention");
5381 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5382 Regs->getStackPointerRegister(), Op.getValueType());
5383}
5384
5385SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5386 SelectionDAG &DAG) const {
5387 MachineFunction &MF = DAG.getMachineFunction();
5388 auto *Regs = Subtarget.getSpecialRegisters();
5389 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5390
5392 report_fatal_error("Variable-sized stack allocations are not supported "
5393 "in GHC calling convention");
5394
5395 SDValue Chain = Op.getOperand(0);
5396 SDValue NewSP = Op.getOperand(1);
5397 SDValue Backchain;
5398 SDLoc DL(Op);
5399
5400 if (StoreBackchain) {
5401 SDValue OldSP = DAG.getCopyFromReg(
5402 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5403 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5404 MachinePointerInfo());
5405 }
5406
5407 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5408
5409 if (StoreBackchain)
5410 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5411 MachinePointerInfo());
5412
5413 return Chain;
5414}
5415
5416SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5417 SelectionDAG &DAG) const {
5418 bool IsData = Op.getConstantOperandVal(4);
5419 if (!IsData)
5420 // Just preserve the chain.
5421 return Op.getOperand(0);
5422
5423 SDLoc DL(Op);
5424 bool IsWrite = Op.getConstantOperandVal(2);
5425 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5426 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5427 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5428 Op.getOperand(1)};
5429 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5430 Node->getVTList(), Ops,
5431 Node->getMemoryVT(), Node->getMemOperand());
5432}
5433
5434SDValue
5435SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5436 SelectionDAG &DAG) const {
5437 unsigned Opcode, CCValid;
5438 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5439 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5440 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5441 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5442 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5443 return SDValue();
5444 }
5445
5446 return SDValue();
5447}
5448
5449SDValue
5450SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5451 SelectionDAG &DAG) const {
5452 unsigned Opcode, CCValid;
5453 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5454 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5455 if (Op->getNumValues() == 1)
5456 return getCCResult(DAG, SDValue(Node, 0));
5457 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5458 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5459 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5460 }
5461
5462 unsigned Id = Op.getConstantOperandVal(0);
5463 switch (Id) {
5464 case Intrinsic::thread_pointer:
5465 return lowerThreadPointer(SDLoc(Op), DAG);
5466
5467 case Intrinsic::s390_vpdi:
5468 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5469 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5470
5471 case Intrinsic::s390_vperm:
5472 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5473 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5474
5475 case Intrinsic::s390_vuphb:
5476 case Intrinsic::s390_vuphh:
5477 case Intrinsic::s390_vuphf:
5478 case Intrinsic::s390_vuphg:
5479 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5480 Op.getOperand(1));
5481
5482 case Intrinsic::s390_vuplhb:
5483 case Intrinsic::s390_vuplhh:
5484 case Intrinsic::s390_vuplhf:
5485 case Intrinsic::s390_vuplhg:
5486 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5487 Op.getOperand(1));
5488
5489 case Intrinsic::s390_vuplb:
5490 case Intrinsic::s390_vuplhw:
5491 case Intrinsic::s390_vuplf:
5492 case Intrinsic::s390_vuplg:
5493 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5494 Op.getOperand(1));
5495
5496 case Intrinsic::s390_vupllb:
5497 case Intrinsic::s390_vupllh:
5498 case Intrinsic::s390_vupllf:
5499 case Intrinsic::s390_vupllg:
5500 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5501 Op.getOperand(1));
5502
5503 case Intrinsic::s390_vsumb:
5504 case Intrinsic::s390_vsumh:
5505 case Intrinsic::s390_vsumgh:
5506 case Intrinsic::s390_vsumgf:
5507 case Intrinsic::s390_vsumqf:
5508 case Intrinsic::s390_vsumqg:
5509 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5510 Op.getOperand(1), Op.getOperand(2));
5511
5512 case Intrinsic::s390_vaq:
5513 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5514 Op.getOperand(1), Op.getOperand(2));
5515 case Intrinsic::s390_vaccb:
5516 case Intrinsic::s390_vacch:
5517 case Intrinsic::s390_vaccf:
5518 case Intrinsic::s390_vaccg:
5519 case Intrinsic::s390_vaccq:
5520 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2));
5522 case Intrinsic::s390_vacq:
5523 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5524 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5525 case Intrinsic::s390_vacccq:
5526 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5527 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5528
5529 case Intrinsic::s390_vsq:
5530 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5531 Op.getOperand(1), Op.getOperand(2));
5532 case Intrinsic::s390_vscbib:
5533 case Intrinsic::s390_vscbih:
5534 case Intrinsic::s390_vscbif:
5535 case Intrinsic::s390_vscbig:
5536 case Intrinsic::s390_vscbiq:
5537 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5538 Op.getOperand(1), Op.getOperand(2));
5539 case Intrinsic::s390_vsbiq:
5540 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5541 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5542 case Intrinsic::s390_vsbcbiq:
5543 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5544 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5545
5546 case Intrinsic::s390_vmhb:
5547 case Intrinsic::s390_vmhh:
5548 case Intrinsic::s390_vmhf:
5549 case Intrinsic::s390_vmhg:
5550 case Intrinsic::s390_vmhq:
5551 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5552 Op.getOperand(1), Op.getOperand(2));
5553 case Intrinsic::s390_vmlhb:
5554 case Intrinsic::s390_vmlhh:
5555 case Intrinsic::s390_vmlhf:
5556 case Intrinsic::s390_vmlhg:
5557 case Intrinsic::s390_vmlhq:
5558 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5559 Op.getOperand(1), Op.getOperand(2));
5560
5561 case Intrinsic::s390_vmahb:
5562 case Intrinsic::s390_vmahh:
5563 case Intrinsic::s390_vmahf:
5564 case Intrinsic::s390_vmahg:
5565 case Intrinsic::s390_vmahq:
5566 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5567 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5568 case Intrinsic::s390_vmalhb:
5569 case Intrinsic::s390_vmalhh:
5570 case Intrinsic::s390_vmalhf:
5571 case Intrinsic::s390_vmalhg:
5572 case Intrinsic::s390_vmalhq:
5573 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5574 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5575
5576 case Intrinsic::s390_vmeb:
5577 case Intrinsic::s390_vmeh:
5578 case Intrinsic::s390_vmef:
5579 case Intrinsic::s390_vmeg:
5580 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5581 Op.getOperand(1), Op.getOperand(2));
5582 case Intrinsic::s390_vmleb:
5583 case Intrinsic::s390_vmleh:
5584 case Intrinsic::s390_vmlef:
5585 case Intrinsic::s390_vmleg:
5586 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5587 Op.getOperand(1), Op.getOperand(2));
5588 case Intrinsic::s390_vmob:
5589 case Intrinsic::s390_vmoh:
5590 case Intrinsic::s390_vmof:
5591 case Intrinsic::s390_vmog:
5592 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5593 Op.getOperand(1), Op.getOperand(2));
5594 case Intrinsic::s390_vmlob:
5595 case Intrinsic::s390_vmloh:
5596 case Intrinsic::s390_vmlof:
5597 case Intrinsic::s390_vmlog:
5598 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5599 Op.getOperand(1), Op.getOperand(2));
5600
5601 case Intrinsic::s390_vmaeb:
5602 case Intrinsic::s390_vmaeh:
5603 case Intrinsic::s390_vmaef:
5604 case Intrinsic::s390_vmaeg:
5605 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5606 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5607 Op.getOperand(1), Op.getOperand(2)),
5608 Op.getOperand(3));
5609 case Intrinsic::s390_vmaleb:
5610 case Intrinsic::s390_vmaleh:
5611 case Intrinsic::s390_vmalef:
5612 case Intrinsic::s390_vmaleg:
5613 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5614 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5615 Op.getOperand(1), Op.getOperand(2)),
5616 Op.getOperand(3));
5617 case Intrinsic::s390_vmaob:
5618 case Intrinsic::s390_vmaoh:
5619 case Intrinsic::s390_vmaof:
5620 case Intrinsic::s390_vmaog:
5621 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5622 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5623 Op.getOperand(1), Op.getOperand(2)),
5624 Op.getOperand(3));
5625 case Intrinsic::s390_vmalob:
5626 case Intrinsic::s390_vmaloh:
5627 case Intrinsic::s390_vmalof:
5628 case Intrinsic::s390_vmalog:
5629 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5630 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5631 Op.getOperand(1), Op.getOperand(2)),
5632 Op.getOperand(3));
5633 }
5634
5635 return SDValue();
5636}
5637
5638namespace {
5639// Says that SystemZISD operation Opcode can be used to perform the equivalent
5640// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5641// Operand is the constant third operand, otherwise it is the number of
5642// bytes in each element of the result.
5643struct Permute {
5644 unsigned Opcode;
5645 unsigned Operand;
5646 unsigned char Bytes[SystemZ::VectorBytes];
5647};
5648}
5649
5650static const Permute PermuteForms[] = {
5651 // VMRHG
5652 { SystemZISD::MERGE_HIGH, 8,
5653 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5654 // VMRHF
5655 { SystemZISD::MERGE_HIGH, 4,
5656 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5657 // VMRHH
5658 { SystemZISD::MERGE_HIGH, 2,
5659 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5660 // VMRHB
5661 { SystemZISD::MERGE_HIGH, 1,
5662 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5663 // VMRLG
5664 { SystemZISD::MERGE_LOW, 8,
5665 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5666 // VMRLF
5667 { SystemZISD::MERGE_LOW, 4,
5668 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5669 // VMRLH
5670 { SystemZISD::MERGE_LOW, 2,
5671 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5672 // VMRLB
5673 { SystemZISD::MERGE_LOW, 1,
5674 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5675 // VPKG
5676 { SystemZISD::PACK, 4,
5677 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5678 // VPKF
5679 { SystemZISD::PACK, 2,
5680 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5681 // VPKH
5682 { SystemZISD::PACK, 1,
5683 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5684 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5685 { SystemZISD::PERMUTE_DWORDS, 4,
5686 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5687 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5688 { SystemZISD::PERMUTE_DWORDS, 1,
5689 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5690};
5691
5692// Called after matching a vector shuffle against a particular pattern.
5693// Both the original shuffle and the pattern have two vector operands.
5694// OpNos[0] is the operand of the original shuffle that should be used for
5695// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5696// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5697// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5698// for operands 0 and 1 of the pattern.
5699static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5700 if (OpNos[0] < 0) {
5701 if (OpNos[1] < 0)
5702 return false;
5703 OpNo0 = OpNo1 = OpNos[1];
5704 } else if (OpNos[1] < 0) {
5705 OpNo0 = OpNo1 = OpNos[0];
5706 } else {
5707 OpNo0 = OpNos[0];
5708 OpNo1 = OpNos[1];
5709 }
5710 return true;
5711}
5712
5713// Bytes is a VPERM-like permute vector, except that -1 is used for
5714// undefined bytes. Return true if the VPERM can be implemented using P.
5715// When returning true set OpNo0 to the VPERM operand that should be
5716// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5717//
5718// For example, if swapping the VPERM operands allows P to match, OpNo0
5719// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5720// operand, but rewriting it to use two duplicated operands allows it to
5721// match P, then OpNo0 and OpNo1 will be the same.
5722static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5723 unsigned &OpNo0, unsigned &OpNo1) {
5724 int OpNos[] = { -1, -1 };
5725 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5726 int Elt = Bytes[I];
5727 if (Elt >= 0) {
5728 // Make sure that the two permute vectors use the same suboperand
5729 // byte number. Only the operand numbers (the high bits) are
5730 // allowed to differ.
5731 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5732 return false;
5733 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5734 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5735 // Make sure that the operand mappings are consistent with previous
5736 // elements.
5737 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5738 return false;
5739 OpNos[ModelOpNo] = RealOpNo;
5740 }
5741 }
5742 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5743}
5744
5745// As above, but search for a matching permute.
5746static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5747 unsigned &OpNo0, unsigned &OpNo1) {
5748 for (auto &P : PermuteForms)
5749 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5750 return &P;
5751 return nullptr;
5752}
5753
5754// Bytes is a VPERM-like permute vector, except that -1 is used for
5755// undefined bytes. This permute is an operand of an outer permute.
5756// See whether redistributing the -1 bytes gives a shuffle that can be
5757// implemented using P. If so, set Transform to a VPERM-like permute vector
5758// that, when applied to the result of P, gives the original permute in Bytes.
5760 const Permute &P,
5761 SmallVectorImpl<int> &Transform) {
5762 unsigned To = 0;
5763 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5764 int Elt = Bytes[From];
5765 if (Elt < 0)
5766 // Byte number From of the result is undefined.
5767 Transform[From] = -1;
5768 else {
5769 while (P.Bytes[To] != Elt) {
5770 To += 1;
5771 if (To == SystemZ::VectorBytes)
5772 return false;
5773 }
5774 Transform[From] = To;
5775 }
5776 }
5777 return true;
5778}
5779
5780// As above, but search for a matching permute.
5781static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5782 SmallVectorImpl<int> &Transform) {
5783 for (auto &P : PermuteForms)
5784 if (matchDoublePermute(Bytes, P, Transform))
5785 return &P;
5786 return nullptr;
5787}
5788
5789// Convert the mask of the given shuffle op into a byte-level mask,
5790// as if it had type vNi8.
5791static bool getVPermMask(SDValue ShuffleOp,
5792 SmallVectorImpl<int> &Bytes) {
5793 EVT VT = ShuffleOp.getValueType();
5794 unsigned NumElements = VT.getVectorNumElements();
5795 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5796
5797 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5798 Bytes.resize(NumElements * BytesPerElement, -1);
5799 for (unsigned I = 0; I < NumElements; ++I) {
5800 int Index = VSN->getMaskElt(I);
5801 if (Index >= 0)
5802 for (unsigned J = 0; J < BytesPerElement; ++J)
5803 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5804 }
5805 return true;
5806 }
5807 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5808 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5809 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5810 Bytes.resize(NumElements * BytesPerElement, -1);
5811 for (unsigned I = 0; I < NumElements; ++I)
5812 for (unsigned J = 0; J < BytesPerElement; ++J)
5813 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5814 return true;
5815 }
5816 return false;
5817}
5818
5819// Bytes is a VPERM-like permute vector, except that -1 is used for
5820// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5821// the result come from a contiguous sequence of bytes from one input.
5822// Set Base to the selector for the first byte if so.
5823static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5824 unsigned BytesPerElement, int &Base) {
5825 Base = -1;
5826 for (unsigned I = 0; I < BytesPerElement; ++I) {
5827 if (Bytes[Start + I] >= 0) {
5828 unsigned Elem = Bytes[Start + I];
5829 if (Base < 0) {
5830 Base = Elem - I;
5831 // Make sure the bytes would come from one input operand.
5832 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5833 return false;
5834 } else if (unsigned(Base) != Elem - I)
5835 return false;
5836 }
5837 }
5838 return true;
5839}
5840
5841// Bytes is a VPERM-like permute vector, except that -1 is used for
5842// undefined bytes. Return true if it can be performed using VSLDB.
5843// When returning true, set StartIndex to the shift amount and OpNo0
5844// and OpNo1 to the VPERM operands that should be used as the first
5845// and second shift operand respectively.
5847 unsigned &StartIndex, unsigned &OpNo0,
5848 unsigned &OpNo1) {
5849 int OpNos[] = { -1, -1 };
5850 int Shift = -1;
5851 for (unsigned I = 0; I < 16; ++I) {
5852 int Index = Bytes[I];
5853 if (Index >= 0) {
5854 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5855 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5856 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5857 if (Shift < 0)
5858 Shift = ExpectedShift;
5859 else if (Shift != ExpectedShift)
5860 return false;
5861 // Make sure that the operand mappings are consistent with previous
5862 // elements.
5863 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5864 return false;
5865 OpNos[ModelOpNo] = RealOpNo;
5866 }
5867 }
5868 StartIndex = Shift;
5869 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5870}
5871
5872// Create a node that performs P on operands Op0 and Op1, casting the
5873// operands to the appropriate type. The type of the result is determined by P.
5875 const Permute &P, SDValue Op0, SDValue Op1) {
5876 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5877 // elements of a PACK are twice as wide as the outputs.
5878 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5879 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5880 P.Operand);
5881 // Cast both operands to the appropriate type.
5882 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5883 SystemZ::VectorBytes / InBytes);
5884 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5885 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5886 SDValue Op;
5887 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5888 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5889 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5890 } else if (P.Opcode == SystemZISD::PACK) {
5891 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5892 SystemZ::VectorBytes / P.Operand);
5893 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5894 } else {
5895 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5896 }
5897 return Op;
5898}
5899
5900static bool isZeroVector(SDValue N) {
5901 if (N->getOpcode() == ISD::BITCAST)
5902 N = N->getOperand(0);
5903 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5904 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5905 return Op->getZExtValue() == 0;
5906 return ISD::isBuildVectorAllZeros(N.getNode());
5907}
5908
5909// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5910static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5911 for (unsigned I = 0; I < Num ; I++)
5912 if (isZeroVector(Ops[I]))
5913 return I;
5914 return UINT32_MAX;
5915}
5916
5917// Bytes is a VPERM-like permute vector, except that -1 is used for
5918// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5919// VSLDB or VPERM.
5921 SDValue *Ops,
5922 const SmallVectorImpl<int> &Bytes) {
5923 for (unsigned I = 0; I < 2; ++I)
5924 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5925
5926 // First see whether VSLDB can be used.
5927 unsigned StartIndex, OpNo0, OpNo1;
5928 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5929 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5930 Ops[OpNo1],
5931 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5932
5933 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5934 // eliminate a zero vector by reusing any zero index in the permute vector.
5935 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5936 if (ZeroVecIdx != UINT32_MAX) {
5937 bool MaskFirst = true;
5938 int ZeroIdx = -1;
5939 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5940 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5941 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5942 if (OpNo == ZeroVecIdx && I == 0) {
5943 // If the first byte is zero, use mask as first operand.
5944 ZeroIdx = 0;
5945 break;
5946 }
5947 if (OpNo != ZeroVecIdx && Byte == 0) {
5948 // If mask contains a zero, use it by placing that vector first.
5949 ZeroIdx = I + SystemZ::VectorBytes;
5950 MaskFirst = false;
5951 break;
5952 }
5953 }
5954 if (ZeroIdx != -1) {
5955 SDValue IndexNodes[SystemZ::VectorBytes];
5956 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5957 if (Bytes[I] >= 0) {
5958 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5959 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5960 if (OpNo == ZeroVecIdx)
5961 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5962 else {
5963 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5964 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5965 }
5966 } else
5967 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5968 }
5969 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5970 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5971 if (MaskFirst)
5972 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5973 Mask);
5974 else
5975 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5976 Mask);
5977 }
5978 }
5979
5980 SDValue IndexNodes[SystemZ::VectorBytes];
5981 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5982 if (Bytes[I] >= 0)
5983 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5984 else
5985 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5986 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5987 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5988 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5989}
5990
5991namespace {
5992// Describes a general N-operand vector shuffle.
5993struct GeneralShuffle {
5994 GeneralShuffle(EVT vt)
5995 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5996 void addUndef();
5997 bool add(SDValue, unsigned);
5998 SDValue getNode(SelectionDAG &, const SDLoc &);
5999 void tryPrepareForUnpack();
6000 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
6001 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
6002
6003 // The operands of the shuffle.
6005
6006 // Index I is -1 if byte I of the result is undefined. Otherwise the
6007 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
6008 // Bytes[I] / SystemZ::VectorBytes.
6010
6011 // The type of the shuffle result.
6012 EVT VT;
6013
6014 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
6015 unsigned UnpackFromEltSize;
6016 // True if the final unpack uses the low half.
6017 bool UnpackLow;
6018};
6019} // namespace
6020
6021// Add an extra undefined element to the shuffle.
6022void GeneralShuffle::addUndef() {
6023 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6024 for (unsigned I = 0; I < BytesPerElement; ++I)
6025 Bytes.push_back(-1);
6026}
6027
6028// Add an extra element to the shuffle, taking it from element Elem of Op.
6029// A null Op indicates a vector input whose value will be calculated later;
6030// there is at most one such input per shuffle and it always has the same
6031// type as the result. Aborts and returns false if the source vector elements
6032// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
6033// LLVM they become implicitly extended, but this is rare and not optimized.
6034bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
6035 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6036
6037 // The source vector can have wider elements than the result,
6038 // either through an explicit TRUNCATE or because of type legalization.
6039 // We want the least significant part.
6040 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
6041 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
6042
6043 // Return false if the source elements are smaller than their destination
6044 // elements.
6045 if (FromBytesPerElement < BytesPerElement)
6046 return false;
6047
6048 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
6049 (FromBytesPerElement - BytesPerElement));
6050
6051 // Look through things like shuffles and bitcasts.
6052 while (Op.getNode()) {
6053 if (Op.getOpcode() == ISD::BITCAST)
6054 Op = Op.getOperand(0);
6055 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
6056 // See whether the bytes we need come from a contiguous part of one
6057 // operand.
6059 if (!getVPermMask(Op, OpBytes))
6060 break;
6061 int NewByte;
6062 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
6063 break;
6064 if (NewByte < 0) {
6065 addUndef();
6066 return true;
6067 }
6068 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
6069 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6070 } else if (Op.isUndef()) {
6071 addUndef();
6072 return true;
6073 } else
6074 break;
6075 }
6076
6077 // Make sure that the source of the extraction is in Ops.
6078 unsigned OpNo = 0;
6079 for (; OpNo < Ops.size(); ++OpNo)
6080 if (Ops[OpNo] == Op)
6081 break;
6082 if (OpNo == Ops.size())
6083 Ops.push_back(Op);
6084
6085 // Add the element to Bytes.
6086 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6087 for (unsigned I = 0; I < BytesPerElement; ++I)
6088 Bytes.push_back(Base + I);
6089
6090 return true;
6091}
6092
6093// Return SDNodes for the completed shuffle.
6094SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6095 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6096
6097 if (Ops.size() == 0)
6098 return DAG.getUNDEF(VT);
6099
6100 // Use a single unpack if possible as the last operation.
6101 tryPrepareForUnpack();
6102
6103 // Make sure that there are at least two shuffle operands.
6104 if (Ops.size() == 1)
6105 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6106
6107 // Create a tree of shuffles, deferring root node until after the loop.
6108 // Try to redistribute the undefined elements of non-root nodes so that
6109 // the non-root shuffles match something like a pack or merge, then adjust
6110 // the parent node's permute vector to compensate for the new order.
6111 // Among other things, this copes with vectors like <2 x i16> that were
6112 // padded with undefined elements during type legalization.
6113 //
6114 // In the best case this redistribution will lead to the whole tree
6115 // using packs and merges. It should rarely be a loss in other cases.
6116 unsigned Stride = 1;
6117 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6118 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6119 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6120
6121 // Create a mask for just these two operands.
6123 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6124 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6125 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6126 if (OpNo == I)
6127 NewBytes[J] = Byte;
6128 else if (OpNo == I + Stride)
6129 NewBytes[J] = SystemZ::VectorBytes + Byte;
6130 else
6131 NewBytes[J] = -1;
6132 }
6133 // See if it would be better to reorganize NewMask to avoid using VPERM.
6135 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6136 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6137 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6138 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6139 if (NewBytes[J] >= 0) {
6140 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6141 "Invalid double permute");
6142 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6143 } else
6144 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6145 }
6146 } else {
6147 // Just use NewBytes on the operands.
6148 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6149 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6150 if (NewBytes[J] >= 0)
6151 Bytes[J] = I * SystemZ::VectorBytes + J;
6152 }
6153 }
6154 }
6155
6156 // Now we just have 2 inputs. Put the second operand in Ops[1].
6157 if (Stride > 1) {
6158 Ops[1] = Ops[Stride];
6159 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6160 if (Bytes[I] >= int(SystemZ::VectorBytes))
6161 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6162 }
6163
6164 // Look for an instruction that can do the permute without resorting
6165 // to VPERM.
6166 unsigned OpNo0, OpNo1;
6167 SDValue Op;
6168 if (unpackWasPrepared() && Ops[1].isUndef())
6169 Op = Ops[0];
6170 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6171 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6172 else
6173 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6174
6175 Op = insertUnpackIfPrepared(DAG, DL, Op);
6176
6177 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6178}
6179
6180#ifndef NDEBUG
6181static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6182 dbgs() << Msg.c_str() << " { ";
6183 for (unsigned I = 0; I < Bytes.size(); I++)
6184 dbgs() << Bytes[I] << " ";
6185 dbgs() << "}\n";
6186}
6187#endif
6188
6189// If the Bytes vector matches an unpack operation, prepare to do the unpack
6190// after all else by removing the zero vector and the effect of the unpack on
6191// Bytes.
6192void GeneralShuffle::tryPrepareForUnpack() {
6193 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6194 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6195 return;
6196
6197 // Only do this if removing the zero vector reduces the depth, otherwise
6198 // the critical path will increase with the final unpack.
6199 if (Ops.size() > 2 &&
6200 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6201 return;
6202
6203 // Find an unpack that would allow removing the zero vector from Ops.
6204 UnpackFromEltSize = 1;
6205 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6206 bool MatchUnpack = true;
6208 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6209 unsigned ToEltSize = UnpackFromEltSize * 2;
6210 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6211 if (!IsZextByte)
6212 SrcBytes.push_back(Bytes[Elt]);
6213 if (Bytes[Elt] != -1) {
6214 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6215 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6216 MatchUnpack = false;
6217 break;
6218 }
6219 }
6220 }
6221 if (MatchUnpack) {
6222 if (Ops.size() == 2) {
6223 // Don't use unpack if a single source operand needs rearrangement.
6224 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6225 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6226 if (SrcBytes[i] == -1)
6227 continue;
6228 if (SrcBytes[i] % 16 != int(i))
6229 CanUseUnpackHigh = false;
6230 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6231 CanUseUnpackLow = false;
6232 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6233 UnpackFromEltSize = UINT_MAX;
6234 return;
6235 }
6236 }
6237 if (!CanUseUnpackHigh)
6238 UnpackLow = true;
6239 }
6240 break;
6241 }
6242 }
6243 if (UnpackFromEltSize > 4)
6244 return;
6245
6246 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6247 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6248 << ".\n";
6249 dumpBytes(Bytes, "Original Bytes vector:"););
6250
6251 // Apply the unpack in reverse to the Bytes array.
6252 unsigned B = 0;
6253 if (UnpackLow) {
6254 while (B < SystemZ::VectorBytes / 2)
6255 Bytes[B++] = -1;
6256 }
6257 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6258 Elt += UnpackFromEltSize;
6259 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6260 Bytes[B] = Bytes[Elt];
6261 }
6262 if (!UnpackLow) {
6263 while (B < SystemZ::VectorBytes)
6264 Bytes[B++] = -1;
6265 }
6266
6267 // Remove the zero vector from Ops
6268 Ops.erase(&Ops[ZeroVecOpNo]);
6269 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6270 if (Bytes[I] >= 0) {
6271 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6272 if (OpNo > ZeroVecOpNo)
6273 Bytes[I] -= SystemZ::VectorBytes;
6274 }
6275
6276 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6277 dbgs() << "\n";);
6278}
6279
6280SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6281 const SDLoc &DL,
6282 SDValue Op) {
6283 if (!unpackWasPrepared())
6284 return Op;
6285 unsigned InBits = UnpackFromEltSize * 8;
6286 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6287 SystemZ::VectorBits / InBits);
6288 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6289 unsigned OutBits = InBits * 2;
6290 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6291 SystemZ::VectorBits / OutBits);
6292 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6293 : SystemZISD::UNPACKL_HIGH,
6294 DL, OutVT, PackedOp);
6295}
6296
6297// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6299 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6300 if (!Op.getOperand(I).isUndef())
6301 return false;
6302 return true;
6303}
6304
6305// Return a vector of type VT that contains Value in the first element.
6306// The other elements don't matter.
6308 SDValue Value) {
6309 // If we have a constant, replicate it to all elements and let the
6310 // BUILD_VECTOR lowering take care of it.
6311 if (Value.getOpcode() == ISD::Constant ||
6312 Value.getOpcode() == ISD::ConstantFP) {
6314 return DAG.getBuildVector(VT, DL, Ops);
6315 }
6316 if (Value.isUndef())
6317 return DAG.getUNDEF(VT);
6318 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6319}
6320
6321// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6322// element 1. Used for cases in which replication is cheap.
6324 SDValue Op0, SDValue Op1) {
6325 if (Op0.isUndef()) {
6326 if (Op1.isUndef())
6327 return DAG.getUNDEF(VT);
6328 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6329 }
6330 if (Op1.isUndef())
6331 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6332 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6333 buildScalarToVector(DAG, DL, VT, Op0),
6334 buildScalarToVector(DAG, DL, VT, Op1));
6335}
6336
6337// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6338// vector for them.
6340 SDValue Op1) {
6341 if (Op0.isUndef() && Op1.isUndef())
6342 return DAG.getUNDEF(MVT::v2i64);
6343 // If one of the two inputs is undefined then replicate the other one,
6344 // in order to avoid using another register unnecessarily.
6345 if (Op0.isUndef())
6346 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6347 else if (Op1.isUndef())
6348 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6349 else {
6350 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6351 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6352 }
6353 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6354}
6355
6356// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6357// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6358// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6359// would benefit from this representation and return it if so.
6361 BuildVectorSDNode *BVN) {
6362 EVT VT = BVN->getValueType(0);
6363 unsigned NumElements = VT.getVectorNumElements();
6364
6365 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6366 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6367 // need a BUILD_VECTOR, add an additional placeholder operand for that
6368 // BUILD_VECTOR and store its operands in ResidueOps.
6369 GeneralShuffle GS(VT);
6371 bool FoundOne = false;
6372 for (unsigned I = 0; I < NumElements; ++I) {
6373 SDValue Op = BVN->getOperand(I);
6374 if (Op.getOpcode() == ISD::TRUNCATE)
6375 Op = Op.getOperand(0);
6376 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6377 Op.getOperand(1).getOpcode() == ISD::Constant) {
6378 unsigned Elem = Op.getConstantOperandVal(1);
6379 if (!GS.add(Op.getOperand(0), Elem))
6380 return SDValue();
6381 FoundOne = true;
6382 } else if (Op.isUndef()) {
6383 GS.addUndef();
6384 } else {
6385 if (!GS.add(SDValue(), ResidueOps.size()))
6386 return SDValue();
6387 ResidueOps.push_back(BVN->getOperand(I));
6388 }
6389 }
6390
6391 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6392 if (!FoundOne)
6393 return SDValue();
6394
6395 // Create the BUILD_VECTOR for the remaining elements, if any.
6396 if (!ResidueOps.empty()) {
6397 while (ResidueOps.size() < NumElements)
6398 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6399 for (auto &Op : GS.Ops) {
6400 if (!Op.getNode()) {
6401 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6402 break;
6403 }
6404 }
6405 }
6406 return GS.getNode(DAG, SDLoc(BVN));
6407}
6408
6409bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6410 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6411 return true;
6412 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6413 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6414 return true;
6415 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6416 return true;
6417 return false;
6418}
6419
6421 unsigned MergedBits, EVT VT, SDValue Op0,
6422 SDValue Op1) {
6423 MVT IntVecVT = MVT::getVectorVT(MVT::getIntegerVT(MergedBits),
6424 SystemZ::VectorBits / MergedBits);
6425 assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
6426 "Handling full vectors only.");
6427 Op0 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0);
6428 Op1 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op1);
6429 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, IntVecVT, Op0, Op1);
6430 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6431}
6432
6434 EVT VT, SmallVectorImpl<SDValue> &Elems,
6435 unsigned Pos) {
6436 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 0], Elems[Pos + 1]);
6437 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 2], Elems[Pos + 3]);
6438 // Avoid unnecessary undefs by reusing the other operand.
6439 if (Op01.isUndef()) {
6440 if (Op23.isUndef())
6441 return Op01;
6442 Op01 = Op23;
6443 } else if (Op23.isUndef())
6444 Op23 = Op01;
6445 // Merging identical replications is a no-op.
6446 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6447 return Op01;
6448 unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
6449 return mergeHighParts(DAG, DL, MergedBits, VT, Op01, Op23);
6450}
6451
6452// Combine GPR scalar values Elems into a vector of type VT.
6453SDValue
6454SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6455 SmallVectorImpl<SDValue> &Elems) const {
6456 // See whether there is a single replicated value.
6458 unsigned int NumElements = Elems.size();
6459 unsigned int Count = 0;
6460 for (auto Elem : Elems) {
6461 if (!Elem.isUndef()) {
6462 if (!Single.getNode())
6463 Single = Elem;
6464 else if (Elem != Single) {
6465 Single = SDValue();
6466 break;
6467 }
6468 Count += 1;
6469 }
6470 }
6471 // There are three cases here:
6472 //
6473 // - if the only defined element is a loaded one, the best sequence
6474 // is a replicating load.
6475 //
6476 // - otherwise, if the only defined element is an i64 value, we will
6477 // end up with the same VLVGP sequence regardless of whether we short-cut
6478 // for replication or fall through to the later code.
6479 //
6480 // - otherwise, if the only defined element is an i32 or smaller value,
6481 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6482 // This is only a win if the single defined element is used more than once.
6483 // In other cases we're better off using a single VLVGx.
6484 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6485 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6486
6487 // If all elements are loads, use VLREP/VLEs (below).
6488 bool AllLoads = true;
6489 for (auto Elem : Elems)
6490 if (!isVectorElementLoad(Elem)) {
6491 AllLoads = false;
6492 break;
6493 }
6494
6495 // The best way of building a v2i64 from two i64s is to use VLVGP.
6496 if (VT == MVT::v2i64 && !AllLoads)
6497 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6498
6499 // Use a 64-bit merge high to combine two doubles.
6500 if (VT == MVT::v2f64 && !AllLoads)
6501 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6502
6503 // Build v4f32 values directly from the FPRs:
6504 //
6505 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6506 // V V VMRHF
6507 // <ABxx> <CDxx>
6508 // V VMRHG
6509 // <ABCD>
6510 if (VT == MVT::v4f32 && !AllLoads)
6511 return buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6512
6513 // Same for v8f16.
6514 if (VT == MVT::v8f16 && !AllLoads) {
6515 SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6516 SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 4);
6517 // Avoid unnecessary undefs by reusing the other operand.
6518 if (Op0123.isUndef())
6519 Op0123 = Op4567;
6520 else if (Op4567.isUndef())
6521 Op4567 = Op0123;
6522 // Merging identical replications is a no-op.
6523 if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
6524 return Op0123;
6525 return mergeHighParts(DAG, DL, 64, VT, Op0123, Op4567);
6526 }
6527
6528 // Collect the constant terms.
6531
6532 unsigned NumConstants = 0;
6533 for (unsigned I = 0; I < NumElements; ++I) {
6534 SDValue Elem = Elems[I];
6535 if (Elem.getOpcode() == ISD::Constant ||
6536 Elem.getOpcode() == ISD::ConstantFP) {
6537 NumConstants += 1;
6538 Constants[I] = Elem;
6539 Done[I] = true;
6540 }
6541 }
6542 // If there was at least one constant, fill in the other elements of
6543 // Constants with undefs to get a full vector constant and use that
6544 // as the starting point.
6546 SDValue ReplicatedVal;
6547 if (NumConstants > 0) {
6548 for (unsigned I = 0; I < NumElements; ++I)
6549 if (!Constants[I].getNode())
6550 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6551 Result = DAG.getBuildVector(VT, DL, Constants);
6552 } else {
6553 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6554 // avoid a false dependency on any previous contents of the vector
6555 // register.
6556
6557 // Use a VLREP if at least one element is a load. Make sure to replicate
6558 // the load with the most elements having its value.
6559 std::map<const SDNode*, unsigned> UseCounts;
6560 SDNode *LoadMaxUses = nullptr;
6561 for (unsigned I = 0; I < NumElements; ++I)
6562 if (isVectorElementLoad(Elems[I])) {
6563 SDNode *Ld = Elems[I].getNode();
6564 unsigned Count = ++UseCounts[Ld];
6565 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6566 LoadMaxUses = Ld;
6567 }
6568 if (LoadMaxUses != nullptr) {
6569 ReplicatedVal = SDValue(LoadMaxUses, 0);
6570 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6571 } else {
6572 // Try to use VLVGP.
6573 unsigned I1 = NumElements / 2 - 1;
6574 unsigned I2 = NumElements - 1;
6575 bool Def1 = !Elems[I1].isUndef();
6576 bool Def2 = !Elems[I2].isUndef();
6577 if (Def1 || Def2) {
6578 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6579 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6580 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6581 joinDwords(DAG, DL, Elem1, Elem2));
6582 Done[I1] = true;
6583 Done[I2] = true;
6584 } else
6585 Result = DAG.getUNDEF(VT);
6586 }
6587 }
6588
6589 // Use VLVGx to insert the other elements.
6590 for (unsigned I = 0; I < NumElements; ++I)
6591 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6592 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6593 DAG.getConstant(I, DL, MVT::i32));
6594 return Result;
6595}
6596
6597SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6598 SelectionDAG &DAG) const {
6599 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6600 SDLoc DL(Op);
6601 EVT VT = Op.getValueType();
6602
6603 if (BVN->isConstant()) {
6604 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6605 return Op;
6606
6607 // Fall back to loading it from memory.
6608 return SDValue();
6609 }
6610
6611 // See if we should use shuffles to construct the vector from other vectors.
6612 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6613 return Res;
6614
6615 // Detect SCALAR_TO_VECTOR conversions.
6617 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6618
6619 // Otherwise use buildVector to build the vector up from GPRs.
6620 unsigned NumElements = Op.getNumOperands();
6622 for (unsigned I = 0; I < NumElements; ++I)
6623 Ops[I] = Op.getOperand(I);
6624 return buildVector(DAG, DL, VT, Ops);
6625}
6626
6627SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6628 SelectionDAG &DAG) const {
6629 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6630 SDLoc DL(Op);
6631 EVT VT = Op.getValueType();
6632 unsigned NumElements = VT.getVectorNumElements();
6633
6634 if (VSN->isSplat()) {
6635 SDValue Op0 = Op.getOperand(0);
6636 unsigned Index = VSN->getSplatIndex();
6637 assert(Index < VT.getVectorNumElements() &&
6638 "Splat index should be defined and in first operand");
6639 // See whether the value we're splatting is directly available as a scalar.
6640 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6642 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6643 // Otherwise keep it as a vector-to-vector operation.
6644 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6645 DAG.getTargetConstant(Index, DL, MVT::i32));
6646 }
6647
6648 GeneralShuffle GS(VT);
6649 for (unsigned I = 0; I < NumElements; ++I) {
6650 int Elt = VSN->getMaskElt(I);
6651 if (Elt < 0)
6652 GS.addUndef();
6653 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6654 unsigned(Elt) % NumElements))
6655 return SDValue();
6656 }
6657 return GS.getNode(DAG, SDLoc(VSN));
6658}
6659
6660SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6661 SelectionDAG &DAG) const {
6662 SDLoc DL(Op);
6663 // Just insert the scalar into element 0 of an undefined vector.
6664 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6665 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6666 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6667}
6668
6669// Shift the lower 2 bytes of Op to the left in order to insert into the
6670// upper 2 bytes of the FP register.
6672 assert(Op.getSimpleValueType() == MVT::i64 &&
6673 "Expexted to convert i64 to f16.");
6674 SDLoc DL(Op);
6675 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6676 DAG.getConstant(48, DL, MVT::i64));
6677 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6678 SDValue F16Val =
6679 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6680 return F16Val;
6681}
6682
6683// Extract Op into GPR and shift the 2 f16 bytes to the right.
6685 assert(Op.getSimpleValueType() == MVT::f16 &&
6686 "Expected to convert f16 to i64.");
6687 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6688 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6689 SDValue(U32, 0), Op);
6690 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6691 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6692 DAG.getConstant(48, DL, MVT::i32));
6693 return Shft;
6694}
6695
6696SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6697 SelectionDAG &DAG) const {
6698 // Handle insertions of floating-point values.
6699 SDLoc DL(Op);
6700 SDValue Op0 = Op.getOperand(0);
6701 SDValue Op1 = Op.getOperand(1);
6702 SDValue Op2 = Op.getOperand(2);
6703 EVT VT = Op.getValueType();
6704
6705 // Insertions into constant indices of a v2f64 can be done using VPDI.
6706 // However, if the inserted value is a bitcast or a constant then it's
6707 // better to use GPRs, as below.
6708 if (VT == MVT::v2f64 &&
6709 Op1.getOpcode() != ISD::BITCAST &&
6710 Op1.getOpcode() != ISD::ConstantFP &&
6711 Op2.getOpcode() == ISD::Constant) {
6712 uint64_t Index = Op2->getAsZExtVal();
6713 unsigned Mask = VT.getVectorNumElements() - 1;
6714 if (Index <= Mask)
6715 return Op;
6716 }
6717
6718 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6719 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6720 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6721 SDValue IntOp1 =
6722 VT == MVT::v8f16
6723 ? DAG.getZExtOrTrunc(convertFromF16(Op1, DL, DAG), DL, MVT::i32)
6724 : DAG.getNode(ISD::BITCAST, DL, IntVT, Op1);
6725 SDValue Res =
6726 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6727 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), IntOp1, Op2);
6728 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6729}
6730
6731SDValue
6732SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6733 SelectionDAG &DAG) const {
6734 // Handle extractions of floating-point values.
6735 SDLoc DL(Op);
6736 SDValue Op0 = Op.getOperand(0);
6737 SDValue Op1 = Op.getOperand(1);
6738 EVT VT = Op.getValueType();
6739 EVT VecVT = Op0.getValueType();
6740
6741 // Extractions of constant indices can be done directly.
6742 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6743 uint64_t Index = CIndexN->getZExtValue();
6744 unsigned Mask = VecVT.getVectorNumElements() - 1;
6745 if (Index <= Mask)
6746 return Op;
6747 }
6748
6749 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6750 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6751 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6752 MVT ExtrVT = IntVT == MVT::i16 ? MVT::i32 : IntVT;
6753 SDValue Extr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrVT,
6754 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6755 if (VT == MVT::f16)
6756 return convertToF16(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Extr), DAG);
6757 return DAG.getNode(ISD::BITCAST, DL, VT, Extr);
6758}
6759
6760SDValue SystemZTargetLowering::
6761lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6762 SDValue PackedOp = Op.getOperand(0);
6763 EVT OutVT = Op.getValueType();
6764 EVT InVT = PackedOp.getValueType();
6765 unsigned ToBits = OutVT.getScalarSizeInBits();
6766 unsigned FromBits = InVT.getScalarSizeInBits();
6767 unsigned StartOffset = 0;
6768
6769 // If the input is a VECTOR_SHUFFLE, there are a number of important
6770 // cases where we can directly implement the sign-extension of the
6771 // original input lanes of the shuffle.
6772 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6773 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6774 ArrayRef<int> ShuffleMask = SVN->getMask();
6775 int OutNumElts = OutVT.getVectorNumElements();
6776
6777 // Recognize the special case where the sign-extension can be done
6778 // by the VSEG instruction. Handled via the default expander.
6779 if (ToBits == 64 && OutNumElts == 2) {
6780 int NumElem = ToBits / FromBits;
6781 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6782 return SDValue();
6783 }
6784
6785 // Recognize the special case where we can fold the shuffle by
6786 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6787 int StartOffsetCandidate = -1;
6788 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6789 if (ShuffleMask[Elt] == -1)
6790 continue;
6791 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6792 if (StartOffsetCandidate == -1)
6793 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6794 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6795 continue;
6796 }
6797 StartOffsetCandidate = -1;
6798 break;
6799 }
6800 if (StartOffsetCandidate != -1) {
6801 StartOffset = StartOffsetCandidate;
6802 PackedOp = PackedOp.getOperand(0);
6803 }
6804 }
6805
6806 do {
6807 FromBits *= 2;
6808 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6809 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6810 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6811 if (StartOffset >= OutNumElts) {
6812 Opcode = SystemZISD::UNPACK_LOW;
6813 StartOffset -= OutNumElts;
6814 }
6815 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6816 } while (FromBits != ToBits);
6817 return PackedOp;
6818}
6819
6820// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6821SDValue SystemZTargetLowering::
6822lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6823 SDValue PackedOp = Op.getOperand(0);
6824 SDLoc DL(Op);
6825 EVT OutVT = Op.getValueType();
6826 EVT InVT = PackedOp.getValueType();
6827 unsigned InNumElts = InVT.getVectorNumElements();
6828 unsigned OutNumElts = OutVT.getVectorNumElements();
6829 unsigned NumInPerOut = InNumElts / OutNumElts;
6830
6831 SDValue ZeroVec =
6832 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6833
6834 SmallVector<int, 16> Mask(InNumElts);
6835 unsigned ZeroVecElt = InNumElts;
6836 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6837 unsigned MaskElt = PackedElt * NumInPerOut;
6838 unsigned End = MaskElt + NumInPerOut - 1;
6839 for (; MaskElt < End; MaskElt++)
6840 Mask[MaskElt] = ZeroVecElt++;
6841 Mask[MaskElt] = PackedElt;
6842 }
6843 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6844 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6845}
6846
6847SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6848 unsigned ByScalar) const {
6849 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6850 SDValue Op0 = Op.getOperand(0);
6851 SDValue Op1 = Op.getOperand(1);
6852 SDLoc DL(Op);
6853 EVT VT = Op.getValueType();
6854 unsigned ElemBitSize = VT.getScalarSizeInBits();
6855
6856 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6857 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6858 APInt SplatBits, SplatUndef;
6859 unsigned SplatBitSize;
6860 bool HasAnyUndefs;
6861 // Check for constant splats. Use ElemBitSize as the minimum element
6862 // width and reject splats that need wider elements.
6863 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6864 ElemBitSize, true) &&
6865 SplatBitSize == ElemBitSize) {
6866 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6867 DL, MVT::i32);
6868 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6869 }
6870 // Check for variable splats.
6871 BitVector UndefElements;
6872 SDValue Splat = BVN->getSplatValue(&UndefElements);
6873 if (Splat) {
6874 // Since i32 is the smallest legal type, we either need a no-op
6875 // or a truncation.
6876 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6877 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6878 }
6879 }
6880
6881 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6882 // and the shift amount is directly available in a GPR.
6883 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6884 if (VSN->isSplat()) {
6885 SDValue VSNOp0 = VSN->getOperand(0);
6886 unsigned Index = VSN->getSplatIndex();
6887 assert(Index < VT.getVectorNumElements() &&
6888 "Splat index should be defined and in first operand");
6889 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6890 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6891 // Since i32 is the smallest legal type, we either need a no-op
6892 // or a truncation.
6893 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6894 VSNOp0.getOperand(Index));
6895 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6896 }
6897 }
6898 }
6899
6900 // Otherwise just treat the current form as legal.
6901 return Op;
6902}
6903
6904SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6905 SDLoc DL(Op);
6906
6907 // i128 FSHL with a constant amount that is a multiple of 8 can be
6908 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6909 // facility, FSHL with a constant amount less than 8 can be implemented
6910 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6911 // combination of the two.
6912 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6913 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6914 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6915 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6916 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6917 if (ShiftAmt > 120) {
6918 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6919 // SHR_DOUBLE_BIT emits fewer instructions.
6920 SDValue Val =
6921 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6922 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6923 return DAG.getBitcast(MVT::i128, Val);
6924 }
6925 SmallVector<int, 16> Mask(16);
6926 for (unsigned Elt = 0; Elt < 16; Elt++)
6927 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6928 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6929 if ((ShiftAmt & 7) == 0)
6930 return DAG.getBitcast(MVT::i128, Shuf1);
6931 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6932 SDValue Val =
6933 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6934 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6935 return DAG.getBitcast(MVT::i128, Val);
6936 }
6937 }
6938
6939 return SDValue();
6940}
6941
6942SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6943 SDLoc DL(Op);
6944
6945 // i128 FSHR with a constant amount that is a multiple of 8 can be
6946 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6947 // facility, FSHR with a constant amount less than 8 can be implemented
6948 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6949 // combination of the two.
6950 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6951 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6952 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6953 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6954 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6955 if (ShiftAmt > 120) {
6956 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6957 // SHL_DOUBLE_BIT emits fewer instructions.
6958 SDValue Val =
6959 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6960 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6961 return DAG.getBitcast(MVT::i128, Val);
6962 }
6963 SmallVector<int, 16> Mask(16);
6964 for (unsigned Elt = 0; Elt < 16; Elt++)
6965 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6966 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6967 if ((ShiftAmt & 7) == 0)
6968 return DAG.getBitcast(MVT::i128, Shuf1);
6969 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6970 SDValue Val =
6971 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6972 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6973 return DAG.getBitcast(MVT::i128, Val);
6974 }
6975 }
6976
6977 return SDValue();
6978}
6979
6981 SDLoc DL(Op);
6982 SDValue Src = Op.getOperand(0);
6983 MVT DstVT = Op.getSimpleValueType();
6984
6986 unsigned SrcAS = N->getSrcAddressSpace();
6987
6988 assert(SrcAS != N->getDestAddressSpace() &&
6989 "addrspacecast must be between different address spaces");
6990
6991 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6992 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6993 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6994 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6995 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6996 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6997 } else if (DstVT == MVT::i32) {
6998 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6999 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
7000 DAG.getConstant(0x7fffffff, DL, MVT::i32));
7001 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
7002 } else {
7003 report_fatal_error("Bad address space in addrspacecast");
7004 }
7005 return Op;
7006}
7007
7008SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
7009 SelectionDAG &DAG) const {
7010 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
7011 if (In.getSimpleValueType() != MVT::f16)
7012 return Op; // Legal
7013 return SDValue(); // Let legalizer emit the libcall.
7014}
7015
7017 MVT VT, SDValue Arg, SDLoc DL,
7018 SDValue Chain, bool IsStrict) const {
7019 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
7020 MakeLibCallOptions CallOptions;
7021 SDValue Result;
7022 std::tie(Result, Chain) =
7023 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
7024 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
7025}
7026
7027SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
7028 SelectionDAG &DAG) const {
7029 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
7030 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
7031 bool IsStrict = Op->isStrictFPOpcode();
7032 SDLoc DL(Op);
7033 MVT VT = Op.getSimpleValueType();
7034 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7035 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7036 EVT InVT = InOp.getValueType();
7037
7038 // FP to unsigned is not directly supported on z10. Promoting an i32
7039 // result to (signed) i64 doesn't generate an inexact condition (fp
7040 // exception) for values that are outside the i32 range but in the i64
7041 // range, so use the default expansion.
7042 if (!Subtarget.hasFPExtension() && !IsSigned)
7043 // Expand i32/i64. F16 values will be recognized to fit and extended.
7044 return SDValue();
7045
7046 // Conversion from f16 is done via f32.
7047 if (InOp.getSimpleValueType() == MVT::f16) {
7049 LowerOperationWrapper(Op.getNode(), Results, DAG);
7050 return DAG.getMergeValues(Results, DL);
7051 }
7052
7053 if (VT == MVT::i128) {
7054 RTLIB::Libcall LC =
7055 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
7056 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7057 }
7058
7059 return Op; // Legal
7060}
7061
7062SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
7063 SelectionDAG &DAG) const {
7064 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
7065 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
7066 bool IsStrict = Op->isStrictFPOpcode();
7067 SDLoc DL(Op);
7068 MVT VT = Op.getSimpleValueType();
7069 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7070 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7071 EVT InVT = InOp.getValueType();
7072
7073 // Conversion to f16 is done via f32.
7074 if (VT == MVT::f16) {
7076 LowerOperationWrapper(Op.getNode(), Results, DAG);
7077 return DAG.getMergeValues(Results, DL);
7078 }
7079
7080 // Unsigned to fp is not directly supported on z10.
7081 if (!Subtarget.hasFPExtension() && !IsSigned)
7082 return SDValue(); // Expand i64.
7083
7084 if (InVT == MVT::i128) {
7085 RTLIB::Libcall LC =
7086 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
7087 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7088 }
7089
7090 return Op; // Legal
7091}
7092
7093// Lower an f16 LOAD in case of no vector support.
7094SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
7095 SelectionDAG &DAG) const {
7096 EVT RegVT = Op.getValueType();
7097 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
7098 (void)RegVT;
7099
7100 // Load as integer.
7101 SDLoc DL(Op);
7102 SDValue NewLd;
7103 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
7104 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7105 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
7106 AtomicLd->getChain(), AtomicLd->getBasePtr(),
7107 AtomicLd->getMemOperand());
7108 } else {
7109 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
7110 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7111 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7112 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7113 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7114 }
7115 SDValue F16Val = convertToF16(NewLd, DAG);
7116 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7117}
7118
7119// Lower an f16 STORE in case of no vector support.
7120SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7121 SelectionDAG &DAG) const {
7122 SDLoc DL(Op);
7123 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7124
7125 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7126 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7127 Shft, AtomicSt->getBasePtr(),
7128 AtomicSt->getMemOperand());
7129
7130 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7131 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7132 St->getMemOperand());
7133}
7134
7135SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7136 SelectionDAG &DAG) const {
7137 SDLoc DL(Op);
7138 MVT ResultVT = Op.getSimpleValueType();
7139 SDValue Arg = Op.getOperand(0);
7140 unsigned Check = Op.getConstantOperandVal(1);
7141
7142 unsigned TDCMask = 0;
7143 if (Check & fcSNan)
7145 if (Check & fcQNan)
7147 if (Check & fcPosInf)
7149 if (Check & fcNegInf)
7151 if (Check & fcPosNormal)
7153 if (Check & fcNegNormal)
7155 if (Check & fcPosSubnormal)
7157 if (Check & fcNegSubnormal)
7159 if (Check & fcPosZero)
7160 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7161 if (Check & fcNegZero)
7162 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7163 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7164
7165 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7166 return getCCResult(DAG, Intr);
7167}
7168
7169SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7170 SelectionDAG &DAG) const {
7171 SDLoc DL(Op);
7172 SDValue Chain = Op.getOperand(0);
7173
7174 // STCKF only supports a memory operand, so we have to use a temporary.
7175 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7176 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7177 MachinePointerInfo MPI =
7179
7180 // Use STCFK to store the TOD clock into the temporary.
7181 SDValue StoreOps[] = {Chain, StackPtr};
7182 Chain = DAG.getMemIntrinsicNode(
7183 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7184 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7185
7186 // And read it back from there.
7187 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7188}
7189
7191 SelectionDAG &DAG) const {
7192 switch (Op.getOpcode()) {
7193 case ISD::FRAMEADDR:
7194 return lowerFRAMEADDR(Op, DAG);
7195 case ISD::RETURNADDR:
7196 return lowerRETURNADDR(Op, DAG);
7197 case ISD::BR_CC:
7198 return lowerBR_CC(Op, DAG);
7199 case ISD::SELECT_CC:
7200 return lowerSELECT_CC(Op, DAG);
7201 case ISD::SETCC:
7202 return lowerSETCC(Op, DAG);
7203 case ISD::STRICT_FSETCC:
7204 return lowerSTRICT_FSETCC(Op, DAG, false);
7206 return lowerSTRICT_FSETCC(Op, DAG, true);
7207 case ISD::GlobalAddress:
7208 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7210 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7211 case ISD::BlockAddress:
7212 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7213 case ISD::JumpTable:
7214 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7215 case ISD::ConstantPool:
7216 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7217 case ISD::BITCAST:
7218 return lowerBITCAST(Op, DAG);
7219 case ISD::VASTART:
7220 return lowerVASTART(Op, DAG);
7221 case ISD::VACOPY:
7222 return lowerVACOPY(Op, DAG);
7224 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7226 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7227 case ISD::MULHS:
7228 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7229 case ISD::MULHU:
7230 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7231 case ISD::SMUL_LOHI:
7232 return lowerSMUL_LOHI(Op, DAG);
7233 case ISD::UMUL_LOHI:
7234 return lowerUMUL_LOHI(Op, DAG);
7235 case ISD::SDIVREM:
7236 return lowerSDIVREM(Op, DAG);
7237 case ISD::UDIVREM:
7238 return lowerUDIVREM(Op, DAG);
7239 case ISD::SADDO:
7240 case ISD::SSUBO:
7241 case ISD::UADDO:
7242 case ISD::USUBO:
7243 return lowerXALUO(Op, DAG);
7244 case ISD::UADDO_CARRY:
7245 case ISD::USUBO_CARRY:
7246 return lowerUADDSUBO_CARRY(Op, DAG);
7247 case ISD::OR:
7248 return lowerOR(Op, DAG);
7249 case ISD::CTPOP:
7250 return lowerCTPOP(Op, DAG);
7251 case ISD::VECREDUCE_ADD:
7252 return lowerVECREDUCE_ADD(Op, DAG);
7253 case ISD::ATOMIC_FENCE:
7254 return lowerATOMIC_FENCE(Op, DAG);
7255 case ISD::ATOMIC_SWAP:
7256 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7257 case ISD::ATOMIC_STORE:
7258 return lowerATOMIC_STORE(Op, DAG);
7259 case ISD::ATOMIC_LOAD:
7260 return lowerATOMIC_LOAD(Op, DAG);
7262 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7264 return lowerATOMIC_LOAD_SUB(Op, DAG);
7266 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7268 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7270 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7272 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7274 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7276 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7278 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7280 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7282 return lowerATOMIC_CMP_SWAP(Op, DAG);
7283 case ISD::STACKSAVE:
7284 return lowerSTACKSAVE(Op, DAG);
7285 case ISD::STACKRESTORE:
7286 return lowerSTACKRESTORE(Op, DAG);
7287 case ISD::PREFETCH:
7288 return lowerPREFETCH(Op, DAG);
7290 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7292 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7293 case ISD::BUILD_VECTOR:
7294 return lowerBUILD_VECTOR(Op, DAG);
7296 return lowerVECTOR_SHUFFLE(Op, DAG);
7298 return lowerSCALAR_TO_VECTOR(Op, DAG);
7300 return lowerINSERT_VECTOR_ELT(Op, DAG);
7302 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7304 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7306 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7307 case ISD::SHL:
7308 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7309 case ISD::SRL:
7310 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7311 case ISD::SRA:
7312 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7313 case ISD::ADDRSPACECAST:
7314 return lowerAddrSpaceCast(Op, DAG);
7315 case ISD::ROTL:
7316 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7317 case ISD::FSHL:
7318 return lowerFSHL(Op, DAG);
7319 case ISD::FSHR:
7320 return lowerFSHR(Op, DAG);
7321 case ISD::FP_EXTEND:
7323 return lowerFP_EXTEND(Op, DAG);
7324 case ISD::FP_TO_UINT:
7325 case ISD::FP_TO_SINT:
7328 return lower_FP_TO_INT(Op, DAG);
7329 case ISD::UINT_TO_FP:
7330 case ISD::SINT_TO_FP:
7333 return lower_INT_TO_FP(Op, DAG);
7334 case ISD::LOAD:
7335 return lowerLoadF16(Op, DAG);
7336 case ISD::STORE:
7337 return lowerStoreF16(Op, DAG);
7338 case ISD::IS_FPCLASS:
7339 return lowerIS_FPCLASS(Op, DAG);
7340 case ISD::GET_ROUNDING:
7341 return lowerGET_ROUNDING(Op, DAG);
7343 return lowerREADCYCLECOUNTER(Op, DAG);
7346 // These operations are legal on our platform, but we cannot actually
7347 // set the operation action to Legal as common code would treat this
7348 // as equivalent to Expand. Instead, we keep the operation action to
7349 // Custom and just leave them unchanged here.
7350 return Op;
7351
7352 default:
7353 llvm_unreachable("Unexpected node to lower");
7354 }
7355}
7356
7358 const SDLoc &SL) {
7359 // If i128 is legal, just use a normal bitcast.
7360 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7361 return DAG.getBitcast(MVT::f128, Src);
7362
7363 // Otherwise, f128 must live in FP128, so do a partwise move.
7365 &SystemZ::FP128BitRegClass);
7366
7367 SDValue Hi, Lo;
7368 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7369
7370 Hi = DAG.getBitcast(MVT::f64, Hi);
7371 Lo = DAG.getBitcast(MVT::f64, Lo);
7372
7373 SDNode *Pair = DAG.getMachineNode(
7374 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7375 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7376 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7377 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7378 return SDValue(Pair, 0);
7379}
7380
7382 const SDLoc &SL) {
7383 // If i128 is legal, just use a normal bitcast.
7384 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7385 return DAG.getBitcast(MVT::i128, Src);
7386
7387 // Otherwise, f128 must live in FP128, so do a partwise move.
7389 &SystemZ::FP128BitRegClass);
7390
7391 SDValue LoFP =
7392 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7393 SDValue HiFP =
7394 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7395 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7396 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7397
7398 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7399}
7400
7401// Lower operations with invalid operand or result types.
7402void
7405 SelectionDAG &DAG) const {
7406 switch (N->getOpcode()) {
7407 case ISD::ATOMIC_LOAD: {
7408 SDLoc DL(N);
7409 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7410 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7411 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7412 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7413 DL, Tys, Ops, MVT::i128, MMO);
7414
7415 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7416 if (N->getValueType(0) == MVT::f128)
7417 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7418 Results.push_back(Lowered);
7419 Results.push_back(Res.getValue(1));
7420 break;
7421 }
7422 case ISD::ATOMIC_STORE: {
7423 SDLoc DL(N);
7424 SDVTList Tys = DAG.getVTList(MVT::Other);
7425 SDValue Val = N->getOperand(1);
7426 if (Val.getValueType() == MVT::f128)
7427 Val = expandBitCastF128ToI128(DAG, Val, DL);
7428 Val = lowerI128ToGR128(DAG, Val);
7429
7430 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7431 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7432 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7433 DL, Tys, Ops, MVT::i128, MMO);
7434 // We have to enforce sequential consistency by performing a
7435 // serialization operation after the store.
7436 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7438 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7439 MVT::Other, Res), 0);
7440 Results.push_back(Res);
7441 break;
7442 }
7444 SDLoc DL(N);
7445 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7446 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7447 lowerI128ToGR128(DAG, N->getOperand(2)),
7448 lowerI128ToGR128(DAG, N->getOperand(3)) };
7449 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7450 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7451 DL, Tys, Ops, MVT::i128, MMO);
7452 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7454 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7455 Results.push_back(lowerGR128ToI128(DAG, Res));
7456 Results.push_back(Success);
7457 Results.push_back(Res.getValue(2));
7458 break;
7459 }
7460 case ISD::BITCAST: {
7461 if (useSoftFloat())
7462 return;
7463 SDLoc DL(N);
7464 SDValue Src = N->getOperand(0);
7465 EVT SrcVT = Src.getValueType();
7466 EVT ResVT = N->getValueType(0);
7467 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7468 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7469 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7470 if (Subtarget.hasVector()) {
7471 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7472 Results.push_back(SDValue(
7473 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7474 } else {
7475 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7476 Results.push_back(convertToF16(In64, DAG));
7477 }
7478 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7479 SDValue ExtractedVal =
7480 Subtarget.hasVector()
7481 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7482 0)
7483 : convertFromF16(Src, DL, DAG);
7484 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7485 }
7486 break;
7487 }
7488 case ISD::UINT_TO_FP:
7489 case ISD::SINT_TO_FP:
7492 if (useSoftFloat())
7493 return;
7494 bool IsStrict = N->isStrictFPOpcode();
7495 SDLoc DL(N);
7496 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7497 EVT ResVT = N->getValueType(0);
7498 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7499 if (ResVT == MVT::f16) {
7500 if (!IsStrict) {
7501 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7502 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7503 } else {
7504 SDValue OpF32 =
7505 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7506 {Chain, InOp});
7507 SDValue F16Res;
7508 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7509 OpF32, OpF32.getValue(1), DL, MVT::f16);
7510 Results.push_back(F16Res);
7511 Results.push_back(Chain);
7512 }
7513 }
7514 break;
7515 }
7516 case ISD::FP_TO_UINT:
7517 case ISD::FP_TO_SINT:
7520 if (useSoftFloat())
7521 return;
7522 bool IsStrict = N->isStrictFPOpcode();
7523 SDLoc DL(N);
7524 EVT ResVT = N->getValueType(0);
7525 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7526 EVT InVT = InOp->getValueType(0);
7527 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7528 if (InVT == MVT::f16) {
7529 if (!IsStrict) {
7530 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7531 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7532 } else {
7533 SDValue InF32;
7534 std::tie(InF32, Chain) =
7535 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7536 SDValue OpF32 =
7537 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7538 {Chain, InF32});
7539 Results.push_back(OpF32);
7540 Results.push_back(OpF32.getValue(1));
7541 }
7542 }
7543 break;
7544 }
7545 default:
7546 llvm_unreachable("Unexpected node to lower");
7547 }
7548}
7549
7550void
7556
7557// Return true if VT is a vector whose elements are a whole number of bytes
7558// in width. Also check for presence of vector support.
7559bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7560 if (!Subtarget.hasVector())
7561 return false;
7562
7563 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7564}
7565
7566// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7567// producing a result of type ResVT. Op is a possibly bitcast version
7568// of the input vector and Index is the index (based on type VecVT) that
7569// should be extracted. Return the new extraction if a simplification
7570// was possible or if Force is true.
7571SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7572 EVT VecVT, SDValue Op,
7573 unsigned Index,
7574 DAGCombinerInfo &DCI,
7575 bool Force) const {
7576 SelectionDAG &DAG = DCI.DAG;
7577
7578 // The number of bytes being extracted.
7579 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7580
7581 for (;;) {
7582 unsigned Opcode = Op.getOpcode();
7583 if (Opcode == ISD::BITCAST)
7584 // Look through bitcasts.
7585 Op = Op.getOperand(0);
7586 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7587 canTreatAsByteVector(Op.getValueType())) {
7588 // Get a VPERM-like permute mask and see whether the bytes covered
7589 // by the extracted element are a contiguous sequence from one
7590 // source operand.
7592 if (!getVPermMask(Op, Bytes))
7593 break;
7594 int First;
7595 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7596 BytesPerElement, First))
7597 break;
7598 if (First < 0)
7599 return DAG.getUNDEF(ResVT);
7600 // Make sure the contiguous sequence starts at a multiple of the
7601 // original element size.
7602 unsigned Byte = unsigned(First) % Bytes.size();
7603 if (Byte % BytesPerElement != 0)
7604 break;
7605 // We can get the extracted value directly from an input.
7606 Index = Byte / BytesPerElement;
7607 Op = Op.getOperand(unsigned(First) / Bytes.size());
7608 Force = true;
7609 } else if (Opcode == ISD::BUILD_VECTOR &&
7610 canTreatAsByteVector(Op.getValueType())) {
7611 // We can only optimize this case if the BUILD_VECTOR elements are
7612 // at least as wide as the extracted value.
7613 EVT OpVT = Op.getValueType();
7614 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7615 if (OpBytesPerElement < BytesPerElement)
7616 break;
7617 // Make sure that the least-significant bit of the extracted value
7618 // is the least significant bit of an input.
7619 unsigned End = (Index + 1) * BytesPerElement;
7620 if (End % OpBytesPerElement != 0)
7621 break;
7622 // We're extracting the low part of one operand of the BUILD_VECTOR.
7623 Op = Op.getOperand(End / OpBytesPerElement - 1);
7624 if (!Op.getValueType().isInteger()) {
7625 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7626 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7627 DCI.AddToWorklist(Op.getNode());
7628 }
7629 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7630 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7631 if (VT != ResVT) {
7632 DCI.AddToWorklist(Op.getNode());
7633 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7634 }
7635 return Op;
7636 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7638 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7639 canTreatAsByteVector(Op.getValueType()) &&
7640 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7641 // Make sure that only the unextended bits are significant.
7642 EVT ExtVT = Op.getValueType();
7643 EVT OpVT = Op.getOperand(0).getValueType();
7644 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7645 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7646 unsigned Byte = Index * BytesPerElement;
7647 unsigned SubByte = Byte % ExtBytesPerElement;
7648 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7649 if (SubByte < MinSubByte ||
7650 SubByte + BytesPerElement > ExtBytesPerElement)
7651 break;
7652 // Get the byte offset of the unextended element
7653 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7654 // ...then add the byte offset relative to that element.
7655 Byte += SubByte - MinSubByte;
7656 if (Byte % BytesPerElement != 0)
7657 break;
7658 Op = Op.getOperand(0);
7659 Index = Byte / BytesPerElement;
7660 Force = true;
7661 } else
7662 break;
7663 }
7664 if (Force) {
7665 if (Op.getValueType() != VecVT) {
7666 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7667 DCI.AddToWorklist(Op.getNode());
7668 }
7669 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7670 DAG.getConstant(Index, DL, MVT::i32));
7671 }
7672 return SDValue();
7673}
7674
7675// Optimize vector operations in scalar value Op on the basis that Op
7676// is truncated to TruncVT.
7677SDValue SystemZTargetLowering::combineTruncateExtract(
7678 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7679 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7680 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7681 // of type TruncVT.
7682 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7683 TruncVT.getSizeInBits() % 8 == 0) {
7684 SDValue Vec = Op.getOperand(0);
7685 EVT VecVT = Vec.getValueType();
7686 if (canTreatAsByteVector(VecVT)) {
7687 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7688 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7689 unsigned TruncBytes = TruncVT.getStoreSize();
7690 if (BytesPerElement % TruncBytes == 0) {
7691 // Calculate the value of Y' in the above description. We are
7692 // splitting the original elements into Scale equal-sized pieces
7693 // and for truncation purposes want the last (least-significant)
7694 // of these pieces for IndexN. This is easiest to do by calculating
7695 // the start index of the following element and then subtracting 1.
7696 unsigned Scale = BytesPerElement / TruncBytes;
7697 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7698
7699 // Defer the creation of the bitcast from X to combineExtract,
7700 // which might be able to optimize the extraction.
7701 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7702 MVT::getIntegerVT(TruncBytes * 8),
7703 VecVT.getStoreSize() / TruncBytes);
7704 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7705 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7706 }
7707 }
7708 }
7709 }
7710 return SDValue();
7711}
7712
7713SDValue SystemZTargetLowering::combineZERO_EXTEND(
7714 SDNode *N, DAGCombinerInfo &DCI) const {
7715 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7716 SelectionDAG &DAG = DCI.DAG;
7717 SDValue N0 = N->getOperand(0);
7718 EVT VT = N->getValueType(0);
7719 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7720 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7721 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7722 if (TrueOp && FalseOp) {
7723 SDLoc DL(N0);
7724 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7725 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7726 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7727 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7728 // If N0 has multiple uses, change other uses as well.
7729 if (!N0.hasOneUse()) {
7730 SDValue TruncSelect =
7731 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7732 DCI.CombineTo(N0.getNode(), TruncSelect);
7733 }
7734 return NewSelect;
7735 }
7736 }
7737 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7738 // of the result is smaller than the size of X and all the truncated bits
7739 // of X are already zero.
7740 if (N0.getOpcode() == ISD::XOR &&
7741 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7742 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7743 N0.getOperand(1).getOpcode() == ISD::Constant) {
7744 SDValue X = N0.getOperand(0).getOperand(0);
7745 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7746 KnownBits Known = DAG.computeKnownBits(X);
7747 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7748 N0.getValueSizeInBits(),
7749 VT.getSizeInBits());
7750 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7751 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7752 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7753 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7754 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7755 }
7756 }
7757 }
7758 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7759 // and VECTOR ADD COMPUTE CARRY for i128:
7760 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7761 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7762 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7763 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7764 // For vector types, these patterns are recognized in the .td file.
7765 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7766 N0.getOperand(0).getValueType() == VT) {
7767 SDValue Op0 = N0.getOperand(0);
7768 SDValue Op1 = N0.getOperand(1);
7769 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7770 switch (CC) {
7771 case ISD::SETULE:
7772 std::swap(Op0, Op1);
7773 [[fallthrough]];
7774 case ISD::SETUGE:
7775 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7776 case ISD::SETUGT:
7777 std::swap(Op0, Op1);
7778 [[fallthrough]];
7779 case ISD::SETULT:
7780 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7781 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7782 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7783 Op0->getOperand(1));
7784 break;
7785 default:
7786 break;
7787 }
7788 }
7789
7790 return SDValue();
7791}
7792
7793SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7794 SDNode *N, DAGCombinerInfo &DCI) const {
7795 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7796 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7797 // into (select_cc LHS, RHS, -1, 0, COND)
7798 SelectionDAG &DAG = DCI.DAG;
7799 SDValue N0 = N->getOperand(0);
7800 EVT VT = N->getValueType(0);
7801 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7802 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7803 N0 = N0.getOperand(0);
7804 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7805 SDLoc DL(N0);
7806 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7807 DAG.getAllOnesConstant(DL, VT),
7808 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7809 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7810 }
7811 return SDValue();
7812}
7813
7814SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7815 SDNode *N, DAGCombinerInfo &DCI) const {
7816 // Convert (sext (ashr (shl X, C1), C2)) to
7817 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7818 // cheap as narrower ones.
7819 SelectionDAG &DAG = DCI.DAG;
7820 SDValue N0 = N->getOperand(0);
7821 EVT VT = N->getValueType(0);
7822 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7823 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7824 SDValue Inner = N0.getOperand(0);
7825 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7826 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7827 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7828 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7829 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7830 EVT ShiftVT = N0.getOperand(1).getValueType();
7831 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7832 Inner.getOperand(0));
7833 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7834 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7835 ShiftVT));
7836 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7837 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7838 }
7839 }
7840 }
7841
7842 return SDValue();
7843}
7844
7845SDValue SystemZTargetLowering::combineMERGE(
7846 SDNode *N, DAGCombinerInfo &DCI) const {
7847 SelectionDAG &DAG = DCI.DAG;
7848 unsigned Opcode = N->getOpcode();
7849 SDValue Op0 = N->getOperand(0);
7850 SDValue Op1 = N->getOperand(1);
7851 if (Op0.getOpcode() == ISD::BITCAST)
7852 Op0 = Op0.getOperand(0);
7854 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7855 // for v4f32.
7856 if (Op1 == N->getOperand(0))
7857 return Op1;
7858 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7859 EVT VT = Op1.getValueType();
7860 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7861 if (ElemBytes <= 4) {
7862 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7863 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7864 EVT InVT = VT.changeVectorElementTypeToInteger();
7865 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7866 SystemZ::VectorBytes / ElemBytes / 2);
7867 if (VT != InVT) {
7868 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7869 DCI.AddToWorklist(Op1.getNode());
7870 }
7871 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7872 DCI.AddToWorklist(Op.getNode());
7873 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7874 }
7875 }
7876 return SDValue();
7877}
7878
7879static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7880 SDNode *&HiPart) {
7881 LoPart = HiPart = nullptr;
7882
7883 // Scan through all users.
7884 for (SDUse &Use : LD->uses()) {
7885 // Skip the uses of the chain.
7886 if (Use.getResNo() != 0)
7887 continue;
7888
7889 // Verify every user is a TRUNCATE to i64 of the low or high half.
7890 SDNode *User = Use.getUser();
7891 bool IsLoPart = true;
7892 if (User->getOpcode() == ISD::SRL &&
7893 User->getOperand(1).getOpcode() == ISD::Constant &&
7894 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7895 User = *User->user_begin();
7896 IsLoPart = false;
7897 }
7898 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7899 return false;
7900
7901 if (IsLoPart) {
7902 if (LoPart)
7903 return false;
7904 LoPart = User;
7905 } else {
7906 if (HiPart)
7907 return false;
7908 HiPart = User;
7909 }
7910 }
7911 return true;
7912}
7913
7914static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7915 SDNode *&HiPart) {
7916 LoPart = HiPart = nullptr;
7917
7918 // Scan through all users.
7919 for (SDUse &Use : LD->uses()) {
7920 // Skip the uses of the chain.
7921 if (Use.getResNo() != 0)
7922 continue;
7923
7924 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7925 SDNode *User = Use.getUser();
7926 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7927 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7928 return false;
7929
7930 switch (User->getConstantOperandVal(1)) {
7931 case SystemZ::subreg_l64:
7932 if (LoPart)
7933 return false;
7934 LoPart = User;
7935 break;
7936 case SystemZ::subreg_h64:
7937 if (HiPart)
7938 return false;
7939 HiPart = User;
7940 break;
7941 default:
7942 return false;
7943 }
7944 }
7945 return true;
7946}
7947
7948SDValue SystemZTargetLowering::combineLOAD(
7949 SDNode *N, DAGCombinerInfo &DCI) const {
7950 SelectionDAG &DAG = DCI.DAG;
7951 EVT LdVT = N->getValueType(0);
7952 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7953 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7954 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7955 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7956 if (PtrVT != LoadNodeVT) {
7957 SDLoc DL(LN);
7958 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7959 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7960 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7961 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7962 LN->getMemOperand());
7963 }
7964 }
7965 }
7966 SDLoc DL(N);
7967
7968 // Replace a 128-bit load that is used solely to move its value into GPRs
7969 // by separate loads of both halves.
7970 LoadSDNode *LD = cast<LoadSDNode>(N);
7971 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7972 SDNode *LoPart, *HiPart;
7973 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7974 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7975 // Rewrite each extraction as an independent load.
7976 SmallVector<SDValue, 2> ArgChains;
7977 if (HiPart) {
7978 SDValue EltLoad = DAG.getLoad(
7979 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7980 LD->getPointerInfo(), LD->getBaseAlign(),
7981 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7982
7983 DCI.CombineTo(HiPart, EltLoad, true);
7984 ArgChains.push_back(EltLoad.getValue(1));
7985 }
7986 if (LoPart) {
7987 SDValue EltLoad = DAG.getLoad(
7988 LoPart->getValueType(0), DL, LD->getChain(),
7989 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7990 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7991 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7992
7993 DCI.CombineTo(LoPart, EltLoad, true);
7994 ArgChains.push_back(EltLoad.getValue(1));
7995 }
7996
7997 // Collect all chains via TokenFactor.
7998 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7999 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
8000 DCI.AddToWorklist(Chain.getNode());
8001 return SDValue(N, 0);
8002 }
8003 }
8004
8005 if (LdVT.isVector() || LdVT.isInteger())
8006 return SDValue();
8007 // Transform a scalar load that is REPLICATEd as well as having other
8008 // use(s) to the form where the other use(s) use the first element of the
8009 // REPLICATE instead of the load. Otherwise instruction selection will not
8010 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
8011 // point loads.
8012
8013 SDValue Replicate;
8014 SmallVector<SDNode*, 8> OtherUses;
8015 for (SDUse &Use : N->uses()) {
8016 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
8017 if (Replicate)
8018 return SDValue(); // Should never happen
8019 Replicate = SDValue(Use.getUser(), 0);
8020 } else if (Use.getResNo() == 0)
8021 OtherUses.push_back(Use.getUser());
8022 }
8023 if (!Replicate || OtherUses.empty())
8024 return SDValue();
8025
8026 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
8027 Replicate, DAG.getConstant(0, DL, MVT::i32));
8028 // Update uses of the loaded Value while preserving old chains.
8029 for (SDNode *U : OtherUses) {
8031 for (SDValue Op : U->ops())
8032 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8033 DAG.UpdateNodeOperands(U, Ops);
8034 }
8035 return SDValue(N, 0);
8036}
8037
8038bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8039 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8040 return true;
8041 if (Subtarget.hasVectorEnhancements2())
8042 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8043 return true;
8044 return false;
8045}
8046
8048 if (!VT.isVector() || !VT.isSimple() ||
8049 VT.getSizeInBits() != 128 ||
8050 VT.getScalarSizeInBits() % 8 != 0)
8051 return false;
8052
8053 unsigned NumElts = VT.getVectorNumElements();
8054 for (unsigned i = 0; i < NumElts; ++i) {
8055 if (M[i] < 0) continue; // ignore UNDEF indices
8056 if ((unsigned) M[i] != NumElts - 1 - i)
8057 return false;
8058 }
8059
8060 return true;
8061}
8062
8063static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8064 for (auto *U : StoredVal->users()) {
8065 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8066 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8067 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8068 continue;
8069 } else if (isa<BuildVectorSDNode>(U)) {
8070 SDValue BuildVector = SDValue(U, 0);
8071 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8072 isOnlyUsedByStores(BuildVector, DAG))
8073 continue;
8074 }
8075 return false;
8076 }
8077 return true;
8078}
8079
8080static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8081 SDValue &HiPart) {
8082 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8083 return false;
8084
8085 SDValue Op0 = Val.getOperand(0);
8086 SDValue Op1 = Val.getOperand(1);
8087
8088 if (Op0.getOpcode() == ISD::SHL)
8089 std::swap(Op0, Op1);
8090 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8091 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8092 Op1.getConstantOperandVal(1) != 64)
8093 return false;
8094 Op1 = Op1.getOperand(0);
8095
8096 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8097 Op0.getOperand(0).getValueType() != MVT::i64)
8098 return false;
8099 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8100 Op1.getOperand(0).getValueType() != MVT::i64)
8101 return false;
8102
8103 LoPart = Op0.getOperand(0);
8104 HiPart = Op1.getOperand(0);
8105 return true;
8106}
8107
8108static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8109 SDValue &HiPart) {
8110 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8111 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8112 return false;
8113
8114 if (Val->getNumOperands() != 5 ||
8115 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8116 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8117 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8118 return false;
8119
8120 LoPart = Val->getOperand(1);
8121 HiPart = Val->getOperand(3);
8122 return true;
8123}
8124
8125SDValue SystemZTargetLowering::combineSTORE(
8126 SDNode *N, DAGCombinerInfo &DCI) const {
8127 SelectionDAG &DAG = DCI.DAG;
8128 auto *SN = cast<StoreSDNode>(N);
8129 auto &Op1 = N->getOperand(1);
8130 EVT MemVT = SN->getMemoryVT();
8131
8132 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8133 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8134 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8135 if (PtrVT != StoreNodeVT) {
8136 SDLoc DL(SN);
8137 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8138 SYSTEMZAS::PTR32, 0);
8139 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8140 SN->getPointerInfo(), SN->getBaseAlign(),
8141 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8142 }
8143 }
8144
8145 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8146 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8147 // If X has wider elements then convert it to:
8148 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8149 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8150 if (SDValue Value =
8151 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8152 DCI.AddToWorklist(Value.getNode());
8153
8154 // Rewrite the store with the new form of stored value.
8155 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8156 SN->getBasePtr(), SN->getMemoryVT(),
8157 SN->getMemOperand());
8158 }
8159 }
8160
8161 // combine STORE (LOAD_STACK_GUARD) into MOV_STACKGUARD_DAG
8162 if (Op1->isMachineOpcode() &&
8163 (Op1->getMachineOpcode() == SystemZ::LOAD_STACK_GUARD)) {
8164 // Obtain the frame index the store was targeting.
8165 int FI = cast<FrameIndexSDNode>(SN->getOperand(2))->getIndex();
8166 // Prepare operands of the MOV_STACKGUARD ISD Node - Chain and FrameIndex.
8167 SDValue Ops[] = {SN->getChain(), DAG.getTargetFrameIndex(FI, MVT::i64)};
8168 return DAG.getNode(SystemZISD::MOV_STACKGUARD, SDLoc(SN), MVT::Other, Ops);
8169 }
8170
8171 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8172 if (!SN->isTruncatingStore() &&
8173 Op1.getOpcode() == ISD::BSWAP &&
8174 Op1.getNode()->hasOneUse() &&
8175 canLoadStoreByteSwapped(Op1.getValueType())) {
8176
8177 SDValue BSwapOp = Op1.getOperand(0);
8178
8179 if (BSwapOp.getValueType() == MVT::i16)
8180 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8181
8182 SDValue Ops[] = {
8183 N->getOperand(0), BSwapOp, N->getOperand(2)
8184 };
8185
8186 return
8187 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8188 Ops, MemVT, SN->getMemOperand());
8189 }
8190 // Combine STORE (element-swap) into VSTER
8191 if (!SN->isTruncatingStore() &&
8192 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8193 Op1.getNode()->hasOneUse() &&
8194 Subtarget.hasVectorEnhancements2()) {
8195 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8196 ArrayRef<int> ShuffleMask = SVN->getMask();
8197 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8198 SDValue Ops[] = {
8199 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8200 };
8201
8202 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8203 DAG.getVTList(MVT::Other),
8204 Ops, MemVT, SN->getMemOperand());
8205 }
8206 }
8207
8208 // Combine STORE (READCYCLECOUNTER) into STCKF.
8209 if (!SN->isTruncatingStore() &&
8211 Op1.hasOneUse() &&
8212 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8213 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8214 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8215 DAG.getVTList(MVT::Other),
8216 Ops, MemVT, SN->getMemOperand());
8217 }
8218
8219 // Transform a store of a 128-bit value moved from parts into two stores.
8220 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8221 SDValue LoPart, HiPart;
8222 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8223 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8224 SDLoc DL(SN);
8225 SDValue Chain0 = DAG.getStore(
8226 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8227 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8228 SDValue Chain1 = DAG.getStore(
8229 SN->getChain(), DL, LoPart,
8230 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8231 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8232 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8233
8234 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8235 }
8236 }
8237
8238 // Replicate a reg or immediate with VREP instead of scalar multiply or
8239 // immediate load. It seems best to do this during the first DAGCombine as
8240 // it is straight-forward to handle the zero-extend node in the initial
8241 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8242 // extracting an i16 element from a v16i8 vector).
8243 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8244 isOnlyUsedByStores(Op1, DAG)) {
8245 SDValue Word = SDValue();
8246 EVT WordVT;
8247
8248 // Find a replicated immediate and return it if found in Word and its
8249 // type in WordVT.
8250 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8251 // Some constants are better handled with a scalar store.
8252 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8253 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8254 return;
8255
8256 APInt Val = C->getAPIntValue();
8257 // Truncate Val in case of a truncating store.
8258 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8259 assert(SN->isTruncatingStore() &&
8260 "Non-truncating store and immediate value does not fit?");
8261 Val = Val.trunc(TotBytes * 8);
8262 }
8263
8264 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8265 if (VCI.isVectorConstantLegal(Subtarget) &&
8266 VCI.Opcode == SystemZISD::REPLICATE) {
8267 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8268 WordVT = VCI.VecVT.getScalarType();
8269 }
8270 };
8271
8272 // Find a replicated register and return it if found in Word and its type
8273 // in WordVT.
8274 auto FindReplicatedReg = [&](SDValue MulOp) {
8275 EVT MulVT = MulOp.getValueType();
8276 if (MulOp->getOpcode() == ISD::MUL &&
8277 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8278 // Find a zero extended value and its type.
8279 SDValue LHS = MulOp->getOperand(0);
8280 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8281 WordVT = LHS->getOperand(0).getValueType();
8282 else if (LHS->getOpcode() == ISD::AssertZext)
8283 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8284 else
8285 return;
8286 // Find a replicating constant, e.g. 0x00010001.
8287 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8288 SystemZVectorConstantInfo VCI(
8289 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8290 if (VCI.isVectorConstantLegal(Subtarget) &&
8291 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8292 WordVT == VCI.VecVT.getScalarType())
8293 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8294 }
8295 }
8296 };
8297
8298 if (isa<BuildVectorSDNode>(Op1) &&
8299 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8300 SDValue SplatVal = Op1->getOperand(0);
8301 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8302 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8303 else
8304 FindReplicatedReg(SplatVal);
8305 } else {
8306 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8307 FindReplicatedImm(C, MemVT.getStoreSize());
8308 else
8309 FindReplicatedReg(Op1);
8310 }
8311
8312 if (Word != SDValue()) {
8313 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8314 "Bad type handling");
8315 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8316 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8317 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8318 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8319 SN->getBasePtr(), SN->getMemOperand());
8320 }
8321 }
8322
8323 return SDValue();
8324}
8325
8326SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8327 SDNode *N, DAGCombinerInfo &DCI) const {
8328 SelectionDAG &DAG = DCI.DAG;
8329 // Combine element-swap (LOAD) into VLER
8330 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8331 N->getOperand(0).hasOneUse() &&
8332 Subtarget.hasVectorEnhancements2()) {
8333 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8334 ArrayRef<int> ShuffleMask = SVN->getMask();
8335 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8336 SDValue Load = N->getOperand(0);
8337 LoadSDNode *LD = cast<LoadSDNode>(Load);
8338
8339 // Create the element-swapping load.
8340 SDValue Ops[] = {
8341 LD->getChain(), // Chain
8342 LD->getBasePtr() // Ptr
8343 };
8344 SDValue ESLoad =
8345 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8346 DAG.getVTList(LD->getValueType(0), MVT::Other),
8347 Ops, LD->getMemoryVT(), LD->getMemOperand());
8348
8349 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8350 // by the load dead.
8351 DCI.CombineTo(N, ESLoad);
8352
8353 // Next, combine the load away, we give it a bogus result value but a real
8354 // chain result. The result value is dead because the shuffle is dead.
8355 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8356
8357 // Return N so it doesn't get rechecked!
8358 return SDValue(N, 0);
8359 }
8360 }
8361
8362 return SDValue();
8363}
8364
8365SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8366 SDNode *N, DAGCombinerInfo &DCI) const {
8367 SelectionDAG &DAG = DCI.DAG;
8368
8369 if (!Subtarget.hasVector())
8370 return SDValue();
8371
8372 // Look through bitcasts that retain the number of vector elements.
8373 SDValue Op = N->getOperand(0);
8374 if (Op.getOpcode() == ISD::BITCAST &&
8375 Op.getValueType().isVector() &&
8376 Op.getOperand(0).getValueType().isVector() &&
8377 Op.getValueType().getVectorNumElements() ==
8378 Op.getOperand(0).getValueType().getVectorNumElements())
8379 Op = Op.getOperand(0);
8380
8381 // Pull BSWAP out of a vector extraction.
8382 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8383 EVT VecVT = Op.getValueType();
8384 EVT EltVT = VecVT.getVectorElementType();
8385 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8386 Op.getOperand(0), N->getOperand(1));
8387 DCI.AddToWorklist(Op.getNode());
8388 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8389 if (EltVT != N->getValueType(0)) {
8390 DCI.AddToWorklist(Op.getNode());
8391 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8392 }
8393 return Op;
8394 }
8395
8396 // Try to simplify a vector extraction.
8397 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8398 SDValue Op0 = N->getOperand(0);
8399 EVT VecVT = Op0.getValueType();
8400 if (canTreatAsByteVector(VecVT))
8401 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8402 IndexN->getZExtValue(), DCI, false);
8403 }
8404 return SDValue();
8405}
8406
8407SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8408 SDNode *N, DAGCombinerInfo &DCI) const {
8409 SelectionDAG &DAG = DCI.DAG;
8410 // (join_dwords X, X) == (replicate X)
8411 if (N->getOperand(0) == N->getOperand(1))
8412 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8413 N->getOperand(0));
8414 return SDValue();
8415}
8416
8418 SDValue Chain1 = N1->getOperand(0);
8419 SDValue Chain2 = N2->getOperand(0);
8420
8421 // Trivial case: both nodes take the same chain.
8422 if (Chain1 == Chain2)
8423 return Chain1;
8424
8425 // FIXME - we could handle more complex cases via TokenFactor,
8426 // assuming we can verify that this would not create a cycle.
8427 return SDValue();
8428}
8429
8430SDValue SystemZTargetLowering::combineFP_ROUND(
8431 SDNode *N, DAGCombinerInfo &DCI) const {
8432
8433 if (!Subtarget.hasVector())
8434 return SDValue();
8435
8436 // (fpround (extract_vector_elt X 0))
8437 // (fpround (extract_vector_elt X 1)) ->
8438 // (extract_vector_elt (VROUND X) 0)
8439 // (extract_vector_elt (VROUND X) 2)
8440 //
8441 // This is a special case since the target doesn't really support v2f32s.
8442 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8443 SelectionDAG &DAG = DCI.DAG;
8444 SDValue Op0 = N->getOperand(OpNo);
8445 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8447 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8448 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8449 Op0.getConstantOperandVal(1) == 0) {
8450 SDValue Vec = Op0.getOperand(0);
8451 for (auto *U : Vec->users()) {
8452 if (U != Op0.getNode() && U->hasOneUse() &&
8453 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8454 U->getOperand(0) == Vec &&
8455 U->getOperand(1).getOpcode() == ISD::Constant &&
8456 U->getConstantOperandVal(1) == 1) {
8457 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8458 if (OtherRound.getOpcode() == N->getOpcode() &&
8459 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8460 OtherRound.getValueType() == MVT::f32) {
8461 SDValue VRound, Chain;
8462 if (N->isStrictFPOpcode()) {
8463 Chain = MergeInputChains(N, OtherRound.getNode());
8464 if (!Chain)
8465 continue;
8466 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8467 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8468 Chain = VRound.getValue(1);
8469 } else
8470 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8471 MVT::v4f32, Vec);
8472 DCI.AddToWorklist(VRound.getNode());
8473 SDValue Extract1 =
8474 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8475 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8476 DCI.AddToWorklist(Extract1.getNode());
8477 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8478 if (Chain)
8479 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8480 SDValue Extract0 =
8481 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8482 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8483 if (Chain)
8484 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8485 N->getVTList(), Extract0, Chain);
8486 return Extract0;
8487 }
8488 }
8489 }
8490 }
8491 return SDValue();
8492}
8493
8494SDValue SystemZTargetLowering::combineFP_EXTEND(
8495 SDNode *N, DAGCombinerInfo &DCI) const {
8496
8497 if (!Subtarget.hasVector())
8498 return SDValue();
8499
8500 // (fpextend (extract_vector_elt X 0))
8501 // (fpextend (extract_vector_elt X 2)) ->
8502 // (extract_vector_elt (VEXTEND X) 0)
8503 // (extract_vector_elt (VEXTEND X) 1)
8504 //
8505 // This is a special case since the target doesn't really support v2f32s.
8506 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8507 SelectionDAG &DAG = DCI.DAG;
8508 SDValue Op0 = N->getOperand(OpNo);
8509 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8511 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8512 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8513 Op0.getConstantOperandVal(1) == 0) {
8514 SDValue Vec = Op0.getOperand(0);
8515 for (auto *U : Vec->users()) {
8516 if (U != Op0.getNode() && U->hasOneUse() &&
8517 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8518 U->getOperand(0) == Vec &&
8519 U->getOperand(1).getOpcode() == ISD::Constant &&
8520 U->getConstantOperandVal(1) == 2) {
8521 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8522 if (OtherExtend.getOpcode() == N->getOpcode() &&
8523 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8524 OtherExtend.getValueType() == MVT::f64) {
8525 SDValue VExtend, Chain;
8526 if (N->isStrictFPOpcode()) {
8527 Chain = MergeInputChains(N, OtherExtend.getNode());
8528 if (!Chain)
8529 continue;
8530 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8531 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8532 Chain = VExtend.getValue(1);
8533 } else
8534 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8535 MVT::v2f64, Vec);
8536 DCI.AddToWorklist(VExtend.getNode());
8537 SDValue Extract1 =
8538 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8539 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8540 DCI.AddToWorklist(Extract1.getNode());
8541 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8542 if (Chain)
8543 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8544 SDValue Extract0 =
8545 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8546 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8547 if (Chain)
8548 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8549 N->getVTList(), Extract0, Chain);
8550 return Extract0;
8551 }
8552 }
8553 }
8554 }
8555 return SDValue();
8556}
8557
8558SDValue SystemZTargetLowering::combineINT_TO_FP(
8559 SDNode *N, DAGCombinerInfo &DCI) const {
8560 if (DCI.Level != BeforeLegalizeTypes)
8561 return SDValue();
8562 SelectionDAG &DAG = DCI.DAG;
8563 LLVMContext &Ctx = *DAG.getContext();
8564 unsigned Opcode = N->getOpcode();
8565 EVT OutVT = N->getValueType(0);
8566 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8567 SDValue Op = N->getOperand(0);
8568 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8569 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8570
8571 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8572 // v2f64 = uint_to_fp v2i16
8573 // =>
8574 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8575 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8576 OutScalarBits <= 64) {
8577 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8578 EVT ExtVT = EVT::getVectorVT(
8579 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8580 unsigned ExtOpcode =
8582 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8583 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8584 }
8585 return SDValue();
8586}
8587
8588SDValue SystemZTargetLowering::combineFCOPYSIGN(
8589 SDNode *N, DAGCombinerInfo &DCI) const {
8590 SelectionDAG &DAG = DCI.DAG;
8591 EVT VT = N->getValueType(0);
8592 SDValue ValOp = N->getOperand(0);
8593 SDValue SignOp = N->getOperand(1);
8594
8595 // Remove the rounding which is not needed.
8596 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8597 SDValue WideOp = SignOp.getOperand(0);
8598 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8599 }
8600
8601 return SDValue();
8602}
8603
8604SDValue SystemZTargetLowering::combineBSWAP(
8605 SDNode *N, DAGCombinerInfo &DCI) const {
8606 SelectionDAG &DAG = DCI.DAG;
8607 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8608 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8609 N->getOperand(0).hasOneUse() &&
8610 canLoadStoreByteSwapped(N->getValueType(0))) {
8611 SDValue Load = N->getOperand(0);
8612 LoadSDNode *LD = cast<LoadSDNode>(Load);
8613
8614 // Create the byte-swapping load.
8615 SDValue Ops[] = {
8616 LD->getChain(), // Chain
8617 LD->getBasePtr() // Ptr
8618 };
8619 EVT LoadVT = N->getValueType(0);
8620 if (LoadVT == MVT::i16)
8621 LoadVT = MVT::i32;
8622 SDValue BSLoad =
8623 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8624 DAG.getVTList(LoadVT, MVT::Other),
8625 Ops, LD->getMemoryVT(), LD->getMemOperand());
8626
8627 // If this is an i16 load, insert the truncate.
8628 SDValue ResVal = BSLoad;
8629 if (N->getValueType(0) == MVT::i16)
8630 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8631
8632 // First, combine the bswap away. This makes the value produced by the
8633 // load dead.
8634 DCI.CombineTo(N, ResVal);
8635
8636 // Next, combine the load away, we give it a bogus result value but a real
8637 // chain result. The result value is dead because the bswap is dead.
8638 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8639
8640 // Return N so it doesn't get rechecked!
8641 return SDValue(N, 0);
8642 }
8643
8644 // Look through bitcasts that retain the number of vector elements.
8645 SDValue Op = N->getOperand(0);
8646 if (Op.getOpcode() == ISD::BITCAST &&
8647 Op.getValueType().isVector() &&
8648 Op.getOperand(0).getValueType().isVector() &&
8649 Op.getValueType().getVectorNumElements() ==
8650 Op.getOperand(0).getValueType().getVectorNumElements())
8651 Op = Op.getOperand(0);
8652
8653 // Push BSWAP into a vector insertion if at least one side then simplifies.
8654 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8655 SDValue Vec = Op.getOperand(0);
8656 SDValue Elt = Op.getOperand(1);
8657 SDValue Idx = Op.getOperand(2);
8658
8660 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8662 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8663 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8664 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8665 EVT VecVT = N->getValueType(0);
8666 EVT EltVT = N->getValueType(0).getVectorElementType();
8667 if (VecVT != Vec.getValueType()) {
8668 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8669 DCI.AddToWorklist(Vec.getNode());
8670 }
8671 if (EltVT != Elt.getValueType()) {
8672 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8673 DCI.AddToWorklist(Elt.getNode());
8674 }
8675 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8676 DCI.AddToWorklist(Vec.getNode());
8677 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8678 DCI.AddToWorklist(Elt.getNode());
8679 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8680 Vec, Elt, Idx);
8681 }
8682 }
8683
8684 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8685 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8686 if (SV && Op.hasOneUse()) {
8687 SDValue Op0 = Op.getOperand(0);
8688 SDValue Op1 = Op.getOperand(1);
8689
8691 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8693 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8694 EVT VecVT = N->getValueType(0);
8695 if (VecVT != Op0.getValueType()) {
8696 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8697 DCI.AddToWorklist(Op0.getNode());
8698 }
8699 if (VecVT != Op1.getValueType()) {
8700 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8701 DCI.AddToWorklist(Op1.getNode());
8702 }
8703 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8704 DCI.AddToWorklist(Op0.getNode());
8705 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8706 DCI.AddToWorklist(Op1.getNode());
8707 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8708 }
8709 }
8710
8711 return SDValue();
8712}
8713
8714SDValue SystemZTargetLowering::combineSETCC(
8715 SDNode *N, DAGCombinerInfo &DCI) const {
8716 SelectionDAG &DAG = DCI.DAG;
8717 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8718 const SDValue LHS = N->getOperand(0);
8719 const SDValue RHS = N->getOperand(1);
8720 bool CmpNull = isNullConstant(RHS);
8721 bool CmpAllOnes = isAllOnesConstant(RHS);
8722 EVT VT = N->getValueType(0);
8723 SDLoc DL(N);
8724
8725 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8726 // change the outer compare to a i128 compare. This will normally
8727 // allow the reduction to be recognized in adjustICmp128, and even if
8728 // not, the i128 compare will still generate better code.
8729 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8731 if (Src.getOpcode() == ISD::SETCC &&
8732 Src.getValueType().isFixedLengthVector() &&
8733 Src.getValueType().getScalarType() == MVT::i1) {
8734 EVT CmpVT = Src.getOperand(0).getValueType();
8735 if (CmpVT.getSizeInBits() == 128) {
8736 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8737 SDValue LHS =
8738 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8739 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8740 : DAG.getAllOnesConstant(DL, MVT::i128);
8741 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8742 N->getFlags());
8743 }
8744 }
8745 }
8746
8747 return SDValue();
8748}
8749
8750static std::pair<SDValue, int> findCCUse(const SDValue &Val,
8751 unsigned Depth = 0) {
8752 // Limit depth of potentially exponential walk.
8753 if (Depth > 5)
8754 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8755
8756 switch (Val.getOpcode()) {
8757 default:
8758 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8759 case SystemZISD::IPM:
8760 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8761 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8762 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8763 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8764 case SystemZISD::SELECT_CCMASK: {
8765 SDValue Op4CCReg = Val.getOperand(4);
8766 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8767 Op4CCReg.getOpcode() == SystemZISD::TM) {
8768 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0), Depth + 1);
8769 if (OpCC != SDValue())
8770 return std::make_pair(OpCC, OpCCValid);
8771 }
8772 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8773 if (!CCValid)
8774 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8775 int CCValidVal = CCValid->getZExtValue();
8776 return std::make_pair(Op4CCReg, CCValidVal);
8777 }
8778 case ISD::ADD:
8779 case ISD::AND:
8780 case ISD::OR:
8781 case ISD::XOR:
8782 case ISD::SHL:
8783 case ISD::SRA:
8784 case ISD::SRL:
8785 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0), Depth + 1);
8786 if (Op0CC != SDValue())
8787 return std::make_pair(Op0CC, Op0CCValid);
8788 return findCCUse(Val.getOperand(1), Depth + 1);
8789 }
8790}
8791
8792static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8793 SelectionDAG &DAG);
8794
8796 SelectionDAG &DAG) {
8797 SDLoc DL(Val);
8798 auto Opcode = Val.getOpcode();
8799 switch (Opcode) {
8800 default:
8801 return {};
8802 case ISD::Constant:
8803 return {Val, Val, Val, Val};
8804 case SystemZISD::IPM: {
8805 SDValue IPMOp0 = Val.getOperand(0);
8806 if (IPMOp0 != CC)
8807 return {};
8808 SmallVector<SDValue, 4> ShiftedCCVals;
8809 for (auto CC : {0, 1, 2, 3})
8810 ShiftedCCVals.emplace_back(
8811 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8812 return ShiftedCCVals;
8813 }
8814 case SystemZISD::SELECT_CCMASK: {
8815 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8816 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8817 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8818 if (!CCValid || !CCMask)
8819 return {};
8820
8821 int CCValidVal = CCValid->getZExtValue();
8822 int CCMaskVal = CCMask->getZExtValue();
8823 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8824 // recursive call to simplifyAssumingCCVal.
8825 SDValue Op4CCReg = Val.getOperand(4);
8826 if (Op4CCReg != CC)
8827 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8828 if (Op4CCReg != CC)
8829 return {};
8830 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8831 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8832 if (TrueSDVals.empty() || FalseSDVals.empty())
8833 return {};
8834 SmallVector<SDValue, 4> MergedSDVals;
8835 for (auto &CCVal : {0, 1, 2, 3})
8836 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8837 ? TrueSDVals[CCVal]
8838 : FalseSDVals[CCVal]);
8839 return MergedSDVals;
8840 }
8841 case ISD::ADD:
8842 case ISD::AND:
8843 case ISD::OR:
8844 case ISD::XOR:
8845 case ISD::SRA:
8846 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8847 // would clobber CC).
8848 if (!Val.hasOneUse())
8849 return {};
8850 [[fallthrough]];
8851 case ISD::SHL:
8852 case ISD::SRL:
8853 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8854 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8855 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8856 if (Op0SDVals.empty() || Op1SDVals.empty())
8857 return {};
8858 SmallVector<SDValue, 4> BinaryOpSDVals;
8859 for (auto CCVal : {0, 1, 2, 3})
8860 BinaryOpSDVals.emplace_back(DAG.getNode(
8861 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8862 return BinaryOpSDVals;
8863 }
8864}
8865
8866static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8867 SelectionDAG &DAG) {
8868 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8869 // set by the CCReg instruction using the CCValid / CCMask masks,
8870 // If the CCReg instruction is itself a ICMP / TM testing the condition
8871 // code set by some other instruction, see whether we can directly
8872 // use that condition code.
8873 auto *CCNode = CCReg.getNode();
8874 if (!CCNode)
8875 return false;
8876
8877 if (CCNode->getOpcode() == SystemZISD::TM) {
8878 if (CCValid != SystemZ::CCMASK_TM)
8879 return false;
8880 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8881 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8882 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8883 if (!Op0Node || !Op1Node)
8884 return -1;
8885 auto Op0APVal = Op0Node->getAPIntValue();
8886 auto Op1APVal = Op1Node->getAPIntValue();
8887 auto Result = Op0APVal & Op1APVal;
8888 bool AllOnes = Result == Op1APVal;
8889 bool AllZeros = Result == 0;
8890 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits() - 1] != 0;
8891 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8892 };
8893 SDValue Op0 = CCNode->getOperand(0);
8894 SDValue Op1 = CCNode->getOperand(1);
8895 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8896 if (Op0CC == SDValue())
8897 return false;
8898 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8899 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8900 if (Op0SDVals.empty() || Op1SDVals.empty())
8901 return false;
8902 int NewCCMask = 0;
8903 for (auto CC : {0, 1, 2, 3}) {
8904 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8905 if (CCVal < 0)
8906 return false;
8907 NewCCMask <<= 1;
8908 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8909 }
8910 NewCCMask &= Op0CCValid;
8911 CCReg = Op0CC;
8912 CCMask = NewCCMask;
8913 CCValid = Op0CCValid;
8914 return true;
8915 }
8916 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8917 CCValid != SystemZ::CCMASK_ICMP)
8918 return false;
8919
8920 SDValue CmpOp0 = CCNode->getOperand(0);
8921 SDValue CmpOp1 = CCNode->getOperand(1);
8922 SDValue CmpOp2 = CCNode->getOperand(2);
8923 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8924 if (Op0CC != SDValue()) {
8925 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8926 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8927 if (Op0SDVals.empty() || Op1SDVals.empty())
8928 return false;
8929
8930 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8931 auto CmpTypeVal = CmpType->getZExtValue();
8932 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8933 const SDValue &Op1Val) {
8934 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8935 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8936 if (!Op0Node || !Op1Node)
8937 return -1;
8938 auto Op0APVal = Op0Node->getAPIntValue();
8939 auto Op1APVal = Op1Node->getAPIntValue();
8940 if (CmpTypeVal == SystemZICMP::SignedOnly)
8941 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8942 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8943 };
8944 int NewCCMask = 0;
8945 for (auto CC : {0, 1, 2, 3}) {
8946 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8947 if (CCVal < 0)
8948 return false;
8949 NewCCMask <<= 1;
8950 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8951 }
8952 NewCCMask &= Op0CCValid;
8953 CCMask = NewCCMask;
8954 CCReg = Op0CC;
8955 CCValid = Op0CCValid;
8956 return true;
8957 }
8958
8959 return false;
8960}
8961
8962// Merging versus split in multiple branches cost.
8965 const Value *Lhs,
8966 const Value *Rhs) const {
8967 const auto isFlagOutOpCC = [](const Value *V) {
8968 using namespace llvm::PatternMatch;
8969 const Value *RHSVal;
8970 const APInt *RHSC;
8971 if (const auto *I = dyn_cast<Instruction>(V)) {
8972 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8973 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8974 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8975 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8976 if (CB->isInlineAsm()) {
8977 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8978 return IA && IA->getConstraintString().contains("{@cc}");
8979 }
8980 }
8981 }
8982 }
8983 return false;
8984 };
8985 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8986 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8987 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8988 // conditionals will be merged or else conditionals will be split.
8989 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8990 return {3, 0, -1};
8991 // Default.
8992 return {-1, -1, -1};
8993}
8994
8995SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8996 DAGCombinerInfo &DCI) const {
8997 SelectionDAG &DAG = DCI.DAG;
8998
8999 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
9000 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9001 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9002 if (!CCValid || !CCMask)
9003 return SDValue();
9004
9005 int CCValidVal = CCValid->getZExtValue();
9006 int CCMaskVal = CCMask->getZExtValue();
9007 SDValue Chain = N->getOperand(0);
9008 SDValue CCReg = N->getOperand(4);
9009 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
9010 // the modified BR_CCMASK with the new values.
9011 // In order to avoid conditional branches with full or empty cc masks, do not
9012 // do this if ccmask is 0 or equal to ccvalid.
9013 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
9014 CCMaskVal != CCValidVal)
9015 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
9016 Chain,
9017 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9018 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
9019 N->getOperand(3), CCReg);
9020 return SDValue();
9021}
9022
9023SDValue SystemZTargetLowering::combineSELECT_CCMASK(
9024 SDNode *N, DAGCombinerInfo &DCI) const {
9025 SelectionDAG &DAG = DCI.DAG;
9026
9027 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
9028 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
9029 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
9030 if (!CCValid || !CCMask)
9031 return SDValue();
9032
9033 int CCValidVal = CCValid->getZExtValue();
9034 int CCMaskVal = CCMask->getZExtValue();
9035 SDValue CCReg = N->getOperand(4);
9036
9037 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
9038
9039 // Populate SDVals vector for each condition code ccval for given Val, which
9040 // can again be another nested select_ccmask with the same CC.
9041 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
9042 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
9044 if (Val.getOperand(4) != CCReg)
9045 return SmallVector<SDValue, 4>{};
9046 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
9047 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
9048 if (!CCMask)
9049 return SmallVector<SDValue, 4>{};
9050
9051 int CCMaskVal = CCMask->getZExtValue();
9052 for (auto &CC : {0, 1, 2, 3})
9053 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9054 : FalseVal);
9055 return Res;
9056 }
9057 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9058 };
9059 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9060 // with CCReg found by combineCCMask or original CCReg.
9061 SDValue TrueVal = N->getOperand(0);
9062 SDValue FalseVal = N->getOperand(1);
9063 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9064 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9065 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9066 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9067 if (TrueSDVals.empty())
9068 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9069 if (FalseSDVals.empty())
9070 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9071 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9072 SmallSet<SDValue, 4> MergedSDValsSet;
9073 // Ignoring CC values outside CCValiid.
9074 for (auto CC : {0, 1, 2, 3}) {
9075 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9076 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9077 ? TrueSDVals[CC]
9078 : FalseSDVals[CC]);
9079 }
9080 if (MergedSDValsSet.size() == 1)
9081 return *MergedSDValsSet.begin();
9082 if (MergedSDValsSet.size() == 2) {
9083 auto BeginIt = MergedSDValsSet.begin();
9084 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9085 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9086 std::swap(NewTrueVal, NewFalseVal);
9087 int NewCCMask = 0;
9088 for (auto CC : {0, 1, 2, 3}) {
9089 NewCCMask <<= 1;
9090 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9091 ? (TrueSDVals[CC] == NewTrueVal)
9092 : (FalseSDVals[CC] == NewTrueVal);
9093 }
9094 CCMaskVal = NewCCMask;
9095 CCMaskVal &= CCValidVal;
9096 TrueVal = NewTrueVal;
9097 FalseVal = NewFalseVal;
9098 IsCombinedCCReg = true;
9099 }
9100 }
9101 // If the condition is trivially false or trivially true after
9102 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
9103 // (possibly modified by constructCCSDValsFromSELECT).
9104 if (CCMaskVal == 0)
9105 return FalseVal;
9106 if (CCMaskVal == CCValidVal)
9107 return TrueVal;
9108
9109 if (IsCombinedCCReg)
9110 return DAG.getNode(
9111 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9112 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9113 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9114
9115 return SDValue();
9116}
9117
9118SDValue SystemZTargetLowering::combineGET_CCMASK(
9119 SDNode *N, DAGCombinerInfo &DCI) const {
9120
9121 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9122 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9123 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9124 if (!CCValid || !CCMask)
9125 return SDValue();
9126 int CCValidVal = CCValid->getZExtValue();
9127 int CCMaskVal = CCMask->getZExtValue();
9128
9129 SDValue Select = N->getOperand(0);
9130 if (Select->getOpcode() == ISD::TRUNCATE)
9131 Select = Select->getOperand(0);
9132 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9133 return SDValue();
9134
9135 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9136 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9137 if (!SelectCCValid || !SelectCCMask)
9138 return SDValue();
9139 int SelectCCValidVal = SelectCCValid->getZExtValue();
9140 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9141
9142 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9143 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9144 if (!TrueVal || !FalseVal)
9145 return SDValue();
9146 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9147 ;
9148 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9149 SelectCCMaskVal ^= SelectCCValidVal;
9150 else
9151 return SDValue();
9152
9153 if (SelectCCValidVal & ~CCValidVal)
9154 return SDValue();
9155 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9156 return SDValue();
9157
9158 return Select->getOperand(4);
9159}
9160
9161SDValue SystemZTargetLowering::combineIntDIVREM(
9162 SDNode *N, DAGCombinerInfo &DCI) const {
9163 SelectionDAG &DAG = DCI.DAG;
9164 EVT VT = N->getValueType(0);
9165 // In the case where the divisor is a vector of constants a cheaper
9166 // sequence of instructions can replace the divide. BuildSDIV is called to
9167 // do this during DAG combining, but it only succeeds when it can build a
9168 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9169 // since it is not Legal but Custom it can only happen before
9170 // legalization. Therefore we must scalarize this early before Combine
9171 // 1. For widened vectors, this is already the result of type legalization.
9172 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9173 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9174 return DAG.UnrollVectorOp(N);
9175 return SDValue();
9176}
9177
9178
9179// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9180// This is closely modeled after the common-code combineShiftToMULH.
9181SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9182 SDNode *N, DAGCombinerInfo &DCI) const {
9183 SelectionDAG &DAG = DCI.DAG;
9184 SDLoc DL(N);
9185
9186 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9187 "SRL or SRA node is required here!");
9188
9189 if (!Subtarget.hasVector())
9190 return SDValue();
9191
9192 // Check the shift amount. Proceed with the transformation if the shift
9193 // amount is constant.
9194 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9195 if (!ShiftAmtSrc)
9196 return SDValue();
9197
9198 // The operation feeding into the shift must be an add.
9199 SDValue ShiftOperand = N->getOperand(0);
9200 if (ShiftOperand.getOpcode() != ISD::ADD)
9201 return SDValue();
9202
9203 // One operand of the add must be a multiply.
9204 SDValue MulOp = ShiftOperand.getOperand(0);
9205 SDValue AddOp = ShiftOperand.getOperand(1);
9206 if (MulOp.getOpcode() != ISD::MUL) {
9207 if (AddOp.getOpcode() != ISD::MUL)
9208 return SDValue();
9209 std::swap(MulOp, AddOp);
9210 }
9211
9212 // All operands must be equivalent extend nodes.
9213 SDValue LeftOp = MulOp.getOperand(0);
9214 SDValue RightOp = MulOp.getOperand(1);
9215
9216 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9217 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9218
9219 if (!IsSignExt && !IsZeroExt)
9220 return SDValue();
9221
9222 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9223 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9224
9225 SDValue MulhRightOp;
9226 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9227 unsigned ActiveBits = IsSignExt
9228 ? Constant->getAPIntValue().getSignificantBits()
9229 : Constant->getAPIntValue().getActiveBits();
9230 if (ActiveBits > NarrowVTSize)
9231 return SDValue();
9232 MulhRightOp = DAG.getConstant(
9233 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9234 NarrowVT);
9235 } else {
9236 if (LeftOp.getOpcode() != RightOp.getOpcode())
9237 return SDValue();
9238 // Check that the two extend nodes are the same type.
9239 if (NarrowVT != RightOp.getOperand(0).getValueType())
9240 return SDValue();
9241 MulhRightOp = RightOp.getOperand(0);
9242 }
9243
9244 SDValue MulhAddOp;
9245 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9246 unsigned ActiveBits = IsSignExt
9247 ? Constant->getAPIntValue().getSignificantBits()
9248 : Constant->getAPIntValue().getActiveBits();
9249 if (ActiveBits > NarrowVTSize)
9250 return SDValue();
9251 MulhAddOp = DAG.getConstant(
9252 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9253 NarrowVT);
9254 } else {
9255 if (LeftOp.getOpcode() != AddOp.getOpcode())
9256 return SDValue();
9257 // Check that the two extend nodes are the same type.
9258 if (NarrowVT != AddOp.getOperand(0).getValueType())
9259 return SDValue();
9260 MulhAddOp = AddOp.getOperand(0);
9261 }
9262
9263 EVT WideVT = LeftOp.getValueType();
9264 // Proceed with the transformation if the wide types match.
9265 assert((WideVT == RightOp.getValueType()) &&
9266 "Cannot have a multiply node with two different operand types.");
9267 assert((WideVT == AddOp.getValueType()) &&
9268 "Cannot have an add node with two different operand types.");
9269
9270 // Proceed with the transformation if the wide type is twice as large
9271 // as the narrow type.
9272 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9273 return SDValue();
9274
9275 // Check the shift amount with the narrow type size.
9276 // Proceed with the transformation if the shift amount is the width
9277 // of the narrow type.
9278 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9279 if (ShiftAmt != NarrowVTSize)
9280 return SDValue();
9281
9282 // Proceed if we support the multiply-and-add-high operation.
9283 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9284 NarrowVT == MVT::v4i32 ||
9285 (Subtarget.hasVectorEnhancements3() &&
9286 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9287 return SDValue();
9288
9289 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9290 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9291 DL, NarrowVT, LeftOp.getOperand(0),
9292 MulhRightOp, MulhAddOp);
9293 bool IsSigned = N->getOpcode() == ISD::SRA;
9294 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9295}
9296
9297// Op is an operand of a multiplication. Check whether this can be folded
9298// into an even/odd widening operation; if so, return the opcode to be used
9299// and update Op to the appropriate sub-operand. Note that the caller must
9300// verify that *both* operands of the multiplication support the operation.
9302 const SystemZSubtarget &Subtarget,
9303 SDValue &Op) {
9304 EVT VT = Op.getValueType();
9305
9306 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9307 // to selecting the even or odd vector elements.
9308 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9309 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9310 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9311 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9312 unsigned NumElts = VT.getVectorNumElements();
9313 Op = Op.getOperand(0);
9314 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9315 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9317 ArrayRef<int> ShuffleMask = SVN->getMask();
9318 bool CanUseEven = true, CanUseOdd = true;
9319 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9320 if (ShuffleMask[Elt] == -1)
9321 continue;
9322 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9323 CanUseEven = false;
9324 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9325 CanUseOdd = false;
9326 }
9327 Op = Op.getOperand(0);
9328 if (CanUseEven)
9329 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9330 if (CanUseOdd)
9331 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9332 }
9333 }
9334
9335 // For z17, we can also support the v2i64->i128 case, which looks like
9336 // (sign/zero_extend (extract_vector_elt X 0/1))
9337 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9338 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9339 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9340 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9341 Op = Op.getOperand(0);
9342 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9343 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9344 Op.getOperand(1).getOpcode() == ISD::Constant) {
9345 unsigned Elem = Op.getConstantOperandVal(1);
9346 Op = Op.getOperand(0);
9347 if (Elem == 0)
9348 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9349 if (Elem == 1)
9350 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9351 }
9352 }
9353
9354 return 0;
9355}
9356
9357SDValue SystemZTargetLowering::combineMUL(
9358 SDNode *N, DAGCombinerInfo &DCI) const {
9359 SelectionDAG &DAG = DCI.DAG;
9360
9361 // Detect even/odd widening multiplication.
9362 SDValue Op0 = N->getOperand(0);
9363 SDValue Op1 = N->getOperand(1);
9364 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9365 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9366 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9367 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9368
9369 return SDValue();
9370}
9371
9372SDValue SystemZTargetLowering::combineINTRINSIC(
9373 SDNode *N, DAGCombinerInfo &DCI) const {
9374 SelectionDAG &DAG = DCI.DAG;
9375
9376 unsigned Id = N->getConstantOperandVal(1);
9377 switch (Id) {
9378 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9379 // or larger is simply a vector load.
9380 case Intrinsic::s390_vll:
9381 case Intrinsic::s390_vlrl:
9382 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9383 if (C->getZExtValue() >= 15)
9384 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9385 N->getOperand(3), MachinePointerInfo());
9386 break;
9387 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9388 case Intrinsic::s390_vstl:
9389 case Intrinsic::s390_vstrl:
9390 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9391 if (C->getZExtValue() >= 15)
9392 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9393 N->getOperand(4), MachinePointerInfo());
9394 break;
9395 }
9396
9397 return SDValue();
9398}
9399
9400SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9401 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9402 return N->getOperand(0);
9403 return N;
9404}
9405
9407 DAGCombinerInfo &DCI) const {
9408 switch(N->getOpcode()) {
9409 default: break;
9410 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9411 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9412 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9413 case SystemZISD::MERGE_HIGH:
9414 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9415 case ISD::LOAD: return combineLOAD(N, DCI);
9416 case ISD::STORE: return combineSTORE(N, DCI);
9417 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9418 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9419 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9421 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9423 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9424 case ISD::SINT_TO_FP:
9425 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9426 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9427 case ISD::BSWAP: return combineBSWAP(N, DCI);
9428 case ISD::SETCC: return combineSETCC(N, DCI);
9429 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9430 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9431 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9432 case ISD::SRL:
9433 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9434 case ISD::MUL: return combineMUL(N, DCI);
9435 case ISD::SDIV:
9436 case ISD::UDIV:
9437 case ISD::SREM:
9438 case ISD::UREM: return combineIntDIVREM(N, DCI);
9440 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9441 }
9442
9443 return SDValue();
9444}
9445
9446// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9447// are for Op.
9448static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9449 unsigned OpNo) {
9450 EVT VT = Op.getValueType();
9451 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9452 APInt SrcDemE;
9453 unsigned Opcode = Op.getOpcode();
9454 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9455 unsigned Id = Op.getConstantOperandVal(0);
9456 switch (Id) {
9457 case Intrinsic::s390_vpksh: // PACKS
9458 case Intrinsic::s390_vpksf:
9459 case Intrinsic::s390_vpksg:
9460 case Intrinsic::s390_vpkshs: // PACKS_CC
9461 case Intrinsic::s390_vpksfs:
9462 case Intrinsic::s390_vpksgs:
9463 case Intrinsic::s390_vpklsh: // PACKLS
9464 case Intrinsic::s390_vpklsf:
9465 case Intrinsic::s390_vpklsg:
9466 case Intrinsic::s390_vpklshs: // PACKLS_CC
9467 case Intrinsic::s390_vpklsfs:
9468 case Intrinsic::s390_vpklsgs:
9469 // VECTOR PACK truncates the elements of two source vectors into one.
9470 SrcDemE = DemandedElts;
9471 if (OpNo == 2)
9472 SrcDemE.lshrInPlace(NumElts / 2);
9473 SrcDemE = SrcDemE.trunc(NumElts / 2);
9474 break;
9475 // VECTOR UNPACK extends half the elements of the source vector.
9476 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9477 case Intrinsic::s390_vuphh:
9478 case Intrinsic::s390_vuphf:
9479 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9480 case Intrinsic::s390_vuplhh:
9481 case Intrinsic::s390_vuplhf:
9482 SrcDemE = APInt(NumElts * 2, 0);
9483 SrcDemE.insertBits(DemandedElts, 0);
9484 break;
9485 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9486 case Intrinsic::s390_vuplhw:
9487 case Intrinsic::s390_vuplf:
9488 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9489 case Intrinsic::s390_vupllh:
9490 case Intrinsic::s390_vupllf:
9491 SrcDemE = APInt(NumElts * 2, 0);
9492 SrcDemE.insertBits(DemandedElts, NumElts);
9493 break;
9494 case Intrinsic::s390_vpdi: {
9495 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9496 SrcDemE = APInt(NumElts, 0);
9497 if (!DemandedElts[OpNo - 1])
9498 break;
9499 unsigned Mask = Op.getConstantOperandVal(3);
9500 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9501 // Demand input element 0 or 1, given by the mask bit value.
9502 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9503 break;
9504 }
9505 case Intrinsic::s390_vsldb: {
9506 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9507 assert(VT == MVT::v16i8 && "Unexpected type.");
9508 unsigned FirstIdx = Op.getConstantOperandVal(3);
9509 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9510 unsigned NumSrc0Els = 16 - FirstIdx;
9511 SrcDemE = APInt(NumElts, 0);
9512 if (OpNo == 1) {
9513 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9514 SrcDemE.insertBits(DemEls, FirstIdx);
9515 } else {
9516 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9517 SrcDemE.insertBits(DemEls, 0);
9518 }
9519 break;
9520 }
9521 case Intrinsic::s390_vperm:
9522 SrcDemE = APInt::getAllOnes(NumElts);
9523 break;
9524 default:
9525 llvm_unreachable("Unhandled intrinsic.");
9526 break;
9527 }
9528 } else {
9529 switch (Opcode) {
9530 case SystemZISD::JOIN_DWORDS:
9531 // Scalar operand.
9532 SrcDemE = APInt(1, 1);
9533 break;
9534 case SystemZISD::SELECT_CCMASK:
9535 SrcDemE = DemandedElts;
9536 break;
9537 default:
9538 llvm_unreachable("Unhandled opcode.");
9539 break;
9540 }
9541 }
9542 return SrcDemE;
9543}
9544
9545static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9546 const APInt &DemandedElts,
9547 const SelectionDAG &DAG, unsigned Depth,
9548 unsigned OpNo) {
9549 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9550 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9551 KnownBits LHSKnown =
9552 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9553 KnownBits RHSKnown =
9554 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9555 Known = LHSKnown.intersectWith(RHSKnown);
9556}
9557
9558void
9560 KnownBits &Known,
9561 const APInt &DemandedElts,
9562 const SelectionDAG &DAG,
9563 unsigned Depth) const {
9564 Known.resetAll();
9565
9566 // Intrinsic CC result is returned in the two low bits.
9567 unsigned Tmp0, Tmp1; // not used
9568 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9569 Known.Zero.setBitsFrom(2);
9570 return;
9571 }
9572 EVT VT = Op.getValueType();
9573 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9574 return;
9575 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9576 "KnownBits does not match VT in bitwidth");
9577 assert ((!VT.isVector() ||
9578 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9579 "DemandedElts does not match VT number of elements");
9580 unsigned BitWidth = Known.getBitWidth();
9581 unsigned Opcode = Op.getOpcode();
9582 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9583 bool IsLogical = false;
9584 unsigned Id = Op.getConstantOperandVal(0);
9585 switch (Id) {
9586 case Intrinsic::s390_vpksh: // PACKS
9587 case Intrinsic::s390_vpksf:
9588 case Intrinsic::s390_vpksg:
9589 case Intrinsic::s390_vpkshs: // PACKS_CC
9590 case Intrinsic::s390_vpksfs:
9591 case Intrinsic::s390_vpksgs:
9592 case Intrinsic::s390_vpklsh: // PACKLS
9593 case Intrinsic::s390_vpklsf:
9594 case Intrinsic::s390_vpklsg:
9595 case Intrinsic::s390_vpklshs: // PACKLS_CC
9596 case Intrinsic::s390_vpklsfs:
9597 case Intrinsic::s390_vpklsgs:
9598 case Intrinsic::s390_vpdi:
9599 case Intrinsic::s390_vsldb:
9600 case Intrinsic::s390_vperm:
9601 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9602 break;
9603 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9604 case Intrinsic::s390_vuplhh:
9605 case Intrinsic::s390_vuplhf:
9606 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9607 case Intrinsic::s390_vupllh:
9608 case Intrinsic::s390_vupllf:
9609 IsLogical = true;
9610 [[fallthrough]];
9611 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9612 case Intrinsic::s390_vuphh:
9613 case Intrinsic::s390_vuphf:
9614 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9615 case Intrinsic::s390_vuplhw:
9616 case Intrinsic::s390_vuplf: {
9617 SDValue SrcOp = Op.getOperand(1);
9618 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9619 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9620 if (IsLogical) {
9621 Known = Known.zext(BitWidth);
9622 } else
9623 Known = Known.sext(BitWidth);
9624 break;
9625 }
9626 default:
9627 break;
9628 }
9629 } else {
9630 switch (Opcode) {
9631 case SystemZISD::JOIN_DWORDS:
9632 case SystemZISD::SELECT_CCMASK:
9633 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9634 break;
9635 case SystemZISD::REPLICATE: {
9636 SDValue SrcOp = Op.getOperand(0);
9637 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9639 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9640 break;
9641 }
9642 default:
9643 break;
9644 }
9645 }
9646
9647 // Known has the width of the source operand(s). Adjust if needed to match
9648 // the passed bitwidth.
9649 if (Known.getBitWidth() != BitWidth)
9650 Known = Known.anyextOrTrunc(BitWidth);
9651}
9652
9653static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9654 const SelectionDAG &DAG, unsigned Depth,
9655 unsigned OpNo) {
9656 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9657 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9658 if (LHS == 1) return 1; // Early out.
9659 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9660 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9661 if (RHS == 1) return 1; // Early out.
9662 unsigned Common = std::min(LHS, RHS);
9663 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9664 EVT VT = Op.getValueType();
9665 unsigned VTBits = VT.getScalarSizeInBits();
9666 if (SrcBitWidth > VTBits) { // PACK
9667 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9668 if (Common > SrcExtraBits)
9669 return (Common - SrcExtraBits);
9670 return 1;
9671 }
9672 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9673 return Common;
9674}
9675
9676unsigned
9678 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9679 unsigned Depth) const {
9680 if (Op.getResNo() != 0)
9681 return 1;
9682 unsigned Opcode = Op.getOpcode();
9683 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9684 unsigned Id = Op.getConstantOperandVal(0);
9685 switch (Id) {
9686 case Intrinsic::s390_vpksh: // PACKS
9687 case Intrinsic::s390_vpksf:
9688 case Intrinsic::s390_vpksg:
9689 case Intrinsic::s390_vpkshs: // PACKS_CC
9690 case Intrinsic::s390_vpksfs:
9691 case Intrinsic::s390_vpksgs:
9692 case Intrinsic::s390_vpklsh: // PACKLS
9693 case Intrinsic::s390_vpklsf:
9694 case Intrinsic::s390_vpklsg:
9695 case Intrinsic::s390_vpklshs: // PACKLS_CC
9696 case Intrinsic::s390_vpklsfs:
9697 case Intrinsic::s390_vpklsgs:
9698 case Intrinsic::s390_vpdi:
9699 case Intrinsic::s390_vsldb:
9700 case Intrinsic::s390_vperm:
9701 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9702 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9703 case Intrinsic::s390_vuphh:
9704 case Intrinsic::s390_vuphf:
9705 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9706 case Intrinsic::s390_vuplhw:
9707 case Intrinsic::s390_vuplf: {
9708 SDValue PackedOp = Op.getOperand(1);
9709 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9710 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9711 EVT VT = Op.getValueType();
9712 unsigned VTBits = VT.getScalarSizeInBits();
9713 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9714 return Tmp;
9715 }
9716 default:
9717 break;
9718 }
9719 } else {
9720 switch (Opcode) {
9721 case SystemZISD::SELECT_CCMASK:
9722 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9723 default:
9724 break;
9725 }
9726 }
9727
9728 return 1;
9729}
9730
9732 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9733 UndefPoisonKind Kind, unsigned Depth) const {
9734 switch (Op->getOpcode()) {
9735 case SystemZISD::PCREL_WRAPPER:
9736 case SystemZISD::PCREL_OFFSET:
9737 return true;
9738 }
9739 return false;
9740}
9741
9742unsigned
9744 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9745 unsigned StackAlign = TFI->getStackAlignment();
9746 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9747 "Unexpected stack alignment");
9748 // The default stack probe size is 4096 if the function has no
9749 // stack-probe-size attribute.
9750 unsigned StackProbeSize =
9751 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9752 // Round down to the stack alignment.
9753 StackProbeSize &= ~(StackAlign - 1);
9754 return StackProbeSize ? StackProbeSize : StackAlign;
9755}
9756
9757//===----------------------------------------------------------------------===//
9758// Custom insertion
9759//===----------------------------------------------------------------------===//
9760
9761// Force base value Base into a register before MI. Return the register.
9763 const SystemZInstrInfo *TII) {
9764 MachineBasicBlock *MBB = MI.getParent();
9765 MachineFunction &MF = *MBB->getParent();
9766 MachineRegisterInfo &MRI = MF.getRegInfo();
9767
9768 if (Base.isReg()) {
9769 // Copy Base into a new virtual register to help register coalescing in
9770 // cases with multiple uses.
9771 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9772 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9773 .add(Base);
9774 return Reg;
9775 }
9776
9777 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9778 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9779 .add(Base)
9780 .addImm(0)
9781 .addReg(0);
9782 return Reg;
9783}
9784
9785// The CC operand of MI might be missing a kill marker because there
9786// were multiple uses of CC, and ISel didn't know which to mark.
9787// Figure out whether MI should have had a kill marker.
9789 // Scan forward through BB for a use/def of CC.
9791 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9792 const MachineInstr &MI = *miI;
9793 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9794 return false;
9795 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9796 break; // Should have kill-flag - update below.
9797 }
9798
9799 // If we hit the end of the block, check whether CC is live into a
9800 // successor.
9801 if (miI == MBB->end()) {
9802 for (const MachineBasicBlock *Succ : MBB->successors())
9803 if (Succ->isLiveIn(SystemZ::CC))
9804 return false;
9805 }
9806
9807 return true;
9808}
9809
9810// Return true if it is OK for this Select pseudo-opcode to be cascaded
9811// together with other Select pseudo-opcodes into a single basic-block with
9812// a conditional jump around it.
9814 switch (MI.getOpcode()) {
9815 case SystemZ::Select32:
9816 case SystemZ::Select64:
9817 case SystemZ::Select128:
9818 case SystemZ::SelectF32:
9819 case SystemZ::SelectF64:
9820 case SystemZ::SelectF128:
9821 case SystemZ::SelectVR32:
9822 case SystemZ::SelectVR64:
9823 case SystemZ::SelectVR128:
9824 return true;
9825
9826 default:
9827 return false;
9828 }
9829}
9830
9831// Helper function, which inserts PHI functions into SinkMBB:
9832// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9833// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9835 MachineBasicBlock *TrueMBB,
9836 MachineBasicBlock *FalseMBB,
9837 MachineBasicBlock *SinkMBB) {
9838 MachineFunction *MF = TrueMBB->getParent();
9840
9841 MachineInstr *FirstMI = Selects.front();
9842 unsigned CCValid = FirstMI->getOperand(3).getImm();
9843 unsigned CCMask = FirstMI->getOperand(4).getImm();
9844
9845 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9846
9847 // As we are creating the PHIs, we have to be careful if there is more than
9848 // one. Later Selects may reference the results of earlier Selects, but later
9849 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9850 // That also means that PHI construction must work forward from earlier to
9851 // later, and that the code must maintain a mapping from earlier PHI's
9852 // destination registers, and the registers that went into the PHI.
9854
9855 for (auto *MI : Selects) {
9856 Register DestReg = MI->getOperand(0).getReg();
9857 Register TrueReg = MI->getOperand(1).getReg();
9858 Register FalseReg = MI->getOperand(2).getReg();
9859
9860 // If this Select we are generating is the opposite condition from
9861 // the jump we generated, then we have to swap the operands for the
9862 // PHI that is going to be generated.
9863 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9864 std::swap(TrueReg, FalseReg);
9865
9866 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9867 TrueReg = It->second.first;
9868
9869 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9870 FalseReg = It->second.second;
9871
9872 DebugLoc DL = MI->getDebugLoc();
9873 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9874 .addReg(TrueReg).addMBB(TrueMBB)
9875 .addReg(FalseReg).addMBB(FalseMBB);
9876
9877 // Add this PHI to the rewrite table.
9878 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9879 }
9880
9881 MF->getProperties().resetNoPHIs();
9882}
9883
9885SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9886 MachineBasicBlock *BB) const {
9887 MachineFunction &MF = *BB->getParent();
9888 MachineFrameInfo &MFI = MF.getFrameInfo();
9889 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9890 assert(TFL->hasReservedCallFrame(MF) &&
9891 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9892 (void)TFL;
9893 // Get the MaxCallFrameSize value and erase MI since it serves no further
9894 // purpose as the call frame is statically reserved in the prolog. Set
9895 // AdjustsStack as MI is *not* mapped as a frame instruction.
9896 uint32_t NumBytes = MI.getOperand(0).getImm();
9897 if (NumBytes > MFI.getMaxCallFrameSize())
9898 MFI.setMaxCallFrameSize(NumBytes);
9899 MFI.setAdjustsStack(true);
9900
9901 MI.eraseFromParent();
9902 return BB;
9903}
9904
9905// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9907SystemZTargetLowering::emitSelect(MachineInstr &MI,
9908 MachineBasicBlock *MBB) const {
9909 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9910 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9911
9912 unsigned CCValid = MI.getOperand(3).getImm();
9913 unsigned CCMask = MI.getOperand(4).getImm();
9914
9915 // If we have a sequence of Select* pseudo instructions using the
9916 // same condition code value, we want to expand all of them into
9917 // a single pair of basic blocks using the same condition.
9918 SmallVector<MachineInstr*, 8> Selects;
9919 SmallVector<MachineInstr*, 8> DbgValues;
9920 Selects.push_back(&MI);
9921 unsigned Count = 0;
9922 for (MachineInstr &NextMI : llvm::make_range(
9923 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9924 if (isSelectPseudo(NextMI)) {
9925 assert(NextMI.getOperand(3).getImm() == CCValid &&
9926 "Bad CCValid operands since CC was not redefined.");
9927 if (NextMI.getOperand(4).getImm() == CCMask ||
9928 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9929 Selects.push_back(&NextMI);
9930 continue;
9931 }
9932 break;
9933 }
9934 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9935 NextMI.usesCustomInsertionHook())
9936 break;
9937 bool User = false;
9938 for (auto *SelMI : Selects)
9939 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9940 User = true;
9941 break;
9942 }
9943 if (NextMI.isDebugInstr()) {
9944 if (User) {
9945 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9946 DbgValues.push_back(&NextMI);
9947 }
9948 } else if (User || ++Count > 20)
9949 break;
9950 }
9951
9952 MachineInstr *LastMI = Selects.back();
9953 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9954 checkCCKill(*LastMI, MBB));
9955 MachineBasicBlock *StartMBB = MBB;
9956 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9957 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9958
9959 // Unless CC was killed in the last Select instruction, mark it as
9960 // live-in to both FalseMBB and JoinMBB.
9961 if (!CCKilled) {
9962 FalseMBB->addLiveIn(SystemZ::CC);
9963 JoinMBB->addLiveIn(SystemZ::CC);
9964 }
9965
9966 // StartMBB:
9967 // BRC CCMask, JoinMBB
9968 // # fallthrough to FalseMBB
9969 MBB = StartMBB;
9970 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9971 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9972 MBB->addSuccessor(JoinMBB);
9973 MBB->addSuccessor(FalseMBB);
9974
9975 // FalseMBB:
9976 // # fallthrough to JoinMBB
9977 MBB = FalseMBB;
9978 MBB->addSuccessor(JoinMBB);
9979
9980 // JoinMBB:
9981 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9982 // ...
9983 MBB = JoinMBB;
9984 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9985 for (auto *SelMI : Selects)
9986 SelMI->eraseFromParent();
9987
9989 for (auto *DbgMI : DbgValues)
9990 MBB->splice(InsertPos, StartMBB, DbgMI);
9991
9992 return JoinMBB;
9993}
9994
9995// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9996// StoreOpcode is the store to use and Invert says whether the store should
9997// happen when the condition is false rather than true. If a STORE ON
9998// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9999MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
10001 unsigned StoreOpcode,
10002 unsigned STOCOpcode,
10003 bool Invert) const {
10004 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10005
10006 Register SrcReg = MI.getOperand(0).getReg();
10007 MachineOperand Base = MI.getOperand(1);
10008 int64_t Disp = MI.getOperand(2).getImm();
10009 Register IndexReg = MI.getOperand(3).getReg();
10010 unsigned CCValid = MI.getOperand(4).getImm();
10011 unsigned CCMask = MI.getOperand(5).getImm();
10012 DebugLoc DL = MI.getDebugLoc();
10013
10014 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
10015
10016 // ISel pattern matching also adds a load memory operand of the same
10017 // address, so take special care to find the storing memory operand.
10018 MachineMemOperand *MMO = nullptr;
10019 for (auto *I : MI.memoperands())
10020 if (I->isStore()) {
10021 MMO = I;
10022 break;
10023 }
10024
10025 // Use STOCOpcode if possible. We could use different store patterns in
10026 // order to avoid matching the index register, but the performance trade-offs
10027 // might be more complicated in that case.
10028 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
10029 if (Invert)
10030 CCMask ^= CCValid;
10031
10032 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
10033 .addReg(SrcReg)
10034 .add(Base)
10035 .addImm(Disp)
10036 .addImm(CCValid)
10037 .addImm(CCMask)
10038 .addMemOperand(MMO);
10039
10040 MI.eraseFromParent();
10041 return MBB;
10042 }
10043
10044 // Get the condition needed to branch around the store.
10045 if (!Invert)
10046 CCMask ^= CCValid;
10047
10048 MachineBasicBlock *StartMBB = MBB;
10049 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10050 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
10051
10052 // Unless CC was killed in the CondStore instruction, mark it as
10053 // live-in to both FalseMBB and JoinMBB.
10054 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
10055 !checkCCKill(MI, JoinMBB)) {
10056 FalseMBB->addLiveIn(SystemZ::CC);
10057 JoinMBB->addLiveIn(SystemZ::CC);
10058 }
10059
10060 // StartMBB:
10061 // BRC CCMask, JoinMBB
10062 // # fallthrough to FalseMBB
10063 MBB = StartMBB;
10064 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10065 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10066 MBB->addSuccessor(JoinMBB);
10067 MBB->addSuccessor(FalseMBB);
10068
10069 // FalseMBB:
10070 // store %SrcReg, %Disp(%Index,%Base)
10071 // # fallthrough to JoinMBB
10072 MBB = FalseMBB;
10073 BuildMI(MBB, DL, TII->get(StoreOpcode))
10074 .addReg(SrcReg)
10075 .add(Base)
10076 .addImm(Disp)
10077 .addReg(IndexReg)
10078 .addMemOperand(MMO);
10079 MBB->addSuccessor(JoinMBB);
10080
10081 MI.eraseFromParent();
10082 return JoinMBB;
10083}
10084
10085// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10087SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10089 bool Unsigned) const {
10090 MachineFunction &MF = *MBB->getParent();
10091 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10092 MachineRegisterInfo &MRI = MF.getRegInfo();
10093
10094 // Synthetic instruction to compare 128-bit values.
10095 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10096 Register Op0 = MI.getOperand(0).getReg();
10097 Register Op1 = MI.getOperand(1).getReg();
10098
10099 MachineBasicBlock *StartMBB = MBB;
10100 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10101 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10102
10103 // StartMBB:
10104 //
10105 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10106 // Swap the inputs to get:
10107 // CC 1 if high(Op0) > high(Op1)
10108 // CC 2 if high(Op0) < high(Op1)
10109 // CC 0 if high(Op0) == high(Op1)
10110 //
10111 // If CC != 0, we'd done, so jump over the next instruction.
10112 //
10113 // VEC[L]G Op1, Op0
10114 // JNE JoinMBB
10115 // # fallthrough to HiEqMBB
10116 MBB = StartMBB;
10117 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10118 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10119 .addReg(Op1).addReg(Op0);
10120 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10122 MBB->addSuccessor(JoinMBB);
10123 MBB->addSuccessor(HiEqMBB);
10124
10125 // HiEqMBB:
10126 //
10127 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10128 // Since we already know the high parts are equal, the CC
10129 // result will only depend on the low parts:
10130 // CC 1 if low(Op0) > low(Op1)
10131 // CC 3 if low(Op0) <= low(Op1)
10132 //
10133 // VCHLGS Tmp, Op0, Op1
10134 // # fallthrough to JoinMBB
10135 MBB = HiEqMBB;
10136 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10137 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10138 .addReg(Op0).addReg(Op1);
10139 MBB->addSuccessor(JoinMBB);
10140
10141 // Mark CC as live-in to JoinMBB.
10142 JoinMBB->addLiveIn(SystemZ::CC);
10143
10144 MI.eraseFromParent();
10145 return JoinMBB;
10146}
10147
10148// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10149// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10150// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10151// whether the field should be inverted after performing BinOpcode (e.g. for
10152// NAND).
10153MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10154 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10155 bool Invert) const {
10156 MachineFunction &MF = *MBB->getParent();
10157 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10158 MachineRegisterInfo &MRI = MF.getRegInfo();
10159
10160 // Extract the operands. Base can be a register or a frame index.
10161 // Src2 can be a register or immediate.
10162 Register Dest = MI.getOperand(0).getReg();
10163 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10164 int64_t Disp = MI.getOperand(2).getImm();
10165 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10166 Register BitShift = MI.getOperand(4).getReg();
10167 Register NegBitShift = MI.getOperand(5).getReg();
10168 unsigned BitSize = MI.getOperand(6).getImm();
10169 DebugLoc DL = MI.getDebugLoc();
10170
10171 // Get the right opcodes for the displacement.
10172 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10173 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10174 assert(LOpcode && CSOpcode && "Displacement out of range");
10175
10176 // Create virtual registers for temporary results.
10177 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10178 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10179 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10180 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10181 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10182
10183 // Insert a basic block for the main loop.
10184 MachineBasicBlock *StartMBB = MBB;
10185 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10186 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10187
10188 // StartMBB:
10189 // ...
10190 // %OrigVal = L Disp(%Base)
10191 // # fall through to LoopMBB
10192 MBB = StartMBB;
10193 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10194 MBB->addSuccessor(LoopMBB);
10195
10196 // LoopMBB:
10197 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10198 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10199 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10200 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10201 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10202 // JNE LoopMBB
10203 // # fall through to DoneMBB
10204 MBB = LoopMBB;
10205 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10206 .addReg(OrigVal).addMBB(StartMBB)
10207 .addReg(Dest).addMBB(LoopMBB);
10208 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10209 .addReg(OldVal).addReg(BitShift).addImm(0);
10210 if (Invert) {
10211 // Perform the operation normally and then invert every bit of the field.
10212 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10213 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10214 // XILF with the upper BitSize bits set.
10215 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10216 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10217 } else if (BinOpcode)
10218 // A simply binary operation.
10219 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10220 .addReg(RotatedOldVal)
10221 .add(Src2);
10222 else
10223 // Use RISBG to rotate Src2 into position and use it to replace the
10224 // field in RotatedOldVal.
10225 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10226 .addReg(RotatedOldVal).addReg(Src2.getReg())
10227 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10228 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10229 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10230 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10231 .addReg(OldVal)
10232 .addReg(NewVal)
10233 .add(Base)
10234 .addImm(Disp);
10235 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10237 MBB->addSuccessor(LoopMBB);
10238 MBB->addSuccessor(DoneMBB);
10239
10240 MI.eraseFromParent();
10241 return DoneMBB;
10242}
10243
10244// Implement EmitInstrWithCustomInserter for subword pseudo
10245// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10246// instruction that should be used to compare the current field with the
10247// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10248// for when the current field should be kept.
10249MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10250 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10251 unsigned KeepOldMask) const {
10252 MachineFunction &MF = *MBB->getParent();
10253 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10254 MachineRegisterInfo &MRI = MF.getRegInfo();
10255
10256 // Extract the operands. Base can be a register or a frame index.
10257 Register Dest = MI.getOperand(0).getReg();
10258 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10259 int64_t Disp = MI.getOperand(2).getImm();
10260 Register Src2 = MI.getOperand(3).getReg();
10261 Register BitShift = MI.getOperand(4).getReg();
10262 Register NegBitShift = MI.getOperand(5).getReg();
10263 unsigned BitSize = MI.getOperand(6).getImm();
10264 DebugLoc DL = MI.getDebugLoc();
10265
10266 // Get the right opcodes for the displacement.
10267 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10268 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10269 assert(LOpcode && CSOpcode && "Displacement out of range");
10270
10271 // Create virtual registers for temporary results.
10272 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10273 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10274 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10275 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10276 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10277 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10278
10279 // Insert 3 basic blocks for the loop.
10280 MachineBasicBlock *StartMBB = MBB;
10281 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10282 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10283 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10284 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10285
10286 // StartMBB:
10287 // ...
10288 // %OrigVal = L Disp(%Base)
10289 // # fall through to LoopMBB
10290 MBB = StartMBB;
10291 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10292 MBB->addSuccessor(LoopMBB);
10293
10294 // LoopMBB:
10295 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10296 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10297 // CompareOpcode %RotatedOldVal, %Src2
10298 // BRC KeepOldMask, UpdateMBB
10299 MBB = LoopMBB;
10300 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10301 .addReg(OrigVal).addMBB(StartMBB)
10302 .addReg(Dest).addMBB(UpdateMBB);
10303 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10304 .addReg(OldVal).addReg(BitShift).addImm(0);
10305 BuildMI(MBB, DL, TII->get(CompareOpcode))
10306 .addReg(RotatedOldVal).addReg(Src2);
10307 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10308 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10309 MBB->addSuccessor(UpdateMBB);
10310 MBB->addSuccessor(UseAltMBB);
10311
10312 // UseAltMBB:
10313 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10314 // # fall through to UpdateMBB
10315 MBB = UseAltMBB;
10316 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10317 .addReg(RotatedOldVal).addReg(Src2)
10318 .addImm(32).addImm(31 + BitSize).addImm(0);
10319 MBB->addSuccessor(UpdateMBB);
10320
10321 // UpdateMBB:
10322 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10323 // [ %RotatedAltVal, UseAltMBB ]
10324 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10325 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10326 // JNE LoopMBB
10327 // # fall through to DoneMBB
10328 MBB = UpdateMBB;
10329 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10330 .addReg(RotatedOldVal).addMBB(LoopMBB)
10331 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10332 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10333 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10334 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10335 .addReg(OldVal)
10336 .addReg(NewVal)
10337 .add(Base)
10338 .addImm(Disp);
10339 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10341 MBB->addSuccessor(LoopMBB);
10342 MBB->addSuccessor(DoneMBB);
10343
10344 MI.eraseFromParent();
10345 return DoneMBB;
10346}
10347
10348// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10349// instruction MI.
10351SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10352 MachineBasicBlock *MBB) const {
10353 MachineFunction &MF = *MBB->getParent();
10354 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10355 MachineRegisterInfo &MRI = MF.getRegInfo();
10356
10357 // Extract the operands. Base can be a register or a frame index.
10358 Register Dest = MI.getOperand(0).getReg();
10359 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10360 int64_t Disp = MI.getOperand(2).getImm();
10361 Register CmpVal = MI.getOperand(3).getReg();
10362 Register OrigSwapVal = MI.getOperand(4).getReg();
10363 Register BitShift = MI.getOperand(5).getReg();
10364 Register NegBitShift = MI.getOperand(6).getReg();
10365 int64_t BitSize = MI.getOperand(7).getImm();
10366 DebugLoc DL = MI.getDebugLoc();
10367
10368 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10369
10370 // Get the right opcodes for the displacement and zero-extension.
10371 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10372 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10373 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10374 assert(LOpcode && CSOpcode && "Displacement out of range");
10375
10376 // Create virtual registers for temporary results.
10377 Register OrigOldVal = MRI.createVirtualRegister(RC);
10378 Register OldVal = MRI.createVirtualRegister(RC);
10379 Register SwapVal = MRI.createVirtualRegister(RC);
10380 Register StoreVal = MRI.createVirtualRegister(RC);
10381 Register OldValRot = MRI.createVirtualRegister(RC);
10382 Register RetryOldVal = MRI.createVirtualRegister(RC);
10383 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10384
10385 // Insert 2 basic blocks for the loop.
10386 MachineBasicBlock *StartMBB = MBB;
10387 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10388 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10389 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10390
10391 // StartMBB:
10392 // ...
10393 // %OrigOldVal = L Disp(%Base)
10394 // # fall through to LoopMBB
10395 MBB = StartMBB;
10396 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10397 .add(Base)
10398 .addImm(Disp)
10399 .addReg(0);
10400 MBB->addSuccessor(LoopMBB);
10401
10402 // LoopMBB:
10403 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10404 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10405 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10406 // ^^ The low BitSize bits contain the field
10407 // of interest.
10408 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10409 // ^^ Replace the upper 32-BitSize bits of the
10410 // swap value with those that we loaded and rotated.
10411 // %Dest = LL[CH] %OldValRot
10412 // CR %Dest, %CmpVal
10413 // JNE DoneMBB
10414 // # Fall through to SetMBB
10415 MBB = LoopMBB;
10416 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10417 .addReg(OrigOldVal).addMBB(StartMBB)
10418 .addReg(RetryOldVal).addMBB(SetMBB);
10419 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10420 .addReg(OrigSwapVal).addMBB(StartMBB)
10421 .addReg(RetrySwapVal).addMBB(SetMBB);
10422 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10423 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10424 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10425 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10426 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10427 .addReg(OldValRot);
10428 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10429 .addReg(Dest).addReg(CmpVal);
10430 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10433 MBB->addSuccessor(DoneMBB);
10434 MBB->addSuccessor(SetMBB);
10435
10436 // SetMBB:
10437 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10438 // ^^ Rotate the new field to its proper position.
10439 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10440 // JNE LoopMBB
10441 // # fall through to ExitMBB
10442 MBB = SetMBB;
10443 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10444 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10445 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10446 .addReg(OldVal)
10447 .addReg(StoreVal)
10448 .add(Base)
10449 .addImm(Disp);
10450 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10452 MBB->addSuccessor(LoopMBB);
10453 MBB->addSuccessor(DoneMBB);
10454
10455 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10456 // to the block after the loop. At this point, CC may have been defined
10457 // either by the CR in LoopMBB or by the CS in SetMBB.
10458 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10459 DoneMBB->addLiveIn(SystemZ::CC);
10460
10461 MI.eraseFromParent();
10462 return DoneMBB;
10463}
10464
10465// Emit a move from two GR64s to a GR128.
10467SystemZTargetLowering::emitPair128(MachineInstr &MI,
10468 MachineBasicBlock *MBB) const {
10469 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10470 const DebugLoc &DL = MI.getDebugLoc();
10471
10472 Register Dest = MI.getOperand(0).getReg();
10473 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10474 .add(MI.getOperand(1))
10475 .addImm(SystemZ::subreg_h64)
10476 .add(MI.getOperand(2))
10477 .addImm(SystemZ::subreg_l64);
10478 MI.eraseFromParent();
10479 return MBB;
10480}
10481
10482// Emit an extension from a GR64 to a GR128. ClearEven is true
10483// if the high register of the GR128 value must be cleared or false if
10484// it's "don't care".
10485MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10487 bool ClearEven) const {
10488 MachineFunction &MF = *MBB->getParent();
10489 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10490 MachineRegisterInfo &MRI = MF.getRegInfo();
10491 DebugLoc DL = MI.getDebugLoc();
10492
10493 Register Dest = MI.getOperand(0).getReg();
10494 Register Src = MI.getOperand(1).getReg();
10495 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10496
10497 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10498 if (ClearEven) {
10499 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10500 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10501
10502 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10503 .addImm(0);
10504 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10505 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10506 In128 = NewIn128;
10507 }
10508 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10509 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10510
10511 MI.eraseFromParent();
10512 return MBB;
10513}
10514
10516SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10518 unsigned Opcode, bool IsMemset) const {
10519 MachineFunction &MF = *MBB->getParent();
10520 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10521 MachineRegisterInfo &MRI = MF.getRegInfo();
10522 DebugLoc DL = MI.getDebugLoc();
10523
10524 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10525 uint64_t DestDisp = MI.getOperand(1).getImm();
10526 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10527 uint64_t SrcDisp;
10528
10529 // Fold the displacement Disp if it is out of range.
10530 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10531 if (!isUInt<12>(Disp)) {
10532 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10533 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10534 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10535 .add(Base).addImm(Disp).addReg(0);
10537 Disp = 0;
10538 }
10539 };
10540
10541 if (!IsMemset) {
10542 SrcBase = earlyUseOperand(MI.getOperand(2));
10543 SrcDisp = MI.getOperand(3).getImm();
10544 } else {
10545 SrcBase = DestBase;
10546 SrcDisp = DestDisp++;
10547 foldDisplIfNeeded(DestBase, DestDisp);
10548 }
10549
10550 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10551 bool IsImmForm = LengthMO.isImm();
10552 bool IsRegForm = !IsImmForm;
10553
10554 // Build and insert one Opcode of Length, with special treatment for memset.
10555 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10557 MachineOperand DBase, uint64_t DDisp,
10558 MachineOperand SBase, uint64_t SDisp,
10559 unsigned Length) -> void {
10560 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10561 if (IsMemset) {
10562 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10563 if (ByteMO.isImm())
10564 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10565 .add(SBase).addImm(SDisp).add(ByteMO);
10566 else
10567 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10568 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10569 if (--Length == 0)
10570 return;
10571 }
10572 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10573 .add(DBase).addImm(DDisp).addImm(Length)
10574 .add(SBase).addImm(SDisp)
10575 .setMemRefs(MI.memoperands());
10576 };
10577
10578 bool NeedsLoop = false;
10579 uint64_t ImmLength = 0;
10580 Register LenAdjReg = SystemZ::NoRegister;
10581 if (IsImmForm) {
10582 ImmLength = LengthMO.getImm();
10583 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10584 if (ImmLength == 0) {
10585 MI.eraseFromParent();
10586 return MBB;
10587 }
10588 if (Opcode == SystemZ::CLC) {
10589 if (ImmLength > 3 * 256)
10590 // A two-CLC sequence is a clear win over a loop, not least because
10591 // it needs only one branch. A three-CLC sequence needs the same
10592 // number of branches as a loop (i.e. 2), but is shorter. That
10593 // brings us to lengths greater than 768 bytes. It seems relatively
10594 // likely that a difference will be found within the first 768 bytes,
10595 // so we just optimize for the smallest number of branch
10596 // instructions, in order to avoid polluting the prediction buffer
10597 // too much.
10598 NeedsLoop = true;
10599 } else if (ImmLength > 6 * 256)
10600 // The heuristic we use is to prefer loops for anything that would
10601 // require 7 or more MVCs. With these kinds of sizes there isn't much
10602 // to choose between straight-line code and looping code, since the
10603 // time will be dominated by the MVCs themselves.
10604 NeedsLoop = true;
10605 } else {
10606 NeedsLoop = true;
10607 LenAdjReg = LengthMO.getReg();
10608 }
10609
10610 // When generating more than one CLC, all but the last will need to
10611 // branch to the end when a difference is found.
10612 MachineBasicBlock *EndMBB =
10613 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10615 : nullptr);
10616
10617 if (NeedsLoop) {
10618 Register StartCountReg =
10619 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10620 if (IsImmForm) {
10621 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10622 ImmLength &= 255;
10623 } else {
10624 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10625 .addReg(LenAdjReg)
10626 .addReg(0)
10627 .addImm(8);
10628 }
10629
10630 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10631 auto loadZeroAddress = [&]() -> MachineOperand {
10632 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10633 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10634 return MachineOperand::CreateReg(Reg, false);
10635 };
10636 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10637 DestBase = loadZeroAddress();
10638 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10639 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10640
10641 MachineBasicBlock *StartMBB = nullptr;
10642 MachineBasicBlock *LoopMBB = nullptr;
10643 MachineBasicBlock *NextMBB = nullptr;
10644 MachineBasicBlock *DoneMBB = nullptr;
10645 MachineBasicBlock *AllDoneMBB = nullptr;
10646
10647 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10648 Register StartDestReg =
10649 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10650
10651 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10652 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10653 Register ThisDestReg =
10654 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10655 Register NextSrcReg = MRI.createVirtualRegister(RC);
10656 Register NextDestReg =
10657 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10658 RC = &SystemZ::GR64BitRegClass;
10659 Register ThisCountReg = MRI.createVirtualRegister(RC);
10660 Register NextCountReg = MRI.createVirtualRegister(RC);
10661
10662 if (IsRegForm) {
10663 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10664 StartMBB = SystemZ::emitBlockAfter(MBB);
10665 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10666 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10667 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10668
10669 // MBB:
10670 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10671 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10672 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10673 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10675 .addMBB(AllDoneMBB);
10676 MBB->addSuccessor(AllDoneMBB);
10677 if (!IsMemset)
10678 MBB->addSuccessor(StartMBB);
10679 else {
10680 // MemsetOneCheckMBB:
10681 // # Jump to MemsetOneMBB for a memset of length 1, or
10682 // # fall thru to StartMBB.
10683 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10684 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10685 MBB->addSuccessor(MemsetOneCheckMBB);
10686 MBB = MemsetOneCheckMBB;
10687 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10688 .addReg(LenAdjReg).addImm(-1);
10689 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10691 .addMBB(MemsetOneMBB);
10692 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10693 MBB->addSuccessor(StartMBB, {90, 100});
10694
10695 // MemsetOneMBB:
10696 // # Jump back to AllDoneMBB after a single MVI or STC.
10697 MBB = MemsetOneMBB;
10698 insertMemMemOp(MBB, MBB->end(),
10699 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10700 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10701 1);
10702 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10703 MBB->addSuccessor(AllDoneMBB);
10704 }
10705
10706 // StartMBB:
10707 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10708 MBB = StartMBB;
10709 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10710 .addReg(StartCountReg).addImm(0);
10711 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10713 .addMBB(DoneMBB);
10714 MBB->addSuccessor(DoneMBB);
10715 MBB->addSuccessor(LoopMBB);
10716 }
10717 else {
10718 StartMBB = MBB;
10719 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10720 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10721 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10722
10723 // StartMBB:
10724 // # fall through to LoopMBB
10725 MBB->addSuccessor(LoopMBB);
10726
10727 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10728 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10729 if (EndMBB && !ImmLength)
10730 // If the loop handled the whole CLC range, DoneMBB will be empty with
10731 // CC live-through into EndMBB, so add it as live-in.
10732 DoneMBB->addLiveIn(SystemZ::CC);
10733 }
10734
10735 // LoopMBB:
10736 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10737 // [ %NextDestReg, NextMBB ]
10738 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10739 // [ %NextSrcReg, NextMBB ]
10740 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10741 // [ %NextCountReg, NextMBB ]
10742 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10743 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10744 // ( JLH EndMBB )
10745 //
10746 // The prefetch is used only for MVC. The JLH is used only for CLC.
10747 MBB = LoopMBB;
10748 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10749 .addReg(StartDestReg).addMBB(StartMBB)
10750 .addReg(NextDestReg).addMBB(NextMBB);
10751 if (!HaveSingleBase)
10752 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10753 .addReg(StartSrcReg).addMBB(StartMBB)
10754 .addReg(NextSrcReg).addMBB(NextMBB);
10755 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10756 .addReg(StartCountReg).addMBB(StartMBB)
10757 .addReg(NextCountReg).addMBB(NextMBB);
10758 if (Opcode == SystemZ::MVC)
10759 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10761 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10762 insertMemMemOp(MBB, MBB->end(),
10763 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10764 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10765 if (EndMBB) {
10766 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10768 .addMBB(EndMBB);
10769 MBB->addSuccessor(EndMBB);
10770 MBB->addSuccessor(NextMBB);
10771 }
10772
10773 // NextMBB:
10774 // %NextDestReg = LA 256(%ThisDestReg)
10775 // %NextSrcReg = LA 256(%ThisSrcReg)
10776 // %NextCountReg = AGHI %ThisCountReg, -1
10777 // CGHI %NextCountReg, 0
10778 // JLH LoopMBB
10779 // # fall through to DoneMBB
10780 //
10781 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10782 MBB = NextMBB;
10783 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10784 .addReg(ThisDestReg).addImm(256).addReg(0);
10785 if (!HaveSingleBase)
10786 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10787 .addReg(ThisSrcReg).addImm(256).addReg(0);
10788 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10789 .addReg(ThisCountReg).addImm(-1);
10790 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10791 .addReg(NextCountReg).addImm(0);
10792 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10794 .addMBB(LoopMBB);
10795 MBB->addSuccessor(LoopMBB);
10796 MBB->addSuccessor(DoneMBB);
10797
10798 MBB = DoneMBB;
10799 if (IsRegForm) {
10800 // DoneMBB:
10801 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10802 // # Use EXecute Relative Long for the remainder of the bytes. The target
10803 // instruction of the EXRL will have a length field of 1 since 0 is an
10804 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10805 // 0xff) + 1.
10806 // # Fall through to AllDoneMBB.
10807 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10808 Register RemDestReg = HaveSingleBase ? RemSrcReg
10809 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10810 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10811 .addReg(StartDestReg).addMBB(StartMBB)
10812 .addReg(NextDestReg).addMBB(NextMBB);
10813 if (!HaveSingleBase)
10814 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10815 .addReg(StartSrcReg).addMBB(StartMBB)
10816 .addReg(NextSrcReg).addMBB(NextMBB);
10817 if (IsMemset)
10818 insertMemMemOp(MBB, MBB->end(),
10819 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10820 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10821 MachineInstrBuilder EXRL_MIB =
10822 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10823 .addImm(Opcode)
10824 .addReg(LenAdjReg)
10825 .addReg(RemDestReg).addImm(DestDisp)
10826 .addReg(RemSrcReg).addImm(SrcDisp);
10827 MBB->addSuccessor(AllDoneMBB);
10828 MBB = AllDoneMBB;
10829 if (Opcode != SystemZ::MVC) {
10830 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10831 if (EndMBB)
10832 MBB->addLiveIn(SystemZ::CC);
10833 }
10834 }
10835 MF.getProperties().resetNoPHIs();
10836 }
10837
10838 // Handle any remaining bytes with straight-line code.
10839 while (ImmLength > 0) {
10840 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10841 // The previous iteration might have created out-of-range displacements.
10842 // Apply them using LA/LAY if so.
10843 foldDisplIfNeeded(DestBase, DestDisp);
10844 foldDisplIfNeeded(SrcBase, SrcDisp);
10845 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10846 DestDisp += ThisLength;
10847 SrcDisp += ThisLength;
10848 ImmLength -= ThisLength;
10849 // If there's another CLC to go, branch to the end if a difference
10850 // was found.
10851 if (EndMBB && ImmLength > 0) {
10852 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10853 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10855 .addMBB(EndMBB);
10856 MBB->addSuccessor(EndMBB);
10857 MBB->addSuccessor(NextMBB);
10858 MBB = NextMBB;
10859 }
10860 }
10861 if (EndMBB) {
10862 MBB->addSuccessor(EndMBB);
10863 MBB = EndMBB;
10864 MBB->addLiveIn(SystemZ::CC);
10865 }
10866
10867 MI.eraseFromParent();
10868 return MBB;
10869}
10870
10871// Decompose string pseudo-instruction MI into a loop that continually performs
10872// Opcode until CC != 3.
10873MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10874 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10875 MachineFunction &MF = *MBB->getParent();
10876 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10877 MachineRegisterInfo &MRI = MF.getRegInfo();
10878 DebugLoc DL = MI.getDebugLoc();
10879
10880 uint64_t End1Reg = MI.getOperand(0).getReg();
10881 uint64_t Start1Reg = MI.getOperand(1).getReg();
10882 uint64_t Start2Reg = MI.getOperand(2).getReg();
10883 uint64_t CharReg = MI.getOperand(3).getReg();
10884
10885 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10886 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10887 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10888 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10889
10890 MachineBasicBlock *StartMBB = MBB;
10891 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10892 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10893
10894 // StartMBB:
10895 // # fall through to LoopMBB
10896 MBB->addSuccessor(LoopMBB);
10897
10898 // LoopMBB:
10899 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10900 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10901 // R0L = %CharReg
10902 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10903 // JO LoopMBB
10904 // # fall through to DoneMBB
10905 //
10906 // The load of R0L can be hoisted by post-RA LICM.
10907 MBB = LoopMBB;
10908
10909 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10910 .addReg(Start1Reg).addMBB(StartMBB)
10911 .addReg(End1Reg).addMBB(LoopMBB);
10912 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10913 .addReg(Start2Reg).addMBB(StartMBB)
10914 .addReg(End2Reg).addMBB(LoopMBB);
10915 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10916 BuildMI(MBB, DL, TII->get(Opcode))
10917 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10918 .addReg(This1Reg).addReg(This2Reg);
10919 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10921 MBB->addSuccessor(LoopMBB);
10922 MBB->addSuccessor(DoneMBB);
10923
10924 DoneMBB->addLiveIn(SystemZ::CC);
10925
10926 MI.eraseFromParent();
10927 return DoneMBB;
10928}
10929
10930// Update TBEGIN instruction with final opcode and register clobbers.
10931MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10932 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10933 bool NoFloat) const {
10934 MachineFunction &MF = *MBB->getParent();
10935 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10936 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10937
10938 // Update opcode.
10939 MI.setDesc(TII->get(Opcode));
10940
10941 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10942 // Make sure to add the corresponding GRSM bits if they are missing.
10943 uint64_t Control = MI.getOperand(2).getImm();
10944 static const unsigned GPRControlBit[16] = {
10945 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10946 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10947 };
10948 Control |= GPRControlBit[15];
10949 if (TFI->hasFP(MF))
10950 Control |= GPRControlBit[11];
10951 MI.getOperand(2).setImm(Control);
10952
10953 // Add GPR clobbers.
10954 for (int I = 0; I < 16; I++) {
10955 if ((Control & GPRControlBit[I]) == 0) {
10956 unsigned Reg = SystemZMC::GR64Regs[I];
10957 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10958 }
10959 }
10960
10961 // Add FPR/VR clobbers.
10962 if (!NoFloat && (Control & 4) != 0) {
10963 if (Subtarget.hasVector()) {
10964 for (unsigned Reg : SystemZMC::VR128Regs) {
10965 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10966 }
10967 } else {
10968 for (unsigned Reg : SystemZMC::FP64Regs) {
10969 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10970 }
10971 }
10972 }
10973
10974 return MBB;
10975}
10976
10977MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10978 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10979 MachineFunction &MF = *MBB->getParent();
10980 MachineRegisterInfo *MRI = &MF.getRegInfo();
10981 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10982 DebugLoc DL = MI.getDebugLoc();
10983
10984 Register SrcReg = MI.getOperand(0).getReg();
10985
10986 // Create new virtual register of the same class as source.
10987 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10988 Register DstReg = MRI->createVirtualRegister(RC);
10989
10990 // Replace pseudo with a normal load-and-test that models the def as
10991 // well.
10992 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10993 .addReg(SrcReg)
10994 .setMIFlags(MI.getFlags());
10995 MI.eraseFromParent();
10996
10997 return MBB;
10998}
10999
11000MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
11002 MachineFunction &MF = *MBB->getParent();
11003 MachineRegisterInfo *MRI = &MF.getRegInfo();
11004 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
11005 DebugLoc DL = MI.getDebugLoc();
11006 const unsigned ProbeSize = getStackProbeSize(MF);
11007 Register DstReg = MI.getOperand(0).getReg();
11008 Register SizeReg = MI.getOperand(2).getReg();
11009
11010 MachineBasicBlock *StartMBB = MBB;
11011 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
11012 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
11013 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
11014 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
11015 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
11016
11017 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
11019
11020 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11021 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11022
11023 // LoopTestMBB
11024 // BRC TailTestMBB
11025 // # fallthrough to LoopBodyMBB
11026 StartMBB->addSuccessor(LoopTestMBB);
11027 MBB = LoopTestMBB;
11028 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
11029 .addReg(SizeReg)
11030 .addMBB(StartMBB)
11031 .addReg(IncReg)
11032 .addMBB(LoopBodyMBB);
11033 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
11034 .addReg(PHIReg)
11035 .addImm(ProbeSize);
11036 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11038 .addMBB(TailTestMBB);
11039 MBB->addSuccessor(LoopBodyMBB);
11040 MBB->addSuccessor(TailTestMBB);
11041
11042 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11043 // J LoopTestMBB
11044 MBB = LoopBodyMBB;
11045 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
11046 .addReg(PHIReg)
11047 .addImm(ProbeSize);
11048 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
11049 .addReg(SystemZ::R15D)
11050 .addImm(ProbeSize);
11051 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11052 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
11053 .setMemRefs(VolLdMMO);
11054 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
11055 MBB->addSuccessor(LoopTestMBB);
11056
11057 // TailTestMBB
11058 // BRC DoneMBB
11059 // # fallthrough to TailMBB
11060 MBB = TailTestMBB;
11061 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11062 .addReg(PHIReg)
11063 .addImm(0);
11064 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11066 .addMBB(DoneMBB);
11067 MBB->addSuccessor(TailMBB);
11068 MBB->addSuccessor(DoneMBB);
11069
11070 // TailMBB
11071 // # fallthrough to DoneMBB
11072 MBB = TailMBB;
11073 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11074 .addReg(SystemZ::R15D)
11075 .addReg(PHIReg);
11076 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11077 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11078 .setMemRefs(VolLdMMO);
11079 MBB->addSuccessor(DoneMBB);
11080
11081 // DoneMBB
11082 MBB = DoneMBB;
11083 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11084 .addReg(SystemZ::R15D);
11085
11086 MI.eraseFromParent();
11087 return DoneMBB;
11088}
11089
11090SDValue SystemZTargetLowering::
11091getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11092 MachineFunction &MF = DAG.getMachineFunction();
11093 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11094 SDLoc DL(SP);
11095 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11096 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11097}
11098
11099// Replace a _STACKGUARD_DAG pseudo with a _STACKGUARD pseudo, adding
11100// a dead early-clobber def reg that will be used as a scratch register
11101// when the pseudo is expanded.
11102MachineBasicBlock *SystemZTargetLowering::emitStackGuardPseudo(
11103 MachineInstr &MI, MachineBasicBlock *MBB, unsigned PseudoOp) const {
11104 MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();
11105 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
11106 DebugLoc DL = MI.getDebugLoc();
11107 Register AddrReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
11108 BuildMI(*MBB, MI, DL, TII->get(PseudoOp), AddrReg)
11109 .addFrameIndex(MI.getOperand(0).getIndex())
11110 .addImm(MI.getOperand(1).getImm());
11111 MI.eraseFromParent();
11112 return MBB;
11113}
11114
11117 switch (MI.getOpcode()) {
11118 case SystemZ::ADJCALLSTACKDOWN:
11119 case SystemZ::ADJCALLSTACKUP:
11120 return emitAdjCallStack(MI, MBB);
11121
11122 case SystemZ::Select32:
11123 case SystemZ::Select64:
11124 case SystemZ::Select128:
11125 case SystemZ::SelectF32:
11126 case SystemZ::SelectF64:
11127 case SystemZ::SelectF128:
11128 case SystemZ::SelectVR32:
11129 case SystemZ::SelectVR64:
11130 case SystemZ::SelectVR128:
11131 return emitSelect(MI, MBB);
11132
11133 case SystemZ::CondStore8Mux:
11134 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11135 case SystemZ::CondStore8MuxInv:
11136 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11137 case SystemZ::CondStore16Mux:
11138 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11139 case SystemZ::CondStore16MuxInv:
11140 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11141 case SystemZ::CondStore32Mux:
11142 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11143 case SystemZ::CondStore32MuxInv:
11144 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11145 case SystemZ::CondStore8:
11146 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11147 case SystemZ::CondStore8Inv:
11148 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11149 case SystemZ::CondStore16:
11150 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11151 case SystemZ::CondStore16Inv:
11152 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11153 case SystemZ::CondStore32:
11154 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11155 case SystemZ::CondStore32Inv:
11156 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11157 case SystemZ::CondStore64:
11158 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11159 case SystemZ::CondStore64Inv:
11160 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11161 case SystemZ::CondStoreF32:
11162 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11163 case SystemZ::CondStoreF32Inv:
11164 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11165 case SystemZ::CondStoreF64:
11166 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11167 case SystemZ::CondStoreF64Inv:
11168 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11169
11170 case SystemZ::SCmp128Hi:
11171 return emitICmp128Hi(MI, MBB, false);
11172 case SystemZ::UCmp128Hi:
11173 return emitICmp128Hi(MI, MBB, true);
11174
11175 case SystemZ::PAIR128:
11176 return emitPair128(MI, MBB);
11177 case SystemZ::AEXT128:
11178 return emitExt128(MI, MBB, false);
11179 case SystemZ::ZEXT128:
11180 return emitExt128(MI, MBB, true);
11181
11182 case SystemZ::ATOMIC_SWAPW:
11183 return emitAtomicLoadBinary(MI, MBB, 0);
11184
11185 case SystemZ::ATOMIC_LOADW_AR:
11186 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11187 case SystemZ::ATOMIC_LOADW_AFI:
11188 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11189
11190 case SystemZ::ATOMIC_LOADW_SR:
11191 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11192
11193 case SystemZ::ATOMIC_LOADW_NR:
11194 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11195 case SystemZ::ATOMIC_LOADW_NILH:
11196 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11197
11198 case SystemZ::ATOMIC_LOADW_OR:
11199 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11200 case SystemZ::ATOMIC_LOADW_OILH:
11201 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11202
11203 case SystemZ::ATOMIC_LOADW_XR:
11204 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11205 case SystemZ::ATOMIC_LOADW_XILF:
11206 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11207
11208 case SystemZ::ATOMIC_LOADW_NRi:
11209 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11210 case SystemZ::ATOMIC_LOADW_NILHi:
11211 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11212
11213 case SystemZ::ATOMIC_LOADW_MIN:
11214 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11215 case SystemZ::ATOMIC_LOADW_MAX:
11216 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11217 case SystemZ::ATOMIC_LOADW_UMIN:
11218 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11219 case SystemZ::ATOMIC_LOADW_UMAX:
11220 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11221
11222 case SystemZ::ATOMIC_CMP_SWAPW:
11223 return emitAtomicCmpSwapW(MI, MBB);
11224 case SystemZ::MVCImm:
11225 case SystemZ::MVCReg:
11226 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11227 case SystemZ::NCImm:
11228 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11229 case SystemZ::OCImm:
11230 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11231 case SystemZ::XCImm:
11232 case SystemZ::XCReg:
11233 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11234 case SystemZ::CLCImm:
11235 case SystemZ::CLCReg:
11236 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11237 case SystemZ::MemsetImmImm:
11238 case SystemZ::MemsetImmReg:
11239 case SystemZ::MemsetRegImm:
11240 case SystemZ::MemsetRegReg:
11241 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11242 case SystemZ::CLSTLoop:
11243 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11244 case SystemZ::MVSTLoop:
11245 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11246 case SystemZ::SRSTLoop:
11247 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11248 case SystemZ::TBEGIN:
11249 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11250 case SystemZ::TBEGIN_nofloat:
11251 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11252 case SystemZ::TBEGINC:
11253 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11254 case SystemZ::LTEBRCompare_Pseudo:
11255 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11256 case SystemZ::LTDBRCompare_Pseudo:
11257 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11258 case SystemZ::LTXBRCompare_Pseudo:
11259 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11260
11261 case SystemZ::PROBED_ALLOCA:
11262 return emitProbedAlloca(MI, MBB);
11263 case SystemZ::EH_SjLj_SetJmp:
11264 return emitEHSjLjSetJmp(MI, MBB);
11265 case SystemZ::EH_SjLj_LongJmp:
11266 return emitEHSjLjLongJmp(MI, MBB);
11267
11268 case TargetOpcode::STACKMAP:
11269 case TargetOpcode::PATCHPOINT:
11270 return emitPatchPoint(MI, MBB);
11271
11272 case SystemZ::MOV_STACKGUARD_DAG:
11273 return emitStackGuardPseudo(MI, MBB, SystemZ::MOV_STACKGUARD);
11274
11275 case SystemZ::CMP_STACKGUARD_DAG:
11276 return emitStackGuardPseudo(MI, MBB, SystemZ::CMP_STACKGUARD);
11277
11278 default:
11279 llvm_unreachable("Unexpected instr type to insert");
11280 }
11281}
11282
11283// This is only used by the isel schedulers, and is needed only to prevent
11284// compiler from crashing when list-ilp is used.
11285const TargetRegisterClass *
11286SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11287 if (VT == MVT::Untyped)
11288 return &SystemZ::ADDR128BitRegClass;
11290}
11291
11292SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11293 SelectionDAG &DAG) const {
11294 SDLoc dl(Op);
11295 /*
11296 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11297 settings:
11298 00 Round to nearest
11299 01 Round to 0
11300 10 Round to +inf
11301 11 Round to -inf
11302
11303 FLT_ROUNDS, on the other hand, expects the following:
11304 -1 Undefined
11305 0 Round to 0
11306 1 Round to nearest
11307 2 Round to +inf
11308 3 Round to -inf
11309 */
11310
11311 // Save FPC to register.
11312 SDValue Chain = Op.getOperand(0);
11313 SDValue EFPC(
11314 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11315 Chain = EFPC.getValue(1);
11316
11317 // Transform as necessary
11318 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11319 DAG.getConstant(3, dl, MVT::i32));
11320 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11321 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11322 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11323 DAG.getConstant(1, dl, MVT::i32)));
11324
11325 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11326 DAG.getConstant(1, dl, MVT::i32));
11327 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11328
11329 return DAG.getMergeValues({RetVal, Chain}, dl);
11330}
11331
11332SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11333 SelectionDAG &DAG) const {
11334 EVT VT = Op.getValueType();
11335 Op = Op.getOperand(0);
11336 EVT OpVT = Op.getValueType();
11337
11338 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11339
11340 SDLoc DL(Op);
11341
11342 // load a 0 vector for the third operand of VSUM.
11343 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11344
11345 // execute VSUM.
11346 switch (OpVT.getScalarSizeInBits()) {
11347 case 8:
11348 case 16:
11349 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11350 [[fallthrough]];
11351 case 32:
11352 case 64:
11353 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11354 DAG.getBitcast(Op.getValueType(), Zero));
11355 break;
11356 case 128:
11357 break; // VSUM over v1i128 should not happen and would be a noop
11358 default:
11359 llvm_unreachable("Unexpected scalar size.");
11360 }
11361 // Cast to original vector type, retrieve last element.
11362 return DAG.getNode(
11363 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11364 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11365}
11366
11368 FunctionType *FT = F->getFunctionType();
11369 const AttributeList &Attrs = F->getAttributes();
11370 if (Attrs.hasRetAttrs())
11371 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11372 OS << *F->getReturnType() << " @" << F->getName() << "(";
11373 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11374 if (I)
11375 OS << ", ";
11376 OS << *FT->getParamType(I);
11377 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11378 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11379 if (ArgAttrs.hasAttribute(A))
11380 OS << " " << Attribute::getNameFromAttrKind(A);
11381 }
11382 OS << ")\n";
11383}
11384
11385bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11386 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11387 if (Itr == IsInternalCache.end())
11388 Itr = IsInternalCache
11389 .insert(std::pair<const Function *, bool>(
11390 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11391 .first;
11392 return Itr->second;
11393}
11394
11395void SystemZTargetLowering::
11396verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11397 const Function *F, SDValue Callee) const {
11398 // Temporarily only do the check when explicitly requested, until it can be
11399 // enabled by default.
11401 return;
11402
11403 bool IsInternal = false;
11404 const Function *CalleeFn = nullptr;
11405 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11406 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11407 IsInternal = isInternal(CalleeFn);
11408 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11409 errs() << "ERROR: Missing extension attribute of passed "
11410 << "value in call to function:\n" << "Callee: ";
11411 if (CalleeFn != nullptr)
11412 printFunctionArgExts(CalleeFn, errs());
11413 else
11414 errs() << "-\n";
11415 errs() << "Caller: ";
11417 llvm_unreachable("");
11418 }
11419}
11420
11421void SystemZTargetLowering::
11422verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11423 const Function *F) const {
11424 // Temporarily only do the check when explicitly requested, until it can be
11425 // enabled by default.
11427 return;
11428
11429 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11430 errs() << "ERROR: Missing extension attribute of returned "
11431 << "value from function:\n";
11433 llvm_unreachable("");
11434 }
11435}
11436
11437// Verify that narrow integer arguments are extended as required by the ABI.
11438// Return false if an error is found.
11439bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11440 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11441 if (!Subtarget.isTargetELF())
11442 return true;
11443
11446 return true;
11447 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11448 return true;
11449
11450 for (unsigned i = 0; i < Outs.size(); ++i) {
11451 MVT VT = Outs[i].VT;
11452 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11453 if (VT.isInteger()) {
11454 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11455 "Unexpected integer argument VT.");
11456 if (VT == MVT::i32 &&
11457 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11458 return false;
11459 }
11460 }
11461
11462 return true;
11463}
11464
11466 Module &M, const LibcallLoweringInfo &Libcalls) const {
11467 StringRef GuardMode = M.getStackProtectorGuard();
11468
11469 // In the TLS case, no symbol needs to be inserted.
11470 if (GuardMode == "tls" || GuardMode.empty())
11471 return;
11472
11473 // Otherwise (in the global case), insert the appropriate global variable.
11475}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL, unsigned MergedBits, EVT VT, SDValue Op0, SDValue Op1)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static void adjustForStackGuardCompare(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static std::pair< SDValue, int > findCCUse(const SDValue &Val, unsigned Depth=0)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems, unsigned Pos)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1563
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1408
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1535
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:968
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1353
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1511
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:398
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:865
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:858
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:407
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
MachineConstantPoolValue * getMachineCPVal() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:85
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:938
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:763
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:775
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:714
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
Tracks which library functions to use for a particular subtarget.
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(MCRegister Reg, Register vreg=Register())
addLiveIn - Add the specified register as a live-in.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:68
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
SDValue getTargetFrameIndex(int FI, EVT VT)
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:216
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
size_type size() const
Definition SmallSet.h:171
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:490
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:714
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
iterator end() const
Definition StringRef.h:116
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Insert SSP declaration if global stack protector is used.
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, UndefPoisonKind Kind, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, Kind can be used to track poison ...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:471
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:792
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:974
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:470
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:815
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:791
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:837
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
auto m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Define
Register definition.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:362
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:263
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
UndefPoisonKind
Enumeration to track whether we are interested in Undef, Poison, or both.
Definition UndefPoison.h:20
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:347
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:418
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:408
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:346
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:271
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
bool isVectorOf(EVT EltVT) const
Return true if this is a vector with matching element type.
Definition ValueTypes.h:181
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:165
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:190
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:176
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:72
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:325
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:184
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.