LLVM 23.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v8f16, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
128 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
129 }
130
131 if (Subtarget.hasVector())
132 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
133 }
134
135 // Compute derived properties from the register classes
136 computeRegisterProperties(Subtarget.getRegisterInfo());
137
138 // Set up special registers.
139 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
140
141 // TODO: It may be better to default to latency-oriented scheduling, however
142 // LLVM's current latency-oriented scheduler can't handle physreg definitions
143 // such as SystemZ has with CC, so set this to the register-pressure
144 // scheduler, because it can.
146
149
151
152 // Instructions are strings of 2-byte aligned 2-byte values.
154 // For performance reasons we prefer 16-byte alignment.
156
157 // Handle operations that are handled in a similar way for all types.
158 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
159 I <= MVT::LAST_FP_VALUETYPE;
160 ++I) {
162 if (isTypeLegal(VT)) {
163 // Lower SET_CC into an IPM-based sequence.
167
168 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
170
171 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
174 }
175 }
176
177 // Expand jump table branches as address arithmetic followed by an
178 // indirect jump.
180
181 // Expand BRCOND into a BR_CC (see above).
183
184 // Handle integer types except i128.
185 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
186 I <= MVT::LAST_INTEGER_VALUETYPE;
187 ++I) {
189 if (isTypeLegal(VT) && VT != MVT::i128) {
191
192 // Expand individual DIV and REMs into DIVREMs.
199
200 // Support addition/subtraction with overflow.
203
204 // Support addition/subtraction with carry.
207
208 // Support carry in as value rather than glue.
211
212 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
213 // available, or if the operand is constant.
215
216 // Use POPCNT on z196 and above.
217 if (Subtarget.hasPopulationCount())
219 else
221
222 // No special instructions for these.
225
226 // Use *MUL_LOHI where possible instead of MULH*.
231
232 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
233 // unsigned on z10 (only z196 and above have native support for
234 // unsigned conversions).
241 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
242 auto OpAction =
243 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
244 setOperationAction(Op, VT, OpAction);
245 }
246 }
247 }
248
249 // Handle i128 if legal.
250 if (isTypeLegal(MVT::i128)) {
251 // No special instructions for these.
258
259 // We may be able to use VSLDB/VSLD/VSRD for these.
262
263 // No special instructions for these before z17.
264 if (!Subtarget.hasVectorEnhancements3()) {
274 } else {
275 // Even if we do have a legal 128-bit multiply, we do not
276 // want 64-bit multiply-high operations to use it.
279 }
280
281 // Support addition/subtraction with carry.
286
287 // Use VPOPCT and add up partial results.
289
290 // Additional instructions available with z17.
291 if (Subtarget.hasVectorEnhancements3()) {
292 setOperationAction(ISD::ABS, MVT::i128, Legal);
293
295 MVT::i128, Legal);
296 }
297 }
298
299 // These need custom handling in order to handle the f16 conversions.
308
309 // Type legalization will convert 8- and 16-bit atomic operations into
310 // forms that operate on i32s (but still keeping the original memory VT).
311 // Lower them into full i32 operations.
323
324 // Whether or not i128 is not a legal type, we need to custom lower
325 // the atomic operations in order to exploit SystemZ instructions.
330
331 // Mark sign/zero extending atomic loads as legal, which will make
332 // DAGCombiner fold extensions into atomic loads if possible.
334 {MVT::i8, MVT::i16, MVT::i32}, Legal);
336 {MVT::i8, MVT::i16}, Legal);
338 MVT::i8, Legal);
339
340 // We can use the CC result of compare-and-swap to implement
341 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
345
347
348 // Traps are legal, as we will convert them to "j .+2".
349 setOperationAction(ISD::TRAP, MVT::Other, Legal);
350
351 // We have native support for a 64-bit CTLZ, via FLOGR.
355
356 // On z17 we have native support for a 64-bit CTTZ.
357 if (Subtarget.hasMiscellaneousExtensions4()) {
361 }
362
363 // On z15 we have native support for a 64-bit CTPOP.
364 if (Subtarget.hasMiscellaneousExtensions3()) {
367 }
368
369 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
371
372 // Expand 128 bit shifts without using a libcall.
376
377 // Also expand 256 bit shifts if i128 is a legal type.
378 if (isTypeLegal(MVT::i128)) {
382 }
383
384 // Handle bitcast from fp128 to i128.
385 if (!isTypeLegal(MVT::i128))
387
388 // We have native instructions for i8, i16 and i32 extensions, but not i1.
390 for (MVT VT : MVT::integer_valuetypes()) {
394 }
395
396 // Handle the various types of symbolic address.
402
403 // We need to handle dynamic allocations specially because of the
404 // 160-byte area at the bottom of the stack.
407
410
411 // Handle prefetches with PFD or PFDRL.
413
414 // Handle readcyclecounter with STCKF.
416
418 // Assume by default that all vector operations need to be expanded.
419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
420 if (getOperationAction(Opcode, VT) == Legal)
421 setOperationAction(Opcode, VT, Expand);
422
423 // Likewise all truncating stores and extending loads.
424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
425 setTruncStoreAction(VT, InnerVT, Expand);
428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
429 }
430
431 if (isTypeLegal(VT)) {
432 // These operations are legal for anything that can be stored in a
433 // vector register, even if there is no native support for the format
434 // as such. In particular, we can do these for v4f32 even though there
435 // are no specific instructions for that format.
441
442 // Likewise, except that we need to replace the nodes with something
443 // more specific.
446 }
447 }
448
449 // Handle integer vector types.
451 if (isTypeLegal(VT)) {
452 // These operations have direct equivalents.
457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
461 }
462 if (Subtarget.hasVectorEnhancements3() &&
463 VT != MVT::v16i8 && VT != MVT::v8i16) {
468 }
473 if (Subtarget.hasVectorEnhancements1())
475 else
479
480 // Convert a GPR scalar to a vector by inserting it into element 0.
482
483 // Use a series of unpacks for extensions.
486
487 // Detect shifts/rotates by a scalar amount and convert them into
488 // V*_BY_SCALAR.
493
494 // Add ISD::VECREDUCE_ADD as custom in order to implement
495 // it with VZERO+VSUM
497
498 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
499 // and inverting the result as necessary.
501
503 Legal);
504 }
505 }
506
507 if (Subtarget.hasVector()) {
508 // There should be no need to check for float types other than v2f64
509 // since <2 x f32> isn't a legal type.
518
527 }
528
529 if (Subtarget.hasVectorEnhancements2()) {
538
547 }
548
549 // Handle floating-point types.
550 if (!useSoftFloat()) {
551 // Promote all f16 operations to float, with some exceptions below.
552 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
553 setOperationAction(Opc, MVT::f16, Promote);
555 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
557 setTruncStoreAction(VT, MVT::f16, Expand);
558 }
560 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
565 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
566 setOperationAction(Op, MVT::f16, Legal);
567 }
568
569 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
570 I <= MVT::LAST_FP_VALUETYPE;
571 ++I) {
573 if (isTypeLegal(VT) && VT != MVT::f16) {
574 // We can use FI for FRINT.
576
577 // We can use the extended form of FI for other rounding operations.
578 if (Subtarget.hasFPExtension()) {
585 }
586
587 // No special instructions for these.
593
594 // Special treatment.
596
597 // Handle constrained floating-point operations.
606 if (Subtarget.hasFPExtension()) {
613 }
614
615 // Extension from f16 needs libcall.
618 }
619 }
620
621 // Handle floating-point vector types.
622 if (Subtarget.hasVector()) {
623 // Scalar-to-vector conversion is just a subreg.
627
628 // Some insertions and extractions can be done directly but others
629 // need to go via integers.
636
637 // These operations have direct equivalents.
638 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
639 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
640 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
641 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
642 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
643 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
644 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
645 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
646 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
649 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
653
654 // Handle constrained floating-point operations.
668
673 if (Subtarget.hasVectorEnhancements1()) {
676 }
677 }
678
679 // The vector enhancements facility 1 has instructions for these.
680 if (Subtarget.hasVectorEnhancements1()) {
681 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
682 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
683 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
684 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
685 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
686 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
687 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
688 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
689 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
692 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
696
701
706
711
716
721
722 // Handle constrained floating-point operations.
736 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
737 MVT::v4f32, MVT::v2f64 }) {
742 }
743 }
744
745 // We only have fused f128 multiply-addition on vector registers.
746 if (!Subtarget.hasVectorEnhancements1()) {
749 }
750
751 // We don't have a copysign instruction on vector registers.
752 if (Subtarget.hasVectorEnhancements1())
754
755 // Needed so that we don't try to implement f128 constant loads using
756 // a load-and-extend of a f80 constant (in cases where the constant
757 // would fit in an f80).
758 for (MVT VT : MVT::fp_valuetypes())
759 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
760
761 // We don't have extending load instruction on vector registers.
762 if (Subtarget.hasVectorEnhancements1()) {
763 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
764 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
765 }
766
767 // Floating-point truncation and stores need to be done separately.
768 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
769 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
770 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
771
772 // We have 64-bit FPR<->GPR moves, but need special handling for
773 // 32-bit forms.
774 if (!Subtarget.hasVector()) {
777 }
778
779 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
780 // structure, but VAEND is a no-op.
784
785 if (Subtarget.isTargetzOS()) {
786 // Handle address space casts between mixed sized pointers.
789 }
790
792
793 // Codes for which we want to perform some z-specific combinations.
797 ISD::LOAD,
810 ISD::SRL,
811 ISD::SRA,
812 ISD::MUL,
813 ISD::SDIV,
814 ISD::UDIV,
815 ISD::SREM,
816 ISD::UREM,
819
820 // Handle intrinsics.
823
824 // We're not using SJLJ for exception handling, but they're implemented
825 // solely to support use of __builtin_setjmp / __builtin_longjmp.
828
829 // We want to use MVC in preference to even a single load/store pair.
830 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
832
833 // The main memset sequence is a byte store followed by an MVC.
834 // Two STC or MV..I stores win over that, but the kind of fused stores
835 // generated by target-independent code don't when the byte value is
836 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
837 // than "STC;MVC". Handle the choice in target-specific code instead.
838 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
840
841 // Default to having -disable-strictnode-mutation on
842 IsStrictFPEnabled = true;
843}
844
846 return Subtarget.hasSoftFloat();
847}
848
850 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
851 unsigned &NumIntermediates, MVT &RegisterVT) const {
852 // Pass fp16 vectors in VR(s).
853 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16) {
854 IntermediateVT = RegisterVT = MVT::v8f16;
855 return NumIntermediates =
857 }
859 Context, CC, VT, IntermediateVT, NumIntermediates, RegisterVT);
860}
861
864 EVT VT) const {
865 // 128-bit single-element vector types are passed like other vectors,
866 // not like their element type.
867 if (VT.isVector() && VT.getSizeInBits() == 128 &&
868 VT.getVectorNumElements() == 1)
869 return MVT::v16i8;
870 // Pass fp16 vectors in VR(s).
871 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
872 return MVT::v8f16;
873 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
874}
875
877 LLVMContext &Context, CallingConv::ID CC, EVT VT) const {
878 // Pass fp16 vectors in VR(s).
879 if (Subtarget.hasVector() && VT.isVector() && VT.getScalarType() == MVT::f16)
881 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
882}
883
885 LLVMContext &, EVT VT) const {
886 if (!VT.isVector())
887 return MVT::i32;
889}
890
892 const MachineFunction &MF, EVT VT) const {
893 if (useSoftFloat())
894 return false;
895
896 VT = VT.getScalarType();
897
898 if (!VT.isSimple())
899 return false;
900
901 switch (VT.getSimpleVT().SimpleTy) {
902 case MVT::f32:
903 case MVT::f64:
904 return true;
905 case MVT::f128:
906 return Subtarget.hasVectorEnhancements1();
907 default:
908 break;
909 }
910
911 return false;
912}
913
914// Return true if the constant can be generated with a vector instruction,
915// such as VGM, VGMB or VREPI.
917 const SystemZSubtarget &Subtarget) {
918 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
919 if (!Subtarget.hasVector() ||
920 (isFP128 && !Subtarget.hasVectorEnhancements1()))
921 return false;
922
923 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
924 // preferred way of creating all-zero and all-one vectors so give it
925 // priority over other methods below.
926 unsigned Mask = 0;
927 unsigned I = 0;
928 for (; I < SystemZ::VectorBytes; ++I) {
929 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
930 if (Byte == 0xff)
931 Mask |= 1ULL << I;
932 else if (Byte != 0)
933 break;
934 }
935 if (I == SystemZ::VectorBytes) {
936 Opcode = SystemZISD::BYTE_MASK;
937 OpVals.push_back(Mask);
939 return true;
940 }
941
942 if (SplatBitSize > 64)
943 return false;
944
945 auto TryValue = [&](uint64_t Value) -> bool {
946 // Try VECTOR REPLICATE IMMEDIATE
947 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
948 if (isInt<16>(SignedValue)) {
949 OpVals.push_back(((unsigned) SignedValue));
950 Opcode = SystemZISD::REPLICATE;
952 SystemZ::VectorBits / SplatBitSize);
953 return true;
954 }
955 // Try VECTOR GENERATE MASK
956 unsigned Start, End;
957 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
958 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
959 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
960 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
961 OpVals.push_back(Start - (64 - SplatBitSize));
962 OpVals.push_back(End - (64 - SplatBitSize));
963 Opcode = SystemZISD::ROTATE_MASK;
965 SystemZ::VectorBits / SplatBitSize);
966 return true;
967 }
968 return false;
969 };
970
971 // First try assuming that any undefined bits above the highest set bit
972 // and below the lowest set bit are 1s. This increases the likelihood of
973 // being able to use a sign-extended element value in VECTOR REPLICATE
974 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
975 uint64_t SplatBitsZ = SplatBits.getZExtValue();
976 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
977 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
978 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
979 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
980 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
981 if (TryValue(SplatBitsZ | Upper | Lower))
982 return true;
983
984 // Now try assuming that any undefined bits between the first and
985 // last defined set bits are set. This increases the chances of
986 // using a non-wraparound mask.
987 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
988 return TryValue(SplatBitsZ | Middle);
989}
990
992 if (IntImm.isSingleWord()) {
993 IntBits = APInt(128, IntImm.getZExtValue());
994 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
995 } else
996 IntBits = IntImm;
997 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
998
999 // Find the smallest splat.
1000 SplatBits = IntImm;
1001 unsigned Width = SplatBits.getBitWidth();
1002 while (Width > 8) {
1003 unsigned HalfSize = Width / 2;
1004 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
1005 APInt LowValue = SplatBits.trunc(HalfSize);
1006
1007 // If the two halves do not match, stop here.
1008 if (HighValue != LowValue || 8 > HalfSize)
1009 break;
1010
1011 SplatBits = HighValue;
1012 Width = HalfSize;
1013 }
1014 SplatUndef = 0;
1015 SplatBitSize = Width;
1016}
1017
1019 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
1020 bool HasAnyUndefs;
1021
1022 // Get IntBits by finding the 128 bit splat.
1023 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
1024 true);
1025
1026 // Get SplatBits by finding the 8 bit or greater splat.
1027 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
1028 true);
1029}
1030
1032 bool ForCodeSize) const {
1033 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
1034 if (Imm.isZero() || Imm.isNegZero())
1035 return true;
1036
1037 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
1038}
1039
1042 MachineBasicBlock *MBB) const {
1043 DebugLoc DL = MI.getDebugLoc();
1044 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1045 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1046
1047 MachineFunction *MF = MBB->getParent();
1049
1050 const BasicBlock *BB = MBB->getBasicBlock();
1051 MachineFunction::iterator I = ++MBB->getIterator();
1052
1053 Register DstReg = MI.getOperand(0).getReg();
1054 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1055 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1056 (void)TRI;
1057 Register MainDstReg = MRI.createVirtualRegister(RC);
1058 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1059
1060 MVT PVT = getPointerTy(MF->getDataLayout());
1061 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1062 // For v = setjmp(buf), we generate.
1063 // Algorithm:
1064 //
1065 // ---------
1066 // | thisMBB |
1067 // ---------
1068 // |
1069 // ------------------------
1070 // | |
1071 // ---------- ---------------
1072 // | mainMBB | | restoreMBB |
1073 // | v = 0 | | v = 1 |
1074 // ---------- ---------------
1075 // | |
1076 // -------------------------
1077 // |
1078 // -----------------------------
1079 // | sinkMBB |
1080 // | phi(v_mainMBB,v_restoreMBB) |
1081 // -----------------------------
1082 // thisMBB:
1083 // buf[FPOffset] = Frame Pointer if hasFP.
1084 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1085 // buf[BCOffset] = Backchain value if building with -mbackchain.
1086 // buf[SPOffset] = Stack Pointer.
1087 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1088 // SjLjSetup restoreMBB
1089 // mainMBB:
1090 // v_main = 0
1091 // sinkMBB:
1092 // v = phi(v_main, v_restore)
1093 // restoreMBB:
1094 // v_restore = 1
1095
1096 MachineBasicBlock *ThisMBB = MBB;
1097 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1098 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1099 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1100
1101 MF->insert(I, MainMBB);
1102 MF->insert(I, SinkMBB);
1103 MF->push_back(RestoreMBB);
1104 RestoreMBB->setMachineBlockAddressTaken();
1105
1107
1108 // Transfer the remainder of BB and its successor edges to sinkMBB.
1109 SinkMBB->splice(SinkMBB->begin(), MBB,
1110 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1112
1113 // thisMBB:
1114 const int64_t FPOffset = 0; // Slot 1.
1115 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1116 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1117 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1118
1119 // Buf address.
1120 Register BufReg = MI.getOperand(1).getReg();
1121
1122 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1123 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1124
1125 // Prepare IP for longjmp.
1126 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1127 .addMBB(RestoreMBB);
1128 // Store IP for return from jmp, slot 2, offset = 1.
1129 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1130 .addReg(LabelReg)
1131 .addReg(BufReg)
1132 .addImm(LabelOffset)
1133 .addReg(0);
1134
1135 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1136 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1137 if (HasFP) {
1138 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1139 .addReg(SpecialRegs->getFramePointerRegister())
1140 .addReg(BufReg)
1141 .addImm(FPOffset)
1142 .addReg(0);
1143 }
1144
1145 // Store SP.
1146 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1147 .addReg(SpecialRegs->getStackPointerRegister())
1148 .addReg(BufReg)
1149 .addImm(SPOffset)
1150 .addReg(0);
1151
1152 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1153 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1154 if (BackChain) {
1155 Register BCReg = MRI.createVirtualRegister(PtrRC);
1156 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1157 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1158 .addReg(SpecialRegs->getStackPointerRegister())
1159 .addImm(TFL->getBackchainOffset(*MF))
1160 .addReg(0);
1161
1162 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1163 .addReg(BCReg)
1164 .addReg(BufReg)
1165 .addImm(BCOffset)
1166 .addReg(0);
1167 }
1168
1169 // Setup.
1170 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1171 .addMBB(RestoreMBB);
1172
1173 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1174 MIB.addRegMask(RegInfo->getNoPreservedMask());
1175
1176 ThisMBB->addSuccessor(MainMBB);
1177 ThisMBB->addSuccessor(RestoreMBB);
1178
1179 // mainMBB:
1180 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1181 MainMBB->addSuccessor(SinkMBB);
1182
1183 // sinkMBB:
1184 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1185 .addReg(MainDstReg)
1186 .addMBB(MainMBB)
1187 .addReg(RestoreDstReg)
1188 .addMBB(RestoreMBB);
1189
1190 // restoreMBB.
1191 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1192 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1193 RestoreMBB->addSuccessor(SinkMBB);
1194
1195 MI.eraseFromParent();
1196
1197 return SinkMBB;
1198}
1199
1202 MachineBasicBlock *MBB) const {
1203
1204 DebugLoc DL = MI.getDebugLoc();
1205 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1206
1207 MachineFunction *MF = MBB->getParent();
1209
1210 MVT PVT = getPointerTy(MF->getDataLayout());
1211 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1212 Register BufReg = MI.getOperand(0).getReg();
1213 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1214 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1215
1216 Register Tmp = MRI.createVirtualRegister(RC);
1217 Register BCReg = MRI.createVirtualRegister(RC);
1218
1220
1221 const int64_t FPOffset = 0;
1222 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1223 const int64_t BCOffset = 2 * PVT.getStoreSize();
1224 const int64_t SPOffset = 3 * PVT.getStoreSize();
1225 const int64_t LPOffset = 4 * PVT.getStoreSize();
1226
1227 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1228 .addReg(BufReg)
1229 .addImm(LabelOffset)
1230 .addReg(0);
1231
1232 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1233 SpecialRegs->getFramePointerRegister())
1234 .addReg(BufReg)
1235 .addImm(FPOffset)
1236 .addReg(0);
1237
1238 // We are restoring R13 even though we never stored in setjmp from llvm,
1239 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1240 // gcc setjmp and llvm longjmp.
1241 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1242 .addReg(BufReg)
1243 .addImm(LPOffset)
1244 .addReg(0);
1245
1246 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1247 if (BackChain) {
1248 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1249 .addReg(BufReg)
1250 .addImm(BCOffset)
1251 .addReg(0);
1252 }
1253
1254 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1255 SpecialRegs->getStackPointerRegister())
1256 .addReg(BufReg)
1257 .addImm(SPOffset)
1258 .addReg(0);
1259
1260 if (BackChain) {
1261 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1262 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1263 .addReg(BCReg)
1264 .addReg(SpecialRegs->getStackPointerRegister())
1265 .addImm(TFL->getBackchainOffset(*MF))
1266 .addReg(0);
1267 }
1268
1269 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1270
1271 MI.eraseFromParent();
1272 return MBB;
1273}
1274
1275/// Returns true if stack probing through inline assembly is requested.
1277 // If the function specifically requests inline stack probes, emit them.
1278 if (MF.getFunction().hasFnAttribute("probe-stack"))
1279 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1280 "inline-asm";
1281 return false;
1282}
1283
1288
1293
1296 const AtomicRMWInst *RMW) const {
1297 // Don't expand subword operations as they require special treatment.
1298 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1300
1301 // Don't expand if there is a target instruction available.
1302 if (Subtarget.hasInterlockedAccess1() &&
1303 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1310
1312}
1313
1315 // We can use CGFI or CLGFI.
1316 return isInt<32>(Imm) || isUInt<32>(Imm);
1317}
1318
1320 // We can use ALGFI or SLGFI.
1321 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1322}
1323
1325 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1326 // Unaligned accesses should never be slower than the expanded version.
1327 // We check specifically for aligned accesses in the few cases where
1328 // they are required.
1329 if (Fast)
1330 *Fast = 1;
1331 return true;
1332}
1333
1335 EVT VT = Y.getValueType();
1336
1337 // We can use NC(G)RK for types in GPRs ...
1338 if (VT == MVT::i32 || VT == MVT::i64)
1339 return Subtarget.hasMiscellaneousExtensions3();
1340
1341 // ... or VNC for types in VRs.
1342 if (VT.isVector() || VT == MVT::i128)
1343 return Subtarget.hasVector();
1344
1345 return false;
1346}
1347
1348// Information about the addressing mode for a memory access.
1350 // True if a long displacement is supported.
1352
1353 // True if use of index register is supported.
1355
1356 AddressingMode(bool LongDispl, bool IdxReg) :
1357 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1358};
1359
1360// Return the desired addressing mode for a Load which has only one use (in
1361// the same block) which is a Store.
1363 Type *Ty) {
1364 // With vector support a Load->Store combination may be combined to either
1365 // an MVC or vector operations and it seems to work best to allow the
1366 // vector addressing mode.
1367 if (HasVector)
1368 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1369
1370 // Otherwise only the MVC case is special.
1371 bool MVC = Ty->isIntegerTy(8);
1372 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1373}
1374
1375// Return the addressing mode which seems most desirable given an LLVM
1376// Instruction pointer.
1377static AddressingMode
1380 switch (II->getIntrinsicID()) {
1381 default: break;
1382 case Intrinsic::memset:
1383 case Intrinsic::memmove:
1384 case Intrinsic::memcpy:
1385 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1386 }
1387 }
1388
1389 if (isa<LoadInst>(I) && I->hasOneUse()) {
1390 auto *SingleUser = cast<Instruction>(*I->user_begin());
1391 if (SingleUser->getParent() == I->getParent()) {
1392 if (isa<ICmpInst>(SingleUser)) {
1393 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1394 if (C->getBitWidth() <= 64 &&
1395 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1396 // Comparison of memory with 16 bit signed / unsigned immediate
1397 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1398 } else if (isa<StoreInst>(SingleUser))
1399 // Load->Store
1400 return getLoadStoreAddrMode(HasVector, I->getType());
1401 }
1402 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1403 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1404 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1405 // Load->Store
1406 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1407 }
1408
1409 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1410
1411 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1412 // dependencies (LDE only supports small offsets).
1413 // * Utilize the vector registers to hold floating point
1414 // values (vector load / store instructions only support small
1415 // offsets).
1416
1417 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1418 I->getOperand(0)->getType());
1419 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1420 bool IsVectorAccess = MemAccessTy->isVectorTy();
1421
1422 // A store of an extracted vector element will be combined into a VSTE type
1423 // instruction.
1424 if (!IsVectorAccess && isa<StoreInst>(I)) {
1425 Value *DataOp = I->getOperand(0);
1426 if (isa<ExtractElementInst>(DataOp))
1427 IsVectorAccess = true;
1428 }
1429
1430 // A load which gets inserted into a vector element will be combined into a
1431 // VLE type instruction.
1432 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1433 User *LoadUser = *I->user_begin();
1434 if (isa<InsertElementInst>(LoadUser))
1435 IsVectorAccess = true;
1436 }
1437
1438 if (IsFPAccess || IsVectorAccess)
1439 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1440 }
1441
1442 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1443}
1444
1446 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1447 // Punt on globals for now, although they can be used in limited
1448 // RELATIVE LONG cases.
1449 if (AM.BaseGV)
1450 return false;
1451
1452 // Require a 20-bit signed offset.
1453 if (!isInt<20>(AM.BaseOffs))
1454 return false;
1455
1456 bool RequireD12 =
1457 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1458 AddressingMode SupportedAM(!RequireD12, true);
1459 if (I != nullptr)
1460 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1461
1462 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1463 return false;
1464
1465 if (!SupportedAM.IndexReg)
1466 // No indexing allowed.
1467 return AM.Scale == 0;
1468 else
1469 // Indexing is OK but no scale factor can be applied.
1470 return AM.Scale == 0 || AM.Scale == 1;
1471}
1472
1474 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1475 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1476 const AttributeList &FuncAttributes, EVT *LargestVT) const {
1477 const int MVCFastLen = 16;
1478
1479 if (Limit != ~unsigned(0)) {
1480 // Don't expand Op into scalar loads/stores in these cases:
1481 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1482 return false; // Small memcpy: Use MVC
1483 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1484 return false; // Small memset (first byte with STC/MVI): Use MVC
1485 if (Op.isZeroMemset())
1486 return false; // Memset zero: Use XC
1487 }
1488
1490 Context, MemOps, Limit, Op, DstAS, SrcAS, FuncAttributes, LargestVT);
1491}
1492
1494 LLVMContext &Context, const MemOp &Op,
1495 const AttributeList &FuncAttributes) const {
1496 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1497}
1498
1499bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1500 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1501 return false;
1502 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1503 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1504 return FromBits > ToBits;
1505}
1506
1508 if (!FromVT.isInteger() || !ToVT.isInteger())
1509 return false;
1510 unsigned FromBits = FromVT.getFixedSizeInBits();
1511 unsigned ToBits = ToVT.getFixedSizeInBits();
1512 return FromBits > ToBits;
1513}
1514
1515//===----------------------------------------------------------------------===//
1516// Inline asm support
1517//===----------------------------------------------------------------------===//
1518
1521 if (Constraint.size() == 1) {
1522 switch (Constraint[0]) {
1523 case 'a': // Address register
1524 case 'd': // Data register (equivalent to 'r')
1525 case 'f': // Floating-point register
1526 case 'h': // High-part register
1527 case 'r': // General-purpose register
1528 case 'v': // Vector register
1529 return C_RegisterClass;
1530
1531 case 'Q': // Memory with base and unsigned 12-bit displacement
1532 case 'R': // Likewise, plus an index
1533 case 'S': // Memory with base and signed 20-bit displacement
1534 case 'T': // Likewise, plus an index
1535 case 'm': // Equivalent to 'T'.
1536 return C_Memory;
1537
1538 case 'I': // Unsigned 8-bit constant
1539 case 'J': // Unsigned 12-bit constant
1540 case 'K': // Signed 16-bit constant
1541 case 'L': // Signed 20-bit displacement (on all targets we support)
1542 case 'M': // 0x7fffffff
1543 return C_Immediate;
1544
1545 default:
1546 break;
1547 }
1548 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1549 switch (Constraint[1]) {
1550 case 'Q': // Address with base and unsigned 12-bit displacement
1551 case 'R': // Likewise, plus an index
1552 case 'S': // Address with base and signed 20-bit displacement
1553 case 'T': // Likewise, plus an index
1554 return C_Address;
1555
1556 default:
1557 break;
1558 }
1559 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1560 if (StringRef("{@cc}").compare(Constraint) == 0)
1561 return C_Other;
1562 }
1563 return TargetLowering::getConstraintType(Constraint);
1564}
1565
1568 AsmOperandInfo &Info, const char *Constraint) const {
1570 Value *CallOperandVal = Info.CallOperandVal;
1571 // If we don't have a value, we can't do a match,
1572 // but allow it at the lowest weight.
1573 if (!CallOperandVal)
1574 return CW_Default;
1575 Type *type = CallOperandVal->getType();
1576 // Look at the constraint type.
1577 switch (*Constraint) {
1578 default:
1579 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1580 break;
1581
1582 case 'a': // Address register
1583 case 'd': // Data register (equivalent to 'r')
1584 case 'h': // High-part register
1585 case 'r': // General-purpose register
1586 Weight =
1587 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1588 break;
1589
1590 case 'f': // Floating-point register
1591 if (!useSoftFloat())
1592 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1593 break;
1594
1595 case 'v': // Vector register
1596 if (Subtarget.hasVector())
1597 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1598 : CW_Default;
1599 break;
1600
1601 case 'I': // Unsigned 8-bit constant
1602 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1603 if (isUInt<8>(C->getZExtValue()))
1604 Weight = CW_Constant;
1605 break;
1606
1607 case 'J': // Unsigned 12-bit constant
1608 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1609 if (isUInt<12>(C->getZExtValue()))
1610 Weight = CW_Constant;
1611 break;
1612
1613 case 'K': // Signed 16-bit constant
1614 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1615 if (isInt<16>(C->getSExtValue()))
1616 Weight = CW_Constant;
1617 break;
1618
1619 case 'L': // Signed 20-bit displacement (on all targets we support)
1620 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1621 if (isInt<20>(C->getSExtValue()))
1622 Weight = CW_Constant;
1623 break;
1624
1625 case 'M': // 0x7fffffff
1626 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1627 if (C->getZExtValue() == 0x7fffffff)
1628 Weight = CW_Constant;
1629 break;
1630 }
1631 return Weight;
1632}
1633
1634// Parse a "{tNNN}" register constraint for which the register type "t"
1635// has already been verified. MC is the class associated with "t" and
1636// Map maps 0-based register numbers to LLVM register numbers.
1637static std::pair<unsigned, const TargetRegisterClass *>
1639 const unsigned *Map, unsigned Size) {
1640 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1641 if (isdigit(Constraint[2])) {
1642 unsigned Index;
1643 bool Failed =
1644 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1645 if (!Failed && Index < Size && Map[Index])
1646 return std::make_pair(Map[Index], RC);
1647 }
1648 return std::make_pair(0U, nullptr);
1649}
1650
1651std::pair<unsigned, const TargetRegisterClass *>
1653 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1654 if (Constraint.size() == 1) {
1655 // GCC Constraint Letters
1656 switch (Constraint[0]) {
1657 default: break;
1658 case 'd': // Data register (equivalent to 'r')
1659 case 'r': // General-purpose register
1660 if (VT.getSizeInBits() == 64)
1661 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1662 else if (VT.getSizeInBits() == 128)
1663 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1664 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1665
1666 case 'a': // Address register
1667 if (VT == MVT::i64)
1668 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1669 else if (VT == MVT::i128)
1670 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1671 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1672
1673 case 'h': // High-part register (an LLVM extension)
1674 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1675
1676 case 'f': // Floating-point register
1677 if (!useSoftFloat()) {
1678 if (VT.getSizeInBits() == 16)
1679 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1680 else if (VT.getSizeInBits() == 64)
1681 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1682 else if (VT.getSizeInBits() == 128)
1683 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1684 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1685 }
1686 break;
1687
1688 case 'v': // Vector register
1689 if (Subtarget.hasVector()) {
1690 if (VT.getSizeInBits() == 16)
1691 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1692 if (VT.getSizeInBits() == 32)
1693 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1694 if (VT.getSizeInBits() == 64)
1695 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1696 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1697 }
1698 break;
1699 }
1700 }
1701 if (Constraint.starts_with("{")) {
1702
1703 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1704 // to check the size on.
1705 auto getVTSizeInBits = [&VT]() {
1706 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1707 };
1708
1709 // We need to override the default register parsing for GPRs and FPRs
1710 // because the interpretation depends on VT. The internal names of
1711 // the registers are also different from the external names
1712 // (F0D and F0S instead of F0, etc.).
1713 if (Constraint[1] == 'r') {
1714 if (getVTSizeInBits() == 32)
1715 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1717 if (getVTSizeInBits() == 128)
1718 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1720 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1722 }
1723 if (Constraint[1] == 'f') {
1724 if (useSoftFloat())
1725 return std::make_pair(
1726 0u, static_cast<const TargetRegisterClass *>(nullptr));
1727 if (getVTSizeInBits() == 16)
1728 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1730 if (getVTSizeInBits() == 32)
1731 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1733 if (getVTSizeInBits() == 128)
1734 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1736 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1738 }
1739 if (Constraint[1] == 'v') {
1740 if (!Subtarget.hasVector())
1741 return std::make_pair(
1742 0u, static_cast<const TargetRegisterClass *>(nullptr));
1743 if (getVTSizeInBits() == 16)
1744 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1746 if (getVTSizeInBits() == 32)
1747 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1749 if (getVTSizeInBits() == 64)
1750 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1752 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1754 }
1755 if (Constraint[1] == '@') {
1756 if (StringRef("{@cc}").compare(Constraint) == 0)
1757 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1758 }
1759 }
1760 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1761}
1762
1763// FIXME? Maybe this could be a TableGen attribute on some registers and
1764// this table could be generated automatically from RegInfo.
1767 const MachineFunction &MF) const {
1768 Register Reg =
1770 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1771 : SystemZ::NoRegister)
1772 .Case("r15",
1773 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1774 .Default(Register());
1775
1776 return Reg;
1777}
1778
1780 const Constant *PersonalityFn) const {
1781 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1782}
1783
1785 const Constant *PersonalityFn) const {
1786 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1787}
1788
1789// Convert condition code in CCReg to an i32 value.
1791 SDLoc DL(CCReg);
1792 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1793 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1794 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1795}
1796
1797// Lower @cc targets via setcc.
1799 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1800 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1801 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1802 return SDValue();
1803
1804 // Check that return type is valid.
1805 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1806 OpInfo.ConstraintVT.getSizeInBits() < 8)
1807 report_fatal_error("Glue output operand is of invalid type");
1808
1809 if (Glue.getNode()) {
1810 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1811 Chain = Glue.getValue(1);
1812 } else
1813 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1814 return getCCResult(DAG, Glue);
1815}
1816
1818 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1819 SelectionDAG &DAG) const {
1820 // Only support length 1 constraints for now.
1821 if (Constraint.size() == 1) {
1822 switch (Constraint[0]) {
1823 case 'I': // Unsigned 8-bit constant
1824 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1825 if (isUInt<8>(C->getZExtValue()))
1826 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1827 Op.getValueType()));
1828 return;
1829
1830 case 'J': // Unsigned 12-bit constant
1831 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1832 if (isUInt<12>(C->getZExtValue()))
1833 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1834 Op.getValueType()));
1835 return;
1836
1837 case 'K': // Signed 16-bit constant
1838 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1839 if (isInt<16>(C->getSExtValue()))
1840 Ops.push_back(DAG.getSignedTargetConstant(
1841 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1842 return;
1843
1844 case 'L': // Signed 20-bit displacement (on all targets we support)
1845 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1846 if (isInt<20>(C->getSExtValue()))
1847 Ops.push_back(DAG.getSignedTargetConstant(
1848 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1849 return;
1850
1851 case 'M': // 0x7fffffff
1852 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1853 if (C->getZExtValue() == 0x7fffffff)
1854 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1855 Op.getValueType()));
1856 return;
1857 }
1858 }
1860}
1861
1862//===----------------------------------------------------------------------===//
1863// Calling conventions
1864//===----------------------------------------------------------------------===//
1865
1866#include "SystemZGenCallingConv.inc"
1867
1869 CallingConv::ID) const {
1870 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1871 SystemZ::R14D, 0 };
1872 return ScratchRegs;
1873}
1874
1876 Type *ToType) const {
1877 return isTruncateFree(FromType, ToType);
1878}
1879
1881 return CI->isTailCall();
1882}
1883
1884// Value is a value that has been passed to us in the location described by VA
1885// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1886// any loads onto Chain.
1888 CCValAssign &VA, SDValue Chain,
1889 SDValue Value) {
1890 // If the argument has been promoted from a smaller type, insert an
1891 // assertion to capture this.
1892 if (VA.getLocInfo() == CCValAssign::SExt)
1894 DAG.getValueType(VA.getValVT()));
1895 else if (VA.getLocInfo() == CCValAssign::ZExt)
1897 DAG.getValueType(VA.getValVT()));
1898
1899 if (VA.isExtInLoc())
1900 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1901 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1902 // If this is a short vector argument loaded from the stack,
1903 // extend from i64 to full vector size and then bitcast.
1904 assert(VA.getLocVT() == MVT::i64);
1905 assert(VA.getValVT().isVector());
1906 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1907 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1908 } else
1909 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1910 return Value;
1911}
1912
1913// Value is a value of type VA.getValVT() that we need to copy into
1914// the location described by VA. Return a copy of Value converted to
1915// VA.getValVT(). The caller is responsible for handling indirect values.
1917 CCValAssign &VA, SDValue Value) {
1918 switch (VA.getLocInfo()) {
1919 case CCValAssign::SExt:
1920 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1921 case CCValAssign::ZExt:
1922 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1923 case CCValAssign::AExt:
1924 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1925 case CCValAssign::BCvt: {
1926 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1927 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1928 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1929 // For an f32 vararg we need to first promote it to an f64 and then
1930 // bitcast it to an i64.
1931 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1932 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1933 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1934 ? MVT::v2i64
1935 : VA.getLocVT();
1936 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1937 // For ELF, this is a short vector argument to be stored to the stack,
1938 // bitcast to v2i64 and then extract first element.
1939 if (BitCastToType == MVT::v2i64)
1940 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1941 DAG.getConstant(0, DL, MVT::i32));
1942 return Value;
1943 }
1944 case CCValAssign::Full:
1945 return Value;
1946 default:
1947 llvm_unreachable("Unhandled getLocInfo()");
1948 }
1949}
1950
1952 SDLoc DL(In);
1953 SDValue Lo, Hi;
1954 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1955 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1956 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1957 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1958 DAG.getConstant(64, DL, MVT::i32)));
1959 } else {
1960 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1961 }
1962
1963 // FIXME: If v2i64 were a legal type, we could use it instead of
1964 // Untyped here. This might enable improved folding.
1965 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1966 MVT::Untyped, Hi, Lo);
1967 return SDValue(Pair, 0);
1968}
1969
1971 SDLoc DL(In);
1972 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1973 DL, MVT::i64, In);
1974 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1975 DL, MVT::i64, In);
1976
1977 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1978 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1979 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1980 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1981 DAG.getConstant(64, DL, MVT::i32));
1982 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1983 } else {
1984 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1985 }
1986}
1987
1989 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1990 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1991 EVT ValueVT = Val.getValueType();
1992 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1993 // Inline assembly operand.
1994 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1995 return true;
1996 }
1997
1998 return false;
1999}
2000
2002 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
2003 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
2004 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
2005 // Inline assembly operand.
2006 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
2007 return DAG.getBitcast(ValueVT, Res);
2008 }
2009
2010 return SDValue();
2011}
2012
2013// The first part of a split stack argument is at index I in Args (and
2014// ArgLocs). Return the type of a part and the number of them by reference.
2015template <class ArgTy>
2017 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
2018 MVT &PartVT, unsigned &NumParts) {
2019 if (!Args[I].Flags.isSplit())
2020 return false;
2021 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
2022 "ArgLocs havoc.");
2023 PartVT = ArgLocs[I].getValVT();
2024 NumParts = 1;
2025 for (unsigned PartIdx = I + 1;; ++PartIdx) {
2026 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
2027 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
2028 ++NumParts;
2029 if (Args[PartIdx].Flags.isSplitEnd())
2030 break;
2031 }
2032 return true;
2033}
2034
2036 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
2037 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2038 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2040 MachineFrameInfo &MFI = MF.getFrameInfo();
2042 SystemZMachineFunctionInfo *FuncInfo =
2044 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2045 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2046
2047 // Assign locations to all of the incoming arguments.
2049 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2050 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2051 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2052
2053 unsigned NumFixedGPRs = 0;
2054 unsigned NumFixedFPRs = 0;
2055 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2056 SDValue ArgValue;
2057 CCValAssign &VA = ArgLocs[I];
2058 EVT LocVT = VA.getLocVT();
2059 if (VA.isRegLoc()) {
2060 // Arguments passed in registers
2061 const TargetRegisterClass *RC;
2062 switch (LocVT.getSimpleVT().SimpleTy) {
2063 default:
2064 // Integers smaller than i64 should be promoted to i64.
2065 llvm_unreachable("Unexpected argument type");
2066 case MVT::i32:
2067 NumFixedGPRs += 1;
2068 RC = &SystemZ::GR32BitRegClass;
2069 break;
2070 case MVT::i64:
2071 NumFixedGPRs += 1;
2072 RC = &SystemZ::GR64BitRegClass;
2073 break;
2074 case MVT::f16:
2075 NumFixedFPRs += 1;
2076 RC = &SystemZ::FP16BitRegClass;
2077 break;
2078 case MVT::f32:
2079 NumFixedFPRs += 1;
2080 RC = &SystemZ::FP32BitRegClass;
2081 break;
2082 case MVT::f64:
2083 NumFixedFPRs += 1;
2084 RC = &SystemZ::FP64BitRegClass;
2085 break;
2086 case MVT::f128:
2087 NumFixedFPRs += 2;
2088 RC = &SystemZ::FP128BitRegClass;
2089 break;
2090 case MVT::v16i8:
2091 case MVT::v8i16:
2092 case MVT::v4i32:
2093 case MVT::v2i64:
2094 case MVT::v8f16:
2095 case MVT::v4f32:
2096 case MVT::v2f64:
2097 RC = &SystemZ::VR128BitRegClass;
2098 break;
2099 }
2100
2101 Register VReg = MRI.createVirtualRegister(RC);
2102 MRI.addLiveIn(VA.getLocReg(), VReg);
2103 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2104 } else {
2105 assert(VA.isMemLoc() && "Argument not register or memory");
2106
2107 // Create the frame index object for this incoming parameter.
2108 // FIXME: Pre-include call frame size in the offset, should not
2109 // need to manually add it here.
2110 int64_t ArgSPOffset = VA.getLocMemOffset();
2111 if (Subtarget.isTargetXPLINK64()) {
2112 auto &XPRegs =
2113 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2114 ArgSPOffset += XPRegs.getCallFrameSize();
2115 }
2116 int FI =
2117 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2118
2119 // Create the SelectionDAG nodes corresponding to a load
2120 // from this parameter. Unpromoted ints and floats are
2121 // passed as right-justified 8-byte values.
2122 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2123 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2124 VA.getLocVT() == MVT::f16) {
2125 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2126 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2127 DAG.getIntPtrConstant(SlotOffs, DL));
2128 }
2129 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2131 }
2132
2133 // Convert the value of the argument register into the value that's
2134 // being passed.
2135 if (VA.getLocInfo() == CCValAssign::Indirect) {
2136 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2138 // If the original argument was split (e.g. i128), we need
2139 // to load all parts of it here (using the same address).
2140 MVT PartVT;
2141 unsigned NumParts;
2142 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2143 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2144 ++I;
2145 CCValAssign &PartVA = ArgLocs[I];
2146 unsigned PartOffset = Ins[I].PartOffset;
2147 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2148 DAG.getIntPtrConstant(PartOffset, DL));
2149 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2151 assert(PartOffset && "Offset should be non-zero.");
2152 }
2153 }
2154 } else
2155 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2156 }
2157
2158 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2159 // Save the number of non-varargs registers for later use by va_start, etc.
2160 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2161 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2162
2163 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2164 Subtarget.getSpecialRegisters());
2165
2166 // Likewise the address (in the form of a frame index) of where the
2167 // first stack vararg would be. The 1-byte size here is arbitrary.
2168 // FIXME: Pre-include call frame size in the offset, should not
2169 // need to manually add it here.
2170 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2171 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2172 FuncInfo->setVarArgsFrameIndex(FI);
2173 }
2174
2175 if (IsVarArg && Subtarget.isTargetELF()) {
2176 // Save the number of non-varargs registers for later use by va_start, etc.
2177 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2178 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2179
2180 // Likewise the address (in the form of a frame index) of where the
2181 // first stack vararg would be. The 1-byte size here is arbitrary.
2182 int64_t VarArgsOffset = CCInfo.getStackSize();
2183 FuncInfo->setVarArgsFrameIndex(
2184 MFI.CreateFixedObject(1, VarArgsOffset, true));
2185
2186 // ...and a similar frame index for the caller-allocated save area
2187 // that will be used to store the incoming registers.
2188 int64_t RegSaveOffset =
2189 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2190 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2191 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2192
2193 // Store the FPR varargs in the reserved frame slots. (We store the
2194 // GPRs as part of the prologue.)
2195 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2197 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2198 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2199 int FI =
2201 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2203 &SystemZ::FP64BitRegClass);
2204 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2205 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2207 }
2208 // Join the stores, which are independent of one another.
2209 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2210 ArrayRef(&MemOps[NumFixedFPRs],
2211 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2212 }
2213 }
2214
2215 if (Subtarget.isTargetXPLINK64()) {
2216 // Create virual register for handling incoming "ADA" special register (R5)
2217 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2218 Register ADAvReg = MRI.createVirtualRegister(RC);
2219 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2220 Subtarget.getSpecialRegisters());
2221 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2222 FuncInfo->setADAVirtualRegister(ADAvReg);
2223 }
2224 return Chain;
2225}
2226
2227static bool canUseSiblingCall(const CCState &ArgCCInfo,
2230 // Punt if there are any indirect or stack arguments, or if the call
2231 // needs the callee-saved argument register R6, or if the call uses
2232 // the callee-saved register arguments SwiftSelf and SwiftError.
2233 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2234 CCValAssign &VA = ArgLocs[I];
2236 return false;
2237 if (!VA.isRegLoc())
2238 return false;
2239 Register Reg = VA.getLocReg();
2240 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2241 return false;
2242 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2243 return false;
2244 }
2245 return true;
2246}
2247
2249 unsigned Offset, bool LoadAdr = false) {
2252 Register ADAvReg = MFI->getADAVirtualRegister();
2254
2255 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2256 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2257
2258 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2259 if (!LoadAdr)
2260 Result = DAG.getLoad(
2261 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2263
2264 return Result;
2265}
2266
2267// ADA access using Global value
2268// Note: for functions, address of descriptor is returned
2270 EVT PtrVT) {
2271 unsigned ADAtype;
2272 bool LoadAddr = false;
2273 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2274 bool IsFunction =
2275 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2276 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2277
2278 if (IsFunction) {
2279 if (IsInternal) {
2281 LoadAddr = true;
2282 } else
2284 } else {
2286 }
2287 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2288
2289 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2290}
2291
2292static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2293 SDLoc &DL, SDValue &Chain) {
2294 unsigned ADADelta = 0; // ADA offset in desc.
2295 unsigned EPADelta = 8; // EPA offset in desc.
2298
2299 // XPLink calling convention.
2300 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2301 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2302 G->getGlobal()->hasPrivateLinkage());
2303 if (IsInternal) {
2306 Register ADAvReg = MFI->getADAVirtualRegister();
2307 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2308 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2309 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2310 return true;
2311 } else {
2313 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2314 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2315 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2316 }
2317 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2319 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2320 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2321 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2322 } else {
2323 // Function pointer case
2324 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2325 DAG.getConstant(ADADelta, DL, PtrVT));
2326 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2328 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2329 DAG.getConstant(EPADelta, DL, PtrVT));
2330 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2332 }
2333 return false;
2334}
2335
2336SDValue
2338 SmallVectorImpl<SDValue> &InVals) const {
2339 SelectionDAG &DAG = CLI.DAG;
2340 SDLoc &DL = CLI.DL;
2342 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2344 SDValue Chain = CLI.Chain;
2345 SDValue Callee = CLI.Callee;
2346 bool &IsTailCall = CLI.IsTailCall;
2347 CallingConv::ID CallConv = CLI.CallConv;
2348 bool IsVarArg = CLI.IsVarArg;
2350 EVT PtrVT = getPointerTy(MF.getDataLayout());
2351 LLVMContext &Ctx = *DAG.getContext();
2352 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2353
2354 // FIXME: z/OS support to be added in later.
2355 if (Subtarget.isTargetXPLINK64())
2356 IsTailCall = false;
2357
2358 // Integer args <=32 bits should have an extension attribute.
2359 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2360
2361 // Analyze the operands of the call, assigning locations to each operand.
2363 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2364 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2365
2366 // We don't support GuaranteedTailCallOpt, only automatically-detected
2367 // sibling calls.
2368 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2369 IsTailCall = false;
2370
2371 // Get a count of how many bytes are to be pushed on the stack.
2372 unsigned NumBytes = ArgCCInfo.getStackSize();
2373
2374 // Mark the start of the call.
2375 if (!IsTailCall)
2376 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2377
2378 // Copy argument values to their designated locations.
2380 SmallVector<SDValue, 8> MemOpChains;
2381 SDValue StackPtr;
2382 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2383 CCValAssign &VA = ArgLocs[I];
2384 SDValue ArgValue = OutVals[I];
2385
2386 if (VA.getLocInfo() == CCValAssign::Indirect) {
2387 // Store the argument in a stack slot and pass its address.
2388 EVT SlotVT;
2389 MVT PartVT;
2390 unsigned NumParts = 1;
2391 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2392 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2393 else
2394 SlotVT = Outs[I].VT;
2395 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2396 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2397
2398 MachinePointerInfo StackPtrInfo =
2400 MemOpChains.push_back(
2401 DAG.getStore(Chain, DL, ArgValue, SpillSlot, StackPtrInfo));
2402 // If the original argument was split (e.g. i128), we need
2403 // to store all parts of it here (and pass just one address).
2404 assert(Outs[I].PartOffset == 0);
2405 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2406 ++I;
2407 SDValue PartValue = OutVals[I];
2408 unsigned PartOffset = Outs[I].PartOffset;
2409 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2410 DAG.getIntPtrConstant(PartOffset, DL));
2411 MemOpChains.push_back(
2412 DAG.getStore(Chain, DL, PartValue, Address,
2413 StackPtrInfo.getWithOffset(PartOffset)));
2414 assert(PartOffset && "Offset should be non-zero.");
2415 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2416 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2417 }
2418 ArgValue = SpillSlot;
2419 } else
2420 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2421
2422 if (VA.isRegLoc()) {
2423 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2424 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2425 // and low values.
2426 if (VA.getLocVT() == MVT::i128)
2427 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2428 // Queue up the argument copies and emit them at the end.
2429 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2430 } else {
2431 assert(VA.isMemLoc() && "Argument not register or memory");
2432
2433 // Work out the address of the stack slot. Unpromoted ints and
2434 // floats are passed as right-justified 8-byte values.
2435 if (!StackPtr.getNode())
2436 StackPtr = DAG.getCopyFromReg(Chain, DL,
2437 Regs->getStackPointerRegister(), PtrVT);
2438 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2439 VA.getLocMemOffset();
2440 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2441 Offset += 4;
2442 else if (VA.getLocVT() == MVT::f16)
2443 Offset += 6;
2444 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2446
2447 // Emit the store.
2448 MemOpChains.push_back(
2449 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2450
2451 // Although long doubles or vectors are passed through the stack when
2452 // they are vararg (non-fixed arguments), if a long double or vector
2453 // occupies the third and fourth slot of the argument list GPR3 should
2454 // still shadow the third slot of the argument list.
2455 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2456 SDValue ShadowArgValue =
2457 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2458 DAG.getIntPtrConstant(1, DL));
2459 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2460 }
2461 }
2462 }
2463
2464 // Join the stores, which are independent of one another.
2465 if (!MemOpChains.empty())
2466 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2467
2468 // Accept direct calls by converting symbolic call addresses to the
2469 // associated Target* opcodes. Force %r1 to be used for indirect
2470 // tail calls.
2471 SDValue Glue;
2472
2473 if (Subtarget.isTargetXPLINK64()) {
2474 SDValue ADA;
2475 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2476 if (!IsBRASL) {
2477 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2478 ->getAddressOfCalleeRegister();
2479 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2480 Glue = Chain.getValue(1);
2481 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2482 }
2483 RegsToPass.push_back(std::make_pair(
2484 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2485 } else {
2486 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2487 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2488 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2489 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2490 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2491 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2492 } else if (IsTailCall) {
2493 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2494 Glue = Chain.getValue(1);
2495 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2496 }
2497 }
2498
2499 // Build a sequence of copy-to-reg nodes, chained and glued together.
2500 for (const auto &[Reg, N] : RegsToPass) {
2501 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2502 Glue = Chain.getValue(1);
2503 }
2504
2505 // The first call operand is the chain and the second is the target address.
2507 Ops.push_back(Chain);
2508 Ops.push_back(Callee);
2509
2510 // Add argument registers to the end of the list so that they are
2511 // known live into the call.
2512 for (const auto &[Reg, N] : RegsToPass)
2513 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2514
2515 // Add a register mask operand representing the call-preserved registers.
2516 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2517 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2518 assert(Mask && "Missing call preserved mask for calling convention");
2519 Ops.push_back(DAG.getRegisterMask(Mask));
2520
2521 // Glue the call to the argument copies, if any.
2522 if (Glue.getNode())
2523 Ops.push_back(Glue);
2524
2525 // Emit the call.
2526 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2527 if (IsTailCall) {
2528 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2529 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2530 return Ret;
2531 }
2532 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2533 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2534 Glue = Chain.getValue(1);
2535
2536 // Mark the end of the call, which is glued to the call itself.
2537 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2538 Glue = Chain.getValue(1);
2539
2540 // Assign locations to each value returned by this call.
2542 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2543 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2544
2545 // Copy all of the result registers out of their specified physreg.
2546 for (CCValAssign &VA : RetLocs) {
2547 // Copy the value out, gluing the copy to the end of the call sequence.
2548 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2549 VA.getLocVT(), Glue);
2550 Chain = RetValue.getValue(1);
2551 Glue = RetValue.getValue(2);
2552
2553 // Convert the value of the return register into the value that's
2554 // being returned.
2555 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2556 }
2557
2558 return Chain;
2559}
2560
2561// Generate a call taking the given operands as arguments and returning a
2562// result of type RetVT.
2564 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2565 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2566 bool DoesNotReturn, bool IsReturnValueUsed) const {
2568 Args.reserve(Ops.size());
2569
2570 for (SDValue Op : Ops) {
2572 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2573 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2574 Entry.IsZExt = !Entry.IsSExt;
2575 Args.push_back(Entry);
2576 }
2577
2578 SDValue Callee =
2579 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2580
2581 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2583 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2584 CLI.setDebugLoc(DL)
2585 .setChain(Chain)
2586 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2587 .setNoReturn(DoesNotReturn)
2588 .setDiscardResult(!IsReturnValueUsed)
2589 .setSExtResult(SignExtend)
2590 .setZExtResult(!SignExtend);
2591 return LowerCallTo(CLI);
2592}
2593
2595 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2596 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2597 const Type *RetTy) const {
2598 // Special case that we cannot easily detect in RetCC_SystemZ since
2599 // i128 may not be a legal type.
2600 for (auto &Out : Outs)
2601 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2602 return false;
2603
2605 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2606 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2607}
2608
2609SDValue
2611 bool IsVarArg,
2613 const SmallVectorImpl<SDValue> &OutVals,
2614 const SDLoc &DL, SelectionDAG &DAG) const {
2616
2617 // Integer args <=32 bits should have an extension attribute.
2618 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2619
2620 // Assign locations to each returned value.
2622 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2623 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2624
2625 // Quick exit for void returns
2626 if (RetLocs.empty())
2627 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2628
2629 if (CallConv == CallingConv::GHC)
2630 report_fatal_error("GHC functions return void only");
2631
2632 // Copy the result values into the output registers.
2633 SDValue Glue;
2635 RetOps.push_back(Chain);
2636 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2637 CCValAssign &VA = RetLocs[I];
2638 SDValue RetValue = OutVals[I];
2639
2640 // Make the return register live on exit.
2641 assert(VA.isRegLoc() && "Can only return in registers!");
2642
2643 // Promote the value as required.
2644 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2645
2646 // Chain and glue the copies together.
2647 Register Reg = VA.getLocReg();
2648 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2649 Glue = Chain.getValue(1);
2650 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2651 }
2652
2653 // Update chain and glue.
2654 RetOps[0] = Chain;
2655 if (Glue.getNode())
2656 RetOps.push_back(Glue);
2657
2658 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2659}
2660
2661// Return true if Op is an intrinsic node with chain that returns the CC value
2662// as its only (other) argument. Provide the associated SystemZISD opcode and
2663// the mask of valid CC values if so.
2664static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2665 unsigned &CCValid) {
2666 unsigned Id = Op.getConstantOperandVal(1);
2667 switch (Id) {
2668 case Intrinsic::s390_tbegin:
2669 Opcode = SystemZISD::TBEGIN;
2670 CCValid = SystemZ::CCMASK_TBEGIN;
2671 return true;
2672
2673 case Intrinsic::s390_tbegin_nofloat:
2674 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2675 CCValid = SystemZ::CCMASK_TBEGIN;
2676 return true;
2677
2678 case Intrinsic::s390_tend:
2679 Opcode = SystemZISD::TEND;
2680 CCValid = SystemZ::CCMASK_TEND;
2681 return true;
2682
2683 default:
2684 return false;
2685 }
2686}
2687
2688// Return true if Op is an intrinsic node without chain that returns the
2689// CC value as its final argument. Provide the associated SystemZISD
2690// opcode and the mask of valid CC values if so.
2691static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2692 unsigned Id = Op.getConstantOperandVal(0);
2693 switch (Id) {
2694 case Intrinsic::s390_vpkshs:
2695 case Intrinsic::s390_vpksfs:
2696 case Intrinsic::s390_vpksgs:
2697 Opcode = SystemZISD::PACKS_CC;
2698 CCValid = SystemZ::CCMASK_VCMP;
2699 return true;
2700
2701 case Intrinsic::s390_vpklshs:
2702 case Intrinsic::s390_vpklsfs:
2703 case Intrinsic::s390_vpklsgs:
2704 Opcode = SystemZISD::PACKLS_CC;
2705 CCValid = SystemZ::CCMASK_VCMP;
2706 return true;
2707
2708 case Intrinsic::s390_vceqbs:
2709 case Intrinsic::s390_vceqhs:
2710 case Intrinsic::s390_vceqfs:
2711 case Intrinsic::s390_vceqgs:
2712 case Intrinsic::s390_vceqqs:
2713 Opcode = SystemZISD::VICMPES;
2714 CCValid = SystemZ::CCMASK_VCMP;
2715 return true;
2716
2717 case Intrinsic::s390_vchbs:
2718 case Intrinsic::s390_vchhs:
2719 case Intrinsic::s390_vchfs:
2720 case Intrinsic::s390_vchgs:
2721 case Intrinsic::s390_vchqs:
2722 Opcode = SystemZISD::VICMPHS;
2723 CCValid = SystemZ::CCMASK_VCMP;
2724 return true;
2725
2726 case Intrinsic::s390_vchlbs:
2727 case Intrinsic::s390_vchlhs:
2728 case Intrinsic::s390_vchlfs:
2729 case Intrinsic::s390_vchlgs:
2730 case Intrinsic::s390_vchlqs:
2731 Opcode = SystemZISD::VICMPHLS;
2732 CCValid = SystemZ::CCMASK_VCMP;
2733 return true;
2734
2735 case Intrinsic::s390_vtm:
2736 Opcode = SystemZISD::VTM;
2737 CCValid = SystemZ::CCMASK_VCMP;
2738 return true;
2739
2740 case Intrinsic::s390_vfaebs:
2741 case Intrinsic::s390_vfaehs:
2742 case Intrinsic::s390_vfaefs:
2743 Opcode = SystemZISD::VFAE_CC;
2744 CCValid = SystemZ::CCMASK_ANY;
2745 return true;
2746
2747 case Intrinsic::s390_vfaezbs:
2748 case Intrinsic::s390_vfaezhs:
2749 case Intrinsic::s390_vfaezfs:
2750 Opcode = SystemZISD::VFAEZ_CC;
2751 CCValid = SystemZ::CCMASK_ANY;
2752 return true;
2753
2754 case Intrinsic::s390_vfeebs:
2755 case Intrinsic::s390_vfeehs:
2756 case Intrinsic::s390_vfeefs:
2757 Opcode = SystemZISD::VFEE_CC;
2758 CCValid = SystemZ::CCMASK_ANY;
2759 return true;
2760
2761 case Intrinsic::s390_vfeezbs:
2762 case Intrinsic::s390_vfeezhs:
2763 case Intrinsic::s390_vfeezfs:
2764 Opcode = SystemZISD::VFEEZ_CC;
2765 CCValid = SystemZ::CCMASK_ANY;
2766 return true;
2767
2768 case Intrinsic::s390_vfenebs:
2769 case Intrinsic::s390_vfenehs:
2770 case Intrinsic::s390_vfenefs:
2771 Opcode = SystemZISD::VFENE_CC;
2772 CCValid = SystemZ::CCMASK_ANY;
2773 return true;
2774
2775 case Intrinsic::s390_vfenezbs:
2776 case Intrinsic::s390_vfenezhs:
2777 case Intrinsic::s390_vfenezfs:
2778 Opcode = SystemZISD::VFENEZ_CC;
2779 CCValid = SystemZ::CCMASK_ANY;
2780 return true;
2781
2782 case Intrinsic::s390_vistrbs:
2783 case Intrinsic::s390_vistrhs:
2784 case Intrinsic::s390_vistrfs:
2785 Opcode = SystemZISD::VISTR_CC;
2787 return true;
2788
2789 case Intrinsic::s390_vstrcbs:
2790 case Intrinsic::s390_vstrchs:
2791 case Intrinsic::s390_vstrcfs:
2792 Opcode = SystemZISD::VSTRC_CC;
2793 CCValid = SystemZ::CCMASK_ANY;
2794 return true;
2795
2796 case Intrinsic::s390_vstrczbs:
2797 case Intrinsic::s390_vstrczhs:
2798 case Intrinsic::s390_vstrczfs:
2799 Opcode = SystemZISD::VSTRCZ_CC;
2800 CCValid = SystemZ::CCMASK_ANY;
2801 return true;
2802
2803 case Intrinsic::s390_vstrsb:
2804 case Intrinsic::s390_vstrsh:
2805 case Intrinsic::s390_vstrsf:
2806 Opcode = SystemZISD::VSTRS_CC;
2807 CCValid = SystemZ::CCMASK_ANY;
2808 return true;
2809
2810 case Intrinsic::s390_vstrszb:
2811 case Intrinsic::s390_vstrszh:
2812 case Intrinsic::s390_vstrszf:
2813 Opcode = SystemZISD::VSTRSZ_CC;
2814 CCValid = SystemZ::CCMASK_ANY;
2815 return true;
2816
2817 case Intrinsic::s390_vfcedbs:
2818 case Intrinsic::s390_vfcesbs:
2819 Opcode = SystemZISD::VFCMPES;
2820 CCValid = SystemZ::CCMASK_VCMP;
2821 return true;
2822
2823 case Intrinsic::s390_vfchdbs:
2824 case Intrinsic::s390_vfchsbs:
2825 Opcode = SystemZISD::VFCMPHS;
2826 CCValid = SystemZ::CCMASK_VCMP;
2827 return true;
2828
2829 case Intrinsic::s390_vfchedbs:
2830 case Intrinsic::s390_vfchesbs:
2831 Opcode = SystemZISD::VFCMPHES;
2832 CCValid = SystemZ::CCMASK_VCMP;
2833 return true;
2834
2835 case Intrinsic::s390_vftcidb:
2836 case Intrinsic::s390_vftcisb:
2837 Opcode = SystemZISD::VFTCI;
2838 CCValid = SystemZ::CCMASK_VCMP;
2839 return true;
2840
2841 case Intrinsic::s390_tdc:
2842 Opcode = SystemZISD::TDC;
2843 CCValid = SystemZ::CCMASK_TDC;
2844 return true;
2845
2846 default:
2847 return false;
2848 }
2849}
2850
2851// Emit an intrinsic with chain and an explicit CC register result.
2853 unsigned Opcode) {
2854 // Copy all operands except the intrinsic ID.
2855 unsigned NumOps = Op.getNumOperands();
2857 Ops.reserve(NumOps - 1);
2858 Ops.push_back(Op.getOperand(0));
2859 for (unsigned I = 2; I < NumOps; ++I)
2860 Ops.push_back(Op.getOperand(I));
2861
2862 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2863 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2864 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2865 SDValue OldChain = SDValue(Op.getNode(), 1);
2866 SDValue NewChain = SDValue(Intr.getNode(), 1);
2867 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2868 return Intr.getNode();
2869}
2870
2871// Emit an intrinsic with an explicit CC register result.
2873 unsigned Opcode) {
2874 // Copy all operands except the intrinsic ID.
2875 SDLoc DL(Op);
2876 unsigned NumOps = Op.getNumOperands();
2878 Ops.reserve(NumOps - 1);
2879 for (unsigned I = 1; I < NumOps; ++I) {
2880 SDValue CurrOper = Op.getOperand(I);
2881 if (CurrOper.getValueType() == MVT::f16) {
2882 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2883 "Unhandled intrinsic with f16 operand.");
2884 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2885 }
2886 Ops.push_back(CurrOper);
2887 }
2888
2889 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2890 return Intr.getNode();
2891}
2892
2893// CC is a comparison that will be implemented using an integer or
2894// floating-point comparison. Return the condition code mask for
2895// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2896// unsigned comparisons and clear for signed ones. In the floating-point
2897// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2899#define CONV(X) \
2900 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2901 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2902 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2903
2904 switch (CC) {
2905 default:
2906 llvm_unreachable("Invalid integer condition!");
2907
2908 CONV(EQ);
2909 CONV(NE);
2910 CONV(GT);
2911 CONV(GE);
2912 CONV(LT);
2913 CONV(LE);
2914
2915 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2917 }
2918#undef CONV
2919}
2920
2921// If C can be converted to a comparison against zero, adjust the operands
2922// as necessary.
2923static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2924 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2925 return;
2926
2927 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2928 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2929 return;
2930
2931 int64_t Value = ConstOp1->getSExtValue();
2932 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2933 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2934 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2935 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2936 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2937 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2938 }
2939}
2940
2941// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2942// adjust the operands as necessary.
2943static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2944 Comparison &C) {
2945 // For us to make any changes, it must a comparison between a single-use
2946 // load and a constant.
2947 if (!C.Op0.hasOneUse() ||
2948 C.Op0.getOpcode() != ISD::LOAD ||
2949 C.Op1.getOpcode() != ISD::Constant)
2950 return;
2951
2952 // We must have an 8- or 16-bit load.
2953 auto *Load = cast<LoadSDNode>(C.Op0);
2954 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2955 if ((NumBits != 8 && NumBits != 16) ||
2956 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2957 return;
2958
2959 // The load must be an extending one and the constant must be within the
2960 // range of the unextended value.
2961 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2962 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2963 return;
2964 uint64_t Value = ConstOp1->getZExtValue();
2965 uint64_t Mask = (1 << NumBits) - 1;
2966 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2967 // Make sure that ConstOp1 is in range of C.Op0.
2968 int64_t SignedValue = ConstOp1->getSExtValue();
2969 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2970 return;
2971 if (C.ICmpType != SystemZICMP::SignedOnly) {
2972 // Unsigned comparison between two sign-extended values is equivalent
2973 // to unsigned comparison between two zero-extended values.
2974 Value &= Mask;
2975 } else if (NumBits == 8) {
2976 // Try to treat the comparison as unsigned, so that we can use CLI.
2977 // Adjust CCMask and Value as necessary.
2978 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2979 // Test whether the high bit of the byte is set.
2980 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2981 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2982 // Test whether the high bit of the byte is clear.
2983 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2984 else
2985 // No instruction exists for this combination.
2986 return;
2987 C.ICmpType = SystemZICMP::UnsignedOnly;
2988 }
2989 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2990 if (Value > Mask)
2991 return;
2992 // If the constant is in range, we can use any comparison.
2993 C.ICmpType = SystemZICMP::Any;
2994 } else
2995 return;
2996
2997 // Make sure that the first operand is an i32 of the right extension type.
2998 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
3001 if (C.Op0.getValueType() != MVT::i32 ||
3002 Load->getExtensionType() != ExtType) {
3003 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
3004 Load->getBasePtr(), Load->getPointerInfo(),
3005 Load->getMemoryVT(), Load->getAlign(),
3006 Load->getMemOperand()->getFlags());
3007 // Update the chain uses.
3008 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
3009 }
3010
3011 // Make sure that the second operand is an i32 with the right value.
3012 if (C.Op1.getValueType() != MVT::i32 ||
3013 Value != ConstOp1->getZExtValue())
3014 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
3015}
3016
3017// Return true if Op is either an unextended load, or a load suitable
3018// for integer register-memory comparisons of type ICmpType.
3019static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
3020 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
3021 if (Load) {
3022 // There are no instructions to compare a register with a memory byte.
3023 if (Load->getMemoryVT() == MVT::i8)
3024 return false;
3025 // Otherwise decide on extension type.
3026 switch (Load->getExtensionType()) {
3027 case ISD::NON_EXTLOAD:
3028 return true;
3029 case ISD::SEXTLOAD:
3030 return ICmpType != SystemZICMP::UnsignedOnly;
3031 case ISD::ZEXTLOAD:
3032 return ICmpType != SystemZICMP::SignedOnly;
3033 default:
3034 break;
3035 }
3036 }
3037 return false;
3038}
3039
3040// Return true if it is better to swap the operands of C.
3041static bool shouldSwapCmpOperands(const Comparison &C) {
3042 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3043 if (C.Op0.getValueType() == MVT::i128)
3044 return false;
3045 if (C.Op0.getValueType() == MVT::f128)
3046 return false;
3047
3048 // Always keep a floating-point constant second, since comparisons with
3049 // zero can use LOAD TEST and comparisons with other constants make a
3050 // natural memory operand.
3051 if (isa<ConstantFPSDNode>(C.Op1))
3052 return false;
3053
3054 // Never swap comparisons with zero since there are many ways to optimize
3055 // those later.
3056 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3057 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3058 return false;
3059
3060 // Also keep natural memory operands second if the loaded value is
3061 // only used here. Several comparisons have memory forms.
3062 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3063 return false;
3064
3065 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3066 // In that case we generally prefer the memory to be second.
3067 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3068 // The only exceptions are when the second operand is a constant and
3069 // we can use things like CHHSI.
3070 if (!ConstOp1)
3071 return true;
3072 // The unsigned memory-immediate instructions can handle 16-bit
3073 // unsigned integers.
3074 if (C.ICmpType != SystemZICMP::SignedOnly &&
3075 isUInt<16>(ConstOp1->getZExtValue()))
3076 return false;
3077 // The signed memory-immediate instructions can handle 16-bit
3078 // signed integers.
3079 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3080 isInt<16>(ConstOp1->getSExtValue()))
3081 return false;
3082 return true;
3083 }
3084
3085 // Try to promote the use of CGFR and CLGFR.
3086 unsigned Opcode0 = C.Op0.getOpcode();
3087 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3088 return true;
3089 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3090 return true;
3091 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3092 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3093 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3094 return true;
3095
3096 return false;
3097}
3098
3099// Check whether C tests for equality between X and Y and whether X - Y
3100// or Y - X is also computed. In that case it's better to compare the
3101// result of the subtraction against zero.
3103 Comparison &C) {
3104 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3105 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3106 for (SDNode *N : C.Op0->users()) {
3107 if (N->getOpcode() == ISD::SUB &&
3108 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3109 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3110 // Disable the nsw and nuw flags: the backend needs to handle
3111 // overflow as well during comparison elimination.
3112 N->dropFlags(SDNodeFlags::NoWrap);
3113 C.Op0 = SDValue(N, 0);
3114 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3115 return;
3116 }
3117 }
3118 }
3119}
3120
3121// Check whether C compares a floating-point value with zero and if that
3122// floating-point value is also negated. In this case we can use the
3123// negation to set CC, so avoiding separate LOAD AND TEST and
3124// LOAD (NEGATIVE/COMPLEMENT) instructions.
3125static void adjustForFNeg(Comparison &C) {
3126 // This optimization is invalid for strict comparisons, since FNEG
3127 // does not raise any exceptions.
3128 if (C.Chain)
3129 return;
3130 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3131 if (C1 && C1->isZero()) {
3132 for (SDNode *N : C.Op0->users()) {
3133 if (N->getOpcode() == ISD::FNEG) {
3134 C.Op0 = SDValue(N, 0);
3135 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3136 return;
3137 }
3138 }
3139 }
3140}
3141
3142// Check whether C compares (shl X, 32) with 0 and whether X is
3143// also sign-extended. In that case it is better to test the result
3144// of the sign extension using LTGFR.
3145//
3146// This case is important because InstCombine transforms a comparison
3147// with (sext (trunc X)) into a comparison with (shl X, 32).
3148static void adjustForLTGFR(Comparison &C) {
3149 // Check for a comparison between (shl X, 32) and 0.
3150 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3151 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3152 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3153 if (C1 && C1->getZExtValue() == 32) {
3154 SDValue ShlOp0 = C.Op0.getOperand(0);
3155 // See whether X has any SIGN_EXTEND_INREG uses.
3156 for (SDNode *N : ShlOp0->users()) {
3157 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3158 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3159 C.Op0 = SDValue(N, 0);
3160 return;
3161 }
3162 }
3163 }
3164 }
3165}
3166
3167// If C compares the truncation of an extending load, try to compare
3168// the untruncated value instead. This exposes more opportunities to
3169// reuse CC.
3170static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3171 Comparison &C) {
3172 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3173 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3174 C.Op1.getOpcode() == ISD::Constant &&
3175 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3176 C.Op1->getAsZExtVal() == 0) {
3177 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3178 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3179 C.Op0.getValueSizeInBits().getFixedValue()) {
3180 unsigned Type = L->getExtensionType();
3181 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3182 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3183 C.Op0 = C.Op0.getOperand(0);
3184 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3185 }
3186 }
3187 }
3188}
3189
3190// Return true if shift operation N has an in-range constant shift value.
3191// Store it in ShiftVal if so.
3192static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3193 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3194 if (!Shift)
3195 return false;
3196
3197 uint64_t Amount = Shift->getZExtValue();
3198 if (Amount >= N.getValueSizeInBits())
3199 return false;
3200
3201 ShiftVal = Amount;
3202 return true;
3203}
3204
3205// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3206// instruction and whether the CC value is descriptive enough to handle
3207// a comparison of type Opcode between the AND result and CmpVal.
3208// CCMask says which comparison result is being tested and BitSize is
3209// the number of bits in the operands. If TEST UNDER MASK can be used,
3210// return the corresponding CC mask, otherwise return 0.
3211static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3212 uint64_t Mask, uint64_t CmpVal,
3213 unsigned ICmpType) {
3214 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3215
3216 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3217 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3218 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3219 return 0;
3220
3221 // Work out the masks for the lowest and highest bits.
3223 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3224
3225 // Signed ordered comparisons are effectively unsigned if the sign
3226 // bit is dropped.
3227 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3228
3229 // Check for equality comparisons with 0, or the equivalent.
3230 if (CmpVal == 0) {
3231 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3233 if (CCMask == SystemZ::CCMASK_CMP_NE)
3235 }
3236 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3237 if (CCMask == SystemZ::CCMASK_CMP_LT)
3239 if (CCMask == SystemZ::CCMASK_CMP_GE)
3241 }
3242 if (EffectivelyUnsigned && CmpVal < Low) {
3243 if (CCMask == SystemZ::CCMASK_CMP_LE)
3245 if (CCMask == SystemZ::CCMASK_CMP_GT)
3247 }
3248
3249 // Check for equality comparisons with the mask, or the equivalent.
3250 if (CmpVal == Mask) {
3251 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3253 if (CCMask == SystemZ::CCMASK_CMP_NE)
3255 }
3256 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3257 if (CCMask == SystemZ::CCMASK_CMP_GT)
3259 if (CCMask == SystemZ::CCMASK_CMP_LE)
3261 }
3262 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3263 if (CCMask == SystemZ::CCMASK_CMP_GE)
3265 if (CCMask == SystemZ::CCMASK_CMP_LT)
3267 }
3268
3269 // Check for ordered comparisons with the top bit.
3270 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3271 if (CCMask == SystemZ::CCMASK_CMP_LE)
3273 if (CCMask == SystemZ::CCMASK_CMP_GT)
3275 }
3276 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3277 if (CCMask == SystemZ::CCMASK_CMP_LT)
3279 if (CCMask == SystemZ::CCMASK_CMP_GE)
3281 }
3282
3283 // If there are just two bits, we can do equality checks for Low and High
3284 // as well.
3285 if (Mask == Low + High) {
3286 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3288 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3290 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3292 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3294 }
3295
3296 // Looks like we've exhausted our options.
3297 return 0;
3298}
3299
3300// See whether C can be implemented as a TEST UNDER MASK instruction.
3301// Update the arguments with the TM version if so.
3303 Comparison &C) {
3304 // Use VECTOR TEST UNDER MASK for i128 operations.
3305 if (C.Op0.getValueType() == MVT::i128) {
3306 // We can use VTM for EQ/NE comparisons of x & y against 0.
3307 if (C.Op0.getOpcode() == ISD::AND &&
3308 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3309 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3310 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3311 if (Mask && Mask->getAPIntValue() == 0) {
3312 C.Opcode = SystemZISD::VTM;
3313 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3314 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3315 C.CCValid = SystemZ::CCMASK_VCMP;
3316 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3317 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3318 else
3319 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3320 }
3321 }
3322 return;
3323 }
3324
3325 // Check that we have a comparison with a constant.
3326 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3327 if (!ConstOp1)
3328 return;
3329 uint64_t CmpVal = ConstOp1->getZExtValue();
3330
3331 // Check whether the nonconstant input is an AND with a constant mask.
3332 Comparison NewC(C);
3333 uint64_t MaskVal;
3334 ConstantSDNode *Mask = nullptr;
3335 if (C.Op0.getOpcode() == ISD::AND) {
3336 NewC.Op0 = C.Op0.getOperand(0);
3337 NewC.Op1 = C.Op0.getOperand(1);
3338 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3339 if (!Mask)
3340 return;
3341 MaskVal = Mask->getZExtValue();
3342 } else {
3343 // There is no instruction to compare with a 64-bit immediate
3344 // so use TMHH instead if possible. We need an unsigned ordered
3345 // comparison with an i64 immediate.
3346 if (NewC.Op0.getValueType() != MVT::i64 ||
3347 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3348 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3349 NewC.ICmpType == SystemZICMP::SignedOnly)
3350 return;
3351 // Convert LE and GT comparisons into LT and GE.
3352 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3353 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3354 if (CmpVal == uint64_t(-1))
3355 return;
3356 CmpVal += 1;
3357 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3358 }
3359 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3360 // be masked off without changing the result.
3361 MaskVal = -(CmpVal & -CmpVal);
3362 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3363 }
3364 if (!MaskVal)
3365 return;
3366
3367 // Check whether the combination of mask, comparison value and comparison
3368 // type are suitable.
3369 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3370 unsigned NewCCMask, ShiftVal;
3371 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3372 NewC.Op0.getOpcode() == ISD::SHL &&
3373 isSimpleShift(NewC.Op0, ShiftVal) &&
3374 (MaskVal >> ShiftVal != 0) &&
3375 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3376 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3377 MaskVal >> ShiftVal,
3378 CmpVal >> ShiftVal,
3379 SystemZICMP::Any))) {
3380 NewC.Op0 = NewC.Op0.getOperand(0);
3381 MaskVal >>= ShiftVal;
3382 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3383 NewC.Op0.getOpcode() == ISD::SRL &&
3384 isSimpleShift(NewC.Op0, ShiftVal) &&
3385 (MaskVal << ShiftVal != 0) &&
3386 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3387 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3388 MaskVal << ShiftVal,
3389 CmpVal << ShiftVal,
3391 NewC.Op0 = NewC.Op0.getOperand(0);
3392 MaskVal <<= ShiftVal;
3393 } else {
3394 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3395 NewC.ICmpType);
3396 if (!NewCCMask)
3397 return;
3398 }
3399
3400 // Go ahead and make the change.
3401 C.Opcode = SystemZISD::TM;
3402 C.Op0 = NewC.Op0;
3403 if (Mask && Mask->getZExtValue() == MaskVal)
3404 C.Op1 = SDValue(Mask, 0);
3405 else
3406 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3407 C.CCValid = SystemZ::CCMASK_TM;
3408 C.CCMask = NewCCMask;
3409}
3410
3411// Implement i128 comparison in vector registers.
3412static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3413 Comparison &C) {
3414 if (C.Opcode != SystemZISD::ICMP)
3415 return;
3416 if (C.Op0.getValueType() != MVT::i128)
3417 return;
3418
3419 // Recognize vector comparison reductions.
3420 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3421 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3422 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3423 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3424 bool CmpNull = isNullConstant(C.Op1);
3425 SDValue Src = peekThroughBitcasts(C.Op0);
3426 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3427 Src = Src.getOperand(0);
3428 CmpNull = !CmpNull;
3429 }
3430 unsigned Opcode = 0;
3431 if (Src.hasOneUse()) {
3432 switch (Src.getOpcode()) {
3433 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3434 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3435 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3436 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3437 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3438 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3439 default: break;
3440 }
3441 }
3442 if (Opcode) {
3443 C.Opcode = Opcode;
3444 C.Op0 = Src->getOperand(0);
3445 C.Op1 = Src->getOperand(1);
3446 C.CCValid = SystemZ::CCMASK_VCMP;
3448 if (!CmpEq)
3449 C.CCMask ^= C.CCValid;
3450 return;
3451 }
3452 }
3453
3454 // Everything below here is not useful if we have native i128 compares.
3455 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3456 return;
3457
3458 // (In-)Equality comparisons can be implemented via VCEQGS.
3459 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3460 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3461 C.Opcode = SystemZISD::VICMPES;
3462 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3463 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3464 C.CCValid = SystemZ::CCMASK_VCMP;
3465 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3466 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3467 else
3468 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3469 return;
3470 }
3471
3472 // Normalize other comparisons to GT.
3473 bool Swap = false, Invert = false;
3474 switch (C.CCMask) {
3475 case SystemZ::CCMASK_CMP_GT: break;
3476 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3477 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3478 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3479 default: llvm_unreachable("Invalid integer condition!");
3480 }
3481 if (Swap)
3482 std::swap(C.Op0, C.Op1);
3483
3484 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3485 C.Opcode = SystemZISD::UCMP128HI;
3486 else
3487 C.Opcode = SystemZISD::SCMP128HI;
3488 C.CCValid = SystemZ::CCMASK_ANY;
3489 C.CCMask = SystemZ::CCMASK_1;
3490
3491 if (Invert)
3492 C.CCMask ^= C.CCValid;
3493}
3494
3495// See whether the comparison argument contains a redundant AND
3496// and remove it if so. This sometimes happens due to the generic
3497// BRCOND expansion.
3499 Comparison &C) {
3500 if (C.Op0.getOpcode() != ISD::AND)
3501 return;
3502 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3503 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3504 return;
3505 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3506 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3507 return;
3508
3509 C.Op0 = C.Op0.getOperand(0);
3510}
3511
3512// Return a Comparison that tests the condition-code result of intrinsic
3513// node Call against constant integer CC using comparison code Cond.
3514// Opcode is the opcode of the SystemZISD operation for the intrinsic
3515// and CCValid is the set of possible condition-code results.
3516static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3517 SDValue Call, unsigned CCValid, uint64_t CC,
3519 Comparison C(Call, SDValue(), SDValue());
3520 C.Opcode = Opcode;
3521 C.CCValid = CCValid;
3522 if (Cond == ISD::SETEQ)
3523 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3524 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3525 else if (Cond == ISD::SETNE)
3526 // ...and the inverse of that.
3527 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3528 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3529 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3530 // always true for CC>3.
3531 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3532 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3533 // ...and the inverse of that.
3534 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3535 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3536 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3537 // always true for CC>3.
3538 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3539 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3540 // ...and the inverse of that.
3541 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3542 else
3543 llvm_unreachable("Unexpected integer comparison type");
3544 C.CCMask &= CCValid;
3545 return C;
3546}
3547
3548// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3549static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3550 ISD::CondCode Cond, const SDLoc &DL,
3551 SDValue Chain = SDValue(),
3552 bool IsSignaling = false) {
3553 if (CmpOp1.getOpcode() == ISD::Constant) {
3554 assert(!Chain);
3555 unsigned Opcode, CCValid;
3556 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3557 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3558 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3559 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3560 CmpOp1->getAsZExtVal(), Cond);
3561 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3562 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3563 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3564 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3565 CmpOp1->getAsZExtVal(), Cond);
3566 }
3567 Comparison C(CmpOp0, CmpOp1, Chain);
3568 C.CCMask = CCMaskForCondCode(Cond);
3569 if (C.Op0.getValueType().isFloatingPoint()) {
3570 C.CCValid = SystemZ::CCMASK_FCMP;
3571 if (!C.Chain)
3572 C.Opcode = SystemZISD::FCMP;
3573 else if (!IsSignaling)
3574 C.Opcode = SystemZISD::STRICT_FCMP;
3575 else
3576 C.Opcode = SystemZISD::STRICT_FCMPS;
3578 } else {
3579 assert(!C.Chain);
3580 C.CCValid = SystemZ::CCMASK_ICMP;
3581 C.Opcode = SystemZISD::ICMP;
3582 // Choose the type of comparison. Equality and inequality tests can
3583 // use either signed or unsigned comparisons. The choice also doesn't
3584 // matter if both sign bits are known to be clear. In those cases we
3585 // want to give the main isel code the freedom to choose whichever
3586 // form fits best.
3587 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3588 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3589 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3590 C.ICmpType = SystemZICMP::Any;
3591 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3592 C.ICmpType = SystemZICMP::UnsignedOnly;
3593 else
3594 C.ICmpType = SystemZICMP::SignedOnly;
3595 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3596 adjustForRedundantAnd(DAG, DL, C);
3597 adjustZeroCmp(DAG, DL, C);
3598 adjustSubwordCmp(DAG, DL, C);
3599 adjustForSubtraction(DAG, DL, C);
3601 adjustICmpTruncate(DAG, DL, C);
3602 }
3603
3604 if (shouldSwapCmpOperands(C)) {
3605 std::swap(C.Op0, C.Op1);
3606 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3607 }
3608
3610 adjustICmp128(DAG, DL, C);
3611 return C;
3612}
3613
3614// Emit the comparison instruction described by C.
3615static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3616 if (!C.Op1.getNode()) {
3617 SDNode *Node;
3618 switch (C.Op0.getOpcode()) {
3620 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3621 return SDValue(Node, 0);
3623 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3624 return SDValue(Node, Node->getNumValues() - 1);
3625 default:
3626 llvm_unreachable("Invalid comparison operands");
3627 }
3628 }
3629 if (C.Opcode == SystemZISD::ICMP)
3630 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3631 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3632 if (C.Opcode == SystemZISD::TM) {
3633 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3635 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3636 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3637 }
3638 if (C.Opcode == SystemZISD::VICMPES ||
3639 C.Opcode == SystemZISD::VICMPHS ||
3640 C.Opcode == SystemZISD::VICMPHLS ||
3641 C.Opcode == SystemZISD::VFCMPES ||
3642 C.Opcode == SystemZISD::VFCMPHS ||
3643 C.Opcode == SystemZISD::VFCMPHES) {
3644 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3645 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3646 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3647 return SDValue(Val.getNode(), 1);
3648 }
3649 if (C.Chain) {
3650 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3651 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3652 }
3653 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3654}
3655
3656// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3657// 64 bits. Extend is the extension type to use. Store the high part
3658// in Hi and the low part in Lo.
3659static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3660 SDValue Op0, SDValue Op1, SDValue &Hi,
3661 SDValue &Lo) {
3662 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3663 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3664 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3665 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3666 DAG.getConstant(32, DL, MVT::i64));
3667 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3668 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3669}
3670
3671// Lower a binary operation that produces two VT results, one in each
3672// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3673// and Opcode performs the GR128 operation. Store the even register result
3674// in Even and the odd register result in Odd.
3675static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3676 unsigned Opcode, SDValue Op0, SDValue Op1,
3677 SDValue &Even, SDValue &Odd) {
3678 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3679 bool Is32Bit = is32Bit(VT);
3680 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3681 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3682}
3683
3684// Return an i32 value that is 1 if the CC value produced by CCReg is
3685// in the mask CCMask and 0 otherwise. CC is known to have a value
3686// in CCValid, so other values can be ignored.
3687static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3688 unsigned CCValid, unsigned CCMask) {
3689 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3690 DAG.getConstant(0, DL, MVT::i32),
3691 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3692 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3693 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3694}
3695
3696// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3697// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3698// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3699// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3700// floating-point comparisons.
3703 switch (CC) {
3704 case ISD::SETOEQ:
3705 case ISD::SETEQ:
3706 switch (Mode) {
3707 case CmpMode::Int: return SystemZISD::VICMPE;
3708 case CmpMode::FP: return SystemZISD::VFCMPE;
3709 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3710 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3711 }
3712 llvm_unreachable("Bad mode");
3713
3714 case ISD::SETOGE:
3715 case ISD::SETGE:
3716 switch (Mode) {
3717 case CmpMode::Int: return 0;
3718 case CmpMode::FP: return SystemZISD::VFCMPHE;
3719 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3720 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3721 }
3722 llvm_unreachable("Bad mode");
3723
3724 case ISD::SETOGT:
3725 case ISD::SETGT:
3726 switch (Mode) {
3727 case CmpMode::Int: return SystemZISD::VICMPH;
3728 case CmpMode::FP: return SystemZISD::VFCMPH;
3729 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3730 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3731 }
3732 llvm_unreachable("Bad mode");
3733
3734 case ISD::SETUGT:
3735 switch (Mode) {
3736 case CmpMode::Int: return SystemZISD::VICMPHL;
3737 case CmpMode::FP: return 0;
3738 case CmpMode::StrictFP: return 0;
3739 case CmpMode::SignalingFP: return 0;
3740 }
3741 llvm_unreachable("Bad mode");
3742
3743 default:
3744 return 0;
3745 }
3746}
3747
3748// Return the SystemZISD vector comparison operation for CC or its inverse,
3749// or 0 if neither can be done directly. Indicate in Invert whether the
3750// result is for the inverse of CC. Mode is as above.
3752 bool &Invert) {
3753 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3754 Invert = false;
3755 return Opcode;
3756 }
3757
3758 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3759 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3760 Invert = true;
3761 return Opcode;
3762 }
3763
3764 return 0;
3765}
3766
3767// Return a v2f64 that contains the extended form of elements Start and Start+1
3768// of v4f32 value Op. If Chain is nonnull, return the strict form.
3769static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3770 SDValue Op, SDValue Chain) {
3771 int Mask[] = { Start, -1, Start + 1, -1 };
3772 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3773 if (Chain) {
3774 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3775 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3776 }
3777 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3778}
3779
3780// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3781// producing a result of type VT. If Chain is nonnull, return the strict form.
3782SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3783 const SDLoc &DL, EVT VT,
3784 SDValue CmpOp0,
3785 SDValue CmpOp1,
3786 SDValue Chain) const {
3787 // There is no hardware support for v4f32 (unless we have the vector
3788 // enhancements facility 1), so extend the vector into two v2f64s
3789 // and compare those.
3790 if (CmpOp0.getValueType() == MVT::v4f32 &&
3791 !Subtarget.hasVectorEnhancements1()) {
3792 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3793 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3794 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3795 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3796 if (Chain) {
3797 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3798 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3799 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3800 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3801 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3802 H1.getValue(1), L1.getValue(1),
3803 HRes.getValue(1), LRes.getValue(1) };
3804 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3805 SDValue Ops[2] = { Res, NewChain };
3806 return DAG.getMergeValues(Ops, DL);
3807 }
3808 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3809 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3810 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3811 }
3812 if (Chain) {
3813 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3814 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3815 }
3816 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3817}
3818
3819// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3820// an integer mask of type VT. If Chain is nonnull, we have a strict
3821// floating-point comparison. If in addition IsSignaling is true, we have
3822// a strict signaling floating-point comparison.
3823SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3824 const SDLoc &DL, EVT VT,
3825 ISD::CondCode CC,
3826 SDValue CmpOp0,
3827 SDValue CmpOp1,
3828 SDValue Chain,
3829 bool IsSignaling) const {
3830 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3831 assert (!Chain || IsFP);
3832 assert (!IsSignaling || Chain);
3833 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3834 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3835 bool Invert = false;
3836 SDValue Cmp;
3837 switch (CC) {
3838 // Handle tests for order using (or (ogt y x) (oge x y)).
3839 case ISD::SETUO:
3840 Invert = true;
3841 [[fallthrough]];
3842 case ISD::SETO: {
3843 assert(IsFP && "Unexpected integer comparison");
3844 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3845 DL, VT, CmpOp1, CmpOp0, Chain);
3846 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3847 DL, VT, CmpOp0, CmpOp1, Chain);
3848 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3849 if (Chain)
3850 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3851 LT.getValue(1), GE.getValue(1));
3852 break;
3853 }
3854
3855 // Handle <> tests using (or (ogt y x) (ogt x y)).
3856 case ISD::SETUEQ:
3857 Invert = true;
3858 [[fallthrough]];
3859 case ISD::SETONE: {
3860 assert(IsFP && "Unexpected integer comparison");
3861 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3862 DL, VT, CmpOp1, CmpOp0, Chain);
3863 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3864 DL, VT, CmpOp0, CmpOp1, Chain);
3865 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3866 if (Chain)
3867 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3868 LT.getValue(1), GT.getValue(1));
3869 break;
3870 }
3871
3872 // Otherwise a single comparison is enough. It doesn't really
3873 // matter whether we try the inversion or the swap first, since
3874 // there are no cases where both work.
3875 default:
3876 // Optimize sign-bit comparisons to signed compares.
3877 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3879 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3880 APInt Mask;
3881 if (CmpOp0.getOpcode() == ISD::AND
3882 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3883 && Mask == APInt::getSignMask(EltSize)) {
3884 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3885 CmpOp0 = CmpOp0.getOperand(0);
3886 }
3887 }
3888 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3889 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3890 else {
3892 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3893 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3894 else
3895 llvm_unreachable("Unhandled comparison");
3896 }
3897 if (Chain)
3898 Chain = Cmp.getValue(1);
3899 break;
3900 }
3901 if (Invert) {
3902 SDValue Mask =
3903 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3904 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3905 }
3906 if (Chain && Chain.getNode() != Cmp.getNode()) {
3907 SDValue Ops[2] = { Cmp, Chain };
3908 Cmp = DAG.getMergeValues(Ops, DL);
3909 }
3910 return Cmp;
3911}
3912
3913SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3914 SelectionDAG &DAG) const {
3915 SDValue CmpOp0 = Op.getOperand(0);
3916 SDValue CmpOp1 = Op.getOperand(1);
3917 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3918 SDLoc DL(Op);
3919 EVT VT = Op.getValueType();
3920 if (VT.isVector())
3921 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3922
3923 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3924 SDValue CCReg = emitCmp(DAG, DL, C);
3925 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3926}
3927
3928SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3929 SelectionDAG &DAG,
3930 bool IsSignaling) const {
3931 SDValue Chain = Op.getOperand(0);
3932 SDValue CmpOp0 = Op.getOperand(1);
3933 SDValue CmpOp1 = Op.getOperand(2);
3934 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3935 SDLoc DL(Op);
3936 EVT VT = Op.getNode()->getValueType(0);
3937 if (VT.isVector()) {
3938 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3939 Chain, IsSignaling);
3940 return Res.getValue(Op.getResNo());
3941 }
3942
3943 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3944 SDValue CCReg = emitCmp(DAG, DL, C);
3945 CCReg->setFlags(Op->getFlags());
3946 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3947 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3948 return DAG.getMergeValues(Ops, DL);
3949}
3950
3951SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3952 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3953 SDValue CmpOp0 = Op.getOperand(2);
3954 SDValue CmpOp1 = Op.getOperand(3);
3955 SDValue Dest = Op.getOperand(4);
3956 SDLoc DL(Op);
3957
3958 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3959 SDValue CCReg = emitCmp(DAG, DL, C);
3960 return DAG.getNode(
3961 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3962 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3963 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3964}
3965
3966// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3967// allowing Pos and Neg to be wider than CmpOp.
3968static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3969 return (Neg.getOpcode() == ISD::SUB &&
3970 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3971 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3972 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3973 Pos.getOperand(0) == CmpOp)));
3974}
3975
3976// Return the absolute or negative absolute of Op; IsNegative decides which.
3978 bool IsNegative) {
3979 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3980 if (IsNegative)
3981 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3982 DAG.getConstant(0, DL, Op.getValueType()), Op);
3983 return Op;
3984}
3985
3987 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3988 EVT VT = MVT::i128;
3989 unsigned Op;
3990
3991 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3992 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3993 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3994 std::swap(TrueOp, FalseOp);
3995 C.CCMask ^= C.CCValid;
3996 }
3997 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3998 std::swap(C.Op0, C.Op1);
3999 C.CCMask = SystemZ::CCMASK_CMP_GT;
4000 }
4001 switch (C.CCMask) {
4003 Op = SystemZISD::VICMPE;
4004 break;
4006 if (C.ICmpType == SystemZICMP::UnsignedOnly)
4007 Op = SystemZISD::VICMPHL;
4008 else
4009 Op = SystemZISD::VICMPH;
4010 break;
4011 default:
4012 llvm_unreachable("Unhandled comparison");
4013 break;
4014 }
4015
4016 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
4017 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
4018 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
4019 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
4020}
4021
4022SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
4023 SelectionDAG &DAG) const {
4024 SDValue CmpOp0 = Op.getOperand(0);
4025 SDValue CmpOp1 = Op.getOperand(1);
4026 SDValue TrueOp = Op.getOperand(2);
4027 SDValue FalseOp = Op.getOperand(3);
4028 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4029 SDLoc DL(Op);
4030
4031 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
4032 // legalizer, as it will be handled according to the type of the resulting
4033 // value. Extend them here if needed.
4034 if (CmpOp0.getSimpleValueType() == MVT::f16) {
4035 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
4036 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
4037 }
4038
4039 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
4040
4041 // Check for absolute and negative-absolute selections, including those
4042 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4043 // This check supplements the one in DAGCombiner.
4044 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4045 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4046 C.Op1.getOpcode() == ISD::Constant &&
4047 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4048 C.Op1->getAsZExtVal() == 0) {
4049 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4050 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4051 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4052 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4053 }
4054
4055 if (Subtarget.hasVectorEnhancements3() &&
4056 C.Opcode == SystemZISD::ICMP &&
4057 C.Op0.getValueType() == MVT::i128 &&
4058 TrueOp.getValueType() == MVT::i128) {
4059 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4060 }
4061
4062 SDValue CCReg = emitCmp(DAG, DL, C);
4063 SDValue Ops[] = {TrueOp, FalseOp,
4064 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4065 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4066
4067 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4068}
4069
4070SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4071 SelectionDAG &DAG) const {
4072 SDLoc DL(Node);
4073 const GlobalValue *GV = Node->getGlobal();
4074 int64_t Offset = Node->getOffset();
4075 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4077
4079 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4080 if (isInt<32>(Offset)) {
4081 // Assign anchors at 1<<12 byte boundaries.
4082 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4083 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4084 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4085
4086 // The offset can be folded into the address if it is aligned to a
4087 // halfword.
4088 Offset -= Anchor;
4089 if (Offset != 0 && (Offset & 1) == 0) {
4090 SDValue Full =
4091 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4092 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4093 Offset = 0;
4094 }
4095 } else {
4096 // Conservatively load a constant offset greater than 32 bits into a
4097 // register below.
4098 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4099 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4100 }
4101 } else if (Subtarget.isTargetELF()) {
4102 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4103 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4104 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4106 } else if (Subtarget.isTargetzOS()) {
4107 Result = getADAEntry(DAG, GV, DL, PtrVT);
4108 } else
4109 llvm_unreachable("Unexpected Subtarget");
4110
4111 // If there was a non-zero offset that we didn't fold, create an explicit
4112 // addition for it.
4113 if (Offset != 0)
4114 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4115 DAG.getSignedConstant(Offset, DL, PtrVT));
4116
4117 return Result;
4118}
4119
4120SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4121 SelectionDAG &DAG,
4122 unsigned Opcode,
4123 SDValue GOTOffset) const {
4124 SDLoc DL(Node);
4125 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4126 SDValue Chain = DAG.getEntryNode();
4127 SDValue Glue;
4128
4131 report_fatal_error("In GHC calling convention TLS is not supported");
4132
4133 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4134 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4135 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4136 Glue = Chain.getValue(1);
4137 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4138 Glue = Chain.getValue(1);
4139
4140 // The first call operand is the chain and the second is the TLS symbol.
4142 Ops.push_back(Chain);
4143 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4144 Node->getValueType(0),
4145 0, 0));
4146
4147 // Add argument registers to the end of the list so that they are
4148 // known live into the call.
4149 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4150 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4151
4152 // Add a register mask operand representing the call-preserved registers.
4153 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4154 const uint32_t *Mask =
4155 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4156 assert(Mask && "Missing call preserved mask for calling convention");
4157 Ops.push_back(DAG.getRegisterMask(Mask));
4158
4159 // Glue the call to the argument copies.
4160 Ops.push_back(Glue);
4161
4162 // Emit the call.
4163 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4164 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4165 Glue = Chain.getValue(1);
4166
4167 // Copy the return value from %r2.
4168 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4169}
4170
4171SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4172 SelectionDAG &DAG) const {
4173 SDValue Chain = DAG.getEntryNode();
4174 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4175
4176 // The high part of the thread pointer is in access register 0.
4177 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4178 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4179
4180 // The low part of the thread pointer is in access register 1.
4181 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4182 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4183
4184 // Merge them into a single 64-bit address.
4185 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4186 DAG.getConstant(32, DL, PtrVT));
4187 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4188}
4189
4190SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4191 SelectionDAG &DAG) const {
4192 if (DAG.getTarget().useEmulatedTLS())
4193 return LowerToTLSEmulatedModel(Node, DAG);
4194 SDLoc DL(Node);
4195 const GlobalValue *GV = Node->getGlobal();
4196 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4197 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4198
4201 report_fatal_error("In GHC calling convention TLS is not supported");
4202
4203 SDValue TP = lowerThreadPointer(DL, DAG);
4204
4205 // Get the offset of GA from the thread pointer, based on the TLS model.
4207 switch (model) {
4209 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4210 SystemZConstantPoolValue *CPV =
4212
4213 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4214 Offset = DAG.getLoad(
4215 PtrVT, DL, DAG.getEntryNode(), Offset,
4217
4218 // Call __tls_get_offset to retrieve the offset.
4219 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4220 break;
4221 }
4222
4224 // Load the GOT offset of the module ID.
4225 SystemZConstantPoolValue *CPV =
4227
4228 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4229 Offset = DAG.getLoad(
4230 PtrVT, DL, DAG.getEntryNode(), Offset,
4232
4233 // Call __tls_get_offset to retrieve the module base offset.
4234 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4235
4236 // Note: The SystemZLDCleanupPass will remove redundant computations
4237 // of the module base offset. Count total number of local-dynamic
4238 // accesses to trigger execution of that pass.
4239 SystemZMachineFunctionInfo* MFI =
4240 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4242
4243 // Add the per-symbol offset.
4245
4246 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4247 DTPOffset = DAG.getLoad(
4248 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4250
4251 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4252 break;
4253 }
4254
4255 case TLSModel::InitialExec: {
4256 // Load the offset from the GOT.
4257 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4259 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4260 Offset =
4261 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4263 break;
4264 }
4265
4266 case TLSModel::LocalExec: {
4267 // Force the offset into the constant pool and load it from there.
4268 SystemZConstantPoolValue *CPV =
4270
4271 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4272 Offset = DAG.getLoad(
4273 PtrVT, DL, DAG.getEntryNode(), Offset,
4275 break;
4276 }
4277 }
4278
4279 // Add the base and offset together.
4280 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4281}
4282
4283SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4284 SelectionDAG &DAG) const {
4285 SDLoc DL(Node);
4286 const BlockAddress *BA = Node->getBlockAddress();
4287 int64_t Offset = Node->getOffset();
4288 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4289
4290 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4291 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4292 return Result;
4293}
4294
4295SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4296 SelectionDAG &DAG) const {
4297 SDLoc DL(JT);
4298 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4299 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4300
4301 // Use LARL to load the address of the table.
4302 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4303}
4304
4305SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4306 SelectionDAG &DAG) const {
4307 SDLoc DL(CP);
4308 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4309
4312 Result =
4313 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4314 else
4315 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4316 CP->getOffset());
4317
4318 // Use LARL to load the address of the constant pool entry.
4319 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4320}
4321
4322SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4323 SelectionDAG &DAG) const {
4324 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4325 MachineFunction &MF = DAG.getMachineFunction();
4326 MachineFrameInfo &MFI = MF.getFrameInfo();
4327 MFI.setFrameAddressIsTaken(true);
4328
4329 SDLoc DL(Op);
4330 unsigned Depth = Op.getConstantOperandVal(0);
4331 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4332
4333 // By definition, the frame address is the address of the back chain. (In
4334 // the case of packed stack without backchain, return the address where the
4335 // backchain would have been stored. This will either be an unused space or
4336 // contain a saved register).
4337 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4338 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4339
4340 if (Depth > 0) {
4341 // FIXME The frontend should detect this case.
4342 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4343 report_fatal_error("Unsupported stack frame traversal count");
4344
4345 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4346 while (Depth--) {
4347 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4348 MachinePointerInfo());
4349 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4350 }
4351 }
4352
4353 return BackChain;
4354}
4355
4356SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4357 SelectionDAG &DAG) const {
4358 MachineFunction &MF = DAG.getMachineFunction();
4359 MachineFrameInfo &MFI = MF.getFrameInfo();
4360 MFI.setReturnAddressIsTaken(true);
4361
4362 SDLoc DL(Op);
4363 unsigned Depth = Op.getConstantOperandVal(0);
4364 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4365
4366 if (Depth > 0) {
4367 // FIXME The frontend should detect this case.
4368 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4369 report_fatal_error("Unsupported stack frame traversal count");
4370
4371 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4372 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4373 int Offset = TFL->getReturnAddressOffset(MF);
4374 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4375 DAG.getSignedConstant(Offset, DL, PtrVT));
4376 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4377 MachinePointerInfo());
4378 }
4379
4380 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4381 // implicit live-in.
4382 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4384 &SystemZ::GR64BitRegClass);
4385 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4386}
4387
4388SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4389 SelectionDAG &DAG) const {
4390 SDLoc DL(Op);
4391 SDValue In = Op.getOperand(0);
4392 EVT InVT = In.getValueType();
4393 EVT ResVT = Op.getValueType();
4394
4395 // Convert loads directly. This is normally done by DAGCombiner,
4396 // but we need this case for bitcasts that are created during lowering
4397 // and which are then lowered themselves.
4398 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4399 if (ISD::isNormalLoad(LoadN)) {
4400 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4401 LoadN->getBasePtr(), LoadN->getMemOperand());
4402 // Update the chain uses.
4403 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4404 return NewLoad;
4405 }
4406
4407 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4408 SDValue In64;
4409 if (Subtarget.hasHighWord()) {
4410 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4411 MVT::i64);
4412 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4413 MVT::i64, SDValue(U64, 0), In);
4414 } else {
4415 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4416 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4417 DAG.getConstant(32, DL, MVT::i64));
4418 }
4419 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4420 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4421 DL, MVT::f32, Out64);
4422 }
4423 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4424 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4425 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4426 MVT::f64, SDValue(U64, 0), In);
4427 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4428 if (Subtarget.hasHighWord())
4429 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4430 MVT::i32, Out64);
4431 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4432 DAG.getConstant(32, DL, MVT::i64));
4433 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4434 }
4435 llvm_unreachable("Unexpected bitcast combination");
4436}
4437
4438SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4439 SelectionDAG &DAG) const {
4440
4441 if (Subtarget.isTargetXPLINK64())
4442 return lowerVASTART_XPLINK(Op, DAG);
4443 else
4444 return lowerVASTART_ELF(Op, DAG);
4445}
4446
4447SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4448 SelectionDAG &DAG) const {
4449 MachineFunction &MF = DAG.getMachineFunction();
4450 SystemZMachineFunctionInfo *FuncInfo =
4451 MF.getInfo<SystemZMachineFunctionInfo>();
4452
4453 SDLoc DL(Op);
4454
4455 // vastart just stores the address of the VarArgsFrameIndex slot into the
4456 // memory location argument.
4457 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4458 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4459 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4460 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4461 MachinePointerInfo(SV));
4462}
4463
4464SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4465 SelectionDAG &DAG) const {
4466 MachineFunction &MF = DAG.getMachineFunction();
4467 SystemZMachineFunctionInfo *FuncInfo =
4468 MF.getInfo<SystemZMachineFunctionInfo>();
4469 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4470
4471 SDValue Chain = Op.getOperand(0);
4472 SDValue Addr = Op.getOperand(1);
4473 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4474 SDLoc DL(Op);
4475
4476 // The initial values of each field.
4477 const unsigned NumFields = 4;
4478 SDValue Fields[NumFields] = {
4479 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4480 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4481 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4482 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4483 };
4484
4485 // Store each field into its respective slot.
4486 SDValue MemOps[NumFields];
4487 unsigned Offset = 0;
4488 for (unsigned I = 0; I < NumFields; ++I) {
4489 SDValue FieldAddr = Addr;
4490 if (Offset != 0)
4491 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4493 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4494 MachinePointerInfo(SV, Offset));
4495 Offset += 8;
4496 }
4497 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4498}
4499
4500SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4501 SelectionDAG &DAG) const {
4502 SDValue Chain = Op.getOperand(0);
4503 SDValue DstPtr = Op.getOperand(1);
4504 SDValue SrcPtr = Op.getOperand(2);
4505 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4506 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4507 SDLoc DL(Op);
4508
4509 uint32_t Sz =
4510 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4511 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4512 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4513 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4514 MachinePointerInfo(SrcSV));
4515}
4516
4517SDValue
4518SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4519 SelectionDAG &DAG) const {
4520 if (Subtarget.isTargetXPLINK64())
4521 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4522 else
4523 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4524}
4525
4526SDValue
4527SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4528 SelectionDAG &DAG) const {
4529 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4530 MachineFunction &MF = DAG.getMachineFunction();
4531 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4532 SDValue Chain = Op.getOperand(0);
4533 SDValue Size = Op.getOperand(1);
4534 SDValue Align = Op.getOperand(2);
4535 SDLoc DL(Op);
4536
4537 // If user has set the no alignment function attribute, ignore
4538 // alloca alignments.
4539 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4540
4541 uint64_t StackAlign = TFI->getStackAlignment();
4542 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4543 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4544
4545 SDValue NeededSpace = Size;
4546
4547 // Add extra space for alignment if needed.
4548 EVT PtrVT = getPointerTy(MF.getDataLayout());
4549 if (ExtraAlignSpace)
4550 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4551 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4552
4553 bool IsSigned = false;
4554 bool DoesNotReturn = false;
4555 bool IsReturnValueUsed = false;
4556 EVT VT = Op.getValueType();
4557 SDValue AllocaCall =
4558 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4559 CallingConv::C, IsSigned, DL, DoesNotReturn,
4560 IsReturnValueUsed)
4561 .first;
4562
4563 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4564 // to end of call in order to ensure it isn't broken up from the call
4565 // sequence.
4566 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4567 Register SPReg = Regs.getStackPointerRegister();
4568 Chain = AllocaCall.getValue(1);
4569 SDValue Glue = AllocaCall.getValue(2);
4570 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4571 Chain = NewSPRegNode.getValue(1);
4572
4573 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4574 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4575 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4576
4577 // Dynamically realign if needed.
4578 if (ExtraAlignSpace) {
4579 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4580 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4581 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4582 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4583 }
4584
4585 SDValue Ops[2] = {Result, Chain};
4586 return DAG.getMergeValues(Ops, DL);
4587}
4588
4589SDValue
4590SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4591 SelectionDAG &DAG) const {
4592 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4593 MachineFunction &MF = DAG.getMachineFunction();
4594 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4595 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4596
4597 SDValue Chain = Op.getOperand(0);
4598 SDValue Size = Op.getOperand(1);
4599 SDValue Align = Op.getOperand(2);
4600 SDLoc DL(Op);
4601
4602 // If user has set the no alignment function attribute, ignore
4603 // alloca alignments.
4604 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4605
4606 uint64_t StackAlign = TFI->getStackAlignment();
4607 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4608 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4609
4611 SDValue NeededSpace = Size;
4612
4613 // Get a reference to the stack pointer.
4614 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4615
4616 // If we need a backchain, save it now.
4617 SDValue Backchain;
4618 if (StoreBackchain)
4619 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4620 MachinePointerInfo());
4621
4622 // Add extra space for alignment if needed.
4623 if (ExtraAlignSpace)
4624 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4625 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4626
4627 // Get the new stack pointer value.
4628 SDValue NewSP;
4629 if (hasInlineStackProbe(MF)) {
4630 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4631 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4632 Chain = NewSP.getValue(1);
4633 }
4634 else {
4635 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4636 // Copy the new stack pointer back.
4637 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4638 }
4639
4640 // The allocated data lives above the 160 bytes allocated for the standard
4641 // frame, plus any outgoing stack arguments. We don't know how much that
4642 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4643 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4644 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4645
4646 // Dynamically realign if needed.
4647 if (RequiredAlign > StackAlign) {
4648 Result =
4649 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4650 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4651 Result =
4652 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4653 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4654 }
4655
4656 if (StoreBackchain)
4657 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4658 MachinePointerInfo());
4659
4660 SDValue Ops[2] = { Result, Chain };
4661 return DAG.getMergeValues(Ops, DL);
4662}
4663
4664SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4665 SDValue Op, SelectionDAG &DAG) const {
4666 SDLoc DL(Op);
4667
4668 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4669}
4670
4671SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4672 SelectionDAG &DAG,
4673 unsigned Opcode) const {
4674 EVT VT = Op.getValueType();
4675 SDLoc DL(Op);
4676 SDValue Even, Odd;
4677
4678 // This custom expander is only used on z17 and later for 64-bit types.
4679 assert(!is32Bit(VT));
4680 assert(Subtarget.hasMiscellaneousExtensions2());
4681
4682 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4683 // the high result in the even register. Return the latter.
4684 lowerGR128Binary(DAG, DL, VT, Opcode,
4685 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4686 return Even;
4687}
4688
4689SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4690 SelectionDAG &DAG) const {
4691 EVT VT = Op.getValueType();
4692 SDLoc DL(Op);
4693 SDValue Ops[2];
4694 if (is32Bit(VT))
4695 // Just do a normal 64-bit multiplication and extract the results.
4696 // We define this so that it can be used for constant division.
4697 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4698 Op.getOperand(1), Ops[1], Ops[0]);
4699 else if (Subtarget.hasMiscellaneousExtensions2())
4700 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4701 // the high result in the even register. ISD::SMUL_LOHI is defined to
4702 // return the low half first, so the results are in reverse order.
4703 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4704 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4705 else {
4706 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4707 //
4708 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4709 //
4710 // but using the fact that the upper halves are either all zeros
4711 // or all ones:
4712 //
4713 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4714 //
4715 // and grouping the right terms together since they are quicker than the
4716 // multiplication:
4717 //
4718 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4719 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4720 SDValue LL = Op.getOperand(0);
4721 SDValue RL = Op.getOperand(1);
4722 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4723 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4724 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4725 // the high result in the even register. ISD::SMUL_LOHI is defined to
4726 // return the low half first, so the results are in reverse order.
4727 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4728 LL, RL, Ops[1], Ops[0]);
4729 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4730 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4731 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4732 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4733 }
4734 return DAG.getMergeValues(Ops, DL);
4735}
4736
4737SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4738 SelectionDAG &DAG) const {
4739 EVT VT = Op.getValueType();
4740 SDLoc DL(Op);
4741 SDValue Ops[2];
4742 if (is32Bit(VT))
4743 // Just do a normal 64-bit multiplication and extract the results.
4744 // We define this so that it can be used for constant division.
4745 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4746 Op.getOperand(1), Ops[1], Ops[0]);
4747 else
4748 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4749 // the high result in the even register. ISD::UMUL_LOHI is defined to
4750 // return the low half first, so the results are in reverse order.
4751 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4752 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4753 return DAG.getMergeValues(Ops, DL);
4754}
4755
4756SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4757 SelectionDAG &DAG) const {
4758 SDValue Op0 = Op.getOperand(0);
4759 SDValue Op1 = Op.getOperand(1);
4760 EVT VT = Op.getValueType();
4761 SDLoc DL(Op);
4762
4763 // We use DSGF for 32-bit division. This means the first operand must
4764 // always be 64-bit, and the second operand should be 32-bit whenever
4765 // that is possible, to improve performance.
4766 if (is32Bit(VT))
4767 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4768 else if (DAG.ComputeNumSignBits(Op1) > 32)
4769 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4770
4771 // DSG(F) returns the remainder in the even register and the
4772 // quotient in the odd register.
4773 SDValue Ops[2];
4774 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4775 return DAG.getMergeValues(Ops, DL);
4776}
4777
4778SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4779 SelectionDAG &DAG) const {
4780 EVT VT = Op.getValueType();
4781 SDLoc DL(Op);
4782
4783 // DL(G) returns the remainder in the even register and the
4784 // quotient in the odd register.
4785 SDValue Ops[2];
4786 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4787 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4788 return DAG.getMergeValues(Ops, DL);
4789}
4790
4791SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4792 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4793
4794 // Get the known-zero masks for each operand.
4795 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4796 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4797 DAG.computeKnownBits(Ops[1])};
4798
4799 // See if the upper 32 bits of one operand and the lower 32 bits of the
4800 // other are known zero. They are the low and high operands respectively.
4801 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4802 Known[1].Zero.getZExtValue() };
4803 unsigned High, Low;
4804 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4805 High = 1, Low = 0;
4806 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4807 High = 0, Low = 1;
4808 else
4809 return Op;
4810
4811 SDValue LowOp = Ops[Low];
4812 SDValue HighOp = Ops[High];
4813
4814 // If the high part is a constant, we're better off using IILH.
4815 if (HighOp.getOpcode() == ISD::Constant)
4816 return Op;
4817
4818 // If the low part is a constant that is outside the range of LHI,
4819 // then we're better off using IILF.
4820 if (LowOp.getOpcode() == ISD::Constant) {
4821 int64_t Value = int32_t(LowOp->getAsZExtVal());
4822 if (!isInt<16>(Value))
4823 return Op;
4824 }
4825
4826 // Check whether the high part is an AND that doesn't change the
4827 // high 32 bits and just masks out low bits. We can skip it if so.
4828 if (HighOp.getOpcode() == ISD::AND &&
4829 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4830 SDValue HighOp0 = HighOp.getOperand(0);
4831 uint64_t Mask = HighOp.getConstantOperandVal(1);
4832 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4833 HighOp = HighOp0;
4834 }
4835
4836 // Take advantage of the fact that all GR32 operations only change the
4837 // low 32 bits by truncating Low to an i32 and inserting it directly
4838 // using a subreg. The interesting cases are those where the truncation
4839 // can be folded.
4840 SDLoc DL(Op);
4841 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4842 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4843 MVT::i64, HighOp, Low32);
4844}
4845
4846// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4847SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4848 SelectionDAG &DAG) const {
4849 SDNode *N = Op.getNode();
4850 SDValue LHS = N->getOperand(0);
4851 SDValue RHS = N->getOperand(1);
4852 SDLoc DL(N);
4853
4854 if (N->getValueType(0) == MVT::i128) {
4855 unsigned BaseOp = 0;
4856 unsigned FlagOp = 0;
4857 bool IsBorrow = false;
4858 switch (Op.getOpcode()) {
4859 default: llvm_unreachable("Unknown instruction!");
4860 case ISD::UADDO:
4861 BaseOp = ISD::ADD;
4862 FlagOp = SystemZISD::VACC;
4863 break;
4864 case ISD::USUBO:
4865 BaseOp = ISD::SUB;
4866 FlagOp = SystemZISD::VSCBI;
4867 IsBorrow = true;
4868 break;
4869 }
4870 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4871 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4872 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4873 DAG.getValueType(MVT::i1));
4874 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4875 if (IsBorrow)
4876 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4877 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4878 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4879 }
4880
4881 unsigned BaseOp = 0;
4882 unsigned CCValid = 0;
4883 unsigned CCMask = 0;
4884
4885 switch (Op.getOpcode()) {
4886 default: llvm_unreachable("Unknown instruction!");
4887 case ISD::SADDO:
4888 BaseOp = SystemZISD::SADDO;
4889 CCValid = SystemZ::CCMASK_ARITH;
4891 break;
4892 case ISD::SSUBO:
4893 BaseOp = SystemZISD::SSUBO;
4894 CCValid = SystemZ::CCMASK_ARITH;
4896 break;
4897 case ISD::UADDO:
4898 BaseOp = SystemZISD::UADDO;
4899 CCValid = SystemZ::CCMASK_LOGICAL;
4901 break;
4902 case ISD::USUBO:
4903 BaseOp = SystemZISD::USUBO;
4904 CCValid = SystemZ::CCMASK_LOGICAL;
4906 break;
4907 }
4908
4909 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4910 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4911
4912 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4913 if (N->getValueType(1) == MVT::i1)
4914 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4915
4916 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4917}
4918
4919static bool isAddCarryChain(SDValue Carry) {
4920 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4921 Carry->getValueType(0) != MVT::i128)
4922 Carry = Carry.getOperand(2);
4923 return Carry.getOpcode() == ISD::UADDO &&
4924 Carry->getValueType(0) != MVT::i128;
4925}
4926
4927static bool isSubBorrowChain(SDValue Carry) {
4928 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4929 Carry->getValueType(0) != MVT::i128)
4930 Carry = Carry.getOperand(2);
4931 return Carry.getOpcode() == ISD::USUBO &&
4932 Carry->getValueType(0) != MVT::i128;
4933}
4934
4935// Lower UADDO_CARRY/USUBO_CARRY nodes.
4936SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4937 SelectionDAG &DAG) const {
4938
4939 SDNode *N = Op.getNode();
4940 MVT VT = N->getSimpleValueType(0);
4941
4942 // Let legalize expand this if it isn't a legal type yet.
4943 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4944 return SDValue();
4945
4946 SDValue LHS = N->getOperand(0);
4947 SDValue RHS = N->getOperand(1);
4948 SDValue Carry = Op.getOperand(2);
4949 SDLoc DL(N);
4950
4951 if (VT == MVT::i128) {
4952 unsigned BaseOp = 0;
4953 unsigned FlagOp = 0;
4954 bool IsBorrow = false;
4955 switch (Op.getOpcode()) {
4956 default: llvm_unreachable("Unknown instruction!");
4957 case ISD::UADDO_CARRY:
4958 BaseOp = SystemZISD::VAC;
4959 FlagOp = SystemZISD::VACCC;
4960 break;
4961 case ISD::USUBO_CARRY:
4962 BaseOp = SystemZISD::VSBI;
4963 FlagOp = SystemZISD::VSBCBI;
4964 IsBorrow = true;
4965 break;
4966 }
4967 if (IsBorrow)
4968 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4969 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4970 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4971 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4972 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4973 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4974 DAG.getValueType(MVT::i1));
4975 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4976 if (IsBorrow)
4977 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4978 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4979 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4980 }
4981
4982 unsigned BaseOp = 0;
4983 unsigned CCValid = 0;
4984 unsigned CCMask = 0;
4985
4986 switch (Op.getOpcode()) {
4987 default: llvm_unreachable("Unknown instruction!");
4988 case ISD::UADDO_CARRY:
4989 if (!isAddCarryChain(Carry))
4990 return SDValue();
4991
4992 BaseOp = SystemZISD::ADDCARRY;
4993 CCValid = SystemZ::CCMASK_LOGICAL;
4995 break;
4996 case ISD::USUBO_CARRY:
4997 if (!isSubBorrowChain(Carry))
4998 return SDValue();
4999
5000 BaseOp = SystemZISD::SUBCARRY;
5001 CCValid = SystemZ::CCMASK_LOGICAL;
5003 break;
5004 }
5005
5006 // Set the condition code from the carry flag.
5007 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
5008 DAG.getConstant(CCValid, DL, MVT::i32),
5009 DAG.getConstant(CCMask, DL, MVT::i32));
5010
5011 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
5012 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
5013
5014 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
5015 if (N->getValueType(1) == MVT::i1)
5016 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
5017
5018 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
5019}
5020
5021SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
5022 SelectionDAG &DAG) const {
5023 EVT VT = Op.getValueType();
5024 SDLoc DL(Op);
5025 Op = Op.getOperand(0);
5026
5027 if (VT.getScalarSizeInBits() == 128) {
5028 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
5029 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
5030 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
5031 DAG.getConstant(0, DL, MVT::i64));
5032 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5033 return Op;
5034 }
5035
5036 // Handle vector types via VPOPCT.
5037 if (VT.isVector()) {
5038 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
5039 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
5040 switch (VT.getScalarSizeInBits()) {
5041 case 8:
5042 break;
5043 case 16: {
5044 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5045 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5046 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5047 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5048 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5049 break;
5050 }
5051 case 32: {
5052 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5053 DAG.getConstant(0, DL, MVT::i32));
5054 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5055 break;
5056 }
5057 case 64: {
5058 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5059 DAG.getConstant(0, DL, MVT::i32));
5060 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5061 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5062 break;
5063 }
5064 default:
5065 llvm_unreachable("Unexpected type");
5066 }
5067 return Op;
5068 }
5069
5070 // Get the known-zero mask for the operand.
5071 KnownBits Known = DAG.computeKnownBits(Op);
5072 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5073 if (NumSignificantBits == 0)
5074 return DAG.getConstant(0, DL, VT);
5075
5076 // Skip known-zero high parts of the operand.
5077 int64_t OrigBitSize = VT.getSizeInBits();
5078 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5079 BitSize = std::min(BitSize, OrigBitSize);
5080
5081 // The POPCNT instruction counts the number of bits in each byte.
5082 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5083 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5084 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5085
5086 // Add up per-byte counts in a binary tree. All bits of Op at
5087 // position larger than BitSize remain zero throughout.
5088 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5089 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5090 if (BitSize != OrigBitSize)
5091 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5092 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5093 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5094 }
5095
5096 // Extract overall result from high byte.
5097 if (BitSize > 8)
5098 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5099 DAG.getConstant(BitSize - 8, DL, VT));
5100
5101 return Op;
5102}
5103
5104SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5105 SelectionDAG &DAG) const {
5106 SDLoc DL(Op);
5107 AtomicOrdering FenceOrdering =
5108 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5109 SyncScope::ID FenceSSID =
5110 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5111
5112 // The only fence that needs an instruction is a sequentially-consistent
5113 // cross-thread fence.
5114 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5115 FenceSSID == SyncScope::System) {
5116 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5117 Op.getOperand(0)),
5118 0);
5119 }
5120
5121 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5122 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5123}
5124
5125SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5126 SelectionDAG &DAG) const {
5127 EVT RegVT = Op.getValueType();
5128 if (RegVT.getSizeInBits() == 128)
5129 return lowerATOMIC_LDST_I128(Op, DAG);
5130 return lowerLoadF16(Op, DAG);
5131}
5132
5133SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5134 SelectionDAG &DAG) const {
5135 auto *Node = cast<AtomicSDNode>(Op.getNode());
5136 if (Node->getMemoryVT().getSizeInBits() == 128)
5137 return lowerATOMIC_LDST_I128(Op, DAG);
5138 return lowerStoreF16(Op, DAG);
5139}
5140
5141SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5142 SelectionDAG &DAG) const {
5143 auto *Node = cast<AtomicSDNode>(Op.getNode());
5144 assert(
5145 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5146 "Only custom lowering i128 or f128.");
5147 // Use same code to handle both legal and non-legal i128 types.
5149 LowerOperationWrapper(Node, Results, DAG);
5150 return DAG.getMergeValues(Results, SDLoc(Op));
5151}
5152
5153// Prepare for a Compare And Swap for a subword operation. This needs to be
5154// done in memory with 4 bytes at natural alignment.
5156 SDValue &AlignedAddr, SDValue &BitShift,
5157 SDValue &NegBitShift) {
5158 EVT PtrVT = Addr.getValueType();
5159 EVT WideVT = MVT::i32;
5160
5161 // Get the address of the containing word.
5162 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5163 DAG.getSignedConstant(-4, DL, PtrVT));
5164
5165 // Get the number of bits that the word must be rotated left in order
5166 // to bring the field to the top bits of a GR32.
5167 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5168 DAG.getConstant(3, DL, PtrVT));
5169 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5170
5171 // Get the complementing shift amount, for rotating a field in the top
5172 // bits back to its proper position.
5173 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5174 DAG.getConstant(0, DL, WideVT), BitShift);
5175
5176}
5177
5178// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5179// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5180SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5181 SelectionDAG &DAG,
5182 unsigned Opcode) const {
5183 auto *Node = cast<AtomicSDNode>(Op.getNode());
5184
5185 // 32-bit operations need no special handling.
5186 EVT NarrowVT = Node->getMemoryVT();
5187 EVT WideVT = MVT::i32;
5188 if (NarrowVT == WideVT)
5189 return Op;
5190
5191 int64_t BitSize = NarrowVT.getSizeInBits();
5192 SDValue ChainIn = Node->getChain();
5193 SDValue Addr = Node->getBasePtr();
5194 SDValue Src2 = Node->getVal();
5195 MachineMemOperand *MMO = Node->getMemOperand();
5196 SDLoc DL(Node);
5197
5198 // Convert atomic subtracts of constants into additions.
5199 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5200 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5201 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5202 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5203 Src2.getValueType());
5204 }
5205
5206 SDValue AlignedAddr, BitShift, NegBitShift;
5207 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5208
5209 // Extend the source operand to 32 bits and prepare it for the inner loop.
5210 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5211 // operations require the source to be shifted in advance. (This shift
5212 // can be folded if the source is constant.) For AND and NAND, the lower
5213 // bits must be set, while for other opcodes they should be left clear.
5214 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5215 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5216 DAG.getConstant(32 - BitSize, DL, WideVT));
5217 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5218 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5219 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5220 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5221
5222 // Construct the ATOMIC_LOADW_* node.
5223 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5224 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5225 DAG.getConstant(BitSize, DL, WideVT) };
5226 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5227 NarrowVT, MMO);
5228
5229 // Rotate the result of the final CS so that the field is in the lower
5230 // bits of a GR32, then truncate it.
5231 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5232 DAG.getConstant(BitSize, DL, WideVT));
5233 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5234
5235 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5236 return DAG.getMergeValues(RetOps, DL);
5237}
5238
5239// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5240// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5241SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5242 SelectionDAG &DAG) const {
5243 auto *Node = cast<AtomicSDNode>(Op.getNode());
5244 EVT MemVT = Node->getMemoryVT();
5245 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5246 // A full-width operation: negate and use LAA(G).
5247 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5248 assert(Subtarget.hasInterlockedAccess1() &&
5249 "Should have been expanded by AtomicExpand pass.");
5250 SDValue Src2 = Node->getVal();
5251 SDLoc DL(Src2);
5252 SDValue NegSrc2 =
5253 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5254 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5255 Node->getChain(), Node->getBasePtr(), NegSrc2,
5256 Node->getMemOperand());
5257 }
5258
5259 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5260}
5261
5262// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5263SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5264 SelectionDAG &DAG) const {
5265 auto *Node = cast<AtomicSDNode>(Op.getNode());
5266 SDValue ChainIn = Node->getOperand(0);
5267 SDValue Addr = Node->getOperand(1);
5268 SDValue CmpVal = Node->getOperand(2);
5269 SDValue SwapVal = Node->getOperand(3);
5270 MachineMemOperand *MMO = Node->getMemOperand();
5271 SDLoc DL(Node);
5272
5273 if (Node->getMemoryVT() == MVT::i128) {
5274 // Use same code to handle both legal and non-legal i128 types.
5276 LowerOperationWrapper(Node, Results, DAG);
5277 return DAG.getMergeValues(Results, DL);
5278 }
5279
5280 // We have native support for 32-bit and 64-bit compare and swap, but we
5281 // still need to expand extracting the "success" result from the CC.
5282 EVT NarrowVT = Node->getMemoryVT();
5283 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5284 if (NarrowVT == WideVT) {
5285 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5286 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5287 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5288 DL, Tys, Ops, NarrowVT, MMO);
5289 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5291
5292 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5293 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5294 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5295 return SDValue();
5296 }
5297
5298 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5299 // via a fullword ATOMIC_CMP_SWAPW operation.
5300 int64_t BitSize = NarrowVT.getSizeInBits();
5301
5302 SDValue AlignedAddr, BitShift, NegBitShift;
5303 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5304
5305 // Construct the ATOMIC_CMP_SWAPW node.
5306 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5307 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5308 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5309 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5310 VTList, Ops, NarrowVT, MMO);
5311 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5313
5314 // emitAtomicCmpSwapW() will zero extend the result (original value).
5315 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5316 DAG.getValueType(NarrowVT));
5317 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5318 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5319 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5320 return SDValue();
5321}
5322
5324SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5325 // Because of how we convert atomic_load and atomic_store to normal loads and
5326 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5327 // since DAGCombine hasn't been updated to account for atomic, but non
5328 // volatile loads. (See D57601)
5329 if (auto *SI = dyn_cast<StoreInst>(&I))
5330 if (SI->isAtomic())
5332 if (auto *LI = dyn_cast<LoadInst>(&I))
5333 if (LI->isAtomic())
5335 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5336 if (AI->isAtomic())
5338 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5339 if (AI->isAtomic())
5342}
5343
5344SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5345 SelectionDAG &DAG) const {
5346 MachineFunction &MF = DAG.getMachineFunction();
5347 auto *Regs = Subtarget.getSpecialRegisters();
5349 report_fatal_error("Variable-sized stack allocations are not supported "
5350 "in GHC calling convention");
5351 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5352 Regs->getStackPointerRegister(), Op.getValueType());
5353}
5354
5355SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5356 SelectionDAG &DAG) const {
5357 MachineFunction &MF = DAG.getMachineFunction();
5358 auto *Regs = Subtarget.getSpecialRegisters();
5359 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5360
5362 report_fatal_error("Variable-sized stack allocations are not supported "
5363 "in GHC calling convention");
5364
5365 SDValue Chain = Op.getOperand(0);
5366 SDValue NewSP = Op.getOperand(1);
5367 SDValue Backchain;
5368 SDLoc DL(Op);
5369
5370 if (StoreBackchain) {
5371 SDValue OldSP = DAG.getCopyFromReg(
5372 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5373 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5374 MachinePointerInfo());
5375 }
5376
5377 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5378
5379 if (StoreBackchain)
5380 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5381 MachinePointerInfo());
5382
5383 return Chain;
5384}
5385
5386SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5387 SelectionDAG &DAG) const {
5388 bool IsData = Op.getConstantOperandVal(4);
5389 if (!IsData)
5390 // Just preserve the chain.
5391 return Op.getOperand(0);
5392
5393 SDLoc DL(Op);
5394 bool IsWrite = Op.getConstantOperandVal(2);
5395 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5396 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5397 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5398 Op.getOperand(1)};
5399 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5400 Node->getVTList(), Ops,
5401 Node->getMemoryVT(), Node->getMemOperand());
5402}
5403
5404SDValue
5405SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5406 SelectionDAG &DAG) const {
5407 unsigned Opcode, CCValid;
5408 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5409 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5410 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5411 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5412 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5413 return SDValue();
5414 }
5415
5416 return SDValue();
5417}
5418
5419SDValue
5420SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5421 SelectionDAG &DAG) const {
5422 unsigned Opcode, CCValid;
5423 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5424 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5425 if (Op->getNumValues() == 1)
5426 return getCCResult(DAG, SDValue(Node, 0));
5427 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5428 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5429 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5430 }
5431
5432 unsigned Id = Op.getConstantOperandVal(0);
5433 switch (Id) {
5434 case Intrinsic::thread_pointer:
5435 return lowerThreadPointer(SDLoc(Op), DAG);
5436
5437 case Intrinsic::s390_vpdi:
5438 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5439 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5440
5441 case Intrinsic::s390_vperm:
5442 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5443 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5444
5445 case Intrinsic::s390_vuphb:
5446 case Intrinsic::s390_vuphh:
5447 case Intrinsic::s390_vuphf:
5448 case Intrinsic::s390_vuphg:
5449 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5450 Op.getOperand(1));
5451
5452 case Intrinsic::s390_vuplhb:
5453 case Intrinsic::s390_vuplhh:
5454 case Intrinsic::s390_vuplhf:
5455 case Intrinsic::s390_vuplhg:
5456 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5457 Op.getOperand(1));
5458
5459 case Intrinsic::s390_vuplb:
5460 case Intrinsic::s390_vuplhw:
5461 case Intrinsic::s390_vuplf:
5462 case Intrinsic::s390_vuplg:
5463 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5464 Op.getOperand(1));
5465
5466 case Intrinsic::s390_vupllb:
5467 case Intrinsic::s390_vupllh:
5468 case Intrinsic::s390_vupllf:
5469 case Intrinsic::s390_vupllg:
5470 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5471 Op.getOperand(1));
5472
5473 case Intrinsic::s390_vsumb:
5474 case Intrinsic::s390_vsumh:
5475 case Intrinsic::s390_vsumgh:
5476 case Intrinsic::s390_vsumgf:
5477 case Intrinsic::s390_vsumqf:
5478 case Intrinsic::s390_vsumqg:
5479 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5480 Op.getOperand(1), Op.getOperand(2));
5481
5482 case Intrinsic::s390_vaq:
5483 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5484 Op.getOperand(1), Op.getOperand(2));
5485 case Intrinsic::s390_vaccb:
5486 case Intrinsic::s390_vacch:
5487 case Intrinsic::s390_vaccf:
5488 case Intrinsic::s390_vaccg:
5489 case Intrinsic::s390_vaccq:
5490 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5491 Op.getOperand(1), Op.getOperand(2));
5492 case Intrinsic::s390_vacq:
5493 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5494 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5495 case Intrinsic::s390_vacccq:
5496 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5497 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5498
5499 case Intrinsic::s390_vsq:
5500 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5501 Op.getOperand(1), Op.getOperand(2));
5502 case Intrinsic::s390_vscbib:
5503 case Intrinsic::s390_vscbih:
5504 case Intrinsic::s390_vscbif:
5505 case Intrinsic::s390_vscbig:
5506 case Intrinsic::s390_vscbiq:
5507 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5508 Op.getOperand(1), Op.getOperand(2));
5509 case Intrinsic::s390_vsbiq:
5510 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5511 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5512 case Intrinsic::s390_vsbcbiq:
5513 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5514 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5515
5516 case Intrinsic::s390_vmhb:
5517 case Intrinsic::s390_vmhh:
5518 case Intrinsic::s390_vmhf:
5519 case Intrinsic::s390_vmhg:
5520 case Intrinsic::s390_vmhq:
5521 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5522 Op.getOperand(1), Op.getOperand(2));
5523 case Intrinsic::s390_vmlhb:
5524 case Intrinsic::s390_vmlhh:
5525 case Intrinsic::s390_vmlhf:
5526 case Intrinsic::s390_vmlhg:
5527 case Intrinsic::s390_vmlhq:
5528 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5529 Op.getOperand(1), Op.getOperand(2));
5530
5531 case Intrinsic::s390_vmahb:
5532 case Intrinsic::s390_vmahh:
5533 case Intrinsic::s390_vmahf:
5534 case Intrinsic::s390_vmahg:
5535 case Intrinsic::s390_vmahq:
5536 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5537 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5538 case Intrinsic::s390_vmalhb:
5539 case Intrinsic::s390_vmalhh:
5540 case Intrinsic::s390_vmalhf:
5541 case Intrinsic::s390_vmalhg:
5542 case Intrinsic::s390_vmalhq:
5543 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5544 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5545
5546 case Intrinsic::s390_vmeb:
5547 case Intrinsic::s390_vmeh:
5548 case Intrinsic::s390_vmef:
5549 case Intrinsic::s390_vmeg:
5550 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5551 Op.getOperand(1), Op.getOperand(2));
5552 case Intrinsic::s390_vmleb:
5553 case Intrinsic::s390_vmleh:
5554 case Intrinsic::s390_vmlef:
5555 case Intrinsic::s390_vmleg:
5556 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5557 Op.getOperand(1), Op.getOperand(2));
5558 case Intrinsic::s390_vmob:
5559 case Intrinsic::s390_vmoh:
5560 case Intrinsic::s390_vmof:
5561 case Intrinsic::s390_vmog:
5562 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5563 Op.getOperand(1), Op.getOperand(2));
5564 case Intrinsic::s390_vmlob:
5565 case Intrinsic::s390_vmloh:
5566 case Intrinsic::s390_vmlof:
5567 case Intrinsic::s390_vmlog:
5568 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5569 Op.getOperand(1), Op.getOperand(2));
5570
5571 case Intrinsic::s390_vmaeb:
5572 case Intrinsic::s390_vmaeh:
5573 case Intrinsic::s390_vmaef:
5574 case Intrinsic::s390_vmaeg:
5575 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5576 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5577 Op.getOperand(1), Op.getOperand(2)),
5578 Op.getOperand(3));
5579 case Intrinsic::s390_vmaleb:
5580 case Intrinsic::s390_vmaleh:
5581 case Intrinsic::s390_vmalef:
5582 case Intrinsic::s390_vmaleg:
5583 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5584 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5585 Op.getOperand(1), Op.getOperand(2)),
5586 Op.getOperand(3));
5587 case Intrinsic::s390_vmaob:
5588 case Intrinsic::s390_vmaoh:
5589 case Intrinsic::s390_vmaof:
5590 case Intrinsic::s390_vmaog:
5591 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5592 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5593 Op.getOperand(1), Op.getOperand(2)),
5594 Op.getOperand(3));
5595 case Intrinsic::s390_vmalob:
5596 case Intrinsic::s390_vmaloh:
5597 case Intrinsic::s390_vmalof:
5598 case Intrinsic::s390_vmalog:
5599 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5600 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5601 Op.getOperand(1), Op.getOperand(2)),
5602 Op.getOperand(3));
5603 }
5604
5605 return SDValue();
5606}
5607
5608namespace {
5609// Says that SystemZISD operation Opcode can be used to perform the equivalent
5610// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5611// Operand is the constant third operand, otherwise it is the number of
5612// bytes in each element of the result.
5613struct Permute {
5614 unsigned Opcode;
5615 unsigned Operand;
5616 unsigned char Bytes[SystemZ::VectorBytes];
5617};
5618}
5619
5620static const Permute PermuteForms[] = {
5621 // VMRHG
5622 { SystemZISD::MERGE_HIGH, 8,
5623 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5624 // VMRHF
5625 { SystemZISD::MERGE_HIGH, 4,
5626 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5627 // VMRHH
5628 { SystemZISD::MERGE_HIGH, 2,
5629 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5630 // VMRHB
5631 { SystemZISD::MERGE_HIGH, 1,
5632 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5633 // VMRLG
5634 { SystemZISD::MERGE_LOW, 8,
5635 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5636 // VMRLF
5637 { SystemZISD::MERGE_LOW, 4,
5638 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5639 // VMRLH
5640 { SystemZISD::MERGE_LOW, 2,
5641 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5642 // VMRLB
5643 { SystemZISD::MERGE_LOW, 1,
5644 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5645 // VPKG
5646 { SystemZISD::PACK, 4,
5647 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5648 // VPKF
5649 { SystemZISD::PACK, 2,
5650 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5651 // VPKH
5652 { SystemZISD::PACK, 1,
5653 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5654 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5655 { SystemZISD::PERMUTE_DWORDS, 4,
5656 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5657 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5658 { SystemZISD::PERMUTE_DWORDS, 1,
5659 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5660};
5661
5662// Called after matching a vector shuffle against a particular pattern.
5663// Both the original shuffle and the pattern have two vector operands.
5664// OpNos[0] is the operand of the original shuffle that should be used for
5665// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5666// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5667// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5668// for operands 0 and 1 of the pattern.
5669static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5670 if (OpNos[0] < 0) {
5671 if (OpNos[1] < 0)
5672 return false;
5673 OpNo0 = OpNo1 = OpNos[1];
5674 } else if (OpNos[1] < 0) {
5675 OpNo0 = OpNo1 = OpNos[0];
5676 } else {
5677 OpNo0 = OpNos[0];
5678 OpNo1 = OpNos[1];
5679 }
5680 return true;
5681}
5682
5683// Bytes is a VPERM-like permute vector, except that -1 is used for
5684// undefined bytes. Return true if the VPERM can be implemented using P.
5685// When returning true set OpNo0 to the VPERM operand that should be
5686// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5687//
5688// For example, if swapping the VPERM operands allows P to match, OpNo0
5689// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5690// operand, but rewriting it to use two duplicated operands allows it to
5691// match P, then OpNo0 and OpNo1 will be the same.
5692static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5693 unsigned &OpNo0, unsigned &OpNo1) {
5694 int OpNos[] = { -1, -1 };
5695 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5696 int Elt = Bytes[I];
5697 if (Elt >= 0) {
5698 // Make sure that the two permute vectors use the same suboperand
5699 // byte number. Only the operand numbers (the high bits) are
5700 // allowed to differ.
5701 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5702 return false;
5703 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5704 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5705 // Make sure that the operand mappings are consistent with previous
5706 // elements.
5707 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5708 return false;
5709 OpNos[ModelOpNo] = RealOpNo;
5710 }
5711 }
5712 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5713}
5714
5715// As above, but search for a matching permute.
5716static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5717 unsigned &OpNo0, unsigned &OpNo1) {
5718 for (auto &P : PermuteForms)
5719 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5720 return &P;
5721 return nullptr;
5722}
5723
5724// Bytes is a VPERM-like permute vector, except that -1 is used for
5725// undefined bytes. This permute is an operand of an outer permute.
5726// See whether redistributing the -1 bytes gives a shuffle that can be
5727// implemented using P. If so, set Transform to a VPERM-like permute vector
5728// that, when applied to the result of P, gives the original permute in Bytes.
5730 const Permute &P,
5731 SmallVectorImpl<int> &Transform) {
5732 unsigned To = 0;
5733 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5734 int Elt = Bytes[From];
5735 if (Elt < 0)
5736 // Byte number From of the result is undefined.
5737 Transform[From] = -1;
5738 else {
5739 while (P.Bytes[To] != Elt) {
5740 To += 1;
5741 if (To == SystemZ::VectorBytes)
5742 return false;
5743 }
5744 Transform[From] = To;
5745 }
5746 }
5747 return true;
5748}
5749
5750// As above, but search for a matching permute.
5751static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5752 SmallVectorImpl<int> &Transform) {
5753 for (auto &P : PermuteForms)
5754 if (matchDoublePermute(Bytes, P, Transform))
5755 return &P;
5756 return nullptr;
5757}
5758
5759// Convert the mask of the given shuffle op into a byte-level mask,
5760// as if it had type vNi8.
5761static bool getVPermMask(SDValue ShuffleOp,
5762 SmallVectorImpl<int> &Bytes) {
5763 EVT VT = ShuffleOp.getValueType();
5764 unsigned NumElements = VT.getVectorNumElements();
5765 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5766
5767 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5768 Bytes.resize(NumElements * BytesPerElement, -1);
5769 for (unsigned I = 0; I < NumElements; ++I) {
5770 int Index = VSN->getMaskElt(I);
5771 if (Index >= 0)
5772 for (unsigned J = 0; J < BytesPerElement; ++J)
5773 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5774 }
5775 return true;
5776 }
5777 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5778 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5779 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5780 Bytes.resize(NumElements * BytesPerElement, -1);
5781 for (unsigned I = 0; I < NumElements; ++I)
5782 for (unsigned J = 0; J < BytesPerElement; ++J)
5783 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5784 return true;
5785 }
5786 return false;
5787}
5788
5789// Bytes is a VPERM-like permute vector, except that -1 is used for
5790// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5791// the result come from a contiguous sequence of bytes from one input.
5792// Set Base to the selector for the first byte if so.
5793static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5794 unsigned BytesPerElement, int &Base) {
5795 Base = -1;
5796 for (unsigned I = 0; I < BytesPerElement; ++I) {
5797 if (Bytes[Start + I] >= 0) {
5798 unsigned Elem = Bytes[Start + I];
5799 if (Base < 0) {
5800 Base = Elem - I;
5801 // Make sure the bytes would come from one input operand.
5802 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5803 return false;
5804 } else if (unsigned(Base) != Elem - I)
5805 return false;
5806 }
5807 }
5808 return true;
5809}
5810
5811// Bytes is a VPERM-like permute vector, except that -1 is used for
5812// undefined bytes. Return true if it can be performed using VSLDB.
5813// When returning true, set StartIndex to the shift amount and OpNo0
5814// and OpNo1 to the VPERM operands that should be used as the first
5815// and second shift operand respectively.
5817 unsigned &StartIndex, unsigned &OpNo0,
5818 unsigned &OpNo1) {
5819 int OpNos[] = { -1, -1 };
5820 int Shift = -1;
5821 for (unsigned I = 0; I < 16; ++I) {
5822 int Index = Bytes[I];
5823 if (Index >= 0) {
5824 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5825 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5826 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5827 if (Shift < 0)
5828 Shift = ExpectedShift;
5829 else if (Shift != ExpectedShift)
5830 return false;
5831 // Make sure that the operand mappings are consistent with previous
5832 // elements.
5833 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5834 return false;
5835 OpNos[ModelOpNo] = RealOpNo;
5836 }
5837 }
5838 StartIndex = Shift;
5839 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5840}
5841
5842// Create a node that performs P on operands Op0 and Op1, casting the
5843// operands to the appropriate type. The type of the result is determined by P.
5845 const Permute &P, SDValue Op0, SDValue Op1) {
5846 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5847 // elements of a PACK are twice as wide as the outputs.
5848 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5849 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5850 P.Operand);
5851 // Cast both operands to the appropriate type.
5852 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5853 SystemZ::VectorBytes / InBytes);
5854 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5855 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5856 SDValue Op;
5857 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5858 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5859 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5860 } else if (P.Opcode == SystemZISD::PACK) {
5861 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5862 SystemZ::VectorBytes / P.Operand);
5863 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5864 } else {
5865 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5866 }
5867 return Op;
5868}
5869
5870static bool isZeroVector(SDValue N) {
5871 if (N->getOpcode() == ISD::BITCAST)
5872 N = N->getOperand(0);
5873 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5874 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5875 return Op->getZExtValue() == 0;
5876 return ISD::isBuildVectorAllZeros(N.getNode());
5877}
5878
5879// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5880static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5881 for (unsigned I = 0; I < Num ; I++)
5882 if (isZeroVector(Ops[I]))
5883 return I;
5884 return UINT32_MAX;
5885}
5886
5887// Bytes is a VPERM-like permute vector, except that -1 is used for
5888// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5889// VSLDB or VPERM.
5891 SDValue *Ops,
5892 const SmallVectorImpl<int> &Bytes) {
5893 for (unsigned I = 0; I < 2; ++I)
5894 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5895
5896 // First see whether VSLDB can be used.
5897 unsigned StartIndex, OpNo0, OpNo1;
5898 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5899 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5900 Ops[OpNo1],
5901 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5902
5903 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5904 // eliminate a zero vector by reusing any zero index in the permute vector.
5905 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5906 if (ZeroVecIdx != UINT32_MAX) {
5907 bool MaskFirst = true;
5908 int ZeroIdx = -1;
5909 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5910 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5911 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5912 if (OpNo == ZeroVecIdx && I == 0) {
5913 // If the first byte is zero, use mask as first operand.
5914 ZeroIdx = 0;
5915 break;
5916 }
5917 if (OpNo != ZeroVecIdx && Byte == 0) {
5918 // If mask contains a zero, use it by placing that vector first.
5919 ZeroIdx = I + SystemZ::VectorBytes;
5920 MaskFirst = false;
5921 break;
5922 }
5923 }
5924 if (ZeroIdx != -1) {
5925 SDValue IndexNodes[SystemZ::VectorBytes];
5926 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5927 if (Bytes[I] >= 0) {
5928 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5929 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5930 if (OpNo == ZeroVecIdx)
5931 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5932 else {
5933 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5934 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5935 }
5936 } else
5937 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5938 }
5939 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5940 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5941 if (MaskFirst)
5942 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5943 Mask);
5944 else
5945 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5946 Mask);
5947 }
5948 }
5949
5950 SDValue IndexNodes[SystemZ::VectorBytes];
5951 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5952 if (Bytes[I] >= 0)
5953 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5954 else
5955 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5956 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5957 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5958 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5959}
5960
5961namespace {
5962// Describes a general N-operand vector shuffle.
5963struct GeneralShuffle {
5964 GeneralShuffle(EVT vt)
5965 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5966 void addUndef();
5967 bool add(SDValue, unsigned);
5968 SDValue getNode(SelectionDAG &, const SDLoc &);
5969 void tryPrepareForUnpack();
5970 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5971 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5972
5973 // The operands of the shuffle.
5975
5976 // Index I is -1 if byte I of the result is undefined. Otherwise the
5977 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5978 // Bytes[I] / SystemZ::VectorBytes.
5980
5981 // The type of the shuffle result.
5982 EVT VT;
5983
5984 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5985 unsigned UnpackFromEltSize;
5986 // True if the final unpack uses the low half.
5987 bool UnpackLow;
5988};
5989} // namespace
5990
5991// Add an extra undefined element to the shuffle.
5992void GeneralShuffle::addUndef() {
5993 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5994 for (unsigned I = 0; I < BytesPerElement; ++I)
5995 Bytes.push_back(-1);
5996}
5997
5998// Add an extra element to the shuffle, taking it from element Elem of Op.
5999// A null Op indicates a vector input whose value will be calculated later;
6000// there is at most one such input per shuffle and it always has the same
6001// type as the result. Aborts and returns false if the source vector elements
6002// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
6003// LLVM they become implicitly extended, but this is rare and not optimized.
6004bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
6005 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
6006
6007 // The source vector can have wider elements than the result,
6008 // either through an explicit TRUNCATE or because of type legalization.
6009 // We want the least significant part.
6010 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
6011 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
6012
6013 // Return false if the source elements are smaller than their destination
6014 // elements.
6015 if (FromBytesPerElement < BytesPerElement)
6016 return false;
6017
6018 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
6019 (FromBytesPerElement - BytesPerElement));
6020
6021 // Look through things like shuffles and bitcasts.
6022 while (Op.getNode()) {
6023 if (Op.getOpcode() == ISD::BITCAST)
6024 Op = Op.getOperand(0);
6025 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
6026 // See whether the bytes we need come from a contiguous part of one
6027 // operand.
6029 if (!getVPermMask(Op, OpBytes))
6030 break;
6031 int NewByte;
6032 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
6033 break;
6034 if (NewByte < 0) {
6035 addUndef();
6036 return true;
6037 }
6038 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
6039 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6040 } else if (Op.isUndef()) {
6041 addUndef();
6042 return true;
6043 } else
6044 break;
6045 }
6046
6047 // Make sure that the source of the extraction is in Ops.
6048 unsigned OpNo = 0;
6049 for (; OpNo < Ops.size(); ++OpNo)
6050 if (Ops[OpNo] == Op)
6051 break;
6052 if (OpNo == Ops.size())
6053 Ops.push_back(Op);
6054
6055 // Add the element to Bytes.
6056 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6057 for (unsigned I = 0; I < BytesPerElement; ++I)
6058 Bytes.push_back(Base + I);
6059
6060 return true;
6061}
6062
6063// Return SDNodes for the completed shuffle.
6064SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6065 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6066
6067 if (Ops.size() == 0)
6068 return DAG.getUNDEF(VT);
6069
6070 // Use a single unpack if possible as the last operation.
6071 tryPrepareForUnpack();
6072
6073 // Make sure that there are at least two shuffle operands.
6074 if (Ops.size() == 1)
6075 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6076
6077 // Create a tree of shuffles, deferring root node until after the loop.
6078 // Try to redistribute the undefined elements of non-root nodes so that
6079 // the non-root shuffles match something like a pack or merge, then adjust
6080 // the parent node's permute vector to compensate for the new order.
6081 // Among other things, this copes with vectors like <2 x i16> that were
6082 // padded with undefined elements during type legalization.
6083 //
6084 // In the best case this redistribution will lead to the whole tree
6085 // using packs and merges. It should rarely be a loss in other cases.
6086 unsigned Stride = 1;
6087 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6088 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6089 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6090
6091 // Create a mask for just these two operands.
6093 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6094 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6095 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6096 if (OpNo == I)
6097 NewBytes[J] = Byte;
6098 else if (OpNo == I + Stride)
6099 NewBytes[J] = SystemZ::VectorBytes + Byte;
6100 else
6101 NewBytes[J] = -1;
6102 }
6103 // See if it would be better to reorganize NewMask to avoid using VPERM.
6105 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6106 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6107 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6108 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6109 if (NewBytes[J] >= 0) {
6110 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6111 "Invalid double permute");
6112 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6113 } else
6114 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6115 }
6116 } else {
6117 // Just use NewBytes on the operands.
6118 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6119 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6120 if (NewBytes[J] >= 0)
6121 Bytes[J] = I * SystemZ::VectorBytes + J;
6122 }
6123 }
6124 }
6125
6126 // Now we just have 2 inputs. Put the second operand in Ops[1].
6127 if (Stride > 1) {
6128 Ops[1] = Ops[Stride];
6129 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6130 if (Bytes[I] >= int(SystemZ::VectorBytes))
6131 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6132 }
6133
6134 // Look for an instruction that can do the permute without resorting
6135 // to VPERM.
6136 unsigned OpNo0, OpNo1;
6137 SDValue Op;
6138 if (unpackWasPrepared() && Ops[1].isUndef())
6139 Op = Ops[0];
6140 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6141 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6142 else
6143 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6144
6145 Op = insertUnpackIfPrepared(DAG, DL, Op);
6146
6147 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6148}
6149
6150#ifndef NDEBUG
6151static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6152 dbgs() << Msg.c_str() << " { ";
6153 for (unsigned I = 0; I < Bytes.size(); I++)
6154 dbgs() << Bytes[I] << " ";
6155 dbgs() << "}\n";
6156}
6157#endif
6158
6159// If the Bytes vector matches an unpack operation, prepare to do the unpack
6160// after all else by removing the zero vector and the effect of the unpack on
6161// Bytes.
6162void GeneralShuffle::tryPrepareForUnpack() {
6163 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6164 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6165 return;
6166
6167 // Only do this if removing the zero vector reduces the depth, otherwise
6168 // the critical path will increase with the final unpack.
6169 if (Ops.size() > 2 &&
6170 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6171 return;
6172
6173 // Find an unpack that would allow removing the zero vector from Ops.
6174 UnpackFromEltSize = 1;
6175 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6176 bool MatchUnpack = true;
6178 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6179 unsigned ToEltSize = UnpackFromEltSize * 2;
6180 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6181 if (!IsZextByte)
6182 SrcBytes.push_back(Bytes[Elt]);
6183 if (Bytes[Elt] != -1) {
6184 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6185 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6186 MatchUnpack = false;
6187 break;
6188 }
6189 }
6190 }
6191 if (MatchUnpack) {
6192 if (Ops.size() == 2) {
6193 // Don't use unpack if a single source operand needs rearrangement.
6194 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6195 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6196 if (SrcBytes[i] == -1)
6197 continue;
6198 if (SrcBytes[i] % 16 != int(i))
6199 CanUseUnpackHigh = false;
6200 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6201 CanUseUnpackLow = false;
6202 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6203 UnpackFromEltSize = UINT_MAX;
6204 return;
6205 }
6206 }
6207 if (!CanUseUnpackHigh)
6208 UnpackLow = true;
6209 }
6210 break;
6211 }
6212 }
6213 if (UnpackFromEltSize > 4)
6214 return;
6215
6216 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6217 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6218 << ".\n";
6219 dumpBytes(Bytes, "Original Bytes vector:"););
6220
6221 // Apply the unpack in reverse to the Bytes array.
6222 unsigned B = 0;
6223 if (UnpackLow) {
6224 while (B < SystemZ::VectorBytes / 2)
6225 Bytes[B++] = -1;
6226 }
6227 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6228 Elt += UnpackFromEltSize;
6229 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6230 Bytes[B] = Bytes[Elt];
6231 }
6232 if (!UnpackLow) {
6233 while (B < SystemZ::VectorBytes)
6234 Bytes[B++] = -1;
6235 }
6236
6237 // Remove the zero vector from Ops
6238 Ops.erase(&Ops[ZeroVecOpNo]);
6239 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6240 if (Bytes[I] >= 0) {
6241 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6242 if (OpNo > ZeroVecOpNo)
6243 Bytes[I] -= SystemZ::VectorBytes;
6244 }
6245
6246 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6247 dbgs() << "\n";);
6248}
6249
6250SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6251 const SDLoc &DL,
6252 SDValue Op) {
6253 if (!unpackWasPrepared())
6254 return Op;
6255 unsigned InBits = UnpackFromEltSize * 8;
6256 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6257 SystemZ::VectorBits / InBits);
6258 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6259 unsigned OutBits = InBits * 2;
6260 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6261 SystemZ::VectorBits / OutBits);
6262 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6263 : SystemZISD::UNPACKL_HIGH,
6264 DL, OutVT, PackedOp);
6265}
6266
6267// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6269 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6270 if (!Op.getOperand(I).isUndef())
6271 return false;
6272 return true;
6273}
6274
6275// Return a vector of type VT that contains Value in the first element.
6276// The other elements don't matter.
6278 SDValue Value) {
6279 // If we have a constant, replicate it to all elements and let the
6280 // BUILD_VECTOR lowering take care of it.
6281 if (Value.getOpcode() == ISD::Constant ||
6282 Value.getOpcode() == ISD::ConstantFP) {
6284 return DAG.getBuildVector(VT, DL, Ops);
6285 }
6286 if (Value.isUndef())
6287 return DAG.getUNDEF(VT);
6288 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6289}
6290
6291// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6292// element 1. Used for cases in which replication is cheap.
6294 SDValue Op0, SDValue Op1) {
6295 if (Op0.isUndef()) {
6296 if (Op1.isUndef())
6297 return DAG.getUNDEF(VT);
6298 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6299 }
6300 if (Op1.isUndef())
6301 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6302 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6303 buildScalarToVector(DAG, DL, VT, Op0),
6304 buildScalarToVector(DAG, DL, VT, Op1));
6305}
6306
6307// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6308// vector for them.
6310 SDValue Op1) {
6311 if (Op0.isUndef() && Op1.isUndef())
6312 return DAG.getUNDEF(MVT::v2i64);
6313 // If one of the two inputs is undefined then replicate the other one,
6314 // in order to avoid using another register unnecessarily.
6315 if (Op0.isUndef())
6316 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6317 else if (Op1.isUndef())
6318 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6319 else {
6320 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6321 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6322 }
6323 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6324}
6325
6326// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6327// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6328// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6329// would benefit from this representation and return it if so.
6331 BuildVectorSDNode *BVN) {
6332 EVT VT = BVN->getValueType(0);
6333 unsigned NumElements = VT.getVectorNumElements();
6334
6335 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6336 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6337 // need a BUILD_VECTOR, add an additional placeholder operand for that
6338 // BUILD_VECTOR and store its operands in ResidueOps.
6339 GeneralShuffle GS(VT);
6341 bool FoundOne = false;
6342 for (unsigned I = 0; I < NumElements; ++I) {
6343 SDValue Op = BVN->getOperand(I);
6344 if (Op.getOpcode() == ISD::TRUNCATE)
6345 Op = Op.getOperand(0);
6346 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6347 Op.getOperand(1).getOpcode() == ISD::Constant) {
6348 unsigned Elem = Op.getConstantOperandVal(1);
6349 if (!GS.add(Op.getOperand(0), Elem))
6350 return SDValue();
6351 FoundOne = true;
6352 } else if (Op.isUndef()) {
6353 GS.addUndef();
6354 } else {
6355 if (!GS.add(SDValue(), ResidueOps.size()))
6356 return SDValue();
6357 ResidueOps.push_back(BVN->getOperand(I));
6358 }
6359 }
6360
6361 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6362 if (!FoundOne)
6363 return SDValue();
6364
6365 // Create the BUILD_VECTOR for the remaining elements, if any.
6366 if (!ResidueOps.empty()) {
6367 while (ResidueOps.size() < NumElements)
6368 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6369 for (auto &Op : GS.Ops) {
6370 if (!Op.getNode()) {
6371 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6372 break;
6373 }
6374 }
6375 }
6376 return GS.getNode(DAG, SDLoc(BVN));
6377}
6378
6379bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6380 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6381 return true;
6382 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6383 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6384 return true;
6385 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6386 return true;
6387 return false;
6388}
6389
6391 unsigned MergedBits, EVT VT, SDValue Op0,
6392 SDValue Op1) {
6393 MVT IntVecVT = MVT::getVectorVT(MVT::getIntegerVT(MergedBits),
6394 SystemZ::VectorBits / MergedBits);
6395 assert(VT.getSizeInBits() == 128 && IntVecVT.getSizeInBits() == 128 &&
6396 "Handling full vectors only.");
6397 Op0 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0);
6398 Op1 = DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op1);
6399 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, DL, IntVecVT, Op0, Op1);
6400 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6401}
6402
6404 EVT VT, SmallVectorImpl<SDValue> &Elems,
6405 unsigned Pos) {
6406 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 0], Elems[Pos + 1]);
6407 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[Pos + 2], Elems[Pos + 3]);
6408 // Avoid unnecessary undefs by reusing the other operand.
6409 if (Op01.isUndef()) {
6410 if (Op23.isUndef())
6411 return Op01;
6412 Op01 = Op23;
6413 } else if (Op23.isUndef())
6414 Op23 = Op01;
6415 // Merging identical replications is a no-op.
6416 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6417 return Op01;
6418 unsigned MergedBits = VT.getSimpleVT().getScalarSizeInBits() * 2;
6419 return mergeHighParts(DAG, DL, MergedBits, VT, Op01, Op23);
6420}
6421
6422// Combine GPR scalar values Elems into a vector of type VT.
6423SDValue
6424SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6425 SmallVectorImpl<SDValue> &Elems) const {
6426 // See whether there is a single replicated value.
6428 unsigned int NumElements = Elems.size();
6429 unsigned int Count = 0;
6430 for (auto Elem : Elems) {
6431 if (!Elem.isUndef()) {
6432 if (!Single.getNode())
6433 Single = Elem;
6434 else if (Elem != Single) {
6435 Single = SDValue();
6436 break;
6437 }
6438 Count += 1;
6439 }
6440 }
6441 // There are three cases here:
6442 //
6443 // - if the only defined element is a loaded one, the best sequence
6444 // is a replicating load.
6445 //
6446 // - otherwise, if the only defined element is an i64 value, we will
6447 // end up with the same VLVGP sequence regardless of whether we short-cut
6448 // for replication or fall through to the later code.
6449 //
6450 // - otherwise, if the only defined element is an i32 or smaller value,
6451 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6452 // This is only a win if the single defined element is used more than once.
6453 // In other cases we're better off using a single VLVGx.
6454 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6455 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6456
6457 // If all elements are loads, use VLREP/VLEs (below).
6458 bool AllLoads = true;
6459 for (auto Elem : Elems)
6460 if (!isVectorElementLoad(Elem)) {
6461 AllLoads = false;
6462 break;
6463 }
6464
6465 // The best way of building a v2i64 from two i64s is to use VLVGP.
6466 if (VT == MVT::v2i64 && !AllLoads)
6467 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6468
6469 // Use a 64-bit merge high to combine two doubles.
6470 if (VT == MVT::v2f64 && !AllLoads)
6471 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6472
6473 // Build v4f32 values directly from the FPRs:
6474 //
6475 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6476 // V V VMRHF
6477 // <ABxx> <CDxx>
6478 // V VMRHG
6479 // <ABCD>
6480 if (VT == MVT::v4f32 && !AllLoads)
6481 return buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6482
6483 // Same for v8f16.
6484 if (VT == MVT::v8f16 && !AllLoads) {
6485 SDValue Op0123 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 0);
6486 SDValue Op4567 = buildFPVecFromScalars4(DAG, DL, VT, Elems, 4);
6487 // Avoid unnecessary undefs by reusing the other operand.
6488 if (Op0123.isUndef())
6489 Op0123 = Op4567;
6490 else if (Op4567.isUndef())
6491 Op4567 = Op0123;
6492 // Merging identical replications is a no-op.
6493 if (Op0123.getOpcode() == SystemZISD::REPLICATE && Op0123 == Op4567)
6494 return Op0123;
6495 return mergeHighParts(DAG, DL, 64, VT, Op0123, Op4567);
6496 }
6497
6498 // Collect the constant terms.
6501
6502 unsigned NumConstants = 0;
6503 for (unsigned I = 0; I < NumElements; ++I) {
6504 SDValue Elem = Elems[I];
6505 if (Elem.getOpcode() == ISD::Constant ||
6506 Elem.getOpcode() == ISD::ConstantFP) {
6507 NumConstants += 1;
6508 Constants[I] = Elem;
6509 Done[I] = true;
6510 }
6511 }
6512 // If there was at least one constant, fill in the other elements of
6513 // Constants with undefs to get a full vector constant and use that
6514 // as the starting point.
6516 SDValue ReplicatedVal;
6517 if (NumConstants > 0) {
6518 for (unsigned I = 0; I < NumElements; ++I)
6519 if (!Constants[I].getNode())
6520 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6521 Result = DAG.getBuildVector(VT, DL, Constants);
6522 } else {
6523 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6524 // avoid a false dependency on any previous contents of the vector
6525 // register.
6526
6527 // Use a VLREP if at least one element is a load. Make sure to replicate
6528 // the load with the most elements having its value.
6529 std::map<const SDNode*, unsigned> UseCounts;
6530 SDNode *LoadMaxUses = nullptr;
6531 for (unsigned I = 0; I < NumElements; ++I)
6532 if (isVectorElementLoad(Elems[I])) {
6533 SDNode *Ld = Elems[I].getNode();
6534 unsigned Count = ++UseCounts[Ld];
6535 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6536 LoadMaxUses = Ld;
6537 }
6538 if (LoadMaxUses != nullptr) {
6539 ReplicatedVal = SDValue(LoadMaxUses, 0);
6540 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6541 } else {
6542 // Try to use VLVGP.
6543 unsigned I1 = NumElements / 2 - 1;
6544 unsigned I2 = NumElements - 1;
6545 bool Def1 = !Elems[I1].isUndef();
6546 bool Def2 = !Elems[I2].isUndef();
6547 if (Def1 || Def2) {
6548 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6549 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6550 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6551 joinDwords(DAG, DL, Elem1, Elem2));
6552 Done[I1] = true;
6553 Done[I2] = true;
6554 } else
6555 Result = DAG.getUNDEF(VT);
6556 }
6557 }
6558
6559 // Use VLVGx to insert the other elements.
6560 for (unsigned I = 0; I < NumElements; ++I)
6561 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6562 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6563 DAG.getConstant(I, DL, MVT::i32));
6564 return Result;
6565}
6566
6567SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6568 SelectionDAG &DAG) const {
6569 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6570 SDLoc DL(Op);
6571 EVT VT = Op.getValueType();
6572
6573 if (BVN->isConstant()) {
6574 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6575 return Op;
6576
6577 // Fall back to loading it from memory.
6578 return SDValue();
6579 }
6580
6581 // See if we should use shuffles to construct the vector from other vectors.
6582 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6583 return Res;
6584
6585 // Detect SCALAR_TO_VECTOR conversions.
6587 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6588
6589 // Otherwise use buildVector to build the vector up from GPRs.
6590 unsigned NumElements = Op.getNumOperands();
6592 for (unsigned I = 0; I < NumElements; ++I)
6593 Ops[I] = Op.getOperand(I);
6594 return buildVector(DAG, DL, VT, Ops);
6595}
6596
6597SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6598 SelectionDAG &DAG) const {
6599 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6600 SDLoc DL(Op);
6601 EVT VT = Op.getValueType();
6602 unsigned NumElements = VT.getVectorNumElements();
6603
6604 if (VSN->isSplat()) {
6605 SDValue Op0 = Op.getOperand(0);
6606 unsigned Index = VSN->getSplatIndex();
6607 assert(Index < VT.getVectorNumElements() &&
6608 "Splat index should be defined and in first operand");
6609 // See whether the value we're splatting is directly available as a scalar.
6610 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6612 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6613 // Otherwise keep it as a vector-to-vector operation.
6614 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6615 DAG.getTargetConstant(Index, DL, MVT::i32));
6616 }
6617
6618 GeneralShuffle GS(VT);
6619 for (unsigned I = 0; I < NumElements; ++I) {
6620 int Elt = VSN->getMaskElt(I);
6621 if (Elt < 0)
6622 GS.addUndef();
6623 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6624 unsigned(Elt) % NumElements))
6625 return SDValue();
6626 }
6627 return GS.getNode(DAG, SDLoc(VSN));
6628}
6629
6630SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6631 SelectionDAG &DAG) const {
6632 SDLoc DL(Op);
6633 // Just insert the scalar into element 0 of an undefined vector.
6634 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6635 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6636 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6637}
6638
6639// Shift the lower 2 bytes of Op to the left in order to insert into the
6640// upper 2 bytes of the FP register.
6642 assert(Op.getSimpleValueType() == MVT::i64 &&
6643 "Expexted to convert i64 to f16.");
6644 SDLoc DL(Op);
6645 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6646 DAG.getConstant(48, DL, MVT::i64));
6647 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6648 SDValue F16Val =
6649 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6650 return F16Val;
6651}
6652
6653// Extract Op into GPR and shift the 2 f16 bytes to the right.
6655 assert(Op.getSimpleValueType() == MVT::f16 &&
6656 "Expected to convert f16 to i64.");
6657 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6658 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6659 SDValue(U32, 0), Op);
6660 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6661 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6662 DAG.getConstant(48, DL, MVT::i32));
6663 return Shft;
6664}
6665
6666SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6667 SelectionDAG &DAG) const {
6668 // Handle insertions of floating-point values.
6669 SDLoc DL(Op);
6670 SDValue Op0 = Op.getOperand(0);
6671 SDValue Op1 = Op.getOperand(1);
6672 SDValue Op2 = Op.getOperand(2);
6673 EVT VT = Op.getValueType();
6674
6675 // Insertions into constant indices of a v2f64 can be done using VPDI.
6676 // However, if the inserted value is a bitcast or a constant then it's
6677 // better to use GPRs, as below.
6678 if (VT == MVT::v2f64 &&
6679 Op1.getOpcode() != ISD::BITCAST &&
6680 Op1.getOpcode() != ISD::ConstantFP &&
6681 Op2.getOpcode() == ISD::Constant) {
6682 uint64_t Index = Op2->getAsZExtVal();
6683 unsigned Mask = VT.getVectorNumElements() - 1;
6684 if (Index <= Mask)
6685 return Op;
6686 }
6687
6688 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6689 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6690 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6691 SDValue IntOp1 =
6692 VT == MVT::v8f16
6693 ? DAG.getZExtOrTrunc(convertFromF16(Op1, DL, DAG), DL, MVT::i32)
6694 : DAG.getNode(ISD::BITCAST, DL, IntVT, Op1);
6695 SDValue Res =
6696 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6697 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), IntOp1, Op2);
6698 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6699}
6700
6701SDValue
6702SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6703 SelectionDAG &DAG) const {
6704 // Handle extractions of floating-point values.
6705 SDLoc DL(Op);
6706 SDValue Op0 = Op.getOperand(0);
6707 SDValue Op1 = Op.getOperand(1);
6708 EVT VT = Op.getValueType();
6709 EVT VecVT = Op0.getValueType();
6710
6711 // Extractions of constant indices can be done directly.
6712 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6713 uint64_t Index = CIndexN->getZExtValue();
6714 unsigned Mask = VecVT.getVectorNumElements() - 1;
6715 if (Index <= Mask)
6716 return Op;
6717 }
6718
6719 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6720 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6721 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6722 MVT ExtrVT = IntVT == MVT::i16 ? MVT::i32 : IntVT;
6723 SDValue Extr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrVT,
6724 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6725 if (VT == MVT::f16)
6726 return convertToF16(DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Extr), DAG);
6727 return DAG.getNode(ISD::BITCAST, DL, VT, Extr);
6728}
6729
6730SDValue SystemZTargetLowering::
6731lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6732 SDValue PackedOp = Op.getOperand(0);
6733 EVT OutVT = Op.getValueType();
6734 EVT InVT = PackedOp.getValueType();
6735 unsigned ToBits = OutVT.getScalarSizeInBits();
6736 unsigned FromBits = InVT.getScalarSizeInBits();
6737 unsigned StartOffset = 0;
6738
6739 // If the input is a VECTOR_SHUFFLE, there are a number of important
6740 // cases where we can directly implement the sign-extension of the
6741 // original input lanes of the shuffle.
6742 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6743 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6744 ArrayRef<int> ShuffleMask = SVN->getMask();
6745 int OutNumElts = OutVT.getVectorNumElements();
6746
6747 // Recognize the special case where the sign-extension can be done
6748 // by the VSEG instruction. Handled via the default expander.
6749 if (ToBits == 64 && OutNumElts == 2) {
6750 int NumElem = ToBits / FromBits;
6751 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6752 return SDValue();
6753 }
6754
6755 // Recognize the special case where we can fold the shuffle by
6756 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6757 int StartOffsetCandidate = -1;
6758 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6759 if (ShuffleMask[Elt] == -1)
6760 continue;
6761 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6762 if (StartOffsetCandidate == -1)
6763 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6764 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6765 continue;
6766 }
6767 StartOffsetCandidate = -1;
6768 break;
6769 }
6770 if (StartOffsetCandidate != -1) {
6771 StartOffset = StartOffsetCandidate;
6772 PackedOp = PackedOp.getOperand(0);
6773 }
6774 }
6775
6776 do {
6777 FromBits *= 2;
6778 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6779 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6780 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6781 if (StartOffset >= OutNumElts) {
6782 Opcode = SystemZISD::UNPACK_LOW;
6783 StartOffset -= OutNumElts;
6784 }
6785 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6786 } while (FromBits != ToBits);
6787 return PackedOp;
6788}
6789
6790// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6791SDValue SystemZTargetLowering::
6792lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6793 SDValue PackedOp = Op.getOperand(0);
6794 SDLoc DL(Op);
6795 EVT OutVT = Op.getValueType();
6796 EVT InVT = PackedOp.getValueType();
6797 unsigned InNumElts = InVT.getVectorNumElements();
6798 unsigned OutNumElts = OutVT.getVectorNumElements();
6799 unsigned NumInPerOut = InNumElts / OutNumElts;
6800
6801 SDValue ZeroVec =
6802 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6803
6804 SmallVector<int, 16> Mask(InNumElts);
6805 unsigned ZeroVecElt = InNumElts;
6806 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6807 unsigned MaskElt = PackedElt * NumInPerOut;
6808 unsigned End = MaskElt + NumInPerOut - 1;
6809 for (; MaskElt < End; MaskElt++)
6810 Mask[MaskElt] = ZeroVecElt++;
6811 Mask[MaskElt] = PackedElt;
6812 }
6813 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6814 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6815}
6816
6817SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6818 unsigned ByScalar) const {
6819 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6820 SDValue Op0 = Op.getOperand(0);
6821 SDValue Op1 = Op.getOperand(1);
6822 SDLoc DL(Op);
6823 EVT VT = Op.getValueType();
6824 unsigned ElemBitSize = VT.getScalarSizeInBits();
6825
6826 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6827 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6828 APInt SplatBits, SplatUndef;
6829 unsigned SplatBitSize;
6830 bool HasAnyUndefs;
6831 // Check for constant splats. Use ElemBitSize as the minimum element
6832 // width and reject splats that need wider elements.
6833 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6834 ElemBitSize, true) &&
6835 SplatBitSize == ElemBitSize) {
6836 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6837 DL, MVT::i32);
6838 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6839 }
6840 // Check for variable splats.
6841 BitVector UndefElements;
6842 SDValue Splat = BVN->getSplatValue(&UndefElements);
6843 if (Splat) {
6844 // Since i32 is the smallest legal type, we either need a no-op
6845 // or a truncation.
6846 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6847 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6848 }
6849 }
6850
6851 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6852 // and the shift amount is directly available in a GPR.
6853 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6854 if (VSN->isSplat()) {
6855 SDValue VSNOp0 = VSN->getOperand(0);
6856 unsigned Index = VSN->getSplatIndex();
6857 assert(Index < VT.getVectorNumElements() &&
6858 "Splat index should be defined and in first operand");
6859 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6860 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6861 // Since i32 is the smallest legal type, we either need a no-op
6862 // or a truncation.
6863 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6864 VSNOp0.getOperand(Index));
6865 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6866 }
6867 }
6868 }
6869
6870 // Otherwise just treat the current form as legal.
6871 return Op;
6872}
6873
6874SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6875 SDLoc DL(Op);
6876
6877 // i128 FSHL with a constant amount that is a multiple of 8 can be
6878 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6879 // facility, FSHL with a constant amount less than 8 can be implemented
6880 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6881 // combination of the two.
6882 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6883 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6884 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6885 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6886 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6887 if (ShiftAmt > 120) {
6888 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6889 // SHR_DOUBLE_BIT emits fewer instructions.
6890 SDValue Val =
6891 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6892 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6893 return DAG.getBitcast(MVT::i128, Val);
6894 }
6895 SmallVector<int, 16> Mask(16);
6896 for (unsigned Elt = 0; Elt < 16; Elt++)
6897 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6898 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6899 if ((ShiftAmt & 7) == 0)
6900 return DAG.getBitcast(MVT::i128, Shuf1);
6901 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6902 SDValue Val =
6903 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6904 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6905 return DAG.getBitcast(MVT::i128, Val);
6906 }
6907 }
6908
6909 return SDValue();
6910}
6911
6912SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6913 SDLoc DL(Op);
6914
6915 // i128 FSHR with a constant amount that is a multiple of 8 can be
6916 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6917 // facility, FSHR with a constant amount less than 8 can be implemented
6918 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6919 // combination of the two.
6920 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6921 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6922 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6923 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6924 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6925 if (ShiftAmt > 120) {
6926 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6927 // SHL_DOUBLE_BIT emits fewer instructions.
6928 SDValue Val =
6929 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6930 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6931 return DAG.getBitcast(MVT::i128, Val);
6932 }
6933 SmallVector<int, 16> Mask(16);
6934 for (unsigned Elt = 0; Elt < 16; Elt++)
6935 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6936 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6937 if ((ShiftAmt & 7) == 0)
6938 return DAG.getBitcast(MVT::i128, Shuf1);
6939 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6940 SDValue Val =
6941 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6942 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6943 return DAG.getBitcast(MVT::i128, Val);
6944 }
6945 }
6946
6947 return SDValue();
6948}
6949
6951 SDLoc DL(Op);
6952 SDValue Src = Op.getOperand(0);
6953 MVT DstVT = Op.getSimpleValueType();
6954
6956 unsigned SrcAS = N->getSrcAddressSpace();
6957
6958 assert(SrcAS != N->getDestAddressSpace() &&
6959 "addrspacecast must be between different address spaces");
6960
6961 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6962 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6963 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6964 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6965 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6966 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6967 } else if (DstVT == MVT::i32) {
6968 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6969 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6970 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6971 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6972 } else {
6973 report_fatal_error("Bad address space in addrspacecast");
6974 }
6975 return Op;
6976}
6977
6978SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6979 SelectionDAG &DAG) const {
6980 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6981 if (In.getSimpleValueType() != MVT::f16)
6982 return Op; // Legal
6983 return SDValue(); // Let legalizer emit the libcall.
6984}
6985
6987 MVT VT, SDValue Arg, SDLoc DL,
6988 SDValue Chain, bool IsStrict) const {
6989 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6990 MakeLibCallOptions CallOptions;
6991 SDValue Result;
6992 std::tie(Result, Chain) =
6993 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6994 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6995}
6996
6997SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6998 SelectionDAG &DAG) const {
6999 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
7000 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
7001 bool IsStrict = Op->isStrictFPOpcode();
7002 SDLoc DL(Op);
7003 MVT VT = Op.getSimpleValueType();
7004 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7005 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7006 EVT InVT = InOp.getValueType();
7007
7008 // FP to unsigned is not directly supported on z10. Promoting an i32
7009 // result to (signed) i64 doesn't generate an inexact condition (fp
7010 // exception) for values that are outside the i32 range but in the i64
7011 // range, so use the default expansion.
7012 if (!Subtarget.hasFPExtension() && !IsSigned)
7013 // Expand i32/i64. F16 values will be recognized to fit and extended.
7014 return SDValue();
7015
7016 // Conversion from f16 is done via f32.
7017 if (InOp.getSimpleValueType() == MVT::f16) {
7019 LowerOperationWrapper(Op.getNode(), Results, DAG);
7020 return DAG.getMergeValues(Results, DL);
7021 }
7022
7023 if (VT == MVT::i128) {
7024 RTLIB::Libcall LC =
7025 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
7026 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7027 }
7028
7029 return Op; // Legal
7030}
7031
7032SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
7033 SelectionDAG &DAG) const {
7034 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
7035 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
7036 bool IsStrict = Op->isStrictFPOpcode();
7037 SDLoc DL(Op);
7038 MVT VT = Op.getSimpleValueType();
7039 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
7040 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
7041 EVT InVT = InOp.getValueType();
7042
7043 // Conversion to f16 is done via f32.
7044 if (VT == MVT::f16) {
7046 LowerOperationWrapper(Op.getNode(), Results, DAG);
7047 return DAG.getMergeValues(Results, DL);
7048 }
7049
7050 // Unsigned to fp is not directly supported on z10.
7051 if (!Subtarget.hasFPExtension() && !IsSigned)
7052 return SDValue(); // Expand i64.
7053
7054 if (InVT == MVT::i128) {
7055 RTLIB::Libcall LC =
7056 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
7057 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
7058 }
7059
7060 return Op; // Legal
7061}
7062
7063// Lower an f16 LOAD in case of no vector support.
7064SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
7065 SelectionDAG &DAG) const {
7066 EVT RegVT = Op.getValueType();
7067 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
7068 (void)RegVT;
7069
7070 // Load as integer.
7071 SDLoc DL(Op);
7072 SDValue NewLd;
7073 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
7074 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7075 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
7076 AtomicLd->getChain(), AtomicLd->getBasePtr(),
7077 AtomicLd->getMemOperand());
7078 } else {
7079 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
7080 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7081 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7082 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7083 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7084 }
7085 SDValue F16Val = convertToF16(NewLd, DAG);
7086 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7087}
7088
7089// Lower an f16 STORE in case of no vector support.
7090SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7091 SelectionDAG &DAG) const {
7092 SDLoc DL(Op);
7093 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7094
7095 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7096 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7097 Shft, AtomicSt->getBasePtr(),
7098 AtomicSt->getMemOperand());
7099
7100 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7101 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7102 St->getMemOperand());
7103}
7104
7105SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7106 SelectionDAG &DAG) const {
7107 SDLoc DL(Op);
7108 MVT ResultVT = Op.getSimpleValueType();
7109 SDValue Arg = Op.getOperand(0);
7110 unsigned Check = Op.getConstantOperandVal(1);
7111
7112 unsigned TDCMask = 0;
7113 if (Check & fcSNan)
7115 if (Check & fcQNan)
7117 if (Check & fcPosInf)
7119 if (Check & fcNegInf)
7121 if (Check & fcPosNormal)
7123 if (Check & fcNegNormal)
7125 if (Check & fcPosSubnormal)
7127 if (Check & fcNegSubnormal)
7129 if (Check & fcPosZero)
7130 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7131 if (Check & fcNegZero)
7132 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7133 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7134
7135 if (Arg.getSimpleValueType() == MVT::f16)
7136 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7137 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7138 return getCCResult(DAG, Intr);
7139}
7140
7141SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7142 SelectionDAG &DAG) const {
7143 SDLoc DL(Op);
7144 SDValue Chain = Op.getOperand(0);
7145
7146 // STCKF only supports a memory operand, so we have to use a temporary.
7147 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7148 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7149 MachinePointerInfo MPI =
7151
7152 // Use STCFK to store the TOD clock into the temporary.
7153 SDValue StoreOps[] = {Chain, StackPtr};
7154 Chain = DAG.getMemIntrinsicNode(
7155 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7156 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7157
7158 // And read it back from there.
7159 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7160}
7161
7163 SelectionDAG &DAG) const {
7164 switch (Op.getOpcode()) {
7165 case ISD::FRAMEADDR:
7166 return lowerFRAMEADDR(Op, DAG);
7167 case ISD::RETURNADDR:
7168 return lowerRETURNADDR(Op, DAG);
7169 case ISD::BR_CC:
7170 return lowerBR_CC(Op, DAG);
7171 case ISD::SELECT_CC:
7172 return lowerSELECT_CC(Op, DAG);
7173 case ISD::SETCC:
7174 return lowerSETCC(Op, DAG);
7175 case ISD::STRICT_FSETCC:
7176 return lowerSTRICT_FSETCC(Op, DAG, false);
7178 return lowerSTRICT_FSETCC(Op, DAG, true);
7179 case ISD::GlobalAddress:
7180 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7182 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7183 case ISD::BlockAddress:
7184 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7185 case ISD::JumpTable:
7186 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7187 case ISD::ConstantPool:
7188 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7189 case ISD::BITCAST:
7190 return lowerBITCAST(Op, DAG);
7191 case ISD::VASTART:
7192 return lowerVASTART(Op, DAG);
7193 case ISD::VACOPY:
7194 return lowerVACOPY(Op, DAG);
7196 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7198 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7199 case ISD::MULHS:
7200 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7201 case ISD::MULHU:
7202 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7203 case ISD::SMUL_LOHI:
7204 return lowerSMUL_LOHI(Op, DAG);
7205 case ISD::UMUL_LOHI:
7206 return lowerUMUL_LOHI(Op, DAG);
7207 case ISD::SDIVREM:
7208 return lowerSDIVREM(Op, DAG);
7209 case ISD::UDIVREM:
7210 return lowerUDIVREM(Op, DAG);
7211 case ISD::SADDO:
7212 case ISD::SSUBO:
7213 case ISD::UADDO:
7214 case ISD::USUBO:
7215 return lowerXALUO(Op, DAG);
7216 case ISD::UADDO_CARRY:
7217 case ISD::USUBO_CARRY:
7218 return lowerUADDSUBO_CARRY(Op, DAG);
7219 case ISD::OR:
7220 return lowerOR(Op, DAG);
7221 case ISD::CTPOP:
7222 return lowerCTPOP(Op, DAG);
7223 case ISD::VECREDUCE_ADD:
7224 return lowerVECREDUCE_ADD(Op, DAG);
7225 case ISD::ATOMIC_FENCE:
7226 return lowerATOMIC_FENCE(Op, DAG);
7227 case ISD::ATOMIC_SWAP:
7228 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7229 case ISD::ATOMIC_STORE:
7230 return lowerATOMIC_STORE(Op, DAG);
7231 case ISD::ATOMIC_LOAD:
7232 return lowerATOMIC_LOAD(Op, DAG);
7234 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7236 return lowerATOMIC_LOAD_SUB(Op, DAG);
7238 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7240 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7242 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7244 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7246 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7248 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7250 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7252 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7254 return lowerATOMIC_CMP_SWAP(Op, DAG);
7255 case ISD::STACKSAVE:
7256 return lowerSTACKSAVE(Op, DAG);
7257 case ISD::STACKRESTORE:
7258 return lowerSTACKRESTORE(Op, DAG);
7259 case ISD::PREFETCH:
7260 return lowerPREFETCH(Op, DAG);
7262 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7264 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7265 case ISD::BUILD_VECTOR:
7266 return lowerBUILD_VECTOR(Op, DAG);
7268 return lowerVECTOR_SHUFFLE(Op, DAG);
7270 return lowerSCALAR_TO_VECTOR(Op, DAG);
7272 return lowerINSERT_VECTOR_ELT(Op, DAG);
7274 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7276 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7278 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7279 case ISD::SHL:
7280 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7281 case ISD::SRL:
7282 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7283 case ISD::SRA:
7284 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7285 case ISD::ADDRSPACECAST:
7286 return lowerAddrSpaceCast(Op, DAG);
7287 case ISD::ROTL:
7288 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7289 case ISD::FSHL:
7290 return lowerFSHL(Op, DAG);
7291 case ISD::FSHR:
7292 return lowerFSHR(Op, DAG);
7293 case ISD::FP_EXTEND:
7295 return lowerFP_EXTEND(Op, DAG);
7296 case ISD::FP_TO_UINT:
7297 case ISD::FP_TO_SINT:
7300 return lower_FP_TO_INT(Op, DAG);
7301 case ISD::UINT_TO_FP:
7302 case ISD::SINT_TO_FP:
7305 return lower_INT_TO_FP(Op, DAG);
7306 case ISD::LOAD:
7307 return lowerLoadF16(Op, DAG);
7308 case ISD::STORE:
7309 return lowerStoreF16(Op, DAG);
7310 case ISD::IS_FPCLASS:
7311 return lowerIS_FPCLASS(Op, DAG);
7312 case ISD::GET_ROUNDING:
7313 return lowerGET_ROUNDING(Op, DAG);
7315 return lowerREADCYCLECOUNTER(Op, DAG);
7318 // These operations are legal on our platform, but we cannot actually
7319 // set the operation action to Legal as common code would treat this
7320 // as equivalent to Expand. Instead, we keep the operation action to
7321 // Custom and just leave them unchanged here.
7322 return Op;
7323
7324 default:
7325 llvm_unreachable("Unexpected node to lower");
7326 }
7327}
7328
7330 const SDLoc &SL) {
7331 // If i128 is legal, just use a normal bitcast.
7332 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7333 return DAG.getBitcast(MVT::f128, Src);
7334
7335 // Otherwise, f128 must live in FP128, so do a partwise move.
7337 &SystemZ::FP128BitRegClass);
7338
7339 SDValue Hi, Lo;
7340 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7341
7342 Hi = DAG.getBitcast(MVT::f64, Hi);
7343 Lo = DAG.getBitcast(MVT::f64, Lo);
7344
7345 SDNode *Pair = DAG.getMachineNode(
7346 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7347 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7348 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7349 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7350 return SDValue(Pair, 0);
7351}
7352
7354 const SDLoc &SL) {
7355 // If i128 is legal, just use a normal bitcast.
7356 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7357 return DAG.getBitcast(MVT::i128, Src);
7358
7359 // Otherwise, f128 must live in FP128, so do a partwise move.
7361 &SystemZ::FP128BitRegClass);
7362
7363 SDValue LoFP =
7364 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7365 SDValue HiFP =
7366 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7367 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7368 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7369
7370 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7371}
7372
7373// Lower operations with invalid operand or result types.
7374void
7377 SelectionDAG &DAG) const {
7378 switch (N->getOpcode()) {
7379 case ISD::ATOMIC_LOAD: {
7380 SDLoc DL(N);
7381 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7382 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7383 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7384 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7385 DL, Tys, Ops, MVT::i128, MMO);
7386
7387 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7388 if (N->getValueType(0) == MVT::f128)
7389 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7390 Results.push_back(Lowered);
7391 Results.push_back(Res.getValue(1));
7392 break;
7393 }
7394 case ISD::ATOMIC_STORE: {
7395 SDLoc DL(N);
7396 SDVTList Tys = DAG.getVTList(MVT::Other);
7397 SDValue Val = N->getOperand(1);
7398 if (Val.getValueType() == MVT::f128)
7399 Val = expandBitCastF128ToI128(DAG, Val, DL);
7400 Val = lowerI128ToGR128(DAG, Val);
7401
7402 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7403 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7404 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7405 DL, Tys, Ops, MVT::i128, MMO);
7406 // We have to enforce sequential consistency by performing a
7407 // serialization operation after the store.
7408 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7410 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7411 MVT::Other, Res), 0);
7412 Results.push_back(Res);
7413 break;
7414 }
7416 SDLoc DL(N);
7417 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7418 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7419 lowerI128ToGR128(DAG, N->getOperand(2)),
7420 lowerI128ToGR128(DAG, N->getOperand(3)) };
7421 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7422 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7423 DL, Tys, Ops, MVT::i128, MMO);
7424 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7426 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7427 Results.push_back(lowerGR128ToI128(DAG, Res));
7428 Results.push_back(Success);
7429 Results.push_back(Res.getValue(2));
7430 break;
7431 }
7432 case ISD::BITCAST: {
7433 if (useSoftFloat())
7434 return;
7435 SDLoc DL(N);
7436 SDValue Src = N->getOperand(0);
7437 EVT SrcVT = Src.getValueType();
7438 EVT ResVT = N->getValueType(0);
7439 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7440 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7441 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7442 if (Subtarget.hasVector()) {
7443 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7444 Results.push_back(SDValue(
7445 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7446 } else {
7447 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7448 Results.push_back(convertToF16(In64, DAG));
7449 }
7450 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7451 SDValue ExtractedVal =
7452 Subtarget.hasVector()
7453 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7454 0)
7455 : convertFromF16(Src, DL, DAG);
7456 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7457 }
7458 break;
7459 }
7460 case ISD::UINT_TO_FP:
7461 case ISD::SINT_TO_FP:
7464 if (useSoftFloat())
7465 return;
7466 bool IsStrict = N->isStrictFPOpcode();
7467 SDLoc DL(N);
7468 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7469 EVT ResVT = N->getValueType(0);
7470 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7471 if (ResVT == MVT::f16) {
7472 if (!IsStrict) {
7473 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7474 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7475 } else {
7476 SDValue OpF32 =
7477 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7478 {Chain, InOp});
7479 SDValue F16Res;
7480 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7481 OpF32, OpF32.getValue(1), DL, MVT::f16);
7482 Results.push_back(F16Res);
7483 Results.push_back(Chain);
7484 }
7485 }
7486 break;
7487 }
7488 case ISD::FP_TO_UINT:
7489 case ISD::FP_TO_SINT:
7492 if (useSoftFloat())
7493 return;
7494 bool IsStrict = N->isStrictFPOpcode();
7495 SDLoc DL(N);
7496 EVT ResVT = N->getValueType(0);
7497 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7498 EVT InVT = InOp->getValueType(0);
7499 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7500 if (InVT == MVT::f16) {
7501 if (!IsStrict) {
7502 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7503 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7504 } else {
7505 SDValue InF32;
7506 std::tie(InF32, Chain) =
7507 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7508 SDValue OpF32 =
7509 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7510 {Chain, InF32});
7511 Results.push_back(OpF32);
7512 Results.push_back(OpF32.getValue(1));
7513 }
7514 }
7515 break;
7516 }
7517 default:
7518 llvm_unreachable("Unexpected node to lower");
7519 }
7520}
7521
7522void
7528
7529// Return true if VT is a vector whose elements are a whole number of bytes
7530// in width. Also check for presence of vector support.
7531bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7532 if (!Subtarget.hasVector())
7533 return false;
7534
7535 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7536}
7537
7538// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7539// producing a result of type ResVT. Op is a possibly bitcast version
7540// of the input vector and Index is the index (based on type VecVT) that
7541// should be extracted. Return the new extraction if a simplification
7542// was possible or if Force is true.
7543SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7544 EVT VecVT, SDValue Op,
7545 unsigned Index,
7546 DAGCombinerInfo &DCI,
7547 bool Force) const {
7548 SelectionDAG &DAG = DCI.DAG;
7549
7550 // The number of bytes being extracted.
7551 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7552
7553 for (;;) {
7554 unsigned Opcode = Op.getOpcode();
7555 if (Opcode == ISD::BITCAST)
7556 // Look through bitcasts.
7557 Op = Op.getOperand(0);
7558 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7559 canTreatAsByteVector(Op.getValueType())) {
7560 // Get a VPERM-like permute mask and see whether the bytes covered
7561 // by the extracted element are a contiguous sequence from one
7562 // source operand.
7564 if (!getVPermMask(Op, Bytes))
7565 break;
7566 int First;
7567 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7568 BytesPerElement, First))
7569 break;
7570 if (First < 0)
7571 return DAG.getUNDEF(ResVT);
7572 // Make sure the contiguous sequence starts at a multiple of the
7573 // original element size.
7574 unsigned Byte = unsigned(First) % Bytes.size();
7575 if (Byte % BytesPerElement != 0)
7576 break;
7577 // We can get the extracted value directly from an input.
7578 Index = Byte / BytesPerElement;
7579 Op = Op.getOperand(unsigned(First) / Bytes.size());
7580 Force = true;
7581 } else if (Opcode == ISD::BUILD_VECTOR &&
7582 canTreatAsByteVector(Op.getValueType())) {
7583 // We can only optimize this case if the BUILD_VECTOR elements are
7584 // at least as wide as the extracted value.
7585 EVT OpVT = Op.getValueType();
7586 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7587 if (OpBytesPerElement < BytesPerElement)
7588 break;
7589 // Make sure that the least-significant bit of the extracted value
7590 // is the least significant bit of an input.
7591 unsigned End = (Index + 1) * BytesPerElement;
7592 if (End % OpBytesPerElement != 0)
7593 break;
7594 // We're extracting the low part of one operand of the BUILD_VECTOR.
7595 Op = Op.getOperand(End / OpBytesPerElement - 1);
7596 if (!Op.getValueType().isInteger()) {
7597 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7598 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7599 DCI.AddToWorklist(Op.getNode());
7600 }
7601 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7602 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7603 if (VT != ResVT) {
7604 DCI.AddToWorklist(Op.getNode());
7605 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7606 }
7607 return Op;
7608 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7610 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7611 canTreatAsByteVector(Op.getValueType()) &&
7612 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7613 // Make sure that only the unextended bits are significant.
7614 EVT ExtVT = Op.getValueType();
7615 EVT OpVT = Op.getOperand(0).getValueType();
7616 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7617 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7618 unsigned Byte = Index * BytesPerElement;
7619 unsigned SubByte = Byte % ExtBytesPerElement;
7620 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7621 if (SubByte < MinSubByte ||
7622 SubByte + BytesPerElement > ExtBytesPerElement)
7623 break;
7624 // Get the byte offset of the unextended element
7625 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7626 // ...then add the byte offset relative to that element.
7627 Byte += SubByte - MinSubByte;
7628 if (Byte % BytesPerElement != 0)
7629 break;
7630 Op = Op.getOperand(0);
7631 Index = Byte / BytesPerElement;
7632 Force = true;
7633 } else
7634 break;
7635 }
7636 if (Force) {
7637 if (Op.getValueType() != VecVT) {
7638 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7639 DCI.AddToWorklist(Op.getNode());
7640 }
7641 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7642 DAG.getConstant(Index, DL, MVT::i32));
7643 }
7644 return SDValue();
7645}
7646
7647// Optimize vector operations in scalar value Op on the basis that Op
7648// is truncated to TruncVT.
7649SDValue SystemZTargetLowering::combineTruncateExtract(
7650 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7651 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7652 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7653 // of type TruncVT.
7654 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7655 TruncVT.getSizeInBits() % 8 == 0) {
7656 SDValue Vec = Op.getOperand(0);
7657 EVT VecVT = Vec.getValueType();
7658 if (canTreatAsByteVector(VecVT)) {
7659 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7660 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7661 unsigned TruncBytes = TruncVT.getStoreSize();
7662 if (BytesPerElement % TruncBytes == 0) {
7663 // Calculate the value of Y' in the above description. We are
7664 // splitting the original elements into Scale equal-sized pieces
7665 // and for truncation purposes want the last (least-significant)
7666 // of these pieces for IndexN. This is easiest to do by calculating
7667 // the start index of the following element and then subtracting 1.
7668 unsigned Scale = BytesPerElement / TruncBytes;
7669 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7670
7671 // Defer the creation of the bitcast from X to combineExtract,
7672 // which might be able to optimize the extraction.
7673 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7674 MVT::getIntegerVT(TruncBytes * 8),
7675 VecVT.getStoreSize() / TruncBytes);
7676 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7677 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7678 }
7679 }
7680 }
7681 }
7682 return SDValue();
7683}
7684
7685SDValue SystemZTargetLowering::combineZERO_EXTEND(
7686 SDNode *N, DAGCombinerInfo &DCI) const {
7687 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7688 SelectionDAG &DAG = DCI.DAG;
7689 SDValue N0 = N->getOperand(0);
7690 EVT VT = N->getValueType(0);
7691 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7692 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7693 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7694 if (TrueOp && FalseOp) {
7695 SDLoc DL(N0);
7696 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7697 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7698 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7699 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7700 // If N0 has multiple uses, change other uses as well.
7701 if (!N0.hasOneUse()) {
7702 SDValue TruncSelect =
7703 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7704 DCI.CombineTo(N0.getNode(), TruncSelect);
7705 }
7706 return NewSelect;
7707 }
7708 }
7709 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7710 // of the result is smaller than the size of X and all the truncated bits
7711 // of X are already zero.
7712 if (N0.getOpcode() == ISD::XOR &&
7713 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7714 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7715 N0.getOperand(1).getOpcode() == ISD::Constant) {
7716 SDValue X = N0.getOperand(0).getOperand(0);
7717 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7718 KnownBits Known = DAG.computeKnownBits(X);
7719 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7720 N0.getValueSizeInBits(),
7721 VT.getSizeInBits());
7722 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7723 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7724 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7725 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7726 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7727 }
7728 }
7729 }
7730 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7731 // and VECTOR ADD COMPUTE CARRY for i128:
7732 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7733 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7734 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7735 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7736 // For vector types, these patterns are recognized in the .td file.
7737 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7738 N0.getOperand(0).getValueType() == VT) {
7739 SDValue Op0 = N0.getOperand(0);
7740 SDValue Op1 = N0.getOperand(1);
7741 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7742 switch (CC) {
7743 case ISD::SETULE:
7744 std::swap(Op0, Op1);
7745 [[fallthrough]];
7746 case ISD::SETUGE:
7747 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7748 case ISD::SETUGT:
7749 std::swap(Op0, Op1);
7750 [[fallthrough]];
7751 case ISD::SETULT:
7752 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7753 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7754 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7755 Op0->getOperand(1));
7756 break;
7757 default:
7758 break;
7759 }
7760 }
7761
7762 return SDValue();
7763}
7764
7765SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7766 SDNode *N, DAGCombinerInfo &DCI) const {
7767 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7768 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7769 // into (select_cc LHS, RHS, -1, 0, COND)
7770 SelectionDAG &DAG = DCI.DAG;
7771 SDValue N0 = N->getOperand(0);
7772 EVT VT = N->getValueType(0);
7773 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7774 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7775 N0 = N0.getOperand(0);
7776 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7777 SDLoc DL(N0);
7778 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7779 DAG.getAllOnesConstant(DL, VT),
7780 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7781 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7782 }
7783 return SDValue();
7784}
7785
7786SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7787 SDNode *N, DAGCombinerInfo &DCI) const {
7788 // Convert (sext (ashr (shl X, C1), C2)) to
7789 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7790 // cheap as narrower ones.
7791 SelectionDAG &DAG = DCI.DAG;
7792 SDValue N0 = N->getOperand(0);
7793 EVT VT = N->getValueType(0);
7794 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7795 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7796 SDValue Inner = N0.getOperand(0);
7797 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7798 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7799 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7800 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7801 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7802 EVT ShiftVT = N0.getOperand(1).getValueType();
7803 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7804 Inner.getOperand(0));
7805 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7806 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7807 ShiftVT));
7808 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7809 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7810 }
7811 }
7812 }
7813
7814 return SDValue();
7815}
7816
7817SDValue SystemZTargetLowering::combineMERGE(
7818 SDNode *N, DAGCombinerInfo &DCI) const {
7819 SelectionDAG &DAG = DCI.DAG;
7820 unsigned Opcode = N->getOpcode();
7821 SDValue Op0 = N->getOperand(0);
7822 SDValue Op1 = N->getOperand(1);
7823 if (Op0.getOpcode() == ISD::BITCAST)
7824 Op0 = Op0.getOperand(0);
7826 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7827 // for v4f32.
7828 if (Op1 == N->getOperand(0))
7829 return Op1;
7830 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7831 EVT VT = Op1.getValueType();
7832 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7833 if (ElemBytes <= 4) {
7834 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7835 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7836 EVT InVT = VT.changeVectorElementTypeToInteger();
7837 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7838 SystemZ::VectorBytes / ElemBytes / 2);
7839 if (VT != InVT) {
7840 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7841 DCI.AddToWorklist(Op1.getNode());
7842 }
7843 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7844 DCI.AddToWorklist(Op.getNode());
7845 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7846 }
7847 }
7848 return SDValue();
7849}
7850
7851static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7852 SDNode *&HiPart) {
7853 LoPart = HiPart = nullptr;
7854
7855 // Scan through all users.
7856 for (SDUse &Use : LD->uses()) {
7857 // Skip the uses of the chain.
7858 if (Use.getResNo() != 0)
7859 continue;
7860
7861 // Verify every user is a TRUNCATE to i64 of the low or high half.
7862 SDNode *User = Use.getUser();
7863 bool IsLoPart = true;
7864 if (User->getOpcode() == ISD::SRL &&
7865 User->getOperand(1).getOpcode() == ISD::Constant &&
7866 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7867 User = *User->user_begin();
7868 IsLoPart = false;
7869 }
7870 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7871 return false;
7872
7873 if (IsLoPart) {
7874 if (LoPart)
7875 return false;
7876 LoPart = User;
7877 } else {
7878 if (HiPart)
7879 return false;
7880 HiPart = User;
7881 }
7882 }
7883 return true;
7884}
7885
7886static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7887 SDNode *&HiPart) {
7888 LoPart = HiPart = nullptr;
7889
7890 // Scan through all users.
7891 for (SDUse &Use : LD->uses()) {
7892 // Skip the uses of the chain.
7893 if (Use.getResNo() != 0)
7894 continue;
7895
7896 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7897 SDNode *User = Use.getUser();
7898 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7899 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7900 return false;
7901
7902 switch (User->getConstantOperandVal(1)) {
7903 case SystemZ::subreg_l64:
7904 if (LoPart)
7905 return false;
7906 LoPart = User;
7907 break;
7908 case SystemZ::subreg_h64:
7909 if (HiPart)
7910 return false;
7911 HiPart = User;
7912 break;
7913 default:
7914 return false;
7915 }
7916 }
7917 return true;
7918}
7919
7920SDValue SystemZTargetLowering::combineLOAD(
7921 SDNode *N, DAGCombinerInfo &DCI) const {
7922 SelectionDAG &DAG = DCI.DAG;
7923 EVT LdVT = N->getValueType(0);
7924 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7925 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7926 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7927 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7928 if (PtrVT != LoadNodeVT) {
7929 SDLoc DL(LN);
7930 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7931 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7932 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7933 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7934 LN->getMemOperand());
7935 }
7936 }
7937 }
7938 SDLoc DL(N);
7939
7940 // Replace a 128-bit load that is used solely to move its value into GPRs
7941 // by separate loads of both halves.
7942 LoadSDNode *LD = cast<LoadSDNode>(N);
7943 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7944 SDNode *LoPart, *HiPart;
7945 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7946 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7947 // Rewrite each extraction as an independent load.
7948 SmallVector<SDValue, 2> ArgChains;
7949 if (HiPart) {
7950 SDValue EltLoad = DAG.getLoad(
7951 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7952 LD->getPointerInfo(), LD->getBaseAlign(),
7953 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7954
7955 DCI.CombineTo(HiPart, EltLoad, true);
7956 ArgChains.push_back(EltLoad.getValue(1));
7957 }
7958 if (LoPart) {
7959 SDValue EltLoad = DAG.getLoad(
7960 LoPart->getValueType(0), DL, LD->getChain(),
7961 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7962 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7963 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7964
7965 DCI.CombineTo(LoPart, EltLoad, true);
7966 ArgChains.push_back(EltLoad.getValue(1));
7967 }
7968
7969 // Collect all chains via TokenFactor.
7970 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7971 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7972 DCI.AddToWorklist(Chain.getNode());
7973 return SDValue(N, 0);
7974 }
7975 }
7976
7977 if (LdVT.isVector() || LdVT.isInteger())
7978 return SDValue();
7979 // Transform a scalar load that is REPLICATEd as well as having other
7980 // use(s) to the form where the other use(s) use the first element of the
7981 // REPLICATE instead of the load. Otherwise instruction selection will not
7982 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7983 // point loads.
7984
7985 SDValue Replicate;
7986 SmallVector<SDNode*, 8> OtherUses;
7987 for (SDUse &Use : N->uses()) {
7988 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7989 if (Replicate)
7990 return SDValue(); // Should never happen
7991 Replicate = SDValue(Use.getUser(), 0);
7992 } else if (Use.getResNo() == 0)
7993 OtherUses.push_back(Use.getUser());
7994 }
7995 if (!Replicate || OtherUses.empty())
7996 return SDValue();
7997
7998 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7999 Replicate, DAG.getConstant(0, DL, MVT::i32));
8000 // Update uses of the loaded Value while preserving old chains.
8001 for (SDNode *U : OtherUses) {
8003 for (SDValue Op : U->ops())
8004 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
8005 DAG.UpdateNodeOperands(U, Ops);
8006 }
8007 return SDValue(N, 0);
8008}
8009
8010bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
8011 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
8012 return true;
8013 if (Subtarget.hasVectorEnhancements2())
8014 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
8015 return true;
8016 return false;
8017}
8018
8020 if (!VT.isVector() || !VT.isSimple() ||
8021 VT.getSizeInBits() != 128 ||
8022 VT.getScalarSizeInBits() % 8 != 0)
8023 return false;
8024
8025 unsigned NumElts = VT.getVectorNumElements();
8026 for (unsigned i = 0; i < NumElts; ++i) {
8027 if (M[i] < 0) continue; // ignore UNDEF indices
8028 if ((unsigned) M[i] != NumElts - 1 - i)
8029 return false;
8030 }
8031
8032 return true;
8033}
8034
8035static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
8036 for (auto *U : StoredVal->users()) {
8037 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
8038 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
8039 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
8040 continue;
8041 } else if (isa<BuildVectorSDNode>(U)) {
8042 SDValue BuildVector = SDValue(U, 0);
8043 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
8044 isOnlyUsedByStores(BuildVector, DAG))
8045 continue;
8046 }
8047 return false;
8048 }
8049 return true;
8050}
8051
8052static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
8053 SDValue &HiPart) {
8054 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
8055 return false;
8056
8057 SDValue Op0 = Val.getOperand(0);
8058 SDValue Op1 = Val.getOperand(1);
8059
8060 if (Op0.getOpcode() == ISD::SHL)
8061 std::swap(Op0, Op1);
8062 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
8063 Op1.getOperand(1).getOpcode() != ISD::Constant ||
8064 Op1.getConstantOperandVal(1) != 64)
8065 return false;
8066 Op1 = Op1.getOperand(0);
8067
8068 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
8069 Op0.getOperand(0).getValueType() != MVT::i64)
8070 return false;
8071 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
8072 Op1.getOperand(0).getValueType() != MVT::i64)
8073 return false;
8074
8075 LoPart = Op0.getOperand(0);
8076 HiPart = Op1.getOperand(0);
8077 return true;
8078}
8079
8080static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8081 SDValue &HiPart) {
8082 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8083 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8084 return false;
8085
8086 if (Val->getNumOperands() != 5 ||
8087 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8088 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8089 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8090 return false;
8091
8092 LoPart = Val->getOperand(1);
8093 HiPart = Val->getOperand(3);
8094 return true;
8095}
8096
8097SDValue SystemZTargetLowering::combineSTORE(
8098 SDNode *N, DAGCombinerInfo &DCI) const {
8099 SelectionDAG &DAG = DCI.DAG;
8100 auto *SN = cast<StoreSDNode>(N);
8101 auto &Op1 = N->getOperand(1);
8102 EVT MemVT = SN->getMemoryVT();
8103
8104 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8105 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8106 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8107 if (PtrVT != StoreNodeVT) {
8108 SDLoc DL(SN);
8109 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8110 SYSTEMZAS::PTR32, 0);
8111 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8112 SN->getPointerInfo(), SN->getBaseAlign(),
8113 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8114 }
8115 }
8116
8117 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8118 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8119 // If X has wider elements then convert it to:
8120 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8121 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8122 if (SDValue Value =
8123 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8124 DCI.AddToWorklist(Value.getNode());
8125
8126 // Rewrite the store with the new form of stored value.
8127 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8128 SN->getBasePtr(), SN->getMemoryVT(),
8129 SN->getMemOperand());
8130 }
8131 }
8132 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8133 if (!SN->isTruncatingStore() &&
8134 Op1.getOpcode() == ISD::BSWAP &&
8135 Op1.getNode()->hasOneUse() &&
8136 canLoadStoreByteSwapped(Op1.getValueType())) {
8137
8138 SDValue BSwapOp = Op1.getOperand(0);
8139
8140 if (BSwapOp.getValueType() == MVT::i16)
8141 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8142
8143 SDValue Ops[] = {
8144 N->getOperand(0), BSwapOp, N->getOperand(2)
8145 };
8146
8147 return
8148 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8149 Ops, MemVT, SN->getMemOperand());
8150 }
8151 // Combine STORE (element-swap) into VSTER
8152 if (!SN->isTruncatingStore() &&
8153 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8154 Op1.getNode()->hasOneUse() &&
8155 Subtarget.hasVectorEnhancements2()) {
8156 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8157 ArrayRef<int> ShuffleMask = SVN->getMask();
8158 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8159 SDValue Ops[] = {
8160 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8161 };
8162
8163 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8164 DAG.getVTList(MVT::Other),
8165 Ops, MemVT, SN->getMemOperand());
8166 }
8167 }
8168
8169 // Combine STORE (READCYCLECOUNTER) into STCKF.
8170 if (!SN->isTruncatingStore() &&
8172 Op1.hasOneUse() &&
8173 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8174 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8175 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8176 DAG.getVTList(MVT::Other),
8177 Ops, MemVT, SN->getMemOperand());
8178 }
8179
8180 // Transform a store of a 128-bit value moved from parts into two stores.
8181 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8182 SDValue LoPart, HiPart;
8183 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8184 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8185 SDLoc DL(SN);
8186 SDValue Chain0 = DAG.getStore(
8187 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8188 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8189 SDValue Chain1 = DAG.getStore(
8190 SN->getChain(), DL, LoPart,
8191 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8192 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8193 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8194
8195 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8196 }
8197 }
8198
8199 // Replicate a reg or immediate with VREP instead of scalar multiply or
8200 // immediate load. It seems best to do this during the first DAGCombine as
8201 // it is straight-forward to handle the zero-extend node in the initial
8202 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8203 // extracting an i16 element from a v16i8 vector).
8204 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8205 isOnlyUsedByStores(Op1, DAG)) {
8206 SDValue Word = SDValue();
8207 EVT WordVT;
8208
8209 // Find a replicated immediate and return it if found in Word and its
8210 // type in WordVT.
8211 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8212 // Some constants are better handled with a scalar store.
8213 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8214 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8215 return;
8216
8217 APInt Val = C->getAPIntValue();
8218 // Truncate Val in case of a truncating store.
8219 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8220 assert(SN->isTruncatingStore() &&
8221 "Non-truncating store and immediate value does not fit?");
8222 Val = Val.trunc(TotBytes * 8);
8223 }
8224
8225 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8226 if (VCI.isVectorConstantLegal(Subtarget) &&
8227 VCI.Opcode == SystemZISD::REPLICATE) {
8228 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8229 WordVT = VCI.VecVT.getScalarType();
8230 }
8231 };
8232
8233 // Find a replicated register and return it if found in Word and its type
8234 // in WordVT.
8235 auto FindReplicatedReg = [&](SDValue MulOp) {
8236 EVT MulVT = MulOp.getValueType();
8237 if (MulOp->getOpcode() == ISD::MUL &&
8238 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8239 // Find a zero extended value and its type.
8240 SDValue LHS = MulOp->getOperand(0);
8241 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8242 WordVT = LHS->getOperand(0).getValueType();
8243 else if (LHS->getOpcode() == ISD::AssertZext)
8244 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8245 else
8246 return;
8247 // Find a replicating constant, e.g. 0x00010001.
8248 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8249 SystemZVectorConstantInfo VCI(
8250 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8251 if (VCI.isVectorConstantLegal(Subtarget) &&
8252 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8253 WordVT == VCI.VecVT.getScalarType())
8254 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8255 }
8256 }
8257 };
8258
8259 if (isa<BuildVectorSDNode>(Op1) &&
8260 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8261 SDValue SplatVal = Op1->getOperand(0);
8262 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8263 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8264 else
8265 FindReplicatedReg(SplatVal);
8266 } else {
8267 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8268 FindReplicatedImm(C, MemVT.getStoreSize());
8269 else
8270 FindReplicatedReg(Op1);
8271 }
8272
8273 if (Word != SDValue()) {
8274 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8275 "Bad type handling");
8276 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8277 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8278 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8279 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8280 SN->getBasePtr(), SN->getMemOperand());
8281 }
8282 }
8283
8284 return SDValue();
8285}
8286
8287SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8288 SDNode *N, DAGCombinerInfo &DCI) const {
8289 SelectionDAG &DAG = DCI.DAG;
8290 // Combine element-swap (LOAD) into VLER
8291 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8292 N->getOperand(0).hasOneUse() &&
8293 Subtarget.hasVectorEnhancements2()) {
8294 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8295 ArrayRef<int> ShuffleMask = SVN->getMask();
8296 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8297 SDValue Load = N->getOperand(0);
8298 LoadSDNode *LD = cast<LoadSDNode>(Load);
8299
8300 // Create the element-swapping load.
8301 SDValue Ops[] = {
8302 LD->getChain(), // Chain
8303 LD->getBasePtr() // Ptr
8304 };
8305 SDValue ESLoad =
8306 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8307 DAG.getVTList(LD->getValueType(0), MVT::Other),
8308 Ops, LD->getMemoryVT(), LD->getMemOperand());
8309
8310 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8311 // by the load dead.
8312 DCI.CombineTo(N, ESLoad);
8313
8314 // Next, combine the load away, we give it a bogus result value but a real
8315 // chain result. The result value is dead because the shuffle is dead.
8316 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8317
8318 // Return N so it doesn't get rechecked!
8319 return SDValue(N, 0);
8320 }
8321 }
8322
8323 return SDValue();
8324}
8325
8326SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8327 SDNode *N, DAGCombinerInfo &DCI) const {
8328 SelectionDAG &DAG = DCI.DAG;
8329
8330 if (!Subtarget.hasVector())
8331 return SDValue();
8332
8333 // Look through bitcasts that retain the number of vector elements.
8334 SDValue Op = N->getOperand(0);
8335 if (Op.getOpcode() == ISD::BITCAST &&
8336 Op.getValueType().isVector() &&
8337 Op.getOperand(0).getValueType().isVector() &&
8338 Op.getValueType().getVectorNumElements() ==
8339 Op.getOperand(0).getValueType().getVectorNumElements())
8340 Op = Op.getOperand(0);
8341
8342 // Pull BSWAP out of a vector extraction.
8343 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8344 EVT VecVT = Op.getValueType();
8345 EVT EltVT = VecVT.getVectorElementType();
8346 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8347 Op.getOperand(0), N->getOperand(1));
8348 DCI.AddToWorklist(Op.getNode());
8349 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8350 if (EltVT != N->getValueType(0)) {
8351 DCI.AddToWorklist(Op.getNode());
8352 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8353 }
8354 return Op;
8355 }
8356
8357 // Try to simplify a vector extraction.
8358 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8359 SDValue Op0 = N->getOperand(0);
8360 EVT VecVT = Op0.getValueType();
8361 if (canTreatAsByteVector(VecVT))
8362 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8363 IndexN->getZExtValue(), DCI, false);
8364 }
8365 return SDValue();
8366}
8367
8368SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8369 SDNode *N, DAGCombinerInfo &DCI) const {
8370 SelectionDAG &DAG = DCI.DAG;
8371 // (join_dwords X, X) == (replicate X)
8372 if (N->getOperand(0) == N->getOperand(1))
8373 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8374 N->getOperand(0));
8375 return SDValue();
8376}
8377
8379 SDValue Chain1 = N1->getOperand(0);
8380 SDValue Chain2 = N2->getOperand(0);
8381
8382 // Trivial case: both nodes take the same chain.
8383 if (Chain1 == Chain2)
8384 return Chain1;
8385
8386 // FIXME - we could handle more complex cases via TokenFactor,
8387 // assuming we can verify that this would not create a cycle.
8388 return SDValue();
8389}
8390
8391SDValue SystemZTargetLowering::combineFP_ROUND(
8392 SDNode *N, DAGCombinerInfo &DCI) const {
8393
8394 if (!Subtarget.hasVector())
8395 return SDValue();
8396
8397 // (fpround (extract_vector_elt X 0))
8398 // (fpround (extract_vector_elt X 1)) ->
8399 // (extract_vector_elt (VROUND X) 0)
8400 // (extract_vector_elt (VROUND X) 2)
8401 //
8402 // This is a special case since the target doesn't really support v2f32s.
8403 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8404 SelectionDAG &DAG = DCI.DAG;
8405 SDValue Op0 = N->getOperand(OpNo);
8406 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8408 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8409 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8410 Op0.getConstantOperandVal(1) == 0) {
8411 SDValue Vec = Op0.getOperand(0);
8412 for (auto *U : Vec->users()) {
8413 if (U != Op0.getNode() && U->hasOneUse() &&
8414 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8415 U->getOperand(0) == Vec &&
8416 U->getOperand(1).getOpcode() == ISD::Constant &&
8417 U->getConstantOperandVal(1) == 1) {
8418 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8419 if (OtherRound.getOpcode() == N->getOpcode() &&
8420 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8421 OtherRound.getValueType() == MVT::f32) {
8422 SDValue VRound, Chain;
8423 if (N->isStrictFPOpcode()) {
8424 Chain = MergeInputChains(N, OtherRound.getNode());
8425 if (!Chain)
8426 continue;
8427 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8428 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8429 Chain = VRound.getValue(1);
8430 } else
8431 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8432 MVT::v4f32, Vec);
8433 DCI.AddToWorklist(VRound.getNode());
8434 SDValue Extract1 =
8435 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8436 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8437 DCI.AddToWorklist(Extract1.getNode());
8438 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8439 if (Chain)
8440 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8441 SDValue Extract0 =
8442 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8443 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8444 if (Chain)
8445 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8446 N->getVTList(), Extract0, Chain);
8447 return Extract0;
8448 }
8449 }
8450 }
8451 }
8452 return SDValue();
8453}
8454
8455SDValue SystemZTargetLowering::combineFP_EXTEND(
8456 SDNode *N, DAGCombinerInfo &DCI) const {
8457
8458 if (!Subtarget.hasVector())
8459 return SDValue();
8460
8461 // (fpextend (extract_vector_elt X 0))
8462 // (fpextend (extract_vector_elt X 2)) ->
8463 // (extract_vector_elt (VEXTEND X) 0)
8464 // (extract_vector_elt (VEXTEND X) 1)
8465 //
8466 // This is a special case since the target doesn't really support v2f32s.
8467 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8468 SelectionDAG &DAG = DCI.DAG;
8469 SDValue Op0 = N->getOperand(OpNo);
8470 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8472 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8473 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8474 Op0.getConstantOperandVal(1) == 0) {
8475 SDValue Vec = Op0.getOperand(0);
8476 for (auto *U : Vec->users()) {
8477 if (U != Op0.getNode() && U->hasOneUse() &&
8478 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8479 U->getOperand(0) == Vec &&
8480 U->getOperand(1).getOpcode() == ISD::Constant &&
8481 U->getConstantOperandVal(1) == 2) {
8482 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8483 if (OtherExtend.getOpcode() == N->getOpcode() &&
8484 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8485 OtherExtend.getValueType() == MVT::f64) {
8486 SDValue VExtend, Chain;
8487 if (N->isStrictFPOpcode()) {
8488 Chain = MergeInputChains(N, OtherExtend.getNode());
8489 if (!Chain)
8490 continue;
8491 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8492 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8493 Chain = VExtend.getValue(1);
8494 } else
8495 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8496 MVT::v2f64, Vec);
8497 DCI.AddToWorklist(VExtend.getNode());
8498 SDValue Extract1 =
8499 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8500 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8501 DCI.AddToWorklist(Extract1.getNode());
8502 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8503 if (Chain)
8504 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8505 SDValue Extract0 =
8506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8507 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8508 if (Chain)
8509 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8510 N->getVTList(), Extract0, Chain);
8511 return Extract0;
8512 }
8513 }
8514 }
8515 }
8516 return SDValue();
8517}
8518
8519SDValue SystemZTargetLowering::combineINT_TO_FP(
8520 SDNode *N, DAGCombinerInfo &DCI) const {
8521 if (DCI.Level != BeforeLegalizeTypes)
8522 return SDValue();
8523 SelectionDAG &DAG = DCI.DAG;
8524 LLVMContext &Ctx = *DAG.getContext();
8525 unsigned Opcode = N->getOpcode();
8526 EVT OutVT = N->getValueType(0);
8527 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8528 SDValue Op = N->getOperand(0);
8529 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8530 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8531
8532 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8533 // v2f64 = uint_to_fp v2i16
8534 // =>
8535 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8536 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8537 OutScalarBits <= 64) {
8538 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8539 EVT ExtVT = EVT::getVectorVT(
8540 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8541 unsigned ExtOpcode =
8543 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8544 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8545 }
8546 return SDValue();
8547}
8548
8549SDValue SystemZTargetLowering::combineFCOPYSIGN(
8550 SDNode *N, DAGCombinerInfo &DCI) const {
8551 SelectionDAG &DAG = DCI.DAG;
8552 EVT VT = N->getValueType(0);
8553 SDValue ValOp = N->getOperand(0);
8554 SDValue SignOp = N->getOperand(1);
8555
8556 // Remove the rounding which is not needed.
8557 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8558 SDValue WideOp = SignOp.getOperand(0);
8559 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8560 }
8561
8562 return SDValue();
8563}
8564
8565SDValue SystemZTargetLowering::combineBSWAP(
8566 SDNode *N, DAGCombinerInfo &DCI) const {
8567 SelectionDAG &DAG = DCI.DAG;
8568 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8569 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8570 N->getOperand(0).hasOneUse() &&
8571 canLoadStoreByteSwapped(N->getValueType(0))) {
8572 SDValue Load = N->getOperand(0);
8573 LoadSDNode *LD = cast<LoadSDNode>(Load);
8574
8575 // Create the byte-swapping load.
8576 SDValue Ops[] = {
8577 LD->getChain(), // Chain
8578 LD->getBasePtr() // Ptr
8579 };
8580 EVT LoadVT = N->getValueType(0);
8581 if (LoadVT == MVT::i16)
8582 LoadVT = MVT::i32;
8583 SDValue BSLoad =
8584 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8585 DAG.getVTList(LoadVT, MVT::Other),
8586 Ops, LD->getMemoryVT(), LD->getMemOperand());
8587
8588 // If this is an i16 load, insert the truncate.
8589 SDValue ResVal = BSLoad;
8590 if (N->getValueType(0) == MVT::i16)
8591 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8592
8593 // First, combine the bswap away. This makes the value produced by the
8594 // load dead.
8595 DCI.CombineTo(N, ResVal);
8596
8597 // Next, combine the load away, we give it a bogus result value but a real
8598 // chain result. The result value is dead because the bswap is dead.
8599 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8600
8601 // Return N so it doesn't get rechecked!
8602 return SDValue(N, 0);
8603 }
8604
8605 // Look through bitcasts that retain the number of vector elements.
8606 SDValue Op = N->getOperand(0);
8607 if (Op.getOpcode() == ISD::BITCAST &&
8608 Op.getValueType().isVector() &&
8609 Op.getOperand(0).getValueType().isVector() &&
8610 Op.getValueType().getVectorNumElements() ==
8611 Op.getOperand(0).getValueType().getVectorNumElements())
8612 Op = Op.getOperand(0);
8613
8614 // Push BSWAP into a vector insertion if at least one side then simplifies.
8615 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8616 SDValue Vec = Op.getOperand(0);
8617 SDValue Elt = Op.getOperand(1);
8618 SDValue Idx = Op.getOperand(2);
8619
8621 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8623 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8624 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8625 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8626 EVT VecVT = N->getValueType(0);
8627 EVT EltVT = N->getValueType(0).getVectorElementType();
8628 if (VecVT != Vec.getValueType()) {
8629 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8630 DCI.AddToWorklist(Vec.getNode());
8631 }
8632 if (EltVT != Elt.getValueType()) {
8633 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8634 DCI.AddToWorklist(Elt.getNode());
8635 }
8636 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8637 DCI.AddToWorklist(Vec.getNode());
8638 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8639 DCI.AddToWorklist(Elt.getNode());
8640 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8641 Vec, Elt, Idx);
8642 }
8643 }
8644
8645 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8646 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8647 if (SV && Op.hasOneUse()) {
8648 SDValue Op0 = Op.getOperand(0);
8649 SDValue Op1 = Op.getOperand(1);
8650
8652 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8654 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8655 EVT VecVT = N->getValueType(0);
8656 if (VecVT != Op0.getValueType()) {
8657 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8658 DCI.AddToWorklist(Op0.getNode());
8659 }
8660 if (VecVT != Op1.getValueType()) {
8661 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8662 DCI.AddToWorklist(Op1.getNode());
8663 }
8664 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8665 DCI.AddToWorklist(Op0.getNode());
8666 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8667 DCI.AddToWorklist(Op1.getNode());
8668 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8669 }
8670 }
8671
8672 return SDValue();
8673}
8674
8675SDValue SystemZTargetLowering::combineSETCC(
8676 SDNode *N, DAGCombinerInfo &DCI) const {
8677 SelectionDAG &DAG = DCI.DAG;
8678 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8679 const SDValue LHS = N->getOperand(0);
8680 const SDValue RHS = N->getOperand(1);
8681 bool CmpNull = isNullConstant(RHS);
8682 bool CmpAllOnes = isAllOnesConstant(RHS);
8683 EVT VT = N->getValueType(0);
8684 SDLoc DL(N);
8685
8686 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8687 // change the outer compare to a i128 compare. This will normally
8688 // allow the reduction to be recognized in adjustICmp128, and even if
8689 // not, the i128 compare will still generate better code.
8690 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8692 if (Src.getOpcode() == ISD::SETCC &&
8693 Src.getValueType().isFixedLengthVector() &&
8694 Src.getValueType().getScalarType() == MVT::i1) {
8695 EVT CmpVT = Src.getOperand(0).getValueType();
8696 if (CmpVT.getSizeInBits() == 128) {
8697 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8698 SDValue LHS =
8699 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8700 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8701 : DAG.getAllOnesConstant(DL, MVT::i128);
8702 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8703 N->getFlags());
8704 }
8705 }
8706 }
8707
8708 return SDValue();
8709}
8710
8711static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8712 switch (Val.getOpcode()) {
8713 default:
8714 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8715 case SystemZISD::IPM:
8716 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8717 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8718 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8719 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8720 case SystemZISD::SELECT_CCMASK: {
8721 SDValue Op4CCReg = Val.getOperand(4);
8722 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8723 Op4CCReg.getOpcode() == SystemZISD::TM) {
8724 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8725 if (OpCC != SDValue())
8726 return std::make_pair(OpCC, OpCCValid);
8727 }
8728 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8729 if (!CCValid)
8730 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8731 int CCValidVal = CCValid->getZExtValue();
8732 return std::make_pair(Op4CCReg, CCValidVal);
8733 }
8734 case ISD::ADD:
8735 case ISD::AND:
8736 case ISD::OR:
8737 case ISD::XOR:
8738 case ISD::SHL:
8739 case ISD::SRA:
8740 case ISD::SRL:
8741 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8742 if (Op0CC != SDValue())
8743 return std::make_pair(Op0CC, Op0CCValid);
8744 return findCCUse(Val.getOperand(1));
8745 }
8746}
8747
8748static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8749 SelectionDAG &DAG);
8750
8752 SelectionDAG &DAG) {
8753 SDLoc DL(Val);
8754 auto Opcode = Val.getOpcode();
8755 switch (Opcode) {
8756 default:
8757 return {};
8758 case ISD::Constant:
8759 return {Val, Val, Val, Val};
8760 case SystemZISD::IPM: {
8761 SDValue IPMOp0 = Val.getOperand(0);
8762 if (IPMOp0 != CC)
8763 return {};
8764 SmallVector<SDValue, 4> ShiftedCCVals;
8765 for (auto CC : {0, 1, 2, 3})
8766 ShiftedCCVals.emplace_back(
8767 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8768 return ShiftedCCVals;
8769 }
8770 case SystemZISD::SELECT_CCMASK: {
8771 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8772 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8773 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8774 if (!CCValid || !CCMask)
8775 return {};
8776
8777 int CCValidVal = CCValid->getZExtValue();
8778 int CCMaskVal = CCMask->getZExtValue();
8779 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8780 // recursive call to simplifyAssumingCCVal.
8781 SDValue Op4CCReg = Val.getOperand(4);
8782 if (Op4CCReg != CC)
8783 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8784 if (Op4CCReg != CC)
8785 return {};
8786 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8787 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8788 if (TrueSDVals.empty() || FalseSDVals.empty())
8789 return {};
8790 SmallVector<SDValue, 4> MergedSDVals;
8791 for (auto &CCVal : {0, 1, 2, 3})
8792 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8793 ? TrueSDVals[CCVal]
8794 : FalseSDVals[CCVal]);
8795 return MergedSDVals;
8796 }
8797 case ISD::ADD:
8798 case ISD::AND:
8799 case ISD::OR:
8800 case ISD::XOR:
8801 case ISD::SRA:
8802 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8803 // would clobber CC).
8804 if (!Val.hasOneUse())
8805 return {};
8806 [[fallthrough]];
8807 case ISD::SHL:
8808 case ISD::SRL:
8809 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8810 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8811 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8812 if (Op0SDVals.empty() || Op1SDVals.empty())
8813 return {};
8814 SmallVector<SDValue, 4> BinaryOpSDVals;
8815 for (auto CCVal : {0, 1, 2, 3})
8816 BinaryOpSDVals.emplace_back(DAG.getNode(
8817 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8818 return BinaryOpSDVals;
8819 }
8820}
8821
8822static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8823 SelectionDAG &DAG) {
8824 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8825 // set by the CCReg instruction using the CCValid / CCMask masks,
8826 // If the CCReg instruction is itself a ICMP / TM testing the condition
8827 // code set by some other instruction, see whether we can directly
8828 // use that condition code.
8829 auto *CCNode = CCReg.getNode();
8830 if (!CCNode)
8831 return false;
8832
8833 if (CCNode->getOpcode() == SystemZISD::TM) {
8834 if (CCValid != SystemZ::CCMASK_TM)
8835 return false;
8836 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8837 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8838 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8839 if (!Op0Node || !Op1Node)
8840 return -1;
8841 auto Op0APVal = Op0Node->getAPIntValue();
8842 auto Op1APVal = Op1Node->getAPIntValue();
8843 auto Result = Op0APVal & Op1APVal;
8844 bool AllOnes = Result == Op1APVal;
8845 bool AllZeros = Result == 0;
8846 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8847 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8848 };
8849 SDValue Op0 = CCNode->getOperand(0);
8850 SDValue Op1 = CCNode->getOperand(1);
8851 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8852 if (Op0CC == SDValue())
8853 return false;
8854 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8855 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8856 if (Op0SDVals.empty() || Op1SDVals.empty())
8857 return false;
8858 int NewCCMask = 0;
8859 for (auto CC : {0, 1, 2, 3}) {
8860 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8861 if (CCVal < 0)
8862 return false;
8863 NewCCMask <<= 1;
8864 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8865 }
8866 NewCCMask &= Op0CCValid;
8867 CCReg = Op0CC;
8868 CCMask = NewCCMask;
8869 CCValid = Op0CCValid;
8870 return true;
8871 }
8872 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8873 CCValid != SystemZ::CCMASK_ICMP)
8874 return false;
8875
8876 SDValue CmpOp0 = CCNode->getOperand(0);
8877 SDValue CmpOp1 = CCNode->getOperand(1);
8878 SDValue CmpOp2 = CCNode->getOperand(2);
8879 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8880 if (Op0CC != SDValue()) {
8881 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8882 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8883 if (Op0SDVals.empty() || Op1SDVals.empty())
8884 return false;
8885
8886 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8887 auto CmpTypeVal = CmpType->getZExtValue();
8888 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8889 const SDValue &Op1Val) {
8890 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8891 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8892 if (!Op0Node || !Op1Node)
8893 return -1;
8894 auto Op0APVal = Op0Node->getAPIntValue();
8895 auto Op1APVal = Op1Node->getAPIntValue();
8896 if (CmpTypeVal == SystemZICMP::SignedOnly)
8897 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8898 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8899 };
8900 int NewCCMask = 0;
8901 for (auto CC : {0, 1, 2, 3}) {
8902 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8903 if (CCVal < 0)
8904 return false;
8905 NewCCMask <<= 1;
8906 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8907 }
8908 NewCCMask &= Op0CCValid;
8909 CCMask = NewCCMask;
8910 CCReg = Op0CC;
8911 CCValid = Op0CCValid;
8912 return true;
8913 }
8914
8915 return false;
8916}
8917
8918// Merging versus split in multiple branches cost.
8921 const Value *Lhs,
8922 const Value *Rhs) const {
8923 const auto isFlagOutOpCC = [](const Value *V) {
8924 using namespace llvm::PatternMatch;
8925 const Value *RHSVal;
8926 const APInt *RHSC;
8927 if (const auto *I = dyn_cast<Instruction>(V)) {
8928 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8929 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8930 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8931 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8932 if (CB->isInlineAsm()) {
8933 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8934 return IA && IA->getConstraintString().contains("{@cc}");
8935 }
8936 }
8937 }
8938 }
8939 return false;
8940 };
8941 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8942 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8943 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8944 // conditionals will be merged or else conditionals will be split.
8945 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8946 return {3, 0, -1};
8947 // Default.
8948 return {-1, -1, -1};
8949}
8950
8951SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8952 DAGCombinerInfo &DCI) const {
8953 SelectionDAG &DAG = DCI.DAG;
8954
8955 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8956 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8957 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8958 if (!CCValid || !CCMask)
8959 return SDValue();
8960
8961 int CCValidVal = CCValid->getZExtValue();
8962 int CCMaskVal = CCMask->getZExtValue();
8963 SDValue Chain = N->getOperand(0);
8964 SDValue CCReg = N->getOperand(4);
8965 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8966 // the modified BR_CCMASK with the new values.
8967 // In order to avoid conditional branches with full or empty cc masks, do not
8968 // do this if ccmask is 0 or equal to ccvalid.
8969 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
8970 CCMaskVal != CCValidVal)
8971 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8972 Chain,
8973 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8974 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8975 N->getOperand(3), CCReg);
8976 return SDValue();
8977}
8978
8979SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8980 SDNode *N, DAGCombinerInfo &DCI) const {
8981 SelectionDAG &DAG = DCI.DAG;
8982
8983 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8984 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8985 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8986 if (!CCValid || !CCMask)
8987 return SDValue();
8988
8989 int CCValidVal = CCValid->getZExtValue();
8990 int CCMaskVal = CCMask->getZExtValue();
8991 SDValue CCReg = N->getOperand(4);
8992
8993 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8994
8995 // Populate SDVals vector for each condition code ccval for given Val, which
8996 // can again be another nested select_ccmask with the same CC.
8997 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8998 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
9000 if (Val.getOperand(4) != CCReg)
9001 return SmallVector<SDValue, 4>{};
9002 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
9003 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
9004 if (!CCMask)
9005 return SmallVector<SDValue, 4>{};
9006
9007 int CCMaskVal = CCMask->getZExtValue();
9008 for (auto &CC : {0, 1, 2, 3})
9009 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
9010 : FalseVal);
9011 return Res;
9012 }
9013 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
9014 };
9015 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
9016 // with CCReg found by combineCCMask or original CCReg.
9017 SDValue TrueVal = N->getOperand(0);
9018 SDValue FalseVal = N->getOperand(1);
9019 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
9020 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
9021 // TrueSDVals/FalseSDVals might be empty in case of non-constant
9022 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
9023 if (TrueSDVals.empty())
9024 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
9025 if (FalseSDVals.empty())
9026 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
9027 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
9028 SmallSet<SDValue, 4> MergedSDValsSet;
9029 // Ignoring CC values outside CCValiid.
9030 for (auto CC : {0, 1, 2, 3}) {
9031 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
9032 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
9033 ? TrueSDVals[CC]
9034 : FalseSDVals[CC]);
9035 }
9036 if (MergedSDValsSet.size() == 1)
9037 return *MergedSDValsSet.begin();
9038 if (MergedSDValsSet.size() == 2) {
9039 auto BeginIt = MergedSDValsSet.begin();
9040 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
9041 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
9042 std::swap(NewTrueVal, NewFalseVal);
9043 int NewCCMask = 0;
9044 for (auto CC : {0, 1, 2, 3}) {
9045 NewCCMask <<= 1;
9046 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
9047 ? (TrueSDVals[CC] == NewTrueVal)
9048 : (FalseSDVals[CC] == NewTrueVal);
9049 }
9050 CCMaskVal = NewCCMask;
9051 CCMaskVal &= CCValidVal;
9052 TrueVal = NewTrueVal;
9053 FalseVal = NewFalseVal;
9054 IsCombinedCCReg = true;
9055 }
9056 }
9057 // If the condition is trivially false or trivially true after
9058 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
9059 // (possibly modified by constructCCSDValsFromSELECT).
9060 if (CCMaskVal == 0)
9061 return FalseVal;
9062 if (CCMaskVal == CCValidVal)
9063 return TrueVal;
9064
9065 if (IsCombinedCCReg)
9066 return DAG.getNode(
9067 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
9068 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
9069 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
9070
9071 return SDValue();
9072}
9073
9074SDValue SystemZTargetLowering::combineGET_CCMASK(
9075 SDNode *N, DAGCombinerInfo &DCI) const {
9076
9077 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9078 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9079 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9080 if (!CCValid || !CCMask)
9081 return SDValue();
9082 int CCValidVal = CCValid->getZExtValue();
9083 int CCMaskVal = CCMask->getZExtValue();
9084
9085 SDValue Select = N->getOperand(0);
9086 if (Select->getOpcode() == ISD::TRUNCATE)
9087 Select = Select->getOperand(0);
9088 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9089 return SDValue();
9090
9091 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9092 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9093 if (!SelectCCValid || !SelectCCMask)
9094 return SDValue();
9095 int SelectCCValidVal = SelectCCValid->getZExtValue();
9096 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9097
9098 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9099 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9100 if (!TrueVal || !FalseVal)
9101 return SDValue();
9102 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9103 ;
9104 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9105 SelectCCMaskVal ^= SelectCCValidVal;
9106 else
9107 return SDValue();
9108
9109 if (SelectCCValidVal & ~CCValidVal)
9110 return SDValue();
9111 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9112 return SDValue();
9113
9114 return Select->getOperand(4);
9115}
9116
9117SDValue SystemZTargetLowering::combineIntDIVREM(
9118 SDNode *N, DAGCombinerInfo &DCI) const {
9119 SelectionDAG &DAG = DCI.DAG;
9120 EVT VT = N->getValueType(0);
9121 // In the case where the divisor is a vector of constants a cheaper
9122 // sequence of instructions can replace the divide. BuildSDIV is called to
9123 // do this during DAG combining, but it only succeeds when it can build a
9124 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9125 // since it is not Legal but Custom it can only happen before
9126 // legalization. Therefore we must scalarize this early before Combine
9127 // 1. For widened vectors, this is already the result of type legalization.
9128 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9129 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9130 return DAG.UnrollVectorOp(N);
9131 return SDValue();
9132}
9133
9134
9135// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9136// This is closely modeled after the common-code combineShiftToMULH.
9137SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9138 SDNode *N, DAGCombinerInfo &DCI) const {
9139 SelectionDAG &DAG = DCI.DAG;
9140 SDLoc DL(N);
9141
9142 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9143 "SRL or SRA node is required here!");
9144
9145 if (!Subtarget.hasVector())
9146 return SDValue();
9147
9148 // Check the shift amount. Proceed with the transformation if the shift
9149 // amount is constant.
9150 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9151 if (!ShiftAmtSrc)
9152 return SDValue();
9153
9154 // The operation feeding into the shift must be an add.
9155 SDValue ShiftOperand = N->getOperand(0);
9156 if (ShiftOperand.getOpcode() != ISD::ADD)
9157 return SDValue();
9158
9159 // One operand of the add must be a multiply.
9160 SDValue MulOp = ShiftOperand.getOperand(0);
9161 SDValue AddOp = ShiftOperand.getOperand(1);
9162 if (MulOp.getOpcode() != ISD::MUL) {
9163 if (AddOp.getOpcode() != ISD::MUL)
9164 return SDValue();
9165 std::swap(MulOp, AddOp);
9166 }
9167
9168 // All operands must be equivalent extend nodes.
9169 SDValue LeftOp = MulOp.getOperand(0);
9170 SDValue RightOp = MulOp.getOperand(1);
9171
9172 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9173 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9174
9175 if (!IsSignExt && !IsZeroExt)
9176 return SDValue();
9177
9178 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9179 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9180
9181 SDValue MulhRightOp;
9182 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9183 unsigned ActiveBits = IsSignExt
9184 ? Constant->getAPIntValue().getSignificantBits()
9185 : Constant->getAPIntValue().getActiveBits();
9186 if (ActiveBits > NarrowVTSize)
9187 return SDValue();
9188 MulhRightOp = DAG.getConstant(
9189 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9190 NarrowVT);
9191 } else {
9192 if (LeftOp.getOpcode() != RightOp.getOpcode())
9193 return SDValue();
9194 // Check that the two extend nodes are the same type.
9195 if (NarrowVT != RightOp.getOperand(0).getValueType())
9196 return SDValue();
9197 MulhRightOp = RightOp.getOperand(0);
9198 }
9199
9200 SDValue MulhAddOp;
9201 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9202 unsigned ActiveBits = IsSignExt
9203 ? Constant->getAPIntValue().getSignificantBits()
9204 : Constant->getAPIntValue().getActiveBits();
9205 if (ActiveBits > NarrowVTSize)
9206 return SDValue();
9207 MulhAddOp = DAG.getConstant(
9208 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9209 NarrowVT);
9210 } else {
9211 if (LeftOp.getOpcode() != AddOp.getOpcode())
9212 return SDValue();
9213 // Check that the two extend nodes are the same type.
9214 if (NarrowVT != AddOp.getOperand(0).getValueType())
9215 return SDValue();
9216 MulhAddOp = AddOp.getOperand(0);
9217 }
9218
9219 EVT WideVT = LeftOp.getValueType();
9220 // Proceed with the transformation if the wide types match.
9221 assert((WideVT == RightOp.getValueType()) &&
9222 "Cannot have a multiply node with two different operand types.");
9223 assert((WideVT == AddOp.getValueType()) &&
9224 "Cannot have an add node with two different operand types.");
9225
9226 // Proceed with the transformation if the wide type is twice as large
9227 // as the narrow type.
9228 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9229 return SDValue();
9230
9231 // Check the shift amount with the narrow type size.
9232 // Proceed with the transformation if the shift amount is the width
9233 // of the narrow type.
9234 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9235 if (ShiftAmt != NarrowVTSize)
9236 return SDValue();
9237
9238 // Proceed if we support the multiply-and-add-high operation.
9239 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9240 NarrowVT == MVT::v4i32 ||
9241 (Subtarget.hasVectorEnhancements3() &&
9242 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9243 return SDValue();
9244
9245 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9246 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9247 DL, NarrowVT, LeftOp.getOperand(0),
9248 MulhRightOp, MulhAddOp);
9249 bool IsSigned = N->getOpcode() == ISD::SRA;
9250 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9251}
9252
9253// Op is an operand of a multiplication. Check whether this can be folded
9254// into an even/odd widening operation; if so, return the opcode to be used
9255// and update Op to the appropriate sub-operand. Note that the caller must
9256// verify that *both* operands of the multiplication support the operation.
9258 const SystemZSubtarget &Subtarget,
9259 SDValue &Op) {
9260 EVT VT = Op.getValueType();
9261
9262 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9263 // to selecting the even or odd vector elements.
9264 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9265 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9266 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9267 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9268 unsigned NumElts = VT.getVectorNumElements();
9269 Op = Op.getOperand(0);
9270 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9271 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9273 ArrayRef<int> ShuffleMask = SVN->getMask();
9274 bool CanUseEven = true, CanUseOdd = true;
9275 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9276 if (ShuffleMask[Elt] == -1)
9277 continue;
9278 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9279 CanUseEven = false;
9280 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9281 CanUseOdd = false;
9282 }
9283 Op = Op.getOperand(0);
9284 if (CanUseEven)
9285 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9286 if (CanUseOdd)
9287 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9288 }
9289 }
9290
9291 // For z17, we can also support the v2i64->i128 case, which looks like
9292 // (sign/zero_extend (extract_vector_elt X 0/1))
9293 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9294 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9295 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9296 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9297 Op = Op.getOperand(0);
9298 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9299 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9300 Op.getOperand(1).getOpcode() == ISD::Constant) {
9301 unsigned Elem = Op.getConstantOperandVal(1);
9302 Op = Op.getOperand(0);
9303 if (Elem == 0)
9304 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9305 if (Elem == 1)
9306 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9307 }
9308 }
9309
9310 return 0;
9311}
9312
9313SDValue SystemZTargetLowering::combineMUL(
9314 SDNode *N, DAGCombinerInfo &DCI) const {
9315 SelectionDAG &DAG = DCI.DAG;
9316
9317 // Detect even/odd widening multiplication.
9318 SDValue Op0 = N->getOperand(0);
9319 SDValue Op1 = N->getOperand(1);
9320 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9321 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9322 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9323 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9324
9325 return SDValue();
9326}
9327
9328SDValue SystemZTargetLowering::combineINTRINSIC(
9329 SDNode *N, DAGCombinerInfo &DCI) const {
9330 SelectionDAG &DAG = DCI.DAG;
9331
9332 unsigned Id = N->getConstantOperandVal(1);
9333 switch (Id) {
9334 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9335 // or larger is simply a vector load.
9336 case Intrinsic::s390_vll:
9337 case Intrinsic::s390_vlrl:
9338 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9339 if (C->getZExtValue() >= 15)
9340 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9341 N->getOperand(3), MachinePointerInfo());
9342 break;
9343 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9344 case Intrinsic::s390_vstl:
9345 case Intrinsic::s390_vstrl:
9346 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9347 if (C->getZExtValue() >= 15)
9348 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9349 N->getOperand(4), MachinePointerInfo());
9350 break;
9351 }
9352
9353 return SDValue();
9354}
9355
9356SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9357 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9358 return N->getOperand(0);
9359 return N;
9360}
9361
9363 DAGCombinerInfo &DCI) const {
9364 switch(N->getOpcode()) {
9365 default: break;
9366 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9367 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9368 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9369 case SystemZISD::MERGE_HIGH:
9370 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9371 case ISD::LOAD: return combineLOAD(N, DCI);
9372 case ISD::STORE: return combineSTORE(N, DCI);
9373 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9374 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9375 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9377 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9379 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9380 case ISD::SINT_TO_FP:
9381 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9382 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9383 case ISD::BSWAP: return combineBSWAP(N, DCI);
9384 case ISD::SETCC: return combineSETCC(N, DCI);
9385 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9386 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9387 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9388 case ISD::SRL:
9389 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9390 case ISD::MUL: return combineMUL(N, DCI);
9391 case ISD::SDIV:
9392 case ISD::UDIV:
9393 case ISD::SREM:
9394 case ISD::UREM: return combineIntDIVREM(N, DCI);
9396 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9397 }
9398
9399 return SDValue();
9400}
9401
9402// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9403// are for Op.
9404static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9405 unsigned OpNo) {
9406 EVT VT = Op.getValueType();
9407 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9408 APInt SrcDemE;
9409 unsigned Opcode = Op.getOpcode();
9410 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9411 unsigned Id = Op.getConstantOperandVal(0);
9412 switch (Id) {
9413 case Intrinsic::s390_vpksh: // PACKS
9414 case Intrinsic::s390_vpksf:
9415 case Intrinsic::s390_vpksg:
9416 case Intrinsic::s390_vpkshs: // PACKS_CC
9417 case Intrinsic::s390_vpksfs:
9418 case Intrinsic::s390_vpksgs:
9419 case Intrinsic::s390_vpklsh: // PACKLS
9420 case Intrinsic::s390_vpklsf:
9421 case Intrinsic::s390_vpklsg:
9422 case Intrinsic::s390_vpklshs: // PACKLS_CC
9423 case Intrinsic::s390_vpklsfs:
9424 case Intrinsic::s390_vpklsgs:
9425 // VECTOR PACK truncates the elements of two source vectors into one.
9426 SrcDemE = DemandedElts;
9427 if (OpNo == 2)
9428 SrcDemE.lshrInPlace(NumElts / 2);
9429 SrcDemE = SrcDemE.trunc(NumElts / 2);
9430 break;
9431 // VECTOR UNPACK extends half the elements of the source vector.
9432 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9433 case Intrinsic::s390_vuphh:
9434 case Intrinsic::s390_vuphf:
9435 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9436 case Intrinsic::s390_vuplhh:
9437 case Intrinsic::s390_vuplhf:
9438 SrcDemE = APInt(NumElts * 2, 0);
9439 SrcDemE.insertBits(DemandedElts, 0);
9440 break;
9441 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9442 case Intrinsic::s390_vuplhw:
9443 case Intrinsic::s390_vuplf:
9444 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9445 case Intrinsic::s390_vupllh:
9446 case Intrinsic::s390_vupllf:
9447 SrcDemE = APInt(NumElts * 2, 0);
9448 SrcDemE.insertBits(DemandedElts, NumElts);
9449 break;
9450 case Intrinsic::s390_vpdi: {
9451 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9452 SrcDemE = APInt(NumElts, 0);
9453 if (!DemandedElts[OpNo - 1])
9454 break;
9455 unsigned Mask = Op.getConstantOperandVal(3);
9456 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9457 // Demand input element 0 or 1, given by the mask bit value.
9458 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9459 break;
9460 }
9461 case Intrinsic::s390_vsldb: {
9462 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9463 assert(VT == MVT::v16i8 && "Unexpected type.");
9464 unsigned FirstIdx = Op.getConstantOperandVal(3);
9465 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9466 unsigned NumSrc0Els = 16 - FirstIdx;
9467 SrcDemE = APInt(NumElts, 0);
9468 if (OpNo == 1) {
9469 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9470 SrcDemE.insertBits(DemEls, FirstIdx);
9471 } else {
9472 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9473 SrcDemE.insertBits(DemEls, 0);
9474 }
9475 break;
9476 }
9477 case Intrinsic::s390_vperm:
9478 SrcDemE = APInt::getAllOnes(NumElts);
9479 break;
9480 default:
9481 llvm_unreachable("Unhandled intrinsic.");
9482 break;
9483 }
9484 } else {
9485 switch (Opcode) {
9486 case SystemZISD::JOIN_DWORDS:
9487 // Scalar operand.
9488 SrcDemE = APInt(1, 1);
9489 break;
9490 case SystemZISD::SELECT_CCMASK:
9491 SrcDemE = DemandedElts;
9492 break;
9493 default:
9494 llvm_unreachable("Unhandled opcode.");
9495 break;
9496 }
9497 }
9498 return SrcDemE;
9499}
9500
9501static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9502 const APInt &DemandedElts,
9503 const SelectionDAG &DAG, unsigned Depth,
9504 unsigned OpNo) {
9505 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9506 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9507 KnownBits LHSKnown =
9508 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9509 KnownBits RHSKnown =
9510 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9511 Known = LHSKnown.intersectWith(RHSKnown);
9512}
9513
9514void
9516 KnownBits &Known,
9517 const APInt &DemandedElts,
9518 const SelectionDAG &DAG,
9519 unsigned Depth) const {
9520 Known.resetAll();
9521
9522 // Intrinsic CC result is returned in the two low bits.
9523 unsigned Tmp0, Tmp1; // not used
9524 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9525 Known.Zero.setBitsFrom(2);
9526 return;
9527 }
9528 EVT VT = Op.getValueType();
9529 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9530 return;
9531 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9532 "KnownBits does not match VT in bitwidth");
9533 assert ((!VT.isVector() ||
9534 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9535 "DemandedElts does not match VT number of elements");
9536 unsigned BitWidth = Known.getBitWidth();
9537 unsigned Opcode = Op.getOpcode();
9538 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9539 bool IsLogical = false;
9540 unsigned Id = Op.getConstantOperandVal(0);
9541 switch (Id) {
9542 case Intrinsic::s390_vpksh: // PACKS
9543 case Intrinsic::s390_vpksf:
9544 case Intrinsic::s390_vpksg:
9545 case Intrinsic::s390_vpkshs: // PACKS_CC
9546 case Intrinsic::s390_vpksfs:
9547 case Intrinsic::s390_vpksgs:
9548 case Intrinsic::s390_vpklsh: // PACKLS
9549 case Intrinsic::s390_vpklsf:
9550 case Intrinsic::s390_vpklsg:
9551 case Intrinsic::s390_vpklshs: // PACKLS_CC
9552 case Intrinsic::s390_vpklsfs:
9553 case Intrinsic::s390_vpklsgs:
9554 case Intrinsic::s390_vpdi:
9555 case Intrinsic::s390_vsldb:
9556 case Intrinsic::s390_vperm:
9557 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9558 break;
9559 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9560 case Intrinsic::s390_vuplhh:
9561 case Intrinsic::s390_vuplhf:
9562 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9563 case Intrinsic::s390_vupllh:
9564 case Intrinsic::s390_vupllf:
9565 IsLogical = true;
9566 [[fallthrough]];
9567 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9568 case Intrinsic::s390_vuphh:
9569 case Intrinsic::s390_vuphf:
9570 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9571 case Intrinsic::s390_vuplhw:
9572 case Intrinsic::s390_vuplf: {
9573 SDValue SrcOp = Op.getOperand(1);
9574 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9575 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9576 if (IsLogical) {
9577 Known = Known.zext(BitWidth);
9578 } else
9579 Known = Known.sext(BitWidth);
9580 break;
9581 }
9582 default:
9583 break;
9584 }
9585 } else {
9586 switch (Opcode) {
9587 case SystemZISD::JOIN_DWORDS:
9588 case SystemZISD::SELECT_CCMASK:
9589 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9590 break;
9591 case SystemZISD::REPLICATE: {
9592 SDValue SrcOp = Op.getOperand(0);
9593 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9595 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9596 break;
9597 }
9598 default:
9599 break;
9600 }
9601 }
9602
9603 // Known has the width of the source operand(s). Adjust if needed to match
9604 // the passed bitwidth.
9605 if (Known.getBitWidth() != BitWidth)
9606 Known = Known.anyextOrTrunc(BitWidth);
9607}
9608
9609static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9610 const SelectionDAG &DAG, unsigned Depth,
9611 unsigned OpNo) {
9612 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9613 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9614 if (LHS == 1) return 1; // Early out.
9615 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9616 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9617 if (RHS == 1) return 1; // Early out.
9618 unsigned Common = std::min(LHS, RHS);
9619 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9620 EVT VT = Op.getValueType();
9621 unsigned VTBits = VT.getScalarSizeInBits();
9622 if (SrcBitWidth > VTBits) { // PACK
9623 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9624 if (Common > SrcExtraBits)
9625 return (Common - SrcExtraBits);
9626 return 1;
9627 }
9628 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9629 return Common;
9630}
9631
9632unsigned
9634 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9635 unsigned Depth) const {
9636 if (Op.getResNo() != 0)
9637 return 1;
9638 unsigned Opcode = Op.getOpcode();
9639 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9640 unsigned Id = Op.getConstantOperandVal(0);
9641 switch (Id) {
9642 case Intrinsic::s390_vpksh: // PACKS
9643 case Intrinsic::s390_vpksf:
9644 case Intrinsic::s390_vpksg:
9645 case Intrinsic::s390_vpkshs: // PACKS_CC
9646 case Intrinsic::s390_vpksfs:
9647 case Intrinsic::s390_vpksgs:
9648 case Intrinsic::s390_vpklsh: // PACKLS
9649 case Intrinsic::s390_vpklsf:
9650 case Intrinsic::s390_vpklsg:
9651 case Intrinsic::s390_vpklshs: // PACKLS_CC
9652 case Intrinsic::s390_vpklsfs:
9653 case Intrinsic::s390_vpklsgs:
9654 case Intrinsic::s390_vpdi:
9655 case Intrinsic::s390_vsldb:
9656 case Intrinsic::s390_vperm:
9657 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9658 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9659 case Intrinsic::s390_vuphh:
9660 case Intrinsic::s390_vuphf:
9661 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9662 case Intrinsic::s390_vuplhw:
9663 case Intrinsic::s390_vuplf: {
9664 SDValue PackedOp = Op.getOperand(1);
9665 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9666 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9667 EVT VT = Op.getValueType();
9668 unsigned VTBits = VT.getScalarSizeInBits();
9669 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9670 return Tmp;
9671 }
9672 default:
9673 break;
9674 }
9675 } else {
9676 switch (Opcode) {
9677 case SystemZISD::SELECT_CCMASK:
9678 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9679 default:
9680 break;
9681 }
9682 }
9683
9684 return 1;
9685}
9686
9689 const APInt &DemandedElts, const SelectionDAG &DAG,
9690 bool PoisonOnly, unsigned Depth) const {
9691 switch (Op->getOpcode()) {
9692 case SystemZISD::PCREL_WRAPPER:
9693 case SystemZISD::PCREL_OFFSET:
9694 return true;
9695 }
9696 return false;
9697}
9698
9699unsigned
9701 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9702 unsigned StackAlign = TFI->getStackAlignment();
9703 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9704 "Unexpected stack alignment");
9705 // The default stack probe size is 4096 if the function has no
9706 // stack-probe-size attribute.
9707 unsigned StackProbeSize =
9708 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9709 // Round down to the stack alignment.
9710 StackProbeSize &= ~(StackAlign - 1);
9711 return StackProbeSize ? StackProbeSize : StackAlign;
9712}
9713
9714//===----------------------------------------------------------------------===//
9715// Custom insertion
9716//===----------------------------------------------------------------------===//
9717
9718// Force base value Base into a register before MI. Return the register.
9720 const SystemZInstrInfo *TII) {
9721 MachineBasicBlock *MBB = MI.getParent();
9722 MachineFunction &MF = *MBB->getParent();
9724
9725 if (Base.isReg()) {
9726 // Copy Base into a new virtual register to help register coalescing in
9727 // cases with multiple uses.
9728 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9729 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9730 .add(Base);
9731 return Reg;
9732 }
9733
9734 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9735 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9736 .add(Base)
9737 .addImm(0)
9738 .addReg(0);
9739 return Reg;
9740}
9741
9742// The CC operand of MI might be missing a kill marker because there
9743// were multiple uses of CC, and ISel didn't know which to mark.
9744// Figure out whether MI should have had a kill marker.
9746 // Scan forward through BB for a use/def of CC.
9748 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9749 const MachineInstr &MI = *miI;
9750 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9751 return false;
9752 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9753 break; // Should have kill-flag - update below.
9754 }
9755
9756 // If we hit the end of the block, check whether CC is live into a
9757 // successor.
9758 if (miI == MBB->end()) {
9759 for (const MachineBasicBlock *Succ : MBB->successors())
9760 if (Succ->isLiveIn(SystemZ::CC))
9761 return false;
9762 }
9763
9764 return true;
9765}
9766
9767// Return true if it is OK for this Select pseudo-opcode to be cascaded
9768// together with other Select pseudo-opcodes into a single basic-block with
9769// a conditional jump around it.
9771 switch (MI.getOpcode()) {
9772 case SystemZ::Select32:
9773 case SystemZ::Select64:
9774 case SystemZ::Select128:
9775 case SystemZ::SelectF32:
9776 case SystemZ::SelectF64:
9777 case SystemZ::SelectF128:
9778 case SystemZ::SelectVR32:
9779 case SystemZ::SelectVR64:
9780 case SystemZ::SelectVR128:
9781 return true;
9782
9783 default:
9784 return false;
9785 }
9786}
9787
9788// Helper function, which inserts PHI functions into SinkMBB:
9789// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9790// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9792 MachineBasicBlock *TrueMBB,
9793 MachineBasicBlock *FalseMBB,
9794 MachineBasicBlock *SinkMBB) {
9795 MachineFunction *MF = TrueMBB->getParent();
9797
9798 MachineInstr *FirstMI = Selects.front();
9799 unsigned CCValid = FirstMI->getOperand(3).getImm();
9800 unsigned CCMask = FirstMI->getOperand(4).getImm();
9801
9802 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9803
9804 // As we are creating the PHIs, we have to be careful if there is more than
9805 // one. Later Selects may reference the results of earlier Selects, but later
9806 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9807 // That also means that PHI construction must work forward from earlier to
9808 // later, and that the code must maintain a mapping from earlier PHI's
9809 // destination registers, and the registers that went into the PHI.
9811
9812 for (auto *MI : Selects) {
9813 Register DestReg = MI->getOperand(0).getReg();
9814 Register TrueReg = MI->getOperand(1).getReg();
9815 Register FalseReg = MI->getOperand(2).getReg();
9816
9817 // If this Select we are generating is the opposite condition from
9818 // the jump we generated, then we have to swap the operands for the
9819 // PHI that is going to be generated.
9820 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9821 std::swap(TrueReg, FalseReg);
9822
9823 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9824 TrueReg = It->second.first;
9825
9826 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9827 FalseReg = It->second.second;
9828
9829 DebugLoc DL = MI->getDebugLoc();
9830 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9831 .addReg(TrueReg).addMBB(TrueMBB)
9832 .addReg(FalseReg).addMBB(FalseMBB);
9833
9834 // Add this PHI to the rewrite table.
9835 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9836 }
9837
9838 MF->getProperties().resetNoPHIs();
9839}
9840
9842SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9843 MachineBasicBlock *BB) const {
9844 MachineFunction &MF = *BB->getParent();
9845 MachineFrameInfo &MFI = MF.getFrameInfo();
9846 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9847 assert(TFL->hasReservedCallFrame(MF) &&
9848 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9849 (void)TFL;
9850 // Get the MaxCallFrameSize value and erase MI since it serves no further
9851 // purpose as the call frame is statically reserved in the prolog. Set
9852 // AdjustsStack as MI is *not* mapped as a frame instruction.
9853 uint32_t NumBytes = MI.getOperand(0).getImm();
9854 if (NumBytes > MFI.getMaxCallFrameSize())
9855 MFI.setMaxCallFrameSize(NumBytes);
9856 MFI.setAdjustsStack(true);
9857
9858 MI.eraseFromParent();
9859 return BB;
9860}
9861
9862// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9864SystemZTargetLowering::emitSelect(MachineInstr &MI,
9865 MachineBasicBlock *MBB) const {
9866 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9867 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9868
9869 unsigned CCValid = MI.getOperand(3).getImm();
9870 unsigned CCMask = MI.getOperand(4).getImm();
9871
9872 // If we have a sequence of Select* pseudo instructions using the
9873 // same condition code value, we want to expand all of them into
9874 // a single pair of basic blocks using the same condition.
9875 SmallVector<MachineInstr*, 8> Selects;
9876 SmallVector<MachineInstr*, 8> DbgValues;
9877 Selects.push_back(&MI);
9878 unsigned Count = 0;
9879 for (MachineInstr &NextMI : llvm::make_range(
9880 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9881 if (isSelectPseudo(NextMI)) {
9882 assert(NextMI.getOperand(3).getImm() == CCValid &&
9883 "Bad CCValid operands since CC was not redefined.");
9884 if (NextMI.getOperand(4).getImm() == CCMask ||
9885 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9886 Selects.push_back(&NextMI);
9887 continue;
9888 }
9889 break;
9890 }
9891 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9892 NextMI.usesCustomInsertionHook())
9893 break;
9894 bool User = false;
9895 for (auto *SelMI : Selects)
9896 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9897 User = true;
9898 break;
9899 }
9900 if (NextMI.isDebugInstr()) {
9901 if (User) {
9902 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9903 DbgValues.push_back(&NextMI);
9904 }
9905 } else if (User || ++Count > 20)
9906 break;
9907 }
9908
9909 MachineInstr *LastMI = Selects.back();
9910 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9911 checkCCKill(*LastMI, MBB));
9912 MachineBasicBlock *StartMBB = MBB;
9913 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9914 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9915
9916 // Unless CC was killed in the last Select instruction, mark it as
9917 // live-in to both FalseMBB and JoinMBB.
9918 if (!CCKilled) {
9919 FalseMBB->addLiveIn(SystemZ::CC);
9920 JoinMBB->addLiveIn(SystemZ::CC);
9921 }
9922
9923 // StartMBB:
9924 // BRC CCMask, JoinMBB
9925 // # fallthrough to FalseMBB
9926 MBB = StartMBB;
9927 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9928 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9929 MBB->addSuccessor(JoinMBB);
9930 MBB->addSuccessor(FalseMBB);
9931
9932 // FalseMBB:
9933 // # fallthrough to JoinMBB
9934 MBB = FalseMBB;
9935 MBB->addSuccessor(JoinMBB);
9936
9937 // JoinMBB:
9938 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9939 // ...
9940 MBB = JoinMBB;
9941 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9942 for (auto *SelMI : Selects)
9943 SelMI->eraseFromParent();
9944
9946 for (auto *DbgMI : DbgValues)
9947 MBB->splice(InsertPos, StartMBB, DbgMI);
9948
9949 return JoinMBB;
9950}
9951
9952// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9953// StoreOpcode is the store to use and Invert says whether the store should
9954// happen when the condition is false rather than true. If a STORE ON
9955// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9956MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9958 unsigned StoreOpcode,
9959 unsigned STOCOpcode,
9960 bool Invert) const {
9961 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9962
9963 Register SrcReg = MI.getOperand(0).getReg();
9964 MachineOperand Base = MI.getOperand(1);
9965 int64_t Disp = MI.getOperand(2).getImm();
9966 Register IndexReg = MI.getOperand(3).getReg();
9967 unsigned CCValid = MI.getOperand(4).getImm();
9968 unsigned CCMask = MI.getOperand(5).getImm();
9969 DebugLoc DL = MI.getDebugLoc();
9970
9971 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9972
9973 // ISel pattern matching also adds a load memory operand of the same
9974 // address, so take special care to find the storing memory operand.
9975 MachineMemOperand *MMO = nullptr;
9976 for (auto *I : MI.memoperands())
9977 if (I->isStore()) {
9978 MMO = I;
9979 break;
9980 }
9981
9982 // Use STOCOpcode if possible. We could use different store patterns in
9983 // order to avoid matching the index register, but the performance trade-offs
9984 // might be more complicated in that case.
9985 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9986 if (Invert)
9987 CCMask ^= CCValid;
9988
9989 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9990 .addReg(SrcReg)
9991 .add(Base)
9992 .addImm(Disp)
9993 .addImm(CCValid)
9994 .addImm(CCMask)
9995 .addMemOperand(MMO);
9996
9997 MI.eraseFromParent();
9998 return MBB;
9999 }
10000
10001 // Get the condition needed to branch around the store.
10002 if (!Invert)
10003 CCMask ^= CCValid;
10004
10005 MachineBasicBlock *StartMBB = MBB;
10006 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
10007 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
10008
10009 // Unless CC was killed in the CondStore instruction, mark it as
10010 // live-in to both FalseMBB and JoinMBB.
10011 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
10012 !checkCCKill(MI, JoinMBB)) {
10013 FalseMBB->addLiveIn(SystemZ::CC);
10014 JoinMBB->addLiveIn(SystemZ::CC);
10015 }
10016
10017 // StartMBB:
10018 // BRC CCMask, JoinMBB
10019 // # fallthrough to FalseMBB
10020 MBB = StartMBB;
10021 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10022 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
10023 MBB->addSuccessor(JoinMBB);
10024 MBB->addSuccessor(FalseMBB);
10025
10026 // FalseMBB:
10027 // store %SrcReg, %Disp(%Index,%Base)
10028 // # fallthrough to JoinMBB
10029 MBB = FalseMBB;
10030 BuildMI(MBB, DL, TII->get(StoreOpcode))
10031 .addReg(SrcReg)
10032 .add(Base)
10033 .addImm(Disp)
10034 .addReg(IndexReg)
10035 .addMemOperand(MMO);
10036 MBB->addSuccessor(JoinMBB);
10037
10038 MI.eraseFromParent();
10039 return JoinMBB;
10040}
10041
10042// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
10044SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
10046 bool Unsigned) const {
10047 MachineFunction &MF = *MBB->getParent();
10048 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10049 MachineRegisterInfo &MRI = MF.getRegInfo();
10050
10051 // Synthetic instruction to compare 128-bit values.
10052 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
10053 Register Op0 = MI.getOperand(0).getReg();
10054 Register Op1 = MI.getOperand(1).getReg();
10055
10056 MachineBasicBlock *StartMBB = MBB;
10057 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
10058 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
10059
10060 // StartMBB:
10061 //
10062 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
10063 // Swap the inputs to get:
10064 // CC 1 if high(Op0) > high(Op1)
10065 // CC 2 if high(Op0) < high(Op1)
10066 // CC 0 if high(Op0) == high(Op1)
10067 //
10068 // If CC != 0, we'd done, so jump over the next instruction.
10069 //
10070 // VEC[L]G Op1, Op0
10071 // JNE JoinMBB
10072 // # fallthrough to HiEqMBB
10073 MBB = StartMBB;
10074 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10075 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10076 .addReg(Op1).addReg(Op0);
10077 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10079 MBB->addSuccessor(JoinMBB);
10080 MBB->addSuccessor(HiEqMBB);
10081
10082 // HiEqMBB:
10083 //
10084 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10085 // Since we already know the high parts are equal, the CC
10086 // result will only depend on the low parts:
10087 // CC 1 if low(Op0) > low(Op1)
10088 // CC 3 if low(Op0) <= low(Op1)
10089 //
10090 // VCHLGS Tmp, Op0, Op1
10091 // # fallthrough to JoinMBB
10092 MBB = HiEqMBB;
10093 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10094 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10095 .addReg(Op0).addReg(Op1);
10096 MBB->addSuccessor(JoinMBB);
10097
10098 // Mark CC as live-in to JoinMBB.
10099 JoinMBB->addLiveIn(SystemZ::CC);
10100
10101 MI.eraseFromParent();
10102 return JoinMBB;
10103}
10104
10105// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10106// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10107// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10108// whether the field should be inverted after performing BinOpcode (e.g. for
10109// NAND).
10110MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10111 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10112 bool Invert) const {
10113 MachineFunction &MF = *MBB->getParent();
10114 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10115 MachineRegisterInfo &MRI = MF.getRegInfo();
10116
10117 // Extract the operands. Base can be a register or a frame index.
10118 // Src2 can be a register or immediate.
10119 Register Dest = MI.getOperand(0).getReg();
10120 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10121 int64_t Disp = MI.getOperand(2).getImm();
10122 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10123 Register BitShift = MI.getOperand(4).getReg();
10124 Register NegBitShift = MI.getOperand(5).getReg();
10125 unsigned BitSize = MI.getOperand(6).getImm();
10126 DebugLoc DL = MI.getDebugLoc();
10127
10128 // Get the right opcodes for the displacement.
10129 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10130 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10131 assert(LOpcode && CSOpcode && "Displacement out of range");
10132
10133 // Create virtual registers for temporary results.
10134 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10135 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10136 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10137 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10138 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10139
10140 // Insert a basic block for the main loop.
10141 MachineBasicBlock *StartMBB = MBB;
10142 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10143 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10144
10145 // StartMBB:
10146 // ...
10147 // %OrigVal = L Disp(%Base)
10148 // # fall through to LoopMBB
10149 MBB = StartMBB;
10150 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10151 MBB->addSuccessor(LoopMBB);
10152
10153 // LoopMBB:
10154 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10155 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10156 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10157 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10158 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10159 // JNE LoopMBB
10160 // # fall through to DoneMBB
10161 MBB = LoopMBB;
10162 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10163 .addReg(OrigVal).addMBB(StartMBB)
10164 .addReg(Dest).addMBB(LoopMBB);
10165 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10166 .addReg(OldVal).addReg(BitShift).addImm(0);
10167 if (Invert) {
10168 // Perform the operation normally and then invert every bit of the field.
10169 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10170 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10171 // XILF with the upper BitSize bits set.
10172 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10173 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10174 } else if (BinOpcode)
10175 // A simply binary operation.
10176 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10177 .addReg(RotatedOldVal)
10178 .add(Src2);
10179 else
10180 // Use RISBG to rotate Src2 into position and use it to replace the
10181 // field in RotatedOldVal.
10182 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10183 .addReg(RotatedOldVal).addReg(Src2.getReg())
10184 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10185 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10186 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10187 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10188 .addReg(OldVal)
10189 .addReg(NewVal)
10190 .add(Base)
10191 .addImm(Disp);
10192 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10194 MBB->addSuccessor(LoopMBB);
10195 MBB->addSuccessor(DoneMBB);
10196
10197 MI.eraseFromParent();
10198 return DoneMBB;
10199}
10200
10201// Implement EmitInstrWithCustomInserter for subword pseudo
10202// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10203// instruction that should be used to compare the current field with the
10204// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10205// for when the current field should be kept.
10206MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10207 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10208 unsigned KeepOldMask) const {
10209 MachineFunction &MF = *MBB->getParent();
10210 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10211 MachineRegisterInfo &MRI = MF.getRegInfo();
10212
10213 // Extract the operands. Base can be a register or a frame index.
10214 Register Dest = MI.getOperand(0).getReg();
10215 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10216 int64_t Disp = MI.getOperand(2).getImm();
10217 Register Src2 = MI.getOperand(3).getReg();
10218 Register BitShift = MI.getOperand(4).getReg();
10219 Register NegBitShift = MI.getOperand(5).getReg();
10220 unsigned BitSize = MI.getOperand(6).getImm();
10221 DebugLoc DL = MI.getDebugLoc();
10222
10223 // Get the right opcodes for the displacement.
10224 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10225 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10226 assert(LOpcode && CSOpcode && "Displacement out of range");
10227
10228 // Create virtual registers for temporary results.
10229 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10230 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10231 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10232 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10233 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10234 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10235
10236 // Insert 3 basic blocks for the loop.
10237 MachineBasicBlock *StartMBB = MBB;
10238 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10239 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10240 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10241 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10242
10243 // StartMBB:
10244 // ...
10245 // %OrigVal = L Disp(%Base)
10246 // # fall through to LoopMBB
10247 MBB = StartMBB;
10248 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10249 MBB->addSuccessor(LoopMBB);
10250
10251 // LoopMBB:
10252 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10253 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10254 // CompareOpcode %RotatedOldVal, %Src2
10255 // BRC KeepOldMask, UpdateMBB
10256 MBB = LoopMBB;
10257 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10258 .addReg(OrigVal).addMBB(StartMBB)
10259 .addReg(Dest).addMBB(UpdateMBB);
10260 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10261 .addReg(OldVal).addReg(BitShift).addImm(0);
10262 BuildMI(MBB, DL, TII->get(CompareOpcode))
10263 .addReg(RotatedOldVal).addReg(Src2);
10264 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10265 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10266 MBB->addSuccessor(UpdateMBB);
10267 MBB->addSuccessor(UseAltMBB);
10268
10269 // UseAltMBB:
10270 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10271 // # fall through to UpdateMBB
10272 MBB = UseAltMBB;
10273 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10274 .addReg(RotatedOldVal).addReg(Src2)
10275 .addImm(32).addImm(31 + BitSize).addImm(0);
10276 MBB->addSuccessor(UpdateMBB);
10277
10278 // UpdateMBB:
10279 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10280 // [ %RotatedAltVal, UseAltMBB ]
10281 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10282 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10283 // JNE LoopMBB
10284 // # fall through to DoneMBB
10285 MBB = UpdateMBB;
10286 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10287 .addReg(RotatedOldVal).addMBB(LoopMBB)
10288 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10289 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10290 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10291 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10292 .addReg(OldVal)
10293 .addReg(NewVal)
10294 .add(Base)
10295 .addImm(Disp);
10296 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10298 MBB->addSuccessor(LoopMBB);
10299 MBB->addSuccessor(DoneMBB);
10300
10301 MI.eraseFromParent();
10302 return DoneMBB;
10303}
10304
10305// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10306// instruction MI.
10308SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10309 MachineBasicBlock *MBB) const {
10310 MachineFunction &MF = *MBB->getParent();
10311 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10312 MachineRegisterInfo &MRI = MF.getRegInfo();
10313
10314 // Extract the operands. Base can be a register or a frame index.
10315 Register Dest = MI.getOperand(0).getReg();
10316 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10317 int64_t Disp = MI.getOperand(2).getImm();
10318 Register CmpVal = MI.getOperand(3).getReg();
10319 Register OrigSwapVal = MI.getOperand(4).getReg();
10320 Register BitShift = MI.getOperand(5).getReg();
10321 Register NegBitShift = MI.getOperand(6).getReg();
10322 int64_t BitSize = MI.getOperand(7).getImm();
10323 DebugLoc DL = MI.getDebugLoc();
10324
10325 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10326
10327 // Get the right opcodes for the displacement and zero-extension.
10328 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10329 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10330 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10331 assert(LOpcode && CSOpcode && "Displacement out of range");
10332
10333 // Create virtual registers for temporary results.
10334 Register OrigOldVal = MRI.createVirtualRegister(RC);
10335 Register OldVal = MRI.createVirtualRegister(RC);
10336 Register SwapVal = MRI.createVirtualRegister(RC);
10337 Register StoreVal = MRI.createVirtualRegister(RC);
10338 Register OldValRot = MRI.createVirtualRegister(RC);
10339 Register RetryOldVal = MRI.createVirtualRegister(RC);
10340 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10341
10342 // Insert 2 basic blocks for the loop.
10343 MachineBasicBlock *StartMBB = MBB;
10344 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10345 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10346 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10347
10348 // StartMBB:
10349 // ...
10350 // %OrigOldVal = L Disp(%Base)
10351 // # fall through to LoopMBB
10352 MBB = StartMBB;
10353 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10354 .add(Base)
10355 .addImm(Disp)
10356 .addReg(0);
10357 MBB->addSuccessor(LoopMBB);
10358
10359 // LoopMBB:
10360 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10361 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10362 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10363 // ^^ The low BitSize bits contain the field
10364 // of interest.
10365 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10366 // ^^ Replace the upper 32-BitSize bits of the
10367 // swap value with those that we loaded and rotated.
10368 // %Dest = LL[CH] %OldValRot
10369 // CR %Dest, %CmpVal
10370 // JNE DoneMBB
10371 // # Fall through to SetMBB
10372 MBB = LoopMBB;
10373 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10374 .addReg(OrigOldVal).addMBB(StartMBB)
10375 .addReg(RetryOldVal).addMBB(SetMBB);
10376 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10377 .addReg(OrigSwapVal).addMBB(StartMBB)
10378 .addReg(RetrySwapVal).addMBB(SetMBB);
10379 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10380 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10381 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10382 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10383 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10384 .addReg(OldValRot);
10385 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10386 .addReg(Dest).addReg(CmpVal);
10387 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10390 MBB->addSuccessor(DoneMBB);
10391 MBB->addSuccessor(SetMBB);
10392
10393 // SetMBB:
10394 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10395 // ^^ Rotate the new field to its proper position.
10396 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10397 // JNE LoopMBB
10398 // # fall through to ExitMBB
10399 MBB = SetMBB;
10400 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10401 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10402 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10403 .addReg(OldVal)
10404 .addReg(StoreVal)
10405 .add(Base)
10406 .addImm(Disp);
10407 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10409 MBB->addSuccessor(LoopMBB);
10410 MBB->addSuccessor(DoneMBB);
10411
10412 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10413 // to the block after the loop. At this point, CC may have been defined
10414 // either by the CR in LoopMBB or by the CS in SetMBB.
10415 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10416 DoneMBB->addLiveIn(SystemZ::CC);
10417
10418 MI.eraseFromParent();
10419 return DoneMBB;
10420}
10421
10422// Emit a move from two GR64s to a GR128.
10424SystemZTargetLowering::emitPair128(MachineInstr &MI,
10425 MachineBasicBlock *MBB) const {
10426 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10427 const DebugLoc &DL = MI.getDebugLoc();
10428
10429 Register Dest = MI.getOperand(0).getReg();
10430 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10431 .add(MI.getOperand(1))
10432 .addImm(SystemZ::subreg_h64)
10433 .add(MI.getOperand(2))
10434 .addImm(SystemZ::subreg_l64);
10435 MI.eraseFromParent();
10436 return MBB;
10437}
10438
10439// Emit an extension from a GR64 to a GR128. ClearEven is true
10440// if the high register of the GR128 value must be cleared or false if
10441// it's "don't care".
10442MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10444 bool ClearEven) const {
10445 MachineFunction &MF = *MBB->getParent();
10446 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10447 MachineRegisterInfo &MRI = MF.getRegInfo();
10448 DebugLoc DL = MI.getDebugLoc();
10449
10450 Register Dest = MI.getOperand(0).getReg();
10451 Register Src = MI.getOperand(1).getReg();
10452 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10453
10454 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10455 if (ClearEven) {
10456 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10457 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10458
10459 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10460 .addImm(0);
10461 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10462 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10463 In128 = NewIn128;
10464 }
10465 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10466 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10467
10468 MI.eraseFromParent();
10469 return MBB;
10470}
10471
10473SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10475 unsigned Opcode, bool IsMemset) const {
10476 MachineFunction &MF = *MBB->getParent();
10477 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10478 MachineRegisterInfo &MRI = MF.getRegInfo();
10479 DebugLoc DL = MI.getDebugLoc();
10480
10481 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10482 uint64_t DestDisp = MI.getOperand(1).getImm();
10483 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10484 uint64_t SrcDisp;
10485
10486 // Fold the displacement Disp if it is out of range.
10487 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10488 if (!isUInt<12>(Disp)) {
10489 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10490 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10491 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10492 .add(Base).addImm(Disp).addReg(0);
10494 Disp = 0;
10495 }
10496 };
10497
10498 if (!IsMemset) {
10499 SrcBase = earlyUseOperand(MI.getOperand(2));
10500 SrcDisp = MI.getOperand(3).getImm();
10501 } else {
10502 SrcBase = DestBase;
10503 SrcDisp = DestDisp++;
10504 foldDisplIfNeeded(DestBase, DestDisp);
10505 }
10506
10507 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10508 bool IsImmForm = LengthMO.isImm();
10509 bool IsRegForm = !IsImmForm;
10510
10511 // Build and insert one Opcode of Length, with special treatment for memset.
10512 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10514 MachineOperand DBase, uint64_t DDisp,
10515 MachineOperand SBase, uint64_t SDisp,
10516 unsigned Length) -> void {
10517 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10518 if (IsMemset) {
10519 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10520 if (ByteMO.isImm())
10521 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10522 .add(SBase).addImm(SDisp).add(ByteMO);
10523 else
10524 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10525 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10526 if (--Length == 0)
10527 return;
10528 }
10529 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10530 .add(DBase).addImm(DDisp).addImm(Length)
10531 .add(SBase).addImm(SDisp)
10532 .setMemRefs(MI.memoperands());
10533 };
10534
10535 bool NeedsLoop = false;
10536 uint64_t ImmLength = 0;
10537 Register LenAdjReg = SystemZ::NoRegister;
10538 if (IsImmForm) {
10539 ImmLength = LengthMO.getImm();
10540 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10541 if (ImmLength == 0) {
10542 MI.eraseFromParent();
10543 return MBB;
10544 }
10545 if (Opcode == SystemZ::CLC) {
10546 if (ImmLength > 3 * 256)
10547 // A two-CLC sequence is a clear win over a loop, not least because
10548 // it needs only one branch. A three-CLC sequence needs the same
10549 // number of branches as a loop (i.e. 2), but is shorter. That
10550 // brings us to lengths greater than 768 bytes. It seems relatively
10551 // likely that a difference will be found within the first 768 bytes,
10552 // so we just optimize for the smallest number of branch
10553 // instructions, in order to avoid polluting the prediction buffer
10554 // too much.
10555 NeedsLoop = true;
10556 } else if (ImmLength > 6 * 256)
10557 // The heuristic we use is to prefer loops for anything that would
10558 // require 7 or more MVCs. With these kinds of sizes there isn't much
10559 // to choose between straight-line code and looping code, since the
10560 // time will be dominated by the MVCs themselves.
10561 NeedsLoop = true;
10562 } else {
10563 NeedsLoop = true;
10564 LenAdjReg = LengthMO.getReg();
10565 }
10566
10567 // When generating more than one CLC, all but the last will need to
10568 // branch to the end when a difference is found.
10569 MachineBasicBlock *EndMBB =
10570 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10572 : nullptr);
10573
10574 if (NeedsLoop) {
10575 Register StartCountReg =
10576 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10577 if (IsImmForm) {
10578 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10579 ImmLength &= 255;
10580 } else {
10581 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10582 .addReg(LenAdjReg)
10583 .addReg(0)
10584 .addImm(8);
10585 }
10586
10587 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10588 auto loadZeroAddress = [&]() -> MachineOperand {
10589 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10590 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10591 return MachineOperand::CreateReg(Reg, false);
10592 };
10593 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10594 DestBase = loadZeroAddress();
10595 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10596 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10597
10598 MachineBasicBlock *StartMBB = nullptr;
10599 MachineBasicBlock *LoopMBB = nullptr;
10600 MachineBasicBlock *NextMBB = nullptr;
10601 MachineBasicBlock *DoneMBB = nullptr;
10602 MachineBasicBlock *AllDoneMBB = nullptr;
10603
10604 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10605 Register StartDestReg =
10606 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10607
10608 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10609 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10610 Register ThisDestReg =
10611 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10612 Register NextSrcReg = MRI.createVirtualRegister(RC);
10613 Register NextDestReg =
10614 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10615 RC = &SystemZ::GR64BitRegClass;
10616 Register ThisCountReg = MRI.createVirtualRegister(RC);
10617 Register NextCountReg = MRI.createVirtualRegister(RC);
10618
10619 if (IsRegForm) {
10620 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10621 StartMBB = SystemZ::emitBlockAfter(MBB);
10622 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10623 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10624 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10625
10626 // MBB:
10627 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10628 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10629 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10630 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10632 .addMBB(AllDoneMBB);
10633 MBB->addSuccessor(AllDoneMBB);
10634 if (!IsMemset)
10635 MBB->addSuccessor(StartMBB);
10636 else {
10637 // MemsetOneCheckMBB:
10638 // # Jump to MemsetOneMBB for a memset of length 1, or
10639 // # fall thru to StartMBB.
10640 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10641 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10642 MBB->addSuccessor(MemsetOneCheckMBB);
10643 MBB = MemsetOneCheckMBB;
10644 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10645 .addReg(LenAdjReg).addImm(-1);
10646 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10648 .addMBB(MemsetOneMBB);
10649 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10650 MBB->addSuccessor(StartMBB, {90, 100});
10651
10652 // MemsetOneMBB:
10653 // # Jump back to AllDoneMBB after a single MVI or STC.
10654 MBB = MemsetOneMBB;
10655 insertMemMemOp(MBB, MBB->end(),
10656 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10657 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10658 1);
10659 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10660 MBB->addSuccessor(AllDoneMBB);
10661 }
10662
10663 // StartMBB:
10664 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10665 MBB = StartMBB;
10666 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10667 .addReg(StartCountReg).addImm(0);
10668 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10670 .addMBB(DoneMBB);
10671 MBB->addSuccessor(DoneMBB);
10672 MBB->addSuccessor(LoopMBB);
10673 }
10674 else {
10675 StartMBB = MBB;
10676 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10677 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10678 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10679
10680 // StartMBB:
10681 // # fall through to LoopMBB
10682 MBB->addSuccessor(LoopMBB);
10683
10684 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10685 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10686 if (EndMBB && !ImmLength)
10687 // If the loop handled the whole CLC range, DoneMBB will be empty with
10688 // CC live-through into EndMBB, so add it as live-in.
10689 DoneMBB->addLiveIn(SystemZ::CC);
10690 }
10691
10692 // LoopMBB:
10693 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10694 // [ %NextDestReg, NextMBB ]
10695 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10696 // [ %NextSrcReg, NextMBB ]
10697 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10698 // [ %NextCountReg, NextMBB ]
10699 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10700 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10701 // ( JLH EndMBB )
10702 //
10703 // The prefetch is used only for MVC. The JLH is used only for CLC.
10704 MBB = LoopMBB;
10705 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10706 .addReg(StartDestReg).addMBB(StartMBB)
10707 .addReg(NextDestReg).addMBB(NextMBB);
10708 if (!HaveSingleBase)
10709 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10710 .addReg(StartSrcReg).addMBB(StartMBB)
10711 .addReg(NextSrcReg).addMBB(NextMBB);
10712 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10713 .addReg(StartCountReg).addMBB(StartMBB)
10714 .addReg(NextCountReg).addMBB(NextMBB);
10715 if (Opcode == SystemZ::MVC)
10716 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10718 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10719 insertMemMemOp(MBB, MBB->end(),
10720 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10721 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10722 if (EndMBB) {
10723 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10725 .addMBB(EndMBB);
10726 MBB->addSuccessor(EndMBB);
10727 MBB->addSuccessor(NextMBB);
10728 }
10729
10730 // NextMBB:
10731 // %NextDestReg = LA 256(%ThisDestReg)
10732 // %NextSrcReg = LA 256(%ThisSrcReg)
10733 // %NextCountReg = AGHI %ThisCountReg, -1
10734 // CGHI %NextCountReg, 0
10735 // JLH LoopMBB
10736 // # fall through to DoneMBB
10737 //
10738 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10739 MBB = NextMBB;
10740 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10741 .addReg(ThisDestReg).addImm(256).addReg(0);
10742 if (!HaveSingleBase)
10743 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10744 .addReg(ThisSrcReg).addImm(256).addReg(0);
10745 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10746 .addReg(ThisCountReg).addImm(-1);
10747 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10748 .addReg(NextCountReg).addImm(0);
10749 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10751 .addMBB(LoopMBB);
10752 MBB->addSuccessor(LoopMBB);
10753 MBB->addSuccessor(DoneMBB);
10754
10755 MBB = DoneMBB;
10756 if (IsRegForm) {
10757 // DoneMBB:
10758 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10759 // # Use EXecute Relative Long for the remainder of the bytes. The target
10760 // instruction of the EXRL will have a length field of 1 since 0 is an
10761 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10762 // 0xff) + 1.
10763 // # Fall through to AllDoneMBB.
10764 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10765 Register RemDestReg = HaveSingleBase ? RemSrcReg
10766 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10767 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10768 .addReg(StartDestReg).addMBB(StartMBB)
10769 .addReg(NextDestReg).addMBB(NextMBB);
10770 if (!HaveSingleBase)
10771 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10772 .addReg(StartSrcReg).addMBB(StartMBB)
10773 .addReg(NextSrcReg).addMBB(NextMBB);
10774 if (IsMemset)
10775 insertMemMemOp(MBB, MBB->end(),
10776 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10777 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10778 MachineInstrBuilder EXRL_MIB =
10779 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10780 .addImm(Opcode)
10781 .addReg(LenAdjReg)
10782 .addReg(RemDestReg).addImm(DestDisp)
10783 .addReg(RemSrcReg).addImm(SrcDisp);
10784 MBB->addSuccessor(AllDoneMBB);
10785 MBB = AllDoneMBB;
10786 if (Opcode != SystemZ::MVC) {
10787 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10788 if (EndMBB)
10789 MBB->addLiveIn(SystemZ::CC);
10790 }
10791 }
10792 MF.getProperties().resetNoPHIs();
10793 }
10794
10795 // Handle any remaining bytes with straight-line code.
10796 while (ImmLength > 0) {
10797 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10798 // The previous iteration might have created out-of-range displacements.
10799 // Apply them using LA/LAY if so.
10800 foldDisplIfNeeded(DestBase, DestDisp);
10801 foldDisplIfNeeded(SrcBase, SrcDisp);
10802 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10803 DestDisp += ThisLength;
10804 SrcDisp += ThisLength;
10805 ImmLength -= ThisLength;
10806 // If there's another CLC to go, branch to the end if a difference
10807 // was found.
10808 if (EndMBB && ImmLength > 0) {
10809 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10810 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10812 .addMBB(EndMBB);
10813 MBB->addSuccessor(EndMBB);
10814 MBB->addSuccessor(NextMBB);
10815 MBB = NextMBB;
10816 }
10817 }
10818 if (EndMBB) {
10819 MBB->addSuccessor(EndMBB);
10820 MBB = EndMBB;
10821 MBB->addLiveIn(SystemZ::CC);
10822 }
10823
10824 MI.eraseFromParent();
10825 return MBB;
10826}
10827
10828// Decompose string pseudo-instruction MI into a loop that continually performs
10829// Opcode until CC != 3.
10830MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10831 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10832 MachineFunction &MF = *MBB->getParent();
10833 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10834 MachineRegisterInfo &MRI = MF.getRegInfo();
10835 DebugLoc DL = MI.getDebugLoc();
10836
10837 uint64_t End1Reg = MI.getOperand(0).getReg();
10838 uint64_t Start1Reg = MI.getOperand(1).getReg();
10839 uint64_t Start2Reg = MI.getOperand(2).getReg();
10840 uint64_t CharReg = MI.getOperand(3).getReg();
10841
10842 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10843 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10844 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10845 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10846
10847 MachineBasicBlock *StartMBB = MBB;
10848 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10849 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10850
10851 // StartMBB:
10852 // # fall through to LoopMBB
10853 MBB->addSuccessor(LoopMBB);
10854
10855 // LoopMBB:
10856 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10857 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10858 // R0L = %CharReg
10859 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10860 // JO LoopMBB
10861 // # fall through to DoneMBB
10862 //
10863 // The load of R0L can be hoisted by post-RA LICM.
10864 MBB = LoopMBB;
10865
10866 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10867 .addReg(Start1Reg).addMBB(StartMBB)
10868 .addReg(End1Reg).addMBB(LoopMBB);
10869 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10870 .addReg(Start2Reg).addMBB(StartMBB)
10871 .addReg(End2Reg).addMBB(LoopMBB);
10872 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10873 BuildMI(MBB, DL, TII->get(Opcode))
10874 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10875 .addReg(This1Reg).addReg(This2Reg);
10876 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10878 MBB->addSuccessor(LoopMBB);
10879 MBB->addSuccessor(DoneMBB);
10880
10881 DoneMBB->addLiveIn(SystemZ::CC);
10882
10883 MI.eraseFromParent();
10884 return DoneMBB;
10885}
10886
10887// Update TBEGIN instruction with final opcode and register clobbers.
10888MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10889 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10890 bool NoFloat) const {
10891 MachineFunction &MF = *MBB->getParent();
10892 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10893 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10894
10895 // Update opcode.
10896 MI.setDesc(TII->get(Opcode));
10897
10898 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10899 // Make sure to add the corresponding GRSM bits if they are missing.
10900 uint64_t Control = MI.getOperand(2).getImm();
10901 static const unsigned GPRControlBit[16] = {
10902 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10903 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10904 };
10905 Control |= GPRControlBit[15];
10906 if (TFI->hasFP(MF))
10907 Control |= GPRControlBit[11];
10908 MI.getOperand(2).setImm(Control);
10909
10910 // Add GPR clobbers.
10911 for (int I = 0; I < 16; I++) {
10912 if ((Control & GPRControlBit[I]) == 0) {
10913 unsigned Reg = SystemZMC::GR64Regs[I];
10914 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10915 }
10916 }
10917
10918 // Add FPR/VR clobbers.
10919 if (!NoFloat && (Control & 4) != 0) {
10920 if (Subtarget.hasVector()) {
10921 for (unsigned Reg : SystemZMC::VR128Regs) {
10922 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10923 }
10924 } else {
10925 for (unsigned Reg : SystemZMC::FP64Regs) {
10926 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10927 }
10928 }
10929 }
10930
10931 return MBB;
10932}
10933
10934MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10935 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10936 MachineFunction &MF = *MBB->getParent();
10937 MachineRegisterInfo *MRI = &MF.getRegInfo();
10938 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10939 DebugLoc DL = MI.getDebugLoc();
10940
10941 Register SrcReg = MI.getOperand(0).getReg();
10942
10943 // Create new virtual register of the same class as source.
10944 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10945 Register DstReg = MRI->createVirtualRegister(RC);
10946
10947 // Replace pseudo with a normal load-and-test that models the def as
10948 // well.
10949 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10950 .addReg(SrcReg)
10951 .setMIFlags(MI.getFlags());
10952 MI.eraseFromParent();
10953
10954 return MBB;
10955}
10956
10957MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10959 MachineFunction &MF = *MBB->getParent();
10960 MachineRegisterInfo *MRI = &MF.getRegInfo();
10961 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10962 DebugLoc DL = MI.getDebugLoc();
10963 const unsigned ProbeSize = getStackProbeSize(MF);
10964 Register DstReg = MI.getOperand(0).getReg();
10965 Register SizeReg = MI.getOperand(2).getReg();
10966
10967 MachineBasicBlock *StartMBB = MBB;
10968 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10969 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10970 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10971 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10972 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10973
10974 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10976
10977 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10978 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10979
10980 // LoopTestMBB
10981 // BRC TailTestMBB
10982 // # fallthrough to LoopBodyMBB
10983 StartMBB->addSuccessor(LoopTestMBB);
10984 MBB = LoopTestMBB;
10985 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10986 .addReg(SizeReg)
10987 .addMBB(StartMBB)
10988 .addReg(IncReg)
10989 .addMBB(LoopBodyMBB);
10990 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10991 .addReg(PHIReg)
10992 .addImm(ProbeSize);
10993 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10995 .addMBB(TailTestMBB);
10996 MBB->addSuccessor(LoopBodyMBB);
10997 MBB->addSuccessor(TailTestMBB);
10998
10999 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
11000 // J LoopTestMBB
11001 MBB = LoopBodyMBB;
11002 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
11003 .addReg(PHIReg)
11004 .addImm(ProbeSize);
11005 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
11006 .addReg(SystemZ::R15D)
11007 .addImm(ProbeSize);
11008 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11009 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
11010 .setMemRefs(VolLdMMO);
11011 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
11012 MBB->addSuccessor(LoopTestMBB);
11013
11014 // TailTestMBB
11015 // BRC DoneMBB
11016 // # fallthrough to TailMBB
11017 MBB = TailTestMBB;
11018 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
11019 .addReg(PHIReg)
11020 .addImm(0);
11021 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
11023 .addMBB(DoneMBB);
11024 MBB->addSuccessor(TailMBB);
11025 MBB->addSuccessor(DoneMBB);
11026
11027 // TailMBB
11028 // # fallthrough to DoneMBB
11029 MBB = TailMBB;
11030 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
11031 .addReg(SystemZ::R15D)
11032 .addReg(PHIReg);
11033 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
11034 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
11035 .setMemRefs(VolLdMMO);
11036 MBB->addSuccessor(DoneMBB);
11037
11038 // DoneMBB
11039 MBB = DoneMBB;
11040 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
11041 .addReg(SystemZ::R15D);
11042
11043 MI.eraseFromParent();
11044 return DoneMBB;
11045}
11046
11047SDValue SystemZTargetLowering::
11048getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
11049 MachineFunction &MF = DAG.getMachineFunction();
11050 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
11051 SDLoc DL(SP);
11052 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
11053 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
11054}
11055
11058 switch (MI.getOpcode()) {
11059 case SystemZ::ADJCALLSTACKDOWN:
11060 case SystemZ::ADJCALLSTACKUP:
11061 return emitAdjCallStack(MI, MBB);
11062
11063 case SystemZ::Select32:
11064 case SystemZ::Select64:
11065 case SystemZ::Select128:
11066 case SystemZ::SelectF32:
11067 case SystemZ::SelectF64:
11068 case SystemZ::SelectF128:
11069 case SystemZ::SelectVR32:
11070 case SystemZ::SelectVR64:
11071 case SystemZ::SelectVR128:
11072 return emitSelect(MI, MBB);
11073
11074 case SystemZ::CondStore8Mux:
11075 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11076 case SystemZ::CondStore8MuxInv:
11077 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11078 case SystemZ::CondStore16Mux:
11079 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11080 case SystemZ::CondStore16MuxInv:
11081 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11082 case SystemZ::CondStore32Mux:
11083 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11084 case SystemZ::CondStore32MuxInv:
11085 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11086 case SystemZ::CondStore8:
11087 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11088 case SystemZ::CondStore8Inv:
11089 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11090 case SystemZ::CondStore16:
11091 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11092 case SystemZ::CondStore16Inv:
11093 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11094 case SystemZ::CondStore32:
11095 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11096 case SystemZ::CondStore32Inv:
11097 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11098 case SystemZ::CondStore64:
11099 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11100 case SystemZ::CondStore64Inv:
11101 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11102 case SystemZ::CondStoreF32:
11103 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11104 case SystemZ::CondStoreF32Inv:
11105 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11106 case SystemZ::CondStoreF64:
11107 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11108 case SystemZ::CondStoreF64Inv:
11109 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11110
11111 case SystemZ::SCmp128Hi:
11112 return emitICmp128Hi(MI, MBB, false);
11113 case SystemZ::UCmp128Hi:
11114 return emitICmp128Hi(MI, MBB, true);
11115
11116 case SystemZ::PAIR128:
11117 return emitPair128(MI, MBB);
11118 case SystemZ::AEXT128:
11119 return emitExt128(MI, MBB, false);
11120 case SystemZ::ZEXT128:
11121 return emitExt128(MI, MBB, true);
11122
11123 case SystemZ::ATOMIC_SWAPW:
11124 return emitAtomicLoadBinary(MI, MBB, 0);
11125
11126 case SystemZ::ATOMIC_LOADW_AR:
11127 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11128 case SystemZ::ATOMIC_LOADW_AFI:
11129 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11130
11131 case SystemZ::ATOMIC_LOADW_SR:
11132 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11133
11134 case SystemZ::ATOMIC_LOADW_NR:
11135 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11136 case SystemZ::ATOMIC_LOADW_NILH:
11137 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11138
11139 case SystemZ::ATOMIC_LOADW_OR:
11140 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11141 case SystemZ::ATOMIC_LOADW_OILH:
11142 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11143
11144 case SystemZ::ATOMIC_LOADW_XR:
11145 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11146 case SystemZ::ATOMIC_LOADW_XILF:
11147 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11148
11149 case SystemZ::ATOMIC_LOADW_NRi:
11150 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11151 case SystemZ::ATOMIC_LOADW_NILHi:
11152 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11153
11154 case SystemZ::ATOMIC_LOADW_MIN:
11155 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11156 case SystemZ::ATOMIC_LOADW_MAX:
11157 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11158 case SystemZ::ATOMIC_LOADW_UMIN:
11159 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11160 case SystemZ::ATOMIC_LOADW_UMAX:
11161 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11162
11163 case SystemZ::ATOMIC_CMP_SWAPW:
11164 return emitAtomicCmpSwapW(MI, MBB);
11165 case SystemZ::MVCImm:
11166 case SystemZ::MVCReg:
11167 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11168 case SystemZ::NCImm:
11169 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11170 case SystemZ::OCImm:
11171 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11172 case SystemZ::XCImm:
11173 case SystemZ::XCReg:
11174 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11175 case SystemZ::CLCImm:
11176 case SystemZ::CLCReg:
11177 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11178 case SystemZ::MemsetImmImm:
11179 case SystemZ::MemsetImmReg:
11180 case SystemZ::MemsetRegImm:
11181 case SystemZ::MemsetRegReg:
11182 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11183 case SystemZ::CLSTLoop:
11184 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11185 case SystemZ::MVSTLoop:
11186 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11187 case SystemZ::SRSTLoop:
11188 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11189 case SystemZ::TBEGIN:
11190 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11191 case SystemZ::TBEGIN_nofloat:
11192 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11193 case SystemZ::TBEGINC:
11194 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11195 case SystemZ::LTEBRCompare_Pseudo:
11196 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11197 case SystemZ::LTDBRCompare_Pseudo:
11198 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11199 case SystemZ::LTXBRCompare_Pseudo:
11200 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11201
11202 case SystemZ::PROBED_ALLOCA:
11203 return emitProbedAlloca(MI, MBB);
11204 case SystemZ::EH_SjLj_SetJmp:
11205 return emitEHSjLjSetJmp(MI, MBB);
11206 case SystemZ::EH_SjLj_LongJmp:
11207 return emitEHSjLjLongJmp(MI, MBB);
11208
11209 case TargetOpcode::STACKMAP:
11210 case TargetOpcode::PATCHPOINT:
11211 return emitPatchPoint(MI, MBB);
11212
11213 default:
11214 llvm_unreachable("Unexpected instr type to insert");
11215 }
11216}
11217
11218// This is only used by the isel schedulers, and is needed only to prevent
11219// compiler from crashing when list-ilp is used.
11220const TargetRegisterClass *
11221SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11222 if (VT == MVT::Untyped)
11223 return &SystemZ::ADDR128BitRegClass;
11225}
11226
11227SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11228 SelectionDAG &DAG) const {
11229 SDLoc dl(Op);
11230 /*
11231 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11232 settings:
11233 00 Round to nearest
11234 01 Round to 0
11235 10 Round to +inf
11236 11 Round to -inf
11237
11238 FLT_ROUNDS, on the other hand, expects the following:
11239 -1 Undefined
11240 0 Round to 0
11241 1 Round to nearest
11242 2 Round to +inf
11243 3 Round to -inf
11244 */
11245
11246 // Save FPC to register.
11247 SDValue Chain = Op.getOperand(0);
11248 SDValue EFPC(
11249 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11250 Chain = EFPC.getValue(1);
11251
11252 // Transform as necessary
11253 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11254 DAG.getConstant(3, dl, MVT::i32));
11255 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11256 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11257 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11258 DAG.getConstant(1, dl, MVT::i32)));
11259
11260 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11261 DAG.getConstant(1, dl, MVT::i32));
11262 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11263
11264 return DAG.getMergeValues({RetVal, Chain}, dl);
11265}
11266
11267SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11268 SelectionDAG &DAG) const {
11269 EVT VT = Op.getValueType();
11270 Op = Op.getOperand(0);
11271 EVT OpVT = Op.getValueType();
11272
11273 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11274
11275 SDLoc DL(Op);
11276
11277 // load a 0 vector for the third operand of VSUM.
11278 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11279
11280 // execute VSUM.
11281 switch (OpVT.getScalarSizeInBits()) {
11282 case 8:
11283 case 16:
11284 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11285 [[fallthrough]];
11286 case 32:
11287 case 64:
11288 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11289 DAG.getBitcast(Op.getValueType(), Zero));
11290 break;
11291 case 128:
11292 break; // VSUM over v1i128 should not happen and would be a noop
11293 default:
11294 llvm_unreachable("Unexpected scalar size.");
11295 }
11296 // Cast to original vector type, retrieve last element.
11297 return DAG.getNode(
11298 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11299 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11300}
11301
11303 FunctionType *FT = F->getFunctionType();
11304 const AttributeList &Attrs = F->getAttributes();
11305 if (Attrs.hasRetAttrs())
11306 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11307 OS << *F->getReturnType() << " @" << F->getName() << "(";
11308 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11309 if (I)
11310 OS << ", ";
11311 OS << *FT->getParamType(I);
11312 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11313 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11314 if (ArgAttrs.hasAttribute(A))
11315 OS << " " << Attribute::getNameFromAttrKind(A);
11316 }
11317 OS << ")\n";
11318}
11319
11320bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11321 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11322 if (Itr == IsInternalCache.end())
11323 Itr = IsInternalCache
11324 .insert(std::pair<const Function *, bool>(
11325 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11326 .first;
11327 return Itr->second;
11328}
11329
11330void SystemZTargetLowering::
11331verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11332 const Function *F, SDValue Callee) const {
11333 // Temporarily only do the check when explicitly requested, until it can be
11334 // enabled by default.
11336 return;
11337
11338 bool IsInternal = false;
11339 const Function *CalleeFn = nullptr;
11340 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11341 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11342 IsInternal = isInternal(CalleeFn);
11343 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11344 errs() << "ERROR: Missing extension attribute of passed "
11345 << "value in call to function:\n" << "Callee: ";
11346 if (CalleeFn != nullptr)
11347 printFunctionArgExts(CalleeFn, errs());
11348 else
11349 errs() << "-\n";
11350 errs() << "Caller: ";
11352 llvm_unreachable("");
11353 }
11354}
11355
11356void SystemZTargetLowering::
11357verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11358 const Function *F) const {
11359 // Temporarily only do the check when explicitly requested, until it can be
11360 // enabled by default.
11362 return;
11363
11364 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11365 errs() << "ERROR: Missing extension attribute of returned "
11366 << "value from function:\n";
11368 llvm_unreachable("");
11369 }
11370}
11371
11372// Verify that narrow integer arguments are extended as required by the ABI.
11373// Return false if an error is found.
11374bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11375 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11376 if (!Subtarget.isTargetELF())
11377 return true;
11378
11381 return true;
11382 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11383 return true;
11384
11385 for (unsigned i = 0; i < Outs.size(); ++i) {
11386 MVT VT = Outs[i].VT;
11387 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11388 if (VT.isInteger()) {
11389 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11390 "Unexpected integer argument VT.");
11391 if (VT == MVT::i32 &&
11392 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11393 return false;
11394 }
11395 }
11396
11397 return true;
11398}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static SDValue mergeHighParts(SelectionDAG &DAG, const SDLoc &DL, unsigned MergedBits, EVT VT, SDValue Op0, SDValue Op1)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static SDValue buildFPVecFromScalars4(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SmallVectorImpl< SDValue > &Elems, unsigned Pos)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1549
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1394
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1521
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1339
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1497
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:402
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
MachineConstantPoolValue * getMachineCPVal() const
const Constant * getConstVal() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:953
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:764
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:776
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:651
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:696
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, EVT *LargestVT=nullptr) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:168
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:471
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ BR_JT
BR_JT - Jumptable branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:247
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:970
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:470
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:328
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:464
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ TRAP
TRAP - Trapping instruction.
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:997
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:458
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:162
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:213
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Define
Register definition.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition MathExtras.h:394
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:189
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:175
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:324
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:183
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:148
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.