LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
27#include <cctype>
28#include <optional>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "systemz-lower"
33
34namespace {
35// Represents information about a comparison.
36struct Comparison {
37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40
41 // The operands to the comparison.
42 SDValue Op0, Op1;
43
44 // Chain if this is a strict floating-point comparison.
45 SDValue Chain;
46
47 // The opcode that should be used to compare Op0 and Op1.
48 unsigned Opcode;
49
50 // A SystemZICMP value. Only used for integer comparisons.
51 unsigned ICmpType;
52
53 // The mask of CC values that Opcode can produce.
54 unsigned CCValid;
55
56 // The mask of CC values for which the original condition is true.
57 unsigned CCMask;
58};
59} // end anonymous namespace
60
61// Classify VT as either 32 or 64 bit.
62static bool is32Bit(EVT VT) {
63 switch (VT.getSimpleVT().SimpleTy) {
64 case MVT::i32:
65 return true;
66 case MVT::i64:
67 return false;
68 default:
69 llvm_unreachable("Unsupported type");
70 }
71}
72
73// Return a version of MachineOperand that can be safely used before the
74// final use.
76 if (Op.isReg())
77 Op.setIsKill(false);
78 return Op;
79}
80
82 const SystemZSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85
86 auto *Regs = STI.getSpecialRegisters();
87
88 // Set up the register classes.
89 if (Subtarget.hasHighWord())
90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91 else
92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94 if (!useSoftFloat()) {
95 if (Subtarget.hasVector()) {
96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98 } else {
99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101 }
102 if (Subtarget.hasVectorEnhancements1())
103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104 else
105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114 }
115
116 if (Subtarget.hasVector())
117 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
118 }
119
120 // Compute derived properties from the register classes
122
123 // Set up special registers.
124 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
125
126 // TODO: It may be better to default to latency-oriented scheduling, however
127 // LLVM's current latency-oriented scheduler can't handle physreg definitions
128 // such as SystemZ has with CC, so set this to the register-pressure
129 // scheduler, because it can.
131
134
136
137 // Instructions are strings of 2-byte aligned 2-byte values.
139 // For performance reasons we prefer 16-byte alignment.
141
142 // Handle operations that are handled in a similar way for all types.
143 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
144 I <= MVT::LAST_FP_VALUETYPE;
145 ++I) {
147 if (isTypeLegal(VT)) {
148 // Lower SET_CC into an IPM-based sequence.
152
153 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
155
156 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
159 }
160 }
161
162 // Expand jump table branches as address arithmetic followed by an
163 // indirect jump.
165
166 // Expand BRCOND into a BR_CC (see above).
168
169 // Handle integer types except i128.
170 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
171 I <= MVT::LAST_INTEGER_VALUETYPE;
172 ++I) {
174 if (isTypeLegal(VT) && VT != MVT::i128) {
176
177 // Expand individual DIV and REMs into DIVREMs.
184
185 // Support addition/subtraction with overflow.
188
189 // Support addition/subtraction with carry.
192
193 // Support carry in as value rather than glue.
196
197 // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
198 // stores, putting a serialization instruction after the stores.
201
202 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
203 // available, or if the operand is constant.
205
206 // Use POPCNT on z196 and above.
207 if (Subtarget.hasPopulationCount())
209 else
211
212 // No special instructions for these.
215
216 // Use *MUL_LOHI where possible instead of MULH*.
221
222 // Only z196 and above have native support for conversions to unsigned.
223 // On z10, promoting to i64 doesn't generate an inexact condition for
224 // values that are outside the i32 range but in the i64 range, so use
225 // the default expansion.
226 if (!Subtarget.hasFPExtension())
228
229 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
230 // default to Expand, so need to be modified to Legal where appropriate.
232 if (Subtarget.hasFPExtension())
234
235 // And similarly for STRICT_[SU]INT_TO_FP.
237 if (Subtarget.hasFPExtension())
239 }
240 }
241
242 // Handle i128 if legal.
243 if (isTypeLegal(MVT::i128)) {
244 // No special instructions for these.
260
261 // Support addition/subtraction with carry.
266
267 // Use VPOPCT and add up partial results.
269
270 // We have to use libcalls for these.
279 }
280
281 // Type legalization will convert 8- and 16-bit atomic operations into
282 // forms that operate on i32s (but still keeping the original memory VT).
283 // Lower them into full i32 operations.
295
296 // Whether or not i128 is not a legal type, we need to custom lower
297 // the atomic operations in order to exploit SystemZ instructions.
300
301 // We can use the CC result of compare-and-swap to implement
302 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
306
308
309 // Traps are legal, as we will convert them to "j .+2".
310 setOperationAction(ISD::TRAP, MVT::Other, Legal);
311
312 // z10 has instructions for signed but not unsigned FP conversion.
313 // Handle unsigned 32-bit types as signed 64-bit types.
314 if (!Subtarget.hasFPExtension()) {
319 }
320
321 // We have native support for a 64-bit CTLZ, via FLOGR.
325
326 // On z15 we have native support for a 64-bit CTPOP.
327 if (Subtarget.hasMiscellaneousExtensions3()) {
330 }
331
332 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
334
335 // Expand 128 bit shifts without using a libcall.
339 setLibcallName(RTLIB::SRL_I128, nullptr);
340 setLibcallName(RTLIB::SHL_I128, nullptr);
341 setLibcallName(RTLIB::SRA_I128, nullptr);
342
343 // Also expand 256 bit shifts if i128 is a legal type.
344 if (isTypeLegal(MVT::i128)) {
348 }
349
350 // Handle bitcast from fp128 to i128.
351 if (!isTypeLegal(MVT::i128))
353
354 // We have native instructions for i8, i16 and i32 extensions, but not i1.
356 for (MVT VT : MVT::integer_valuetypes()) {
360 }
361
362 // Handle the various types of symbolic address.
368
369 // We need to handle dynamic allocations specially because of the
370 // 160-byte area at the bottom of the stack.
373
376
377 // Handle prefetches with PFD or PFDRL.
379
381 // Assume by default that all vector operations need to be expanded.
382 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
383 if (getOperationAction(Opcode, VT) == Legal)
384 setOperationAction(Opcode, VT, Expand);
385
386 // Likewise all truncating stores and extending loads.
387 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
388 setTruncStoreAction(VT, InnerVT, Expand);
391 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
392 }
393
394 if (isTypeLegal(VT)) {
395 // These operations are legal for anything that can be stored in a
396 // vector register, even if there is no native support for the format
397 // as such. In particular, we can do these for v4f32 even though there
398 // are no specific instructions for that format.
404
405 // Likewise, except that we need to replace the nodes with something
406 // more specific.
409 }
410 }
411
412 // Handle integer vector types.
414 if (isTypeLegal(VT)) {
415 // These operations have direct equivalents.
420 if (VT != MVT::v2i64)
426 if (Subtarget.hasVectorEnhancements1())
428 else
432
433 // Convert a GPR scalar to a vector by inserting it into element 0.
435
436 // Use a series of unpacks for extensions.
439
440 // Detect shifts/rotates by a scalar amount and convert them into
441 // V*_BY_SCALAR.
446
447 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
448 // and inverting the result as necessary.
450 }
451 }
452
453 if (Subtarget.hasVector()) {
454 // There should be no need to check for float types other than v2f64
455 // since <2 x f32> isn't a legal type.
464
473 }
474
475 if (Subtarget.hasVectorEnhancements2()) {
484
493 }
494
495 // Handle floating-point types.
496 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
497 I <= MVT::LAST_FP_VALUETYPE;
498 ++I) {
500 if (isTypeLegal(VT)) {
501 // We can use FI for FRINT.
503
504 // We can use the extended form of FI for other rounding operations.
505 if (Subtarget.hasFPExtension()) {
511 }
512
513 // No special instructions for these.
519
520 // Special treatment.
522
523 // Handle constrained floating-point operations.
533 if (Subtarget.hasFPExtension()) {
539 }
540 }
541 }
542
543 // Handle floating-point vector types.
544 if (Subtarget.hasVector()) {
545 // Scalar-to-vector conversion is just a subreg.
548
549 // Some insertions and extractions can be done directly but others
550 // need to go via integers.
555
556 // These operations have direct equivalents.
557 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
558 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
559 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
560 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
561 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
562 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
563 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
564 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
565 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
568 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
571
572 // Handle constrained floating-point operations.
585
590 if (Subtarget.hasVectorEnhancements1()) {
593 }
594 }
595
596 // The vector enhancements facility 1 has instructions for these.
597 if (Subtarget.hasVectorEnhancements1()) {
598 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
599 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
600 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
601 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
602 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
603 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
604 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
605 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
606 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
609 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
612
617
622
627
632
637
638 // Handle constrained floating-point operations.
651 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
652 MVT::v4f32, MVT::v2f64 }) {
657 }
658 }
659
660 // We only have fused f128 multiply-addition on vector registers.
661 if (!Subtarget.hasVectorEnhancements1()) {
664 }
665
666 // We don't have a copysign instruction on vector registers.
667 if (Subtarget.hasVectorEnhancements1())
669
670 // Needed so that we don't try to implement f128 constant loads using
671 // a load-and-extend of a f80 constant (in cases where the constant
672 // would fit in an f80).
673 for (MVT VT : MVT::fp_valuetypes())
674 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
675
676 // We don't have extending load instruction on vector registers.
677 if (Subtarget.hasVectorEnhancements1()) {
678 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
679 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
680 }
681
682 // Floating-point truncation and stores need to be done separately.
683 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
684 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
685 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
686
687 // We have 64-bit FPR<->GPR moves, but need special handling for
688 // 32-bit forms.
689 if (!Subtarget.hasVector()) {
692 }
693
694 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
695 // structure, but VAEND is a no-op.
699
701
702 // Codes for which we want to perform some z-specific combinations.
706 ISD::LOAD,
717 ISD::SDIV,
718 ISD::UDIV,
719 ISD::SREM,
720 ISD::UREM,
723
724 // Handle intrinsics.
727
728 // We want to use MVC in preference to even a single load/store pair.
729 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
731
732 // The main memset sequence is a byte store followed by an MVC.
733 // Two STC or MV..I stores win over that, but the kind of fused stores
734 // generated by target-independent code don't when the byte value is
735 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
736 // than "STC;MVC". Handle the choice in target-specific code instead.
737 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
739
740 // Default to having -disable-strictnode-mutation on
741 IsStrictFPEnabled = true;
742
743 if (Subtarget.isTargetzOS()) {
744 struct RTLibCallMapping {
745 RTLIB::Libcall Code;
746 const char *Name;
747 };
748 static RTLibCallMapping RTLibCallCommon[] = {
749#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
750#include "ZOSLibcallNames.def"
751 };
752 for (auto &E : RTLibCallCommon)
753 setLibcallName(E.Code, E.Name);
754 }
755}
756
758 return Subtarget.hasSoftFloat();
759}
760
762 LLVMContext &, EVT VT) const {
763 if (!VT.isVector())
764 return MVT::i32;
766}
767
769 const MachineFunction &MF, EVT VT) const {
770 VT = VT.getScalarType();
771
772 if (!VT.isSimple())
773 return false;
774
775 switch (VT.getSimpleVT().SimpleTy) {
776 case MVT::f32:
777 case MVT::f64:
778 return true;
779 case MVT::f128:
780 return Subtarget.hasVectorEnhancements1();
781 default:
782 break;
783 }
784
785 return false;
786}
787
788// Return true if the constant can be generated with a vector instruction,
789// such as VGM, VGMB or VREPI.
791 const SystemZSubtarget &Subtarget) {
792 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
793 if (!Subtarget.hasVector() ||
794 (isFP128 && !Subtarget.hasVectorEnhancements1()))
795 return false;
796
797 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
798 // preferred way of creating all-zero and all-one vectors so give it
799 // priority over other methods below.
800 unsigned Mask = 0;
801 unsigned I = 0;
802 for (; I < SystemZ::VectorBytes; ++I) {
803 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
804 if (Byte == 0xff)
805 Mask |= 1ULL << I;
806 else if (Byte != 0)
807 break;
808 }
809 if (I == SystemZ::VectorBytes) {
811 OpVals.push_back(Mask);
813 return true;
814 }
815
816 if (SplatBitSize > 64)
817 return false;
818
819 auto tryValue = [&](uint64_t Value) -> bool {
820 // Try VECTOR REPLICATE IMMEDIATE
821 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
822 if (isInt<16>(SignedValue)) {
823 OpVals.push_back(((unsigned) SignedValue));
826 SystemZ::VectorBits / SplatBitSize);
827 return true;
828 }
829 // Try VECTOR GENERATE MASK
830 unsigned Start, End;
831 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
832 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
833 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
834 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
835 OpVals.push_back(Start - (64 - SplatBitSize));
836 OpVals.push_back(End - (64 - SplatBitSize));
839 SystemZ::VectorBits / SplatBitSize);
840 return true;
841 }
842 return false;
843 };
844
845 // First try assuming that any undefined bits above the highest set bit
846 // and below the lowest set bit are 1s. This increases the likelihood of
847 // being able to use a sign-extended element value in VECTOR REPLICATE
848 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
849 uint64_t SplatBitsZ = SplatBits.getZExtValue();
850 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
851 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
852 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
853 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
854 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
855 if (tryValue(SplatBitsZ | Upper | Lower))
856 return true;
857
858 // Now try assuming that any undefined bits between the first and
859 // last defined set bits are set. This increases the chances of
860 // using a non-wraparound mask.
861 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
862 return tryValue(SplatBitsZ | Middle);
863}
864
866 if (IntImm.isSingleWord()) {
867 IntBits = APInt(128, IntImm.getZExtValue());
868 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
869 } else
870 IntBits = IntImm;
871 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
872
873 // Find the smallest splat.
874 SplatBits = IntImm;
875 unsigned Width = SplatBits.getBitWidth();
876 while (Width > 8) {
877 unsigned HalfSize = Width / 2;
878 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
879 APInt LowValue = SplatBits.trunc(HalfSize);
880
881 // If the two halves do not match, stop here.
882 if (HighValue != LowValue || 8 > HalfSize)
883 break;
884
885 SplatBits = HighValue;
886 Width = HalfSize;
887 }
888 SplatUndef = 0;
889 SplatBitSize = Width;
890}
891
893 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
894 bool HasAnyUndefs;
895
896 // Get IntBits by finding the 128 bit splat.
897 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
898 true);
899
900 // Get SplatBits by finding the 8 bit or greater splat.
901 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
902 true);
903}
904
906 bool ForCodeSize) const {
907 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
908 if (Imm.isZero() || Imm.isNegZero())
909 return true;
910
912}
913
914/// Returns true if stack probing through inline assembly is requested.
916 // If the function specifically requests inline stack probes, emit them.
917 if (MF.getFunction().hasFnAttribute("probe-stack"))
918 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
919 "inline-asm";
920 return false;
921}
922
925 // Don't expand subword operations as they require special treatment.
926 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
928
929 // Don't expand if there is a target instruction available.
930 if (Subtarget.hasInterlockedAccess1() &&
931 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
938
940}
941
943 // We can use CGFI or CLGFI.
944 return isInt<32>(Imm) || isUInt<32>(Imm);
945}
946
948 // We can use ALGFI or SLGFI.
949 return isUInt<32>(Imm) || isUInt<32>(-Imm);
950}
951
953 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
954 // Unaligned accesses should never be slower than the expanded version.
955 // We check specifically for aligned accesses in the few cases where
956 // they are required.
957 if (Fast)
958 *Fast = 1;
959 return true;
960}
961
962// Information about the addressing mode for a memory access.
964 // True if a long displacement is supported.
966
967 // True if use of index register is supported.
969
970 AddressingMode(bool LongDispl, bool IdxReg) :
971 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
972};
973
974// Return the desired addressing mode for a Load which has only one use (in
975// the same block) which is a Store.
977 Type *Ty) {
978 // With vector support a Load->Store combination may be combined to either
979 // an MVC or vector operations and it seems to work best to allow the
980 // vector addressing mode.
981 if (HasVector)
982 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
983
984 // Otherwise only the MVC case is special.
985 bool MVC = Ty->isIntegerTy(8);
986 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
987}
988
989// Return the addressing mode which seems most desirable given an LLVM
990// Instruction pointer.
991static AddressingMode
993 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
994 switch (II->getIntrinsicID()) {
995 default: break;
996 case Intrinsic::memset:
997 case Intrinsic::memmove:
998 case Intrinsic::memcpy:
999 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1000 }
1001 }
1002
1003 if (isa<LoadInst>(I) && I->hasOneUse()) {
1004 auto *SingleUser = cast<Instruction>(*I->user_begin());
1005 if (SingleUser->getParent() == I->getParent()) {
1006 if (isa<ICmpInst>(SingleUser)) {
1007 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1008 if (C->getBitWidth() <= 64 &&
1009 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1010 // Comparison of memory with 16 bit signed / unsigned immediate
1011 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1012 } else if (isa<StoreInst>(SingleUser))
1013 // Load->Store
1014 return getLoadStoreAddrMode(HasVector, I->getType());
1015 }
1016 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1017 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1018 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1019 // Load->Store
1020 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1021 }
1022
1023 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1024
1025 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1026 // dependencies (LDE only supports small offsets).
1027 // * Utilize the vector registers to hold floating point
1028 // values (vector load / store instructions only support small
1029 // offsets).
1030
1031 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1032 I->getOperand(0)->getType());
1033 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1034 bool IsVectorAccess = MemAccessTy->isVectorTy();
1035
1036 // A store of an extracted vector element will be combined into a VSTE type
1037 // instruction.
1038 if (!IsVectorAccess && isa<StoreInst>(I)) {
1039 Value *DataOp = I->getOperand(0);
1040 if (isa<ExtractElementInst>(DataOp))
1041 IsVectorAccess = true;
1042 }
1043
1044 // A load which gets inserted into a vector element will be combined into a
1045 // VLE type instruction.
1046 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1047 User *LoadUser = *I->user_begin();
1048 if (isa<InsertElementInst>(LoadUser))
1049 IsVectorAccess = true;
1050 }
1051
1052 if (IsFPAccess || IsVectorAccess)
1053 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1054 }
1055
1056 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1057}
1058
1060 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1061 // Punt on globals for now, although they can be used in limited
1062 // RELATIVE LONG cases.
1063 if (AM.BaseGV)
1064 return false;
1065
1066 // Require a 20-bit signed offset.
1067 if (!isInt<20>(AM.BaseOffs))
1068 return false;
1069
1070 bool RequireD12 =
1071 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1072 AddressingMode SupportedAM(!RequireD12, true);
1073 if (I != nullptr)
1074 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1075
1076 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1077 return false;
1078
1079 if (!SupportedAM.IndexReg)
1080 // No indexing allowed.
1081 return AM.Scale == 0;
1082 else
1083 // Indexing is OK but no scale factor can be applied.
1084 return AM.Scale == 0 || AM.Scale == 1;
1085}
1086
1088 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1089 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1090 const int MVCFastLen = 16;
1091
1092 if (Limit != ~unsigned(0)) {
1093 // Don't expand Op into scalar loads/stores in these cases:
1094 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1095 return false; // Small memcpy: Use MVC
1096 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1097 return false; // Small memset (first byte with STC/MVI): Use MVC
1098 if (Op.isZeroMemset())
1099 return false; // Memset zero: Use XC
1100 }
1101
1102 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1103 SrcAS, FuncAttributes);
1104}
1105
1107 const AttributeList &FuncAttributes) const {
1108 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1109}
1110
1111bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1112 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1113 return false;
1114 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1115 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1116 return FromBits > ToBits;
1117}
1118
1120 if (!FromVT.isInteger() || !ToVT.isInteger())
1121 return false;
1122 unsigned FromBits = FromVT.getFixedSizeInBits();
1123 unsigned ToBits = ToVT.getFixedSizeInBits();
1124 return FromBits > ToBits;
1125}
1126
1127//===----------------------------------------------------------------------===//
1128// Inline asm support
1129//===----------------------------------------------------------------------===//
1130
1133 if (Constraint.size() == 1) {
1134 switch (Constraint[0]) {
1135 case 'a': // Address register
1136 case 'd': // Data register (equivalent to 'r')
1137 case 'f': // Floating-point register
1138 case 'h': // High-part register
1139 case 'r': // General-purpose register
1140 case 'v': // Vector register
1141 return C_RegisterClass;
1142
1143 case 'Q': // Memory with base and unsigned 12-bit displacement
1144 case 'R': // Likewise, plus an index
1145 case 'S': // Memory with base and signed 20-bit displacement
1146 case 'T': // Likewise, plus an index
1147 case 'm': // Equivalent to 'T'.
1148 return C_Memory;
1149
1150 case 'I': // Unsigned 8-bit constant
1151 case 'J': // Unsigned 12-bit constant
1152 case 'K': // Signed 16-bit constant
1153 case 'L': // Signed 20-bit displacement (on all targets we support)
1154 case 'M': // 0x7fffffff
1155 return C_Immediate;
1156
1157 default:
1158 break;
1159 }
1160 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1161 switch (Constraint[1]) {
1162 case 'Q': // Address with base and unsigned 12-bit displacement
1163 case 'R': // Likewise, plus an index
1164 case 'S': // Address with base and signed 20-bit displacement
1165 case 'T': // Likewise, plus an index
1166 return C_Address;
1167
1168 default:
1169 break;
1170 }
1171 }
1172 return TargetLowering::getConstraintType(Constraint);
1173}
1174
1177 const char *constraint) const {
1179 Value *CallOperandVal = info.CallOperandVal;
1180 // If we don't have a value, we can't do a match,
1181 // but allow it at the lowest weight.
1182 if (!CallOperandVal)
1183 return CW_Default;
1184 Type *type = CallOperandVal->getType();
1185 // Look at the constraint type.
1186 switch (*constraint) {
1187 default:
1189 break;
1190
1191 case 'a': // Address register
1192 case 'd': // Data register (equivalent to 'r')
1193 case 'h': // High-part register
1194 case 'r': // General-purpose register
1195 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1196 break;
1197
1198 case 'f': // Floating-point register
1199 if (!useSoftFloat())
1200 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1201 break;
1202
1203 case 'v': // Vector register
1204 if (Subtarget.hasVector())
1205 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1206 : CW_Default;
1207 break;
1208
1209 case 'I': // Unsigned 8-bit constant
1210 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1211 if (isUInt<8>(C->getZExtValue()))
1212 weight = CW_Constant;
1213 break;
1214
1215 case 'J': // Unsigned 12-bit constant
1216 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1217 if (isUInt<12>(C->getZExtValue()))
1218 weight = CW_Constant;
1219 break;
1220
1221 case 'K': // Signed 16-bit constant
1222 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1223 if (isInt<16>(C->getSExtValue()))
1224 weight = CW_Constant;
1225 break;
1226
1227 case 'L': // Signed 20-bit displacement (on all targets we support)
1228 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1229 if (isInt<20>(C->getSExtValue()))
1230 weight = CW_Constant;
1231 break;
1232
1233 case 'M': // 0x7fffffff
1234 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1235 if (C->getZExtValue() == 0x7fffffff)
1236 weight = CW_Constant;
1237 break;
1238 }
1239 return weight;
1240}
1241
1242// Parse a "{tNNN}" register constraint for which the register type "t"
1243// has already been verified. MC is the class associated with "t" and
1244// Map maps 0-based register numbers to LLVM register numbers.
1245static std::pair<unsigned, const TargetRegisterClass *>
1247 const unsigned *Map, unsigned Size) {
1248 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1249 if (isdigit(Constraint[2])) {
1250 unsigned Index;
1251 bool Failed =
1252 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1253 if (!Failed && Index < Size && Map[Index])
1254 return std::make_pair(Map[Index], RC);
1255 }
1256 return std::make_pair(0U, nullptr);
1257}
1258
1259std::pair<unsigned, const TargetRegisterClass *>
1261 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1262 if (Constraint.size() == 1) {
1263 // GCC Constraint Letters
1264 switch (Constraint[0]) {
1265 default: break;
1266 case 'd': // Data register (equivalent to 'r')
1267 case 'r': // General-purpose register
1268 if (VT.getSizeInBits() == 64)
1269 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1270 else if (VT.getSizeInBits() == 128)
1271 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1272 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1273
1274 case 'a': // Address register
1275 if (VT == MVT::i64)
1276 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1277 else if (VT == MVT::i128)
1278 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1279 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1280
1281 case 'h': // High-part register (an LLVM extension)
1282 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1283
1284 case 'f': // Floating-point register
1285 if (!useSoftFloat()) {
1286 if (VT.getSizeInBits() == 64)
1287 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1288 else if (VT.getSizeInBits() == 128)
1289 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1290 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1291 }
1292 break;
1293
1294 case 'v': // Vector register
1295 if (Subtarget.hasVector()) {
1296 if (VT.getSizeInBits() == 32)
1297 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1298 if (VT.getSizeInBits() == 64)
1299 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1300 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1301 }
1302 break;
1303 }
1304 }
1305 if (Constraint.starts_with("{")) {
1306
1307 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1308 // to check the size on.
1309 auto getVTSizeInBits = [&VT]() {
1310 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1311 };
1312
1313 // We need to override the default register parsing for GPRs and FPRs
1314 // because the interpretation depends on VT. The internal names of
1315 // the registers are also different from the external names
1316 // (F0D and F0S instead of F0, etc.).
1317 if (Constraint[1] == 'r') {
1318 if (getVTSizeInBits() == 32)
1319 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1321 if (getVTSizeInBits() == 128)
1322 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1324 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1326 }
1327 if (Constraint[1] == 'f') {
1328 if (useSoftFloat())
1329 return std::make_pair(
1330 0u, static_cast<const TargetRegisterClass *>(nullptr));
1331 if (getVTSizeInBits() == 32)
1332 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1334 if (getVTSizeInBits() == 128)
1335 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1337 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1339 }
1340 if (Constraint[1] == 'v') {
1341 if (!Subtarget.hasVector())
1342 return std::make_pair(
1343 0u, static_cast<const TargetRegisterClass *>(nullptr));
1344 if (getVTSizeInBits() == 32)
1345 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1347 if (getVTSizeInBits() == 64)
1348 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1350 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1352 }
1353 }
1354 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1355}
1356
1357// FIXME? Maybe this could be a TableGen attribute on some registers and
1358// this table could be generated automatically from RegInfo.
1361 const MachineFunction &MF) const {
1362 Register Reg =
1364 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1365 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1366 .Default(0);
1367
1368 if (Reg)
1369 return Reg;
1370 report_fatal_error("Invalid register name global variable");
1371}
1372
1374 const Constant *PersonalityFn) const {
1375 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1376}
1377
1379 const Constant *PersonalityFn) const {
1380 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1381}
1382
1384 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1385 SelectionDAG &DAG) const {
1386 // Only support length 1 constraints for now.
1387 if (Constraint.size() == 1) {
1388 switch (Constraint[0]) {
1389 case 'I': // Unsigned 8-bit constant
1390 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1391 if (isUInt<8>(C->getZExtValue()))
1392 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1393 Op.getValueType()));
1394 return;
1395
1396 case 'J': // Unsigned 12-bit constant
1397 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1398 if (isUInt<12>(C->getZExtValue()))
1399 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1400 Op.getValueType()));
1401 return;
1402
1403 case 'K': // Signed 16-bit constant
1404 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1405 if (isInt<16>(C->getSExtValue()))
1406 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1407 Op.getValueType()));
1408 return;
1409
1410 case 'L': // Signed 20-bit displacement (on all targets we support)
1411 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1412 if (isInt<20>(C->getSExtValue()))
1413 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1414 Op.getValueType()));
1415 return;
1416
1417 case 'M': // 0x7fffffff
1418 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1419 if (C->getZExtValue() == 0x7fffffff)
1420 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1421 Op.getValueType()));
1422 return;
1423 }
1424 }
1425 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1426}
1427
1428//===----------------------------------------------------------------------===//
1429// Calling conventions
1430//===----------------------------------------------------------------------===//
1431
1432#include "SystemZGenCallingConv.inc"
1433
1435 CallingConv::ID) const {
1436 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1437 SystemZ::R14D, 0 };
1438 return ScratchRegs;
1439}
1440
1442 Type *ToType) const {
1443 return isTruncateFree(FromType, ToType);
1444}
1445
1447 return CI->isTailCall();
1448}
1449
1450// Value is a value that has been passed to us in the location described by VA
1451// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1452// any loads onto Chain.
1454 CCValAssign &VA, SDValue Chain,
1455 SDValue Value) {
1456 // If the argument has been promoted from a smaller type, insert an
1457 // assertion to capture this.
1458 if (VA.getLocInfo() == CCValAssign::SExt)
1460 DAG.getValueType(VA.getValVT()));
1461 else if (VA.getLocInfo() == CCValAssign::ZExt)
1463 DAG.getValueType(VA.getValVT()));
1464
1465 if (VA.isExtInLoc())
1466 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1467 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1468 // If this is a short vector argument loaded from the stack,
1469 // extend from i64 to full vector size and then bitcast.
1470 assert(VA.getLocVT() == MVT::i64);
1471 assert(VA.getValVT().isVector());
1472 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1473 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1474 } else
1475 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1476 return Value;
1477}
1478
1479// Value is a value of type VA.getValVT() that we need to copy into
1480// the location described by VA. Return a copy of Value converted to
1481// VA.getValVT(). The caller is responsible for handling indirect values.
1483 CCValAssign &VA, SDValue Value) {
1484 switch (VA.getLocInfo()) {
1485 case CCValAssign::SExt:
1486 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1487 case CCValAssign::ZExt:
1488 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1489 case CCValAssign::AExt:
1490 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1491 case CCValAssign::BCvt: {
1492 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1493 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1494 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1495 // For an f32 vararg we need to first promote it to an f64 and then
1496 // bitcast it to an i64.
1497 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1498 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1499 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1500 ? MVT::v2i64
1501 : VA.getLocVT();
1502 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1503 // For ELF, this is a short vector argument to be stored to the stack,
1504 // bitcast to v2i64 and then extract first element.
1505 if (BitCastToType == MVT::v2i64)
1506 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1507 DAG.getConstant(0, DL, MVT::i32));
1508 return Value;
1509 }
1510 case CCValAssign::Full:
1511 return Value;
1512 default:
1513 llvm_unreachable("Unhandled getLocInfo()");
1514 }
1515}
1516
1518 SDLoc DL(In);
1519 SDValue Lo, Hi;
1520 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1521 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1522 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1523 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1524 DAG.getConstant(64, DL, MVT::i32)));
1525 } else {
1526 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1527 }
1528
1529 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1530 MVT::Untyped, Hi, Lo);
1531 return SDValue(Pair, 0);
1532}
1533
1535 SDLoc DL(In);
1536 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1537 DL, MVT::i64, In);
1538 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1539 DL, MVT::i64, In);
1540
1541 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1542 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1543 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1544 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1545 DAG.getConstant(64, DL, MVT::i32));
1546 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1547 } else {
1548 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1549 }
1550}
1551
1553 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1554 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1555 EVT ValueVT = Val.getValueType();
1556 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1557 // Inline assembly operand.
1558 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1559 return true;
1560 }
1561
1562 return false;
1563}
1564
1566 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1567 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1568 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1569 // Inline assembly operand.
1570 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1571 return DAG.getBitcast(ValueVT, Res);
1572 }
1573
1574 return SDValue();
1575}
1576
1578 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1579 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1580 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1582 MachineFrameInfo &MFI = MF.getFrameInfo();
1584 SystemZMachineFunctionInfo *FuncInfo =
1586 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1587 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1588
1589 // Assign locations to all of the incoming arguments.
1591 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1592 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1593 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1594
1595 unsigned NumFixedGPRs = 0;
1596 unsigned NumFixedFPRs = 0;
1597 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1598 SDValue ArgValue;
1599 CCValAssign &VA = ArgLocs[I];
1600 EVT LocVT = VA.getLocVT();
1601 if (VA.isRegLoc()) {
1602 // Arguments passed in registers
1603 const TargetRegisterClass *RC;
1604 switch (LocVT.getSimpleVT().SimpleTy) {
1605 default:
1606 // Integers smaller than i64 should be promoted to i64.
1607 llvm_unreachable("Unexpected argument type");
1608 case MVT::i32:
1609 NumFixedGPRs += 1;
1610 RC = &SystemZ::GR32BitRegClass;
1611 break;
1612 case MVT::i64:
1613 NumFixedGPRs += 1;
1614 RC = &SystemZ::GR64BitRegClass;
1615 break;
1616 case MVT::f32:
1617 NumFixedFPRs += 1;
1618 RC = &SystemZ::FP32BitRegClass;
1619 break;
1620 case MVT::f64:
1621 NumFixedFPRs += 1;
1622 RC = &SystemZ::FP64BitRegClass;
1623 break;
1624 case MVT::f128:
1625 NumFixedFPRs += 2;
1626 RC = &SystemZ::FP128BitRegClass;
1627 break;
1628 case MVT::v16i8:
1629 case MVT::v8i16:
1630 case MVT::v4i32:
1631 case MVT::v2i64:
1632 case MVT::v4f32:
1633 case MVT::v2f64:
1634 RC = &SystemZ::VR128BitRegClass;
1635 break;
1636 }
1637
1638 Register VReg = MRI.createVirtualRegister(RC);
1639 MRI.addLiveIn(VA.getLocReg(), VReg);
1640 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1641 } else {
1642 assert(VA.isMemLoc() && "Argument not register or memory");
1643
1644 // Create the frame index object for this incoming parameter.
1645 // FIXME: Pre-include call frame size in the offset, should not
1646 // need to manually add it here.
1647 int64_t ArgSPOffset = VA.getLocMemOffset();
1648 if (Subtarget.isTargetXPLINK64()) {
1649 auto &XPRegs =
1651 ArgSPOffset += XPRegs.getCallFrameSize();
1652 }
1653 int FI =
1654 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1655
1656 // Create the SelectionDAG nodes corresponding to a load
1657 // from this parameter. Unpromoted ints and floats are
1658 // passed as right-justified 8-byte values.
1659 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1660 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1661 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1662 DAG.getIntPtrConstant(4, DL));
1663 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1665 }
1666
1667 // Convert the value of the argument register into the value that's
1668 // being passed.
1669 if (VA.getLocInfo() == CCValAssign::Indirect) {
1670 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1672 // If the original argument was split (e.g. i128), we need
1673 // to load all parts of it here (using the same address).
1674 unsigned ArgIndex = Ins[I].OrigArgIndex;
1675 assert (Ins[I].PartOffset == 0);
1676 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1677 CCValAssign &PartVA = ArgLocs[I + 1];
1678 unsigned PartOffset = Ins[I + 1].PartOffset;
1679 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1680 DAG.getIntPtrConstant(PartOffset, DL));
1681 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1683 ++I;
1684 }
1685 } else
1686 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1687 }
1688
1689 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1690 // Save the number of non-varargs registers for later use by va_start, etc.
1691 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1692 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1693
1694 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1695 Subtarget.getSpecialRegisters());
1696
1697 // Likewise the address (in the form of a frame index) of where the
1698 // first stack vararg would be. The 1-byte size here is arbitrary.
1699 // FIXME: Pre-include call frame size in the offset, should not
1700 // need to manually add it here.
1701 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1702 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1703 FuncInfo->setVarArgsFrameIndex(FI);
1704 }
1705
1706 if (IsVarArg && Subtarget.isTargetELF()) {
1707 // Save the number of non-varargs registers for later use by va_start, etc.
1708 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1709 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1710
1711 // Likewise the address (in the form of a frame index) of where the
1712 // first stack vararg would be. The 1-byte size here is arbitrary.
1713 int64_t VarArgsOffset = CCInfo.getStackSize();
1714 FuncInfo->setVarArgsFrameIndex(
1715 MFI.CreateFixedObject(1, VarArgsOffset, true));
1716
1717 // ...and a similar frame index for the caller-allocated save area
1718 // that will be used to store the incoming registers.
1719 int64_t RegSaveOffset =
1720 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1721 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1722 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1723
1724 // Store the FPR varargs in the reserved frame slots. (We store the
1725 // GPRs as part of the prologue.)
1726 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1728 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1729 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1730 int FI =
1732 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1734 &SystemZ::FP64BitRegClass);
1735 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1736 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1738 }
1739 // Join the stores, which are independent of one another.
1740 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1741 ArrayRef(&MemOps[NumFixedFPRs],
1742 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1743 }
1744 }
1745
1746 if (Subtarget.isTargetXPLINK64()) {
1747 // Create virual register for handling incoming "ADA" special register (R5)
1748 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1749 Register ADAvReg = MRI.createVirtualRegister(RC);
1750 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1751 Subtarget.getSpecialRegisters());
1752 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1753 FuncInfo->setADAVirtualRegister(ADAvReg);
1754 }
1755 return Chain;
1756}
1757
1758static bool canUseSiblingCall(const CCState &ArgCCInfo,
1761 // Punt if there are any indirect or stack arguments, or if the call
1762 // needs the callee-saved argument register R6, or if the call uses
1763 // the callee-saved register arguments SwiftSelf and SwiftError.
1764 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1765 CCValAssign &VA = ArgLocs[I];
1767 return false;
1768 if (!VA.isRegLoc())
1769 return false;
1770 Register Reg = VA.getLocReg();
1771 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1772 return false;
1773 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1774 return false;
1775 }
1776 return true;
1777}
1778
1780 unsigned Offset, bool LoadAdr = false) {
1783 unsigned ADAvReg = MFI->getADAVirtualRegister();
1785
1786 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1787 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1788
1789 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1790 if (!LoadAdr)
1791 Result = DAG.getLoad(
1792 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1794
1795 return Result;
1796}
1797
1798// ADA access using Global value
1799// Note: for functions, address of descriptor is returned
1801 EVT PtrVT) {
1802 unsigned ADAtype;
1803 bool LoadAddr = false;
1804 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1805 bool IsFunction =
1806 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1807 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1808
1809 if (IsFunction) {
1810 if (IsInternal) {
1812 LoadAddr = true;
1813 } else
1815 } else {
1817 }
1818 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1819
1820 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1821}
1822
1823static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1824 SDLoc &DL, SDValue &Chain) {
1825 unsigned ADADelta = 0; // ADA offset in desc.
1826 unsigned EPADelta = 8; // EPA offset in desc.
1829
1830 // XPLink calling convention.
1831 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1832 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1833 G->getGlobal()->hasPrivateLinkage());
1834 if (IsInternal) {
1837 unsigned ADAvReg = MFI->getADAVirtualRegister();
1838 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1839 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1840 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1841 return true;
1842 } else {
1844 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1845 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1846 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1847 }
1848 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1850 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1851 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1852 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1853 } else {
1854 // Function pointer case
1855 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1856 DAG.getConstant(ADADelta, DL, PtrVT));
1857 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1859 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1860 DAG.getConstant(EPADelta, DL, PtrVT));
1861 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1863 }
1864 return false;
1865}
1866
1867SDValue
1869 SmallVectorImpl<SDValue> &InVals) const {
1870 SelectionDAG &DAG = CLI.DAG;
1871 SDLoc &DL = CLI.DL;
1873 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1875 SDValue Chain = CLI.Chain;
1876 SDValue Callee = CLI.Callee;
1877 bool &IsTailCall = CLI.IsTailCall;
1878 CallingConv::ID CallConv = CLI.CallConv;
1879 bool IsVarArg = CLI.IsVarArg;
1881 EVT PtrVT = getPointerTy(MF.getDataLayout());
1882 LLVMContext &Ctx = *DAG.getContext();
1884
1885 // FIXME: z/OS support to be added in later.
1886 if (Subtarget.isTargetXPLINK64())
1887 IsTailCall = false;
1888
1889 // Analyze the operands of the call, assigning locations to each operand.
1891 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1892 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1893
1894 // We don't support GuaranteedTailCallOpt, only automatically-detected
1895 // sibling calls.
1896 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1897 IsTailCall = false;
1898
1899 // Get a count of how many bytes are to be pushed on the stack.
1900 unsigned NumBytes = ArgCCInfo.getStackSize();
1901
1902 // Mark the start of the call.
1903 if (!IsTailCall)
1904 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1905
1906 // Copy argument values to their designated locations.
1908 SmallVector<SDValue, 8> MemOpChains;
1909 SDValue StackPtr;
1910 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1911 CCValAssign &VA = ArgLocs[I];
1912 SDValue ArgValue = OutVals[I];
1913
1914 if (VA.getLocInfo() == CCValAssign::Indirect) {
1915 // Store the argument in a stack slot and pass its address.
1916 unsigned ArgIndex = Outs[I].OrigArgIndex;
1917 EVT SlotVT;
1918 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1919 // Allocate the full stack space for a promoted (and split) argument.
1920 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1921 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1922 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1923 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1924 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1925 } else {
1926 SlotVT = Outs[I].VT;
1927 }
1928 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1929 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1930 MemOpChains.push_back(
1931 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1933 // If the original argument was split (e.g. i128), we need
1934 // to store all parts of it here (and pass just one address).
1935 assert (Outs[I].PartOffset == 0);
1936 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1937 SDValue PartValue = OutVals[I + 1];
1938 unsigned PartOffset = Outs[I + 1].PartOffset;
1939 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1940 DAG.getIntPtrConstant(PartOffset, DL));
1941 MemOpChains.push_back(
1942 DAG.getStore(Chain, DL, PartValue, Address,
1944 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1945 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1946 ++I;
1947 }
1948 ArgValue = SpillSlot;
1949 } else
1950 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1951
1952 if (VA.isRegLoc()) {
1953 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1954 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1955 // and low values.
1956 if (VA.getLocVT() == MVT::i128)
1957 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1958 // Queue up the argument copies and emit them at the end.
1959 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1960 } else {
1961 assert(VA.isMemLoc() && "Argument not register or memory");
1962
1963 // Work out the address of the stack slot. Unpromoted ints and
1964 // floats are passed as right-justified 8-byte values.
1965 if (!StackPtr.getNode())
1966 StackPtr = DAG.getCopyFromReg(Chain, DL,
1967 Regs->getStackPointerRegister(), PtrVT);
1968 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1969 VA.getLocMemOffset();
1970 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1971 Offset += 4;
1972 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1974
1975 // Emit the store.
1976 MemOpChains.push_back(
1977 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1978
1979 // Although long doubles or vectors are passed through the stack when
1980 // they are vararg (non-fixed arguments), if a long double or vector
1981 // occupies the third and fourth slot of the argument list GPR3 should
1982 // still shadow the third slot of the argument list.
1983 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1984 SDValue ShadowArgValue =
1985 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1986 DAG.getIntPtrConstant(1, DL));
1987 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1988 }
1989 }
1990 }
1991
1992 // Join the stores, which are independent of one another.
1993 if (!MemOpChains.empty())
1994 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1995
1996 // Accept direct calls by converting symbolic call addresses to the
1997 // associated Target* opcodes. Force %r1 to be used for indirect
1998 // tail calls.
1999 SDValue Glue;
2000
2001 if (Subtarget.isTargetXPLINK64()) {
2002 SDValue ADA;
2003 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2004 if (!IsBRASL) {
2005 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2006 ->getAddressOfCalleeRegister();
2007 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2008 Glue = Chain.getValue(1);
2009 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2010 }
2011 RegsToPass.push_back(std::make_pair(
2012 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2013 } else {
2014 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2015 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2016 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2017 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2018 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2019 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2020 } else if (IsTailCall) {
2021 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2022 Glue = Chain.getValue(1);
2023 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2024 }
2025 }
2026
2027 // Build a sequence of copy-to-reg nodes, chained and glued together.
2028 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2029 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2030 RegsToPass[I].second, Glue);
2031 Glue = Chain.getValue(1);
2032 }
2033
2034 // The first call operand is the chain and the second is the target address.
2036 Ops.push_back(Chain);
2037 Ops.push_back(Callee);
2038
2039 // Add argument registers to the end of the list so that they are
2040 // known live into the call.
2041 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2042 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2043 RegsToPass[I].second.getValueType()));
2044
2045 // Add a register mask operand representing the call-preserved registers.
2046 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2047 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2048 assert(Mask && "Missing call preserved mask for calling convention");
2049 Ops.push_back(DAG.getRegisterMask(Mask));
2050
2051 // Glue the call to the argument copies, if any.
2052 if (Glue.getNode())
2053 Ops.push_back(Glue);
2054
2055 // Emit the call.
2056 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2057 if (IsTailCall) {
2058 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2059 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2060 return Ret;
2061 }
2062 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2063 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2064 Glue = Chain.getValue(1);
2065
2066 // Mark the end of the call, which is glued to the call itself.
2067 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2068 Glue = Chain.getValue(1);
2069
2070 // Assign locations to each value returned by this call.
2072 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2073 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2074
2075 // Copy all of the result registers out of their specified physreg.
2076 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2077 CCValAssign &VA = RetLocs[I];
2078
2079 // Copy the value out, gluing the copy to the end of the call sequence.
2080 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2081 VA.getLocVT(), Glue);
2082 Chain = RetValue.getValue(1);
2083 Glue = RetValue.getValue(2);
2084
2085 // Convert the value of the return register into the value that's
2086 // being returned.
2087 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2088 }
2089
2090 return Chain;
2091}
2092
2093// Generate a call taking the given operands as arguments and returning a
2094// result of type RetVT.
2096 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2097 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2098 bool DoesNotReturn, bool IsReturnValueUsed) const {
2100 Args.reserve(Ops.size());
2101
2103 for (SDValue Op : Ops) {
2104 Entry.Node = Op;
2105 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2106 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2107 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2108 Args.push_back(Entry);
2109 }
2110
2111 SDValue Callee =
2112 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2113
2114 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2116 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2117 CLI.setDebugLoc(DL)
2118 .setChain(Chain)
2119 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2120 .setNoReturn(DoesNotReturn)
2121 .setDiscardResult(!IsReturnValueUsed)
2122 .setSExtResult(SignExtend)
2123 .setZExtResult(!SignExtend);
2124 return LowerCallTo(CLI);
2125}
2126
2129 MachineFunction &MF, bool isVarArg,
2131 LLVMContext &Context) const {
2132 // Special case that we cannot easily detect in RetCC_SystemZ since
2133 // i128 may not be a legal type.
2134 for (auto &Out : Outs)
2135 if (Out.ArgVT == MVT::i128)
2136 return false;
2137
2139 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2140 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2141}
2142
2143SDValue
2145 bool IsVarArg,
2147 const SmallVectorImpl<SDValue> &OutVals,
2148 const SDLoc &DL, SelectionDAG &DAG) const {
2150
2151 // Assign locations to each returned value.
2153 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2154 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2155
2156 // Quick exit for void returns
2157 if (RetLocs.empty())
2158 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2159
2160 if (CallConv == CallingConv::GHC)
2161 report_fatal_error("GHC functions return void only");
2162
2163 // Copy the result values into the output registers.
2164 SDValue Glue;
2166 RetOps.push_back(Chain);
2167 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2168 CCValAssign &VA = RetLocs[I];
2169 SDValue RetValue = OutVals[I];
2170
2171 // Make the return register live on exit.
2172 assert(VA.isRegLoc() && "Can only return in registers!");
2173
2174 // Promote the value as required.
2175 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2176
2177 // Chain and glue the copies together.
2178 Register Reg = VA.getLocReg();
2179 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2180 Glue = Chain.getValue(1);
2181 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2182 }
2183
2184 // Update chain and glue.
2185 RetOps[0] = Chain;
2186 if (Glue.getNode())
2187 RetOps.push_back(Glue);
2188
2189 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2190}
2191
2192// Return true if Op is an intrinsic node with chain that returns the CC value
2193// as its only (other) argument. Provide the associated SystemZISD opcode and
2194// the mask of valid CC values if so.
2195static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2196 unsigned &CCValid) {
2197 unsigned Id = Op.getConstantOperandVal(1);
2198 switch (Id) {
2199 case Intrinsic::s390_tbegin:
2200 Opcode = SystemZISD::TBEGIN;
2201 CCValid = SystemZ::CCMASK_TBEGIN;
2202 return true;
2203
2204 case Intrinsic::s390_tbegin_nofloat:
2206 CCValid = SystemZ::CCMASK_TBEGIN;
2207 return true;
2208
2209 case Intrinsic::s390_tend:
2210 Opcode = SystemZISD::TEND;
2211 CCValid = SystemZ::CCMASK_TEND;
2212 return true;
2213
2214 default:
2215 return false;
2216 }
2217}
2218
2219// Return true if Op is an intrinsic node without chain that returns the
2220// CC value as its final argument. Provide the associated SystemZISD
2221// opcode and the mask of valid CC values if so.
2222static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2223 unsigned Id = Op.getConstantOperandVal(0);
2224 switch (Id) {
2225 case Intrinsic::s390_vpkshs:
2226 case Intrinsic::s390_vpksfs:
2227 case Intrinsic::s390_vpksgs:
2228 Opcode = SystemZISD::PACKS_CC;
2229 CCValid = SystemZ::CCMASK_VCMP;
2230 return true;
2231
2232 case Intrinsic::s390_vpklshs:
2233 case Intrinsic::s390_vpklsfs:
2234 case Intrinsic::s390_vpklsgs:
2235 Opcode = SystemZISD::PACKLS_CC;
2236 CCValid = SystemZ::CCMASK_VCMP;
2237 return true;
2238
2239 case Intrinsic::s390_vceqbs:
2240 case Intrinsic::s390_vceqhs:
2241 case Intrinsic::s390_vceqfs:
2242 case Intrinsic::s390_vceqgs:
2243 Opcode = SystemZISD::VICMPES;
2244 CCValid = SystemZ::CCMASK_VCMP;
2245 return true;
2246
2247 case Intrinsic::s390_vchbs:
2248 case Intrinsic::s390_vchhs:
2249 case Intrinsic::s390_vchfs:
2250 case Intrinsic::s390_vchgs:
2251 Opcode = SystemZISD::VICMPHS;
2252 CCValid = SystemZ::CCMASK_VCMP;
2253 return true;
2254
2255 case Intrinsic::s390_vchlbs:
2256 case Intrinsic::s390_vchlhs:
2257 case Intrinsic::s390_vchlfs:
2258 case Intrinsic::s390_vchlgs:
2259 Opcode = SystemZISD::VICMPHLS;
2260 CCValid = SystemZ::CCMASK_VCMP;
2261 return true;
2262
2263 case Intrinsic::s390_vtm:
2264 Opcode = SystemZISD::VTM;
2265 CCValid = SystemZ::CCMASK_VCMP;
2266 return true;
2267
2268 case Intrinsic::s390_vfaebs:
2269 case Intrinsic::s390_vfaehs:
2270 case Intrinsic::s390_vfaefs:
2271 Opcode = SystemZISD::VFAE_CC;
2272 CCValid = SystemZ::CCMASK_ANY;
2273 return true;
2274
2275 case Intrinsic::s390_vfaezbs:
2276 case Intrinsic::s390_vfaezhs:
2277 case Intrinsic::s390_vfaezfs:
2278 Opcode = SystemZISD::VFAEZ_CC;
2279 CCValid = SystemZ::CCMASK_ANY;
2280 return true;
2281
2282 case Intrinsic::s390_vfeebs:
2283 case Intrinsic::s390_vfeehs:
2284 case Intrinsic::s390_vfeefs:
2285 Opcode = SystemZISD::VFEE_CC;
2286 CCValid = SystemZ::CCMASK_ANY;
2287 return true;
2288
2289 case Intrinsic::s390_vfeezbs:
2290 case Intrinsic::s390_vfeezhs:
2291 case Intrinsic::s390_vfeezfs:
2292 Opcode = SystemZISD::VFEEZ_CC;
2293 CCValid = SystemZ::CCMASK_ANY;
2294 return true;
2295
2296 case Intrinsic::s390_vfenebs:
2297 case Intrinsic::s390_vfenehs:
2298 case Intrinsic::s390_vfenefs:
2299 Opcode = SystemZISD::VFENE_CC;
2300 CCValid = SystemZ::CCMASK_ANY;
2301 return true;
2302
2303 case Intrinsic::s390_vfenezbs:
2304 case Intrinsic::s390_vfenezhs:
2305 case Intrinsic::s390_vfenezfs:
2306 Opcode = SystemZISD::VFENEZ_CC;
2307 CCValid = SystemZ::CCMASK_ANY;
2308 return true;
2309
2310 case Intrinsic::s390_vistrbs:
2311 case Intrinsic::s390_vistrhs:
2312 case Intrinsic::s390_vistrfs:
2313 Opcode = SystemZISD::VISTR_CC;
2315 return true;
2316
2317 case Intrinsic::s390_vstrcbs:
2318 case Intrinsic::s390_vstrchs:
2319 case Intrinsic::s390_vstrcfs:
2320 Opcode = SystemZISD::VSTRC_CC;
2321 CCValid = SystemZ::CCMASK_ANY;
2322 return true;
2323
2324 case Intrinsic::s390_vstrczbs:
2325 case Intrinsic::s390_vstrczhs:
2326 case Intrinsic::s390_vstrczfs:
2327 Opcode = SystemZISD::VSTRCZ_CC;
2328 CCValid = SystemZ::CCMASK_ANY;
2329 return true;
2330
2331 case Intrinsic::s390_vstrsb:
2332 case Intrinsic::s390_vstrsh:
2333 case Intrinsic::s390_vstrsf:
2334 Opcode = SystemZISD::VSTRS_CC;
2335 CCValid = SystemZ::CCMASK_ANY;
2336 return true;
2337
2338 case Intrinsic::s390_vstrszb:
2339 case Intrinsic::s390_vstrszh:
2340 case Intrinsic::s390_vstrszf:
2341 Opcode = SystemZISD::VSTRSZ_CC;
2342 CCValid = SystemZ::CCMASK_ANY;
2343 return true;
2344
2345 case Intrinsic::s390_vfcedbs:
2346 case Intrinsic::s390_vfcesbs:
2347 Opcode = SystemZISD::VFCMPES;
2348 CCValid = SystemZ::CCMASK_VCMP;
2349 return true;
2350
2351 case Intrinsic::s390_vfchdbs:
2352 case Intrinsic::s390_vfchsbs:
2353 Opcode = SystemZISD::VFCMPHS;
2354 CCValid = SystemZ::CCMASK_VCMP;
2355 return true;
2356
2357 case Intrinsic::s390_vfchedbs:
2358 case Intrinsic::s390_vfchesbs:
2359 Opcode = SystemZISD::VFCMPHES;
2360 CCValid = SystemZ::CCMASK_VCMP;
2361 return true;
2362
2363 case Intrinsic::s390_vftcidb:
2364 case Intrinsic::s390_vftcisb:
2365 Opcode = SystemZISD::VFTCI;
2366 CCValid = SystemZ::CCMASK_VCMP;
2367 return true;
2368
2369 case Intrinsic::s390_tdc:
2370 Opcode = SystemZISD::TDC;
2371 CCValid = SystemZ::CCMASK_TDC;
2372 return true;
2373
2374 default:
2375 return false;
2376 }
2377}
2378
2379// Emit an intrinsic with chain and an explicit CC register result.
2381 unsigned Opcode) {
2382 // Copy all operands except the intrinsic ID.
2383 unsigned NumOps = Op.getNumOperands();
2385 Ops.reserve(NumOps - 1);
2386 Ops.push_back(Op.getOperand(0));
2387 for (unsigned I = 2; I < NumOps; ++I)
2388 Ops.push_back(Op.getOperand(I));
2389
2390 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2391 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2392 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2393 SDValue OldChain = SDValue(Op.getNode(), 1);
2394 SDValue NewChain = SDValue(Intr.getNode(), 1);
2395 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2396 return Intr.getNode();
2397}
2398
2399// Emit an intrinsic with an explicit CC register result.
2401 unsigned Opcode) {
2402 // Copy all operands except the intrinsic ID.
2403 unsigned NumOps = Op.getNumOperands();
2405 Ops.reserve(NumOps - 1);
2406 for (unsigned I = 1; I < NumOps; ++I)
2407 Ops.push_back(Op.getOperand(I));
2408
2409 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2410 return Intr.getNode();
2411}
2412
2413// CC is a comparison that will be implemented using an integer or
2414// floating-point comparison. Return the condition code mask for
2415// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2416// unsigned comparisons and clear for signed ones. In the floating-point
2417// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2419#define CONV(X) \
2420 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2421 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2422 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2423
2424 switch (CC) {
2425 default:
2426 llvm_unreachable("Invalid integer condition!");
2427
2428 CONV(EQ);
2429 CONV(NE);
2430 CONV(GT);
2431 CONV(GE);
2432 CONV(LT);
2433 CONV(LE);
2434
2435 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2437 }
2438#undef CONV
2439}
2440
2441// If C can be converted to a comparison against zero, adjust the operands
2442// as necessary.
2443static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2444 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2445 return;
2446
2447 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2448 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2449 return;
2450
2451 int64_t Value = ConstOp1->getSExtValue();
2452 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2453 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2454 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2455 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2456 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2457 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2458 }
2459}
2460
2461// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2462// adjust the operands as necessary.
2463static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2464 Comparison &C) {
2465 // For us to make any changes, it must a comparison between a single-use
2466 // load and a constant.
2467 if (!C.Op0.hasOneUse() ||
2468 C.Op0.getOpcode() != ISD::LOAD ||
2469 C.Op1.getOpcode() != ISD::Constant)
2470 return;
2471
2472 // We must have an 8- or 16-bit load.
2473 auto *Load = cast<LoadSDNode>(C.Op0);
2474 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2475 if ((NumBits != 8 && NumBits != 16) ||
2476 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2477 return;
2478
2479 // The load must be an extending one and the constant must be within the
2480 // range of the unextended value.
2481 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2482 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2483 return;
2484 uint64_t Value = ConstOp1->getZExtValue();
2485 uint64_t Mask = (1 << NumBits) - 1;
2486 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2487 // Make sure that ConstOp1 is in range of C.Op0.
2488 int64_t SignedValue = ConstOp1->getSExtValue();
2489 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2490 return;
2491 if (C.ICmpType != SystemZICMP::SignedOnly) {
2492 // Unsigned comparison between two sign-extended values is equivalent
2493 // to unsigned comparison between two zero-extended values.
2494 Value &= Mask;
2495 } else if (NumBits == 8) {
2496 // Try to treat the comparison as unsigned, so that we can use CLI.
2497 // Adjust CCMask and Value as necessary.
2498 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2499 // Test whether the high bit of the byte is set.
2500 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2501 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2502 // Test whether the high bit of the byte is clear.
2503 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2504 else
2505 // No instruction exists for this combination.
2506 return;
2507 C.ICmpType = SystemZICMP::UnsignedOnly;
2508 }
2509 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2510 if (Value > Mask)
2511 return;
2512 // If the constant is in range, we can use any comparison.
2513 C.ICmpType = SystemZICMP::Any;
2514 } else
2515 return;
2516
2517 // Make sure that the first operand is an i32 of the right extension type.
2518 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2521 if (C.Op0.getValueType() != MVT::i32 ||
2522 Load->getExtensionType() != ExtType) {
2523 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2524 Load->getBasePtr(), Load->getPointerInfo(),
2525 Load->getMemoryVT(), Load->getAlign(),
2526 Load->getMemOperand()->getFlags());
2527 // Update the chain uses.
2528 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2529 }
2530
2531 // Make sure that the second operand is an i32 with the right value.
2532 if (C.Op1.getValueType() != MVT::i32 ||
2533 Value != ConstOp1->getZExtValue())
2534 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2535}
2536
2537// Return true if Op is either an unextended load, or a load suitable
2538// for integer register-memory comparisons of type ICmpType.
2539static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2540 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2541 if (Load) {
2542 // There are no instructions to compare a register with a memory byte.
2543 if (Load->getMemoryVT() == MVT::i8)
2544 return false;
2545 // Otherwise decide on extension type.
2546 switch (Load->getExtensionType()) {
2547 case ISD::NON_EXTLOAD:
2548 return true;
2549 case ISD::SEXTLOAD:
2550 return ICmpType != SystemZICMP::UnsignedOnly;
2551 case ISD::ZEXTLOAD:
2552 return ICmpType != SystemZICMP::SignedOnly;
2553 default:
2554 break;
2555 }
2556 }
2557 return false;
2558}
2559
2560// Return true if it is better to swap the operands of C.
2561static bool shouldSwapCmpOperands(const Comparison &C) {
2562 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2563 if (C.Op0.getValueType() == MVT::i128)
2564 return false;
2565 if (C.Op0.getValueType() == MVT::f128)
2566 return false;
2567
2568 // Always keep a floating-point constant second, since comparisons with
2569 // zero can use LOAD TEST and comparisons with other constants make a
2570 // natural memory operand.
2571 if (isa<ConstantFPSDNode>(C.Op1))
2572 return false;
2573
2574 // Never swap comparisons with zero since there are many ways to optimize
2575 // those later.
2576 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2577 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2578 return false;
2579
2580 // Also keep natural memory operands second if the loaded value is
2581 // only used here. Several comparisons have memory forms.
2582 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2583 return false;
2584
2585 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2586 // In that case we generally prefer the memory to be second.
2587 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2588 // The only exceptions are when the second operand is a constant and
2589 // we can use things like CHHSI.
2590 if (!ConstOp1)
2591 return true;
2592 // The unsigned memory-immediate instructions can handle 16-bit
2593 // unsigned integers.
2594 if (C.ICmpType != SystemZICMP::SignedOnly &&
2595 isUInt<16>(ConstOp1->getZExtValue()))
2596 return false;
2597 // The signed memory-immediate instructions can handle 16-bit
2598 // signed integers.
2599 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2600 isInt<16>(ConstOp1->getSExtValue()))
2601 return false;
2602 return true;
2603 }
2604
2605 // Try to promote the use of CGFR and CLGFR.
2606 unsigned Opcode0 = C.Op0.getOpcode();
2607 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2608 return true;
2609 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2610 return true;
2611 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2612 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2613 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2614 return true;
2615
2616 return false;
2617}
2618
2619// Check whether C tests for equality between X and Y and whether X - Y
2620// or Y - X is also computed. In that case it's better to compare the
2621// result of the subtraction against zero.
2623 Comparison &C) {
2624 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2625 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2626 for (SDNode *N : C.Op0->uses()) {
2627 if (N->getOpcode() == ISD::SUB &&
2628 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2629 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2630 // Disable the nsw and nuw flags: the backend needs to handle
2631 // overflow as well during comparison elimination.
2632 SDNodeFlags Flags = N->getFlags();
2633 Flags.setNoSignedWrap(false);
2634 Flags.setNoUnsignedWrap(false);
2635 N->setFlags(Flags);
2636 C.Op0 = SDValue(N, 0);
2637 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2638 return;
2639 }
2640 }
2641 }
2642}
2643
2644// Check whether C compares a floating-point value with zero and if that
2645// floating-point value is also negated. In this case we can use the
2646// negation to set CC, so avoiding separate LOAD AND TEST and
2647// LOAD (NEGATIVE/COMPLEMENT) instructions.
2648static void adjustForFNeg(Comparison &C) {
2649 // This optimization is invalid for strict comparisons, since FNEG
2650 // does not raise any exceptions.
2651 if (C.Chain)
2652 return;
2653 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2654 if (C1 && C1->isZero()) {
2655 for (SDNode *N : C.Op0->uses()) {
2656 if (N->getOpcode() == ISD::FNEG) {
2657 C.Op0 = SDValue(N, 0);
2658 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2659 return;
2660 }
2661 }
2662 }
2663}
2664
2665// Check whether C compares (shl X, 32) with 0 and whether X is
2666// also sign-extended. In that case it is better to test the result
2667// of the sign extension using LTGFR.
2668//
2669// This case is important because InstCombine transforms a comparison
2670// with (sext (trunc X)) into a comparison with (shl X, 32).
2671static void adjustForLTGFR(Comparison &C) {
2672 // Check for a comparison between (shl X, 32) and 0.
2673 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2674 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2675 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2676 if (C1 && C1->getZExtValue() == 32) {
2677 SDValue ShlOp0 = C.Op0.getOperand(0);
2678 // See whether X has any SIGN_EXTEND_INREG uses.
2679 for (SDNode *N : ShlOp0->uses()) {
2680 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2681 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2682 C.Op0 = SDValue(N, 0);
2683 return;
2684 }
2685 }
2686 }
2687 }
2688}
2689
2690// If C compares the truncation of an extending load, try to compare
2691// the untruncated value instead. This exposes more opportunities to
2692// reuse CC.
2693static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2694 Comparison &C) {
2695 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2696 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2697 C.Op1.getOpcode() == ISD::Constant &&
2698 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2699 C.Op1->getAsZExtVal() == 0) {
2700 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2701 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2702 C.Op0.getValueSizeInBits().getFixedValue()) {
2703 unsigned Type = L->getExtensionType();
2704 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2705 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2706 C.Op0 = C.Op0.getOperand(0);
2707 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2708 }
2709 }
2710 }
2711}
2712
2713// Return true if shift operation N has an in-range constant shift value.
2714// Store it in ShiftVal if so.
2715static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2716 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2717 if (!Shift)
2718 return false;
2719
2720 uint64_t Amount = Shift->getZExtValue();
2721 if (Amount >= N.getValueSizeInBits())
2722 return false;
2723
2724 ShiftVal = Amount;
2725 return true;
2726}
2727
2728// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2729// instruction and whether the CC value is descriptive enough to handle
2730// a comparison of type Opcode between the AND result and CmpVal.
2731// CCMask says which comparison result is being tested and BitSize is
2732// the number of bits in the operands. If TEST UNDER MASK can be used,
2733// return the corresponding CC mask, otherwise return 0.
2734static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2735 uint64_t Mask, uint64_t CmpVal,
2736 unsigned ICmpType) {
2737 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2738
2739 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2740 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2741 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2742 return 0;
2743
2744 // Work out the masks for the lowest and highest bits.
2746 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2747
2748 // Signed ordered comparisons are effectively unsigned if the sign
2749 // bit is dropped.
2750 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2751
2752 // Check for equality comparisons with 0, or the equivalent.
2753 if (CmpVal == 0) {
2754 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2756 if (CCMask == SystemZ::CCMASK_CMP_NE)
2758 }
2759 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2760 if (CCMask == SystemZ::CCMASK_CMP_LT)
2762 if (CCMask == SystemZ::CCMASK_CMP_GE)
2764 }
2765 if (EffectivelyUnsigned && CmpVal < Low) {
2766 if (CCMask == SystemZ::CCMASK_CMP_LE)
2768 if (CCMask == SystemZ::CCMASK_CMP_GT)
2770 }
2771
2772 // Check for equality comparisons with the mask, or the equivalent.
2773 if (CmpVal == Mask) {
2774 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2776 if (CCMask == SystemZ::CCMASK_CMP_NE)
2778 }
2779 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2780 if (CCMask == SystemZ::CCMASK_CMP_GT)
2782 if (CCMask == SystemZ::CCMASK_CMP_LE)
2784 }
2785 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2786 if (CCMask == SystemZ::CCMASK_CMP_GE)
2788 if (CCMask == SystemZ::CCMASK_CMP_LT)
2790 }
2791
2792 // Check for ordered comparisons with the top bit.
2793 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2794 if (CCMask == SystemZ::CCMASK_CMP_LE)
2796 if (CCMask == SystemZ::CCMASK_CMP_GT)
2798 }
2799 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2800 if (CCMask == SystemZ::CCMASK_CMP_LT)
2802 if (CCMask == SystemZ::CCMASK_CMP_GE)
2804 }
2805
2806 // If there are just two bits, we can do equality checks for Low and High
2807 // as well.
2808 if (Mask == Low + High) {
2809 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2811 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2813 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2815 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2817 }
2818
2819 // Looks like we've exhausted our options.
2820 return 0;
2821}
2822
2823// See whether C can be implemented as a TEST UNDER MASK instruction.
2824// Update the arguments with the TM version if so.
2826 Comparison &C) {
2827 // Use VECTOR TEST UNDER MASK for i128 operations.
2828 if (C.Op0.getValueType() == MVT::i128) {
2829 // We can use VTM for EQ/NE comparisons of x & y against 0.
2830 if (C.Op0.getOpcode() == ISD::AND &&
2831 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2832 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2833 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2834 if (Mask && Mask->getAPIntValue() == 0) {
2835 C.Opcode = SystemZISD::VTM;
2836 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2837 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2838 C.CCValid = SystemZ::CCMASK_VCMP;
2839 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2840 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2841 else
2842 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2843 }
2844 }
2845 return;
2846 }
2847
2848 // Check that we have a comparison with a constant.
2849 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2850 if (!ConstOp1)
2851 return;
2852 uint64_t CmpVal = ConstOp1->getZExtValue();
2853
2854 // Check whether the nonconstant input is an AND with a constant mask.
2855 Comparison NewC(C);
2856 uint64_t MaskVal;
2857 ConstantSDNode *Mask = nullptr;
2858 if (C.Op0.getOpcode() == ISD::AND) {
2859 NewC.Op0 = C.Op0.getOperand(0);
2860 NewC.Op1 = C.Op0.getOperand(1);
2861 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2862 if (!Mask)
2863 return;
2864 MaskVal = Mask->getZExtValue();
2865 } else {
2866 // There is no instruction to compare with a 64-bit immediate
2867 // so use TMHH instead if possible. We need an unsigned ordered
2868 // comparison with an i64 immediate.
2869 if (NewC.Op0.getValueType() != MVT::i64 ||
2870 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2871 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2872 NewC.ICmpType == SystemZICMP::SignedOnly)
2873 return;
2874 // Convert LE and GT comparisons into LT and GE.
2875 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2876 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2877 if (CmpVal == uint64_t(-1))
2878 return;
2879 CmpVal += 1;
2880 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2881 }
2882 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2883 // be masked off without changing the result.
2884 MaskVal = -(CmpVal & -CmpVal);
2885 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2886 }
2887 if (!MaskVal)
2888 return;
2889
2890 // Check whether the combination of mask, comparison value and comparison
2891 // type are suitable.
2892 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2893 unsigned NewCCMask, ShiftVal;
2894 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2895 NewC.Op0.getOpcode() == ISD::SHL &&
2896 isSimpleShift(NewC.Op0, ShiftVal) &&
2897 (MaskVal >> ShiftVal != 0) &&
2898 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2899 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2900 MaskVal >> ShiftVal,
2901 CmpVal >> ShiftVal,
2902 SystemZICMP::Any))) {
2903 NewC.Op0 = NewC.Op0.getOperand(0);
2904 MaskVal >>= ShiftVal;
2905 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2906 NewC.Op0.getOpcode() == ISD::SRL &&
2907 isSimpleShift(NewC.Op0, ShiftVal) &&
2908 (MaskVal << ShiftVal != 0) &&
2909 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2910 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2911 MaskVal << ShiftVal,
2912 CmpVal << ShiftVal,
2914 NewC.Op0 = NewC.Op0.getOperand(0);
2915 MaskVal <<= ShiftVal;
2916 } else {
2917 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2918 NewC.ICmpType);
2919 if (!NewCCMask)
2920 return;
2921 }
2922
2923 // Go ahead and make the change.
2924 C.Opcode = SystemZISD::TM;
2925 C.Op0 = NewC.Op0;
2926 if (Mask && Mask->getZExtValue() == MaskVal)
2927 C.Op1 = SDValue(Mask, 0);
2928 else
2929 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2930 C.CCValid = SystemZ::CCMASK_TM;
2931 C.CCMask = NewCCMask;
2932}
2933
2934// Implement i128 comparison in vector registers.
2935static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2936 Comparison &C) {
2937 if (C.Opcode != SystemZISD::ICMP)
2938 return;
2939 if (C.Op0.getValueType() != MVT::i128)
2940 return;
2941
2942 // (In-)Equality comparisons can be implemented via VCEQGS.
2943 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2944 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2945 C.Opcode = SystemZISD::VICMPES;
2946 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2947 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2948 C.CCValid = SystemZ::CCMASK_VCMP;
2949 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2950 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2951 else
2952 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2953 return;
2954 }
2955
2956 // Normalize other comparisons to GT.
2957 bool Swap = false, Invert = false;
2958 switch (C.CCMask) {
2959 case SystemZ::CCMASK_CMP_GT: break;
2960 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2961 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2962 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2963 default: llvm_unreachable("Invalid integer condition!");
2964 }
2965 if (Swap)
2966 std::swap(C.Op0, C.Op1);
2967
2968 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2969 C.Opcode = SystemZISD::UCMP128HI;
2970 else
2971 C.Opcode = SystemZISD::SCMP128HI;
2972 C.CCValid = SystemZ::CCMASK_ANY;
2973 C.CCMask = SystemZ::CCMASK_1;
2974
2975 if (Invert)
2976 C.CCMask ^= C.CCValid;
2977}
2978
2979// See whether the comparison argument contains a redundant AND
2980// and remove it if so. This sometimes happens due to the generic
2981// BRCOND expansion.
2983 Comparison &C) {
2984 if (C.Op0.getOpcode() != ISD::AND)
2985 return;
2986 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2987 if (!Mask || Mask->getValueSizeInBits(0) > 64)
2988 return;
2989 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2990 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2991 return;
2992
2993 C.Op0 = C.Op0.getOperand(0);
2994}
2995
2996// Return a Comparison that tests the condition-code result of intrinsic
2997// node Call against constant integer CC using comparison code Cond.
2998// Opcode is the opcode of the SystemZISD operation for the intrinsic
2999// and CCValid is the set of possible condition-code results.
3000static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3001 SDValue Call, unsigned CCValid, uint64_t CC,
3003 Comparison C(Call, SDValue(), SDValue());
3004 C.Opcode = Opcode;
3005 C.CCValid = CCValid;
3006 if (Cond == ISD::SETEQ)
3007 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3008 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3009 else if (Cond == ISD::SETNE)
3010 // ...and the inverse of that.
3011 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3012 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3013 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3014 // always true for CC>3.
3015 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3016 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3017 // ...and the inverse of that.
3018 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3019 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3020 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3021 // always true for CC>3.
3022 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3023 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3024 // ...and the inverse of that.
3025 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3026 else
3027 llvm_unreachable("Unexpected integer comparison type");
3028 C.CCMask &= CCValid;
3029 return C;
3030}
3031
3032// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3033static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3034 ISD::CondCode Cond, const SDLoc &DL,
3035 SDValue Chain = SDValue(),
3036 bool IsSignaling = false) {
3037 if (CmpOp1.getOpcode() == ISD::Constant) {
3038 assert(!Chain);
3039 unsigned Opcode, CCValid;
3040 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3041 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3042 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3043 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3044 CmpOp1->getAsZExtVal(), Cond);
3045 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3046 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3047 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3048 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3049 CmpOp1->getAsZExtVal(), Cond);
3050 }
3051 Comparison C(CmpOp0, CmpOp1, Chain);
3052 C.CCMask = CCMaskForCondCode(Cond);
3053 if (C.Op0.getValueType().isFloatingPoint()) {
3054 C.CCValid = SystemZ::CCMASK_FCMP;
3055 if (!C.Chain)
3056 C.Opcode = SystemZISD::FCMP;
3057 else if (!IsSignaling)
3058 C.Opcode = SystemZISD::STRICT_FCMP;
3059 else
3060 C.Opcode = SystemZISD::STRICT_FCMPS;
3062 } else {
3063 assert(!C.Chain);
3064 C.CCValid = SystemZ::CCMASK_ICMP;
3065 C.Opcode = SystemZISD::ICMP;
3066 // Choose the type of comparison. Equality and inequality tests can
3067 // use either signed or unsigned comparisons. The choice also doesn't
3068 // matter if both sign bits are known to be clear. In those cases we
3069 // want to give the main isel code the freedom to choose whichever
3070 // form fits best.
3071 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3072 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3073 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3074 C.ICmpType = SystemZICMP::Any;
3075 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3076 C.ICmpType = SystemZICMP::UnsignedOnly;
3077 else
3078 C.ICmpType = SystemZICMP::SignedOnly;
3079 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3080 adjustForRedundantAnd(DAG, DL, C);
3081 adjustZeroCmp(DAG, DL, C);
3082 adjustSubwordCmp(DAG, DL, C);
3083 adjustForSubtraction(DAG, DL, C);
3085 adjustICmpTruncate(DAG, DL, C);
3086 }
3087
3088 if (shouldSwapCmpOperands(C)) {
3089 std::swap(C.Op0, C.Op1);
3090 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3091 }
3092
3094 adjustICmp128(DAG, DL, C);
3095 return C;
3096}
3097
3098// Emit the comparison instruction described by C.
3099static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3100 if (!C.Op1.getNode()) {
3101 SDNode *Node;
3102 switch (C.Op0.getOpcode()) {
3104 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3105 return SDValue(Node, 0);
3107 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3108 return SDValue(Node, Node->getNumValues() - 1);
3109 default:
3110 llvm_unreachable("Invalid comparison operands");
3111 }
3112 }
3113 if (C.Opcode == SystemZISD::ICMP)
3114 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3115 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3116 if (C.Opcode == SystemZISD::TM) {
3117 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3119 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3120 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3121 }
3122 if (C.Opcode == SystemZISD::VICMPES) {
3123 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3124 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3125 return SDValue(Val.getNode(), 1);
3126 }
3127 if (C.Chain) {
3128 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3129 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3130 }
3131 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3132}
3133
3134// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3135// 64 bits. Extend is the extension type to use. Store the high part
3136// in Hi and the low part in Lo.
3137static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3138 SDValue Op0, SDValue Op1, SDValue &Hi,
3139 SDValue &Lo) {
3140 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3141 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3142 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3143 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3144 DAG.getConstant(32, DL, MVT::i64));
3145 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3146 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3147}
3148
3149// Lower a binary operation that produces two VT results, one in each
3150// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3151// and Opcode performs the GR128 operation. Store the even register result
3152// in Even and the odd register result in Odd.
3153static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3154 unsigned Opcode, SDValue Op0, SDValue Op1,
3155 SDValue &Even, SDValue &Odd) {
3156 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3157 bool Is32Bit = is32Bit(VT);
3158 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3159 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3160}
3161
3162// Return an i32 value that is 1 if the CC value produced by CCReg is
3163// in the mask CCMask and 0 otherwise. CC is known to have a value
3164// in CCValid, so other values can be ignored.
3165static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3166 unsigned CCValid, unsigned CCMask) {
3167 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3168 DAG.getConstant(0, DL, MVT::i32),
3169 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3170 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3171 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3172}
3173
3174// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3175// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3176// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3177// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3178// floating-point comparisons.
3181 switch (CC) {
3182 case ISD::SETOEQ:
3183 case ISD::SETEQ:
3184 switch (Mode) {
3185 case CmpMode::Int: return SystemZISD::VICMPE;
3186 case CmpMode::FP: return SystemZISD::VFCMPE;
3187 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3188 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3189 }
3190 llvm_unreachable("Bad mode");
3191
3192 case ISD::SETOGE:
3193 case ISD::SETGE:
3194 switch (Mode) {
3195 case CmpMode::Int: return 0;
3196 case CmpMode::FP: return SystemZISD::VFCMPHE;
3197 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3198 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3199 }
3200 llvm_unreachable("Bad mode");
3201
3202 case ISD::SETOGT:
3203 case ISD::SETGT:
3204 switch (Mode) {
3205 case CmpMode::Int: return SystemZISD::VICMPH;
3206 case CmpMode::FP: return SystemZISD::VFCMPH;
3207 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3208 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3209 }
3210 llvm_unreachable("Bad mode");
3211
3212 case ISD::SETUGT:
3213 switch (Mode) {
3214 case CmpMode::Int: return SystemZISD::VICMPHL;
3215 case CmpMode::FP: return 0;
3216 case CmpMode::StrictFP: return 0;
3217 case CmpMode::SignalingFP: return 0;
3218 }
3219 llvm_unreachable("Bad mode");
3220
3221 default:
3222 return 0;
3223 }
3224}
3225
3226// Return the SystemZISD vector comparison operation for CC or its inverse,
3227// or 0 if neither can be done directly. Indicate in Invert whether the
3228// result is for the inverse of CC. Mode is as above.
3230 bool &Invert) {
3231 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3232 Invert = false;
3233 return Opcode;
3234 }
3235
3236 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3237 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3238 Invert = true;
3239 return Opcode;
3240 }
3241
3242 return 0;
3243}
3244
3245// Return a v2f64 that contains the extended form of elements Start and Start+1
3246// of v4f32 value Op. If Chain is nonnull, return the strict form.
3247static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3248 SDValue Op, SDValue Chain) {
3249 int Mask[] = { Start, -1, Start + 1, -1 };
3250 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3251 if (Chain) {
3252 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3253 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3254 }
3255 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3256}
3257
3258// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3259// producing a result of type VT. If Chain is nonnull, return the strict form.
3260SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3261 const SDLoc &DL, EVT VT,
3262 SDValue CmpOp0,
3263 SDValue CmpOp1,
3264 SDValue Chain) const {
3265 // There is no hardware support for v4f32 (unless we have the vector
3266 // enhancements facility 1), so extend the vector into two v2f64s
3267 // and compare those.
3268 if (CmpOp0.getValueType() == MVT::v4f32 &&
3269 !Subtarget.hasVectorEnhancements1()) {
3270 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3271 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3272 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3273 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3274 if (Chain) {
3275 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3276 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3277 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3278 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3279 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3280 H1.getValue(1), L1.getValue(1),
3281 HRes.getValue(1), LRes.getValue(1) };
3282 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3283 SDValue Ops[2] = { Res, NewChain };
3284 return DAG.getMergeValues(Ops, DL);
3285 }
3286 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3287 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3288 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3289 }
3290 if (Chain) {
3291 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3292 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3293 }
3294 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3295}
3296
3297// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3298// an integer mask of type VT. If Chain is nonnull, we have a strict
3299// floating-point comparison. If in addition IsSignaling is true, we have
3300// a strict signaling floating-point comparison.
3301SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3302 const SDLoc &DL, EVT VT,
3304 SDValue CmpOp0,
3305 SDValue CmpOp1,
3306 SDValue Chain,
3307 bool IsSignaling) const {
3308 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3309 assert (!Chain || IsFP);
3310 assert (!IsSignaling || Chain);
3311 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3312 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3313 bool Invert = false;
3314 SDValue Cmp;
3315 switch (CC) {
3316 // Handle tests for order using (or (ogt y x) (oge x y)).
3317 case ISD::SETUO:
3318 Invert = true;
3319 [[fallthrough]];
3320 case ISD::SETO: {
3321 assert(IsFP && "Unexpected integer comparison");
3322 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3323 DL, VT, CmpOp1, CmpOp0, Chain);
3324 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3325 DL, VT, CmpOp0, CmpOp1, Chain);
3326 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3327 if (Chain)
3328 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3329 LT.getValue(1), GE.getValue(1));
3330 break;
3331 }
3332
3333 // Handle <> tests using (or (ogt y x) (ogt x y)).
3334 case ISD::SETUEQ:
3335 Invert = true;
3336 [[fallthrough]];
3337 case ISD::SETONE: {
3338 assert(IsFP && "Unexpected integer comparison");
3339 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3340 DL, VT, CmpOp1, CmpOp0, Chain);
3341 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3342 DL, VT, CmpOp0, CmpOp1, Chain);
3343 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3344 if (Chain)
3345 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3346 LT.getValue(1), GT.getValue(1));
3347 break;
3348 }
3349
3350 // Otherwise a single comparison is enough. It doesn't really
3351 // matter whether we try the inversion or the swap first, since
3352 // there are no cases where both work.
3353 default:
3354 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3355 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3356 else {
3358 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3359 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3360 else
3361 llvm_unreachable("Unhandled comparison");
3362 }
3363 if (Chain)
3364 Chain = Cmp.getValue(1);
3365 break;
3366 }
3367 if (Invert) {
3368 SDValue Mask =
3369 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3370 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3371 }
3372 if (Chain && Chain.getNode() != Cmp.getNode()) {
3373 SDValue Ops[2] = { Cmp, Chain };
3374 Cmp = DAG.getMergeValues(Ops, DL);
3375 }
3376 return Cmp;
3377}
3378
3379SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3380 SelectionDAG &DAG) const {
3381 SDValue CmpOp0 = Op.getOperand(0);
3382 SDValue CmpOp1 = Op.getOperand(1);
3383 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3384 SDLoc DL(Op);
3385 EVT VT = Op.getValueType();
3386 if (VT.isVector())
3387 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3388
3389 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3390 SDValue CCReg = emitCmp(DAG, DL, C);
3391 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3392}
3393
3394SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3395 SelectionDAG &DAG,
3396 bool IsSignaling) const {
3397 SDValue Chain = Op.getOperand(0);
3398 SDValue CmpOp0 = Op.getOperand(1);
3399 SDValue CmpOp1 = Op.getOperand(2);
3400 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3401 SDLoc DL(Op);
3402 EVT VT = Op.getNode()->getValueType(0);
3403 if (VT.isVector()) {
3404 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3405 Chain, IsSignaling);
3406 return Res.getValue(Op.getResNo());
3407 }
3408
3409 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3410 SDValue CCReg = emitCmp(DAG, DL, C);
3411 CCReg->setFlags(Op->getFlags());
3412 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3413 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3414 return DAG.getMergeValues(Ops, DL);
3415}
3416
3417SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3418 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3419 SDValue CmpOp0 = Op.getOperand(2);
3420 SDValue CmpOp1 = Op.getOperand(3);
3421 SDValue Dest = Op.getOperand(4);
3422 SDLoc DL(Op);
3423
3424 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3425 SDValue CCReg = emitCmp(DAG, DL, C);
3426 return DAG.getNode(
3427 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3428 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3429 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3430}
3431
3432// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3433// allowing Pos and Neg to be wider than CmpOp.
3434static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3435 return (Neg.getOpcode() == ISD::SUB &&
3436 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3437 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3438 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3439 Pos.getOperand(0) == CmpOp)));
3440}
3441
3442// Return the absolute or negative absolute of Op; IsNegative decides which.
3444 bool IsNegative) {
3445 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3446 if (IsNegative)
3447 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3448 DAG.getConstant(0, DL, Op.getValueType()), Op);
3449 return Op;
3450}
3451
3452SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3453 SelectionDAG &DAG) const {
3454 SDValue CmpOp0 = Op.getOperand(0);
3455 SDValue CmpOp1 = Op.getOperand(1);
3456 SDValue TrueOp = Op.getOperand(2);
3457 SDValue FalseOp = Op.getOperand(3);
3458 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3459 SDLoc DL(Op);
3460
3461 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3462
3463 // Check for absolute and negative-absolute selections, including those
3464 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3465 // This check supplements the one in DAGCombiner.
3466 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3467 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3468 C.Op1.getOpcode() == ISD::Constant &&
3469 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3470 C.Op1->getAsZExtVal() == 0) {
3471 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3472 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3473 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3474 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3475 }
3476
3477 SDValue CCReg = emitCmp(DAG, DL, C);
3478 SDValue Ops[] = {TrueOp, FalseOp,
3479 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3480 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3481
3482 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3483}
3484
3485SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3486 SelectionDAG &DAG) const {
3487 SDLoc DL(Node);
3488 const GlobalValue *GV = Node->getGlobal();
3489 int64_t Offset = Node->getOffset();
3490 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3492
3494 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3495 if (isInt<32>(Offset)) {
3496 // Assign anchors at 1<<12 byte boundaries.
3497 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3498 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3499 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3500
3501 // The offset can be folded into the address if it is aligned to a
3502 // halfword.
3503 Offset -= Anchor;
3504 if (Offset != 0 && (Offset & 1) == 0) {
3505 SDValue Full =
3506 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3507 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3508 Offset = 0;
3509 }
3510 } else {
3511 // Conservatively load a constant offset greater than 32 bits into a
3512 // register below.
3513 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3514 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3515 }
3516 } else if (Subtarget.isTargetELF()) {
3517 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3518 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3519 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3521 } else if (Subtarget.isTargetzOS()) {
3522 Result = getADAEntry(DAG, GV, DL, PtrVT);
3523 } else
3524 llvm_unreachable("Unexpected Subtarget");
3525
3526 // If there was a non-zero offset that we didn't fold, create an explicit
3527 // addition for it.
3528 if (Offset != 0)
3529 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3530 DAG.getConstant(Offset, DL, PtrVT));
3531
3532 return Result;
3533}
3534
3535SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3536 SelectionDAG &DAG,
3537 unsigned Opcode,
3538 SDValue GOTOffset) const {
3539 SDLoc DL(Node);
3540 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3541 SDValue Chain = DAG.getEntryNode();
3542 SDValue Glue;
3543
3546 report_fatal_error("In GHC calling convention TLS is not supported");
3547
3548 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3549 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3550 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3551 Glue = Chain.getValue(1);
3552 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3553 Glue = Chain.getValue(1);
3554
3555 // The first call operand is the chain and the second is the TLS symbol.
3557 Ops.push_back(Chain);
3558 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3559 Node->getValueType(0),
3560 0, 0));
3561
3562 // Add argument registers to the end of the list so that they are
3563 // known live into the call.
3564 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3565 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3566
3567 // Add a register mask operand representing the call-preserved registers.
3568 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3569 const uint32_t *Mask =
3570 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3571 assert(Mask && "Missing call preserved mask for calling convention");
3572 Ops.push_back(DAG.getRegisterMask(Mask));
3573
3574 // Glue the call to the argument copies.
3575 Ops.push_back(Glue);
3576
3577 // Emit the call.
3578 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3579 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3580 Glue = Chain.getValue(1);
3581
3582 // Copy the return value from %r2.
3583 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3584}
3585
3586SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3587 SelectionDAG &DAG) const {
3588 SDValue Chain = DAG.getEntryNode();
3589 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3590
3591 // The high part of the thread pointer is in access register 0.
3592 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3593 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3594
3595 // The low part of the thread pointer is in access register 1.
3596 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3597 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3598
3599 // Merge them into a single 64-bit address.
3600 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3601 DAG.getConstant(32, DL, PtrVT));
3602 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3603}
3604
3605SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3606 SelectionDAG &DAG) const {
3607 if (DAG.getTarget().useEmulatedTLS())
3608 return LowerToTLSEmulatedModel(Node, DAG);
3609 SDLoc DL(Node);
3610 const GlobalValue *GV = Node->getGlobal();
3611 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3612 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3613
3616 report_fatal_error("In GHC calling convention TLS is not supported");
3617
3618 SDValue TP = lowerThreadPointer(DL, DAG);
3619
3620 // Get the offset of GA from the thread pointer, based on the TLS model.
3622 switch (model) {
3624 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3627
3628 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3629 Offset = DAG.getLoad(
3630 PtrVT, DL, DAG.getEntryNode(), Offset,
3632
3633 // Call __tls_get_offset to retrieve the offset.
3634 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3635 break;
3636 }
3637
3639 // Load the GOT offset of the module ID.
3642
3643 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3644 Offset = DAG.getLoad(
3645 PtrVT, DL, DAG.getEntryNode(), Offset,
3647
3648 // Call __tls_get_offset to retrieve the module base offset.
3649 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3650
3651 // Note: The SystemZLDCleanupPass will remove redundant computations
3652 // of the module base offset. Count total number of local-dynamic
3653 // accesses to trigger execution of that pass.
3657
3658 // Add the per-symbol offset.
3660
3661 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3662 DTPOffset = DAG.getLoad(
3663 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3665
3666 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3667 break;
3668 }
3669
3670 case TLSModel::InitialExec: {
3671 // Load the offset from the GOT.
3672 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3675 Offset =
3676 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3678 break;
3679 }
3680
3681 case TLSModel::LocalExec: {
3682 // Force the offset into the constant pool and load it from there.
3685
3686 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3687 Offset = DAG.getLoad(
3688 PtrVT, DL, DAG.getEntryNode(), Offset,
3690 break;
3691 }
3692 }
3693
3694 // Add the base and offset together.
3695 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3696}
3697
3698SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3699 SelectionDAG &DAG) const {
3700 SDLoc DL(Node);
3701 const BlockAddress *BA = Node->getBlockAddress();
3702 int64_t Offset = Node->getOffset();
3703 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3704
3705 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3706 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3707 return Result;
3708}
3709
3710SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3711 SelectionDAG &DAG) const {
3712 SDLoc DL(JT);
3713 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3714 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3715
3716 // Use LARL to load the address of the table.
3717 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3718}
3719
3720SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3721 SelectionDAG &DAG) const {
3722 SDLoc DL(CP);
3723 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3724
3726 if (CP->isMachineConstantPoolEntry())
3727 Result =
3728 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3729 else
3730 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3731 CP->getOffset());
3732
3733 // Use LARL to load the address of the constant pool entry.
3734 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3735}
3736
3737SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3738 SelectionDAG &DAG) const {
3739 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3741 MachineFrameInfo &MFI = MF.getFrameInfo();
3742 MFI.setFrameAddressIsTaken(true);
3743
3744 SDLoc DL(Op);
3745 unsigned Depth = Op.getConstantOperandVal(0);
3746 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3747
3748 // By definition, the frame address is the address of the back chain. (In
3749 // the case of packed stack without backchain, return the address where the
3750 // backchain would have been stored. This will either be an unused space or
3751 // contain a saved register).
3752 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3753 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3754
3755 if (Depth > 0) {
3756 // FIXME The frontend should detect this case.
3757 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3758 report_fatal_error("Unsupported stack frame traversal count");
3759
3760 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3761 while (Depth--) {
3762 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3764 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3765 }
3766 }
3767
3768 return BackChain;
3769}
3770
3771SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3772 SelectionDAG &DAG) const {
3774 MachineFrameInfo &MFI = MF.getFrameInfo();
3775 MFI.setReturnAddressIsTaken(true);
3776
3778 return SDValue();
3779
3780 SDLoc DL(Op);
3781 unsigned Depth = Op.getConstantOperandVal(0);
3782 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3783
3784 if (Depth > 0) {
3785 // FIXME The frontend should detect this case.
3786 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3787 report_fatal_error("Unsupported stack frame traversal count");
3788
3789 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3790 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3791 int Offset = (TFL->usePackedStack(MF) ? -2 : 14) *
3793 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3794 DAG.getConstant(Offset, DL, PtrVT));
3795 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3797 }
3798
3799 // Return R14D, which has the return address. Mark it an implicit live-in.
3800 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3801 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3802}
3803
3804SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3805 SelectionDAG &DAG) const {
3806 SDLoc DL(Op);
3807 SDValue In = Op.getOperand(0);
3808 EVT InVT = In.getValueType();
3809 EVT ResVT = Op.getValueType();
3810
3811 // Convert loads directly. This is normally done by DAGCombiner,
3812 // but we need this case for bitcasts that are created during lowering
3813 // and which are then lowered themselves.
3814 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3815 if (ISD::isNormalLoad(LoadN)) {
3816 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3817 LoadN->getBasePtr(), LoadN->getMemOperand());
3818 // Update the chain uses.
3819 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3820 return NewLoad;
3821 }
3822
3823 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3824 SDValue In64;
3825 if (Subtarget.hasHighWord()) {
3826 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3827 MVT::i64);
3828 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3829 MVT::i64, SDValue(U64, 0), In);
3830 } else {
3831 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3832 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3833 DAG.getConstant(32, DL, MVT::i64));
3834 }
3835 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3836 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3837 DL, MVT::f32, Out64);
3838 }
3839 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3840 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3841 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3842 MVT::f64, SDValue(U64, 0), In);
3843 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3844 if (Subtarget.hasHighWord())
3845 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3846 MVT::i32, Out64);
3847 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3848 DAG.getConstant(32, DL, MVT::i64));
3849 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3850 }
3851 llvm_unreachable("Unexpected bitcast combination");
3852}
3853
3854SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3855 SelectionDAG &DAG) const {
3856
3857 if (Subtarget.isTargetXPLINK64())
3858 return lowerVASTART_XPLINK(Op, DAG);
3859 else
3860 return lowerVASTART_ELF(Op, DAG);
3861}
3862
3863SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3864 SelectionDAG &DAG) const {
3866 SystemZMachineFunctionInfo *FuncInfo =
3868
3869 SDLoc DL(Op);
3870
3871 // vastart just stores the address of the VarArgsFrameIndex slot into the
3872 // memory location argument.
3873 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3874 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3875 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3876 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3877 MachinePointerInfo(SV));
3878}
3879
3880SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3881 SelectionDAG &DAG) const {
3883 SystemZMachineFunctionInfo *FuncInfo =
3885 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3886
3887 SDValue Chain = Op.getOperand(0);
3888 SDValue Addr = Op.getOperand(1);
3889 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3890 SDLoc DL(Op);
3891
3892 // The initial values of each field.
3893 const unsigned NumFields = 4;
3894 SDValue Fields[NumFields] = {
3895 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3896 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3897 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3898 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3899 };
3900
3901 // Store each field into its respective slot.
3902 SDValue MemOps[NumFields];
3903 unsigned Offset = 0;
3904 for (unsigned I = 0; I < NumFields; ++I) {
3905 SDValue FieldAddr = Addr;
3906 if (Offset != 0)
3907 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3909 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3911 Offset += 8;
3912 }
3913 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3914}
3915
3916SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3917 SelectionDAG &DAG) const {
3918 SDValue Chain = Op.getOperand(0);
3919 SDValue DstPtr = Op.getOperand(1);
3920 SDValue SrcPtr = Op.getOperand(2);
3921 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3922 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3923 SDLoc DL(Op);
3924
3925 uint32_t Sz =
3926 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3927 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3928 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3929 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3930 MachinePointerInfo(SrcSV));
3931}
3932
3933SDValue
3934SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3935 SelectionDAG &DAG) const {
3936 if (Subtarget.isTargetXPLINK64())
3937 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3938 else
3939 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3940}
3941
3942SDValue
3943SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3944 SelectionDAG &DAG) const {
3945 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3947 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3948 SDValue Chain = Op.getOperand(0);
3949 SDValue Size = Op.getOperand(1);
3950 SDValue Align = Op.getOperand(2);
3951 SDLoc DL(Op);
3952
3953 // If user has set the no alignment function attribute, ignore
3954 // alloca alignments.
3955 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3956
3957 uint64_t StackAlign = TFI->getStackAlignment();
3958 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3959 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3960
3961 SDValue NeededSpace = Size;
3962
3963 // Add extra space for alignment if needed.
3964 EVT PtrVT = getPointerTy(MF.getDataLayout());
3965 if (ExtraAlignSpace)
3966 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3967 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3968
3969 bool IsSigned = false;
3970 bool DoesNotReturn = false;
3971 bool IsReturnValueUsed = false;
3972 EVT VT = Op.getValueType();
3973 SDValue AllocaCall =
3974 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
3975 CallingConv::C, IsSigned, DL, DoesNotReturn,
3976 IsReturnValueUsed)
3977 .first;
3978
3979 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3980 // to end of call in order to ensure it isn't broken up from the call
3981 // sequence.
3982 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3983 Register SPReg = Regs.getStackPointerRegister();
3984 Chain = AllocaCall.getValue(1);
3985 SDValue Glue = AllocaCall.getValue(2);
3986 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3987 Chain = NewSPRegNode.getValue(1);
3988
3989 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3990 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3991 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3992
3993 // Dynamically realign if needed.
3994 if (ExtraAlignSpace) {
3995 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3996 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3997 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3998 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3999 }
4000
4001 SDValue Ops[2] = {Result, Chain};
4002 return DAG.getMergeValues(Ops, DL);
4003}
4004
4005SDValue
4006SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4007 SelectionDAG &DAG) const {
4008 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4010 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4011 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4012
4013 SDValue Chain = Op.getOperand(0);
4014 SDValue Size = Op.getOperand(1);
4015 SDValue Align = Op.getOperand(2);
4016 SDLoc DL(Op);
4017
4018 // If user has set the no alignment function attribute, ignore
4019 // alloca alignments.
4020 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4021
4022 uint64_t StackAlign = TFI->getStackAlignment();
4023 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4024 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4025
4027 SDValue NeededSpace = Size;
4028
4029 // Get a reference to the stack pointer.
4030 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4031
4032 // If we need a backchain, save it now.
4033 SDValue Backchain;
4034 if (StoreBackchain)
4035 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4037
4038 // Add extra space for alignment if needed.
4039 if (ExtraAlignSpace)
4040 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4041 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4042
4043 // Get the new stack pointer value.
4044 SDValue NewSP;
4045 if (hasInlineStackProbe(MF)) {
4047 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4048 Chain = NewSP.getValue(1);
4049 }
4050 else {
4051 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4052 // Copy the new stack pointer back.
4053 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4054 }
4055
4056 // The allocated data lives above the 160 bytes allocated for the standard
4057 // frame, plus any outgoing stack arguments. We don't know how much that
4058 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4059 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4060 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4061
4062 // Dynamically realign if needed.
4063 if (RequiredAlign > StackAlign) {
4064 Result =
4065 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4066 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4067 Result =
4068 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4069 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4070 }
4071
4072 if (StoreBackchain)
4073 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4075
4076 SDValue Ops[2] = { Result, Chain };
4077 return DAG.getMergeValues(Ops, DL);
4078}
4079
4080SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4081 SDValue Op, SelectionDAG &DAG) const {
4082 SDLoc DL(Op);
4083
4084 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4085}
4086
4087SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4088 SelectionDAG &DAG) const {
4089 EVT VT = Op.getValueType();
4090 SDLoc DL(Op);
4091 SDValue Ops[2];
4092 if (is32Bit(VT))
4093 // Just do a normal 64-bit multiplication and extract the results.
4094 // We define this so that it can be used for constant division.
4095 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4096 Op.getOperand(1), Ops[1], Ops[0]);
4097 else if (Subtarget.hasMiscellaneousExtensions2())
4098 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4099 // the high result in the even register. ISD::SMUL_LOHI is defined to
4100 // return the low half first, so the results are in reverse order.
4102 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4103 else {
4104 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4105 //
4106 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4107 //
4108 // but using the fact that the upper halves are either all zeros
4109 // or all ones:
4110 //
4111 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4112 //
4113 // and grouping the right terms together since they are quicker than the
4114 // multiplication:
4115 //
4116 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4117 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4118 SDValue LL = Op.getOperand(0);
4119 SDValue RL = Op.getOperand(1);
4120 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4121 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4122 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4123 // the high result in the even register. ISD::SMUL_LOHI is defined to
4124 // return the low half first, so the results are in reverse order.
4126 LL, RL, Ops[1], Ops[0]);
4127 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4128 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4129 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4130 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4131 }
4132 return DAG.getMergeValues(Ops, DL);
4133}
4134
4135SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4136 SelectionDAG &DAG) const {
4137 EVT VT = Op.getValueType();
4138 SDLoc DL(Op);
4139 SDValue Ops[2];
4140 if (is32Bit(VT))
4141 // Just do a normal 64-bit multiplication and extract the results.
4142 // We define this so that it can be used for constant division.
4143 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4144 Op.getOperand(1), Ops[1], Ops[0]);
4145 else
4146 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4147 // the high result in the even register. ISD::UMUL_LOHI is defined to
4148 // return the low half first, so the results are in reverse order.
4150 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4151 return DAG.getMergeValues(Ops, DL);
4152}
4153
4154SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4155 SelectionDAG &DAG) const {
4156 SDValue Op0 = Op.getOperand(0);
4157 SDValue Op1 = Op.getOperand(1);
4158 EVT VT = Op.getValueType();
4159 SDLoc DL(Op);
4160
4161 // We use DSGF for 32-bit division. This means the first operand must
4162 // always be 64-bit, and the second operand should be 32-bit whenever
4163 // that is possible, to improve performance.
4164 if (is32Bit(VT))
4165 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4166 else if (DAG.ComputeNumSignBits(Op1) > 32)
4167 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4168
4169 // DSG(F) returns the remainder in the even register and the
4170 // quotient in the odd register.
4171 SDValue Ops[2];
4172 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4173 return DAG.getMergeValues(Ops, DL);
4174}
4175
4176SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4177 SelectionDAG &DAG) const {
4178 EVT VT = Op.getValueType();
4179 SDLoc DL(Op);
4180
4181 // DL(G) returns the remainder in the even register and the
4182 // quotient in the odd register.
4183 SDValue Ops[2];
4185 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4186 return DAG.getMergeValues(Ops, DL);
4187}
4188
4189SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4190 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4191
4192 // Get the known-zero masks for each operand.
4193 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4194 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4195 DAG.computeKnownBits(Ops[1])};
4196
4197 // See if the upper 32 bits of one operand and the lower 32 bits of the
4198 // other are known zero. They are the low and high operands respectively.
4199 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4200 Known[1].Zero.getZExtValue() };
4201 unsigned High, Low;
4202 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4203 High = 1, Low = 0;
4204 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4205 High = 0, Low = 1;
4206 else
4207 return Op;
4208
4209 SDValue LowOp = Ops[Low];
4210 SDValue HighOp = Ops[High];
4211
4212 // If the high part is a constant, we're better off using IILH.
4213 if (HighOp.getOpcode() == ISD::Constant)
4214 return Op;
4215
4216 // If the low part is a constant that is outside the range of LHI,
4217 // then we're better off using IILF.
4218 if (LowOp.getOpcode() == ISD::Constant) {
4219 int64_t Value = int32_t(LowOp->getAsZExtVal());
4220 if (!isInt<16>(Value))
4221 return Op;
4222 }
4223
4224 // Check whether the high part is an AND that doesn't change the
4225 // high 32 bits and just masks out low bits. We can skip it if so.
4226 if (HighOp.getOpcode() == ISD::AND &&
4227 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4228 SDValue HighOp0 = HighOp.getOperand(0);
4230 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4231 HighOp = HighOp0;
4232 }
4233
4234 // Take advantage of the fact that all GR32 operations only change the
4235 // low 32 bits by truncating Low to an i32 and inserting it directly
4236 // using a subreg. The interesting cases are those where the truncation
4237 // can be folded.
4238 SDLoc DL(Op);
4239 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4240 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4241 MVT::i64, HighOp, Low32);
4242}
4243
4244// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4245SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4246 SelectionDAG &DAG) const {
4247 SDNode *N = Op.getNode();
4248 SDValue LHS = N->getOperand(0);
4249 SDValue RHS = N->getOperand(1);
4250 SDLoc DL(N);
4251
4252 if (N->getValueType(0) == MVT::i128) {
4253 unsigned BaseOp = 0;
4254 unsigned FlagOp = 0;
4255 switch (Op.getOpcode()) {
4256 default: llvm_unreachable("Unknown instruction!");
4257 case ISD::UADDO:
4258 BaseOp = ISD::ADD;
4259 FlagOp = SystemZISD::VACC;
4260 break;
4261 case ISD::USUBO:
4262 BaseOp = ISD::SUB;
4263 FlagOp = SystemZISD::VSCBI;
4264 break;
4265 }
4266 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4267 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4268 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4269 DAG.getValueType(MVT::i1));
4270 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4271 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4272 }
4273
4274 unsigned BaseOp = 0;
4275 unsigned CCValid = 0;
4276 unsigned CCMask = 0;
4277
4278 switch (Op.getOpcode()) {
4279 default: llvm_unreachable("Unknown instruction!");
4280 case ISD::SADDO:
4281 BaseOp = SystemZISD::SADDO;
4282 CCValid = SystemZ::CCMASK_ARITH;
4284 break;
4285 case ISD::SSUBO:
4286 BaseOp = SystemZISD::SSUBO;
4287 CCValid = SystemZ::CCMASK_ARITH;
4289 break;
4290 case ISD::UADDO:
4291 BaseOp = SystemZISD::UADDO;
4292 CCValid = SystemZ::CCMASK_LOGICAL;
4294 break;
4295 case ISD::USUBO:
4296 BaseOp = SystemZISD::USUBO;
4297 CCValid = SystemZ::CCMASK_LOGICAL;
4299 break;
4300 }
4301
4302 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4303 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4304
4305 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4306 if (N->getValueType(1) == MVT::i1)
4307 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4308
4309 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4310}
4311
4312static bool isAddCarryChain(SDValue Carry) {
4313 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4314 Carry = Carry.getOperand(2);
4315 return Carry.getOpcode() == ISD::UADDO;
4316}
4317
4318static bool isSubBorrowChain(SDValue Carry) {
4319 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4320 Carry = Carry.getOperand(2);
4321 return Carry.getOpcode() == ISD::USUBO;
4322}
4323
4324// Lower UADDO_CARRY/USUBO_CARRY nodes.
4325SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4326 SelectionDAG &DAG) const {
4327
4328 SDNode *N = Op.getNode();
4329 MVT VT = N->getSimpleValueType(0);
4330
4331 // Let legalize expand this if it isn't a legal type yet.
4332 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4333 return SDValue();
4334
4335 SDValue LHS = N->getOperand(0);
4336 SDValue RHS = N->getOperand(1);
4337 SDValue Carry = Op.getOperand(2);
4338 SDLoc DL(N);
4339
4340 if (VT == MVT::i128) {
4341 unsigned BaseOp = 0;
4342 unsigned FlagOp = 0;
4343 switch (Op.getOpcode()) {
4344 default: llvm_unreachable("Unknown instruction!");
4345 case ISD::UADDO_CARRY:
4346 BaseOp = SystemZISD::VAC;
4347 FlagOp = SystemZISD::VACCC;
4348 break;
4349 case ISD::USUBO_CARRY:
4350 BaseOp = SystemZISD::VSBI;
4351 FlagOp = SystemZISD::VSBCBI;
4352 break;
4353 }
4354 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4355 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4356 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4357 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4358 DAG.getValueType(MVT::i1));
4359 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4360 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4361 }
4362
4363 unsigned BaseOp = 0;
4364 unsigned CCValid = 0;
4365 unsigned CCMask = 0;
4366
4367 switch (Op.getOpcode()) {
4368 default: llvm_unreachable("Unknown instruction!");
4369 case ISD::UADDO_CARRY:
4370 if (!isAddCarryChain(Carry))
4371 return SDValue();
4372
4373 BaseOp = SystemZISD::ADDCARRY;
4374 CCValid = SystemZ::CCMASK_LOGICAL;
4376 break;
4377 case ISD::USUBO_CARRY:
4378 if (!isSubBorrowChain(Carry))
4379 return SDValue();
4380
4381 BaseOp = SystemZISD::SUBCARRY;
4382 CCValid = SystemZ::CCMASK_LOGICAL;
4384 break;
4385 }
4386
4387 // Set the condition code from the carry flag.
4388 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4389 DAG.getConstant(CCValid, DL, MVT::i32),
4390 DAG.getConstant(CCMask, DL, MVT::i32));
4391
4392 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4393 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4394
4395 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4396 if (N->getValueType(1) == MVT::i1)
4397 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4398
4399 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4400}
4401
4402SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4403 SelectionDAG &DAG) const {
4404 EVT VT = Op.getValueType();
4405 SDLoc DL(Op);
4406 Op = Op.getOperand(0);
4407
4408 if (VT.getScalarSizeInBits() == 128) {
4409 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4410 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4411 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4412 DAG.getConstant(0, DL, MVT::i64));
4413 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4414 return Op;
4415 }
4416
4417 // Handle vector types via VPOPCT.
4418 if (VT.isVector()) {
4419 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4420 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4421 switch (VT.getScalarSizeInBits()) {
4422 case 8:
4423 break;
4424 case 16: {
4425 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4426 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4427 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4428 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4429 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4430 break;
4431 }
4432 case 32: {
4433 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4434 DAG.getConstant(0, DL, MVT::i32));
4435 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4436 break;
4437 }
4438 case 64: {
4439 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4440 DAG.getConstant(0, DL, MVT::i32));
4441 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4442 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4443 break;
4444 }
4445 default:
4446 llvm_unreachable("Unexpected type");
4447 }
4448 return Op;
4449 }
4450
4451 // Get the known-zero mask for the operand.
4452 KnownBits Known = DAG.computeKnownBits(Op);
4453 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4454 if (NumSignificantBits == 0)
4455 return DAG.getConstant(0, DL, VT);
4456
4457 // Skip known-zero high parts of the operand.
4458 int64_t OrigBitSize = VT.getSizeInBits();
4459 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4460 BitSize = std::min(BitSize, OrigBitSize);
4461
4462 // The POPCNT instruction counts the number of bits in each byte.
4463 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4464 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4465 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4466
4467 // Add up per-byte counts in a binary tree. All bits of Op at
4468 // position larger than BitSize remain zero throughout.
4469 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4470 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4471 if (BitSize != OrigBitSize)
4472 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4473 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4474 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4475 }
4476
4477 // Extract overall result from high byte.
4478 if (BitSize > 8)
4479 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4480 DAG.getConstant(BitSize - 8, DL, VT));
4481
4482 return Op;
4483}
4484
4485SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4486 SelectionDAG &DAG) const {
4487 SDLoc DL(Op);
4488 AtomicOrdering FenceOrdering =
4489 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4490 SyncScope::ID FenceSSID =
4491 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4492
4493 // The only fence that needs an instruction is a sequentially-consistent
4494 // cross-thread fence.
4495 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4496 FenceSSID == SyncScope::System) {
4497 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4498 Op.getOperand(0)),
4499 0);
4500 }
4501
4502 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4503 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4504}
4505
4506// Op is an atomic load. Lower it into a normal volatile load.
4507SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
4508 SelectionDAG &DAG) const {
4509 auto *Node = cast<AtomicSDNode>(Op.getNode());
4510 if (Node->getMemoryVT() == MVT::i128) {
4511 // Use same code to handle both legal and non-legal i128 types.
4514 return DAG.getMergeValues(Results, SDLoc(Op));
4515 }
4516 return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
4517 Node->getChain(), Node->getBasePtr(),
4518 Node->getMemoryVT(), Node->getMemOperand());
4519}
4520
4521// Op is an atomic store. Lower it into a normal volatile store.
4522SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
4523 SelectionDAG &DAG) const {
4524 auto *Node = cast<AtomicSDNode>(Op.getNode());
4525 if (Node->getMemoryVT() == MVT::i128) {
4526 // Use same code to handle both legal and non-legal i128 types.
4529 return DAG.getMergeValues(Results, SDLoc(Op));
4530 }
4531 SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
4532 Node->getBasePtr(), Node->getMemoryVT(),
4533 Node->getMemOperand());
4534 // We have to enforce sequential consistency by performing a
4535 // serialization operation after the store.
4536 if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
4537 Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
4538 MVT::Other, Chain), 0);
4539 return Chain;
4540}
4541
4542// Prepare for a Compare And Swap for a subword operation. This needs to be
4543// done in memory with 4 bytes at natural alignment.
4545 SDValue &AlignedAddr, SDValue &BitShift,
4546 SDValue &NegBitShift) {
4547 EVT PtrVT = Addr.getValueType();
4548 EVT WideVT = MVT::i32;
4549
4550 // Get the address of the containing word.
4551 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4552 DAG.getConstant(-4, DL, PtrVT));
4553
4554 // Get the number of bits that the word must be rotated left in order
4555 // to bring the field to the top bits of a GR32.
4556 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4557 DAG.getConstant(3, DL, PtrVT));
4558 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4559
4560 // Get the complementing shift amount, for rotating a field in the top
4561 // bits back to its proper position.
4562 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4563 DAG.getConstant(0, DL, WideVT), BitShift);
4564
4565}
4566
4567// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4568// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4569SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4570 SelectionDAG &DAG,
4571 unsigned Opcode) const {
4572 auto *Node = cast<AtomicSDNode>(Op.getNode());
4573
4574 // 32-bit operations need no special handling.
4575 EVT NarrowVT = Node->getMemoryVT();
4576 EVT WideVT = MVT::i32;
4577 if (NarrowVT == WideVT)
4578 return Op;
4579
4580 int64_t BitSize = NarrowVT.getSizeInBits();
4581 SDValue ChainIn = Node->getChain();
4582 SDValue Addr = Node->getBasePtr();
4583 SDValue Src2 = Node->getVal();
4584 MachineMemOperand *MMO = Node->getMemOperand();
4585 SDLoc DL(Node);
4586
4587 // Convert atomic subtracts of constants into additions.
4588 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4589 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4591 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4592 }
4593
4594 SDValue AlignedAddr, BitShift, NegBitShift;
4595 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4596
4597 // Extend the source operand to 32 bits and prepare it for the inner loop.
4598 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4599 // operations require the source to be shifted in advance. (This shift
4600 // can be folded if the source is constant.) For AND and NAND, the lower
4601 // bits must be set, while for other opcodes they should be left clear.
4602 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4603 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4604 DAG.getConstant(32 - BitSize, DL, WideVT));
4605 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4607 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4608 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4609
4610 // Construct the ATOMIC_LOADW_* node.
4611 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4612 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4613 DAG.getConstant(BitSize, DL, WideVT) };
4614 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4615 NarrowVT, MMO);
4616
4617 // Rotate the result of the final CS so that the field is in the lower
4618 // bits of a GR32, then truncate it.
4619 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4620 DAG.getConstant(BitSize, DL, WideVT));
4621 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4622
4623 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4624 return DAG.getMergeValues(RetOps, DL);
4625}
4626
4627// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4628// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4629SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4630 SelectionDAG &DAG) const {
4631 auto *Node = cast<AtomicSDNode>(Op.getNode());
4632 EVT MemVT = Node->getMemoryVT();
4633 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4634 // A full-width operation: negate and use LAA(G).
4635 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4636 assert(Subtarget.hasInterlockedAccess1() &&
4637 "Should have been expanded by AtomicExpand pass.");
4638 SDValue Src2 = Node->getVal();
4639 SDLoc DL(Src2);
4640 SDValue NegSrc2 =
4641 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4642 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4643 Node->getChain(), Node->getBasePtr(), NegSrc2,
4644 Node->getMemOperand());
4645 }
4646
4647 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4648}
4649
4650// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4651SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4652 SelectionDAG &DAG) const {
4653 auto *Node = cast<AtomicSDNode>(Op.getNode());
4654 SDValue ChainIn = Node->getOperand(0);
4655 SDValue Addr = Node->getOperand(1);
4656 SDValue CmpVal = Node->getOperand(2);
4657 SDValue SwapVal = Node->getOperand(3);
4658 MachineMemOperand *MMO = Node->getMemOperand();
4659 SDLoc DL(Node);
4660
4661 if (Node->getMemoryVT() == MVT::i128) {
4662 // Use same code to handle both legal and non-legal i128 types.
4665 return DAG.getMergeValues(Results, DL);
4666 }
4667
4668 // We have native support for 32-bit and 64-bit compare and swap, but we
4669 // still need to expand extracting the "success" result from the CC.
4670 EVT NarrowVT = Node->getMemoryVT();
4671 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4672 if (NarrowVT == WideVT) {
4673 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4674 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4676 DL, Tys, Ops, NarrowVT, MMO);
4677 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4679
4680 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4681 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4682 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4683 return SDValue();
4684 }
4685
4686 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4687 // via a fullword ATOMIC_CMP_SWAPW operation.
4688 int64_t BitSize = NarrowVT.getSizeInBits();
4689
4690 SDValue AlignedAddr, BitShift, NegBitShift;
4691 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4692
4693 // Construct the ATOMIC_CMP_SWAPW node.
4694 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4695 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4696 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4698 VTList, Ops, NarrowVT, MMO);
4699 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4701
4702 // emitAtomicCmpSwapW() will zero extend the result (original value).
4703 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4704 DAG.getValueType(NarrowVT));
4705 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4706 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4707 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4708 return SDValue();
4709}
4710
4712SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4713 // Because of how we convert atomic_load and atomic_store to normal loads and
4714 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4715 // since DAGCombine hasn't been updated to account for atomic, but non
4716 // volatile loads. (See D57601)
4717 if (auto *SI = dyn_cast<StoreInst>(&I))
4718 if (SI->isAtomic())
4720 if (auto *LI = dyn_cast<LoadInst>(&I))
4721 if (LI->isAtomic())
4723 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4724 if (AI->isAtomic())
4726 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4727 if (AI->isAtomic())
4730}
4731
4732SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4733 SelectionDAG &DAG) const {
4735 auto *Regs = Subtarget.getSpecialRegisters();
4737 report_fatal_error("Variable-sized stack allocations are not supported "
4738 "in GHC calling convention");
4739 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4740 Regs->getStackPointerRegister(), Op.getValueType());
4741}
4742
4743SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4744 SelectionDAG &DAG) const {
4746 auto *Regs = Subtarget.getSpecialRegisters();
4747 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4748
4750 report_fatal_error("Variable-sized stack allocations are not supported "
4751 "in GHC calling convention");
4752
4753 SDValue Chain = Op.getOperand(0);
4754 SDValue NewSP = Op.getOperand(1);
4755 SDValue Backchain;
4756 SDLoc DL(Op);
4757
4758 if (StoreBackchain) {
4759 SDValue OldSP = DAG.getCopyFromReg(
4760 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4761 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4763 }
4764
4765 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4766
4767 if (StoreBackchain)
4768 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4770
4771 return Chain;
4772}
4773
4774SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4775 SelectionDAG &DAG) const {
4776 bool IsData = Op.getConstantOperandVal(4);
4777 if (!IsData)
4778 // Just preserve the chain.
4779 return Op.getOperand(0);
4780
4781 SDLoc DL(Op);
4782 bool IsWrite = Op.getConstantOperandVal(2);
4783 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4784 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4785 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4786 Op.getOperand(1)};
4788 Node->getVTList(), Ops,
4789 Node->getMemoryVT(), Node->getMemOperand());
4790}
4791
4792// Convert condition code in CCReg to an i32 value.
4794 SDLoc DL(CCReg);
4795 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4796 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4797 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4798}
4799
4800SDValue
4801SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4802 SelectionDAG &DAG) const {
4803 unsigned Opcode, CCValid;
4804 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4805 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4806 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4807 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4808 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4809 return SDValue();
4810 }
4811
4812 return SDValue();
4813}
4814
4815SDValue
4816SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4817 SelectionDAG &DAG) const {
4818 unsigned Opcode, CCValid;
4819 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4820 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4821 if (Op->getNumValues() == 1)
4822 return getCCResult(DAG, SDValue(Node, 0));
4823 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4824 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4825 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4826 }
4827
4828 unsigned Id = Op.getConstantOperandVal(0);
4829 switch (Id) {
4830 case Intrinsic::thread_pointer:
4831 return lowerThreadPointer(SDLoc(Op), DAG);
4832
4833 case Intrinsic::s390_vpdi:
4834 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4835 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4836
4837 case Intrinsic::s390_vperm:
4838 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4839 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4840
4841 case Intrinsic::s390_vuphb:
4842 case Intrinsic::s390_vuphh:
4843 case Intrinsic::s390_vuphf:
4844 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4845 Op.getOperand(1));
4846
4847 case Intrinsic::s390_vuplhb:
4848 case Intrinsic::s390_vuplhh:
4849 case Intrinsic::s390_vuplhf:
4850 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4851 Op.getOperand(1));
4852
4853 case Intrinsic::s390_vuplb:
4854 case Intrinsic::s390_vuplhw:
4855 case Intrinsic::s390_vuplf:
4856 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4857 Op.getOperand(1));
4858
4859 case Intrinsic::s390_vupllb:
4860 case Intrinsic::s390_vupllh:
4861 case Intrinsic::s390_vupllf:
4862 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4863 Op.getOperand(1));
4864
4865 case Intrinsic::s390_vsumb:
4866 case Intrinsic::s390_vsumh:
4867 case Intrinsic::s390_vsumgh:
4868 case Intrinsic::s390_vsumgf:
4869 case Intrinsic::s390_vsumqf:
4870 case Intrinsic::s390_vsumqg:
4871 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4872 Op.getOperand(1), Op.getOperand(2));
4873
4874 case Intrinsic::s390_vaq:
4875 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4876 Op.getOperand(1), Op.getOperand(2));
4877 case Intrinsic::s390_vaccb:
4878 case Intrinsic::s390_vacch:
4879 case Intrinsic::s390_vaccf:
4880 case Intrinsic::s390_vaccg:
4881 case Intrinsic::s390_vaccq:
4882 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4883 Op.getOperand(1), Op.getOperand(2));
4884 case Intrinsic::s390_vacq:
4885 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4886 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4887 case Intrinsic::s390_vacccq:
4888 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4889 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4890
4891 case Intrinsic::s390_vsq:
4892 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4893 Op.getOperand(1), Op.getOperand(2));
4894 case Intrinsic::s390_vscbib:
4895 case Intrinsic::s390_vscbih:
4896 case Intrinsic::s390_vscbif:
4897 case Intrinsic::s390_vscbig:
4898 case Intrinsic::s390_vscbiq:
4899 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4900 Op.getOperand(1), Op.getOperand(2));
4901 case Intrinsic::s390_vsbiq:
4902 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4903 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4904 case Intrinsic::s390_vsbcbiq:
4905 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4906 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4907 }
4908
4909 return SDValue();
4910}
4911
4912namespace {
4913// Says that SystemZISD operation Opcode can be used to perform the equivalent
4914// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4915// Operand is the constant third operand, otherwise it is the number of
4916// bytes in each element of the result.
4917struct Permute {
4918 unsigned Opcode;
4919 unsigned Operand;
4920 unsigned char Bytes[SystemZ::VectorBytes];
4921};
4922}
4923
4924static const Permute PermuteForms[] = {
4925 // VMRHG
4927 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4928 // VMRHF
4930 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4931 // VMRHH
4933 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4934 // VMRHB
4936 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4937 // VMRLG
4939 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4940 // VMRLF
4942 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4943 // VMRLH
4945 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4946 // VMRLB
4948 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4949 // VPKG
4950 { SystemZISD::PACK, 4,
4951 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4952 // VPKF
4953 { SystemZISD::PACK, 2,
4954 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4955 // VPKH
4956 { SystemZISD::PACK, 1,
4957 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4958 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4960 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4961 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4963 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4964};
4965
4966// Called after matching a vector shuffle against a particular pattern.
4967// Both the original shuffle and the pattern have two vector operands.
4968// OpNos[0] is the operand of the original shuffle that should be used for
4969// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4970// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4971// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4972// for operands 0 and 1 of the pattern.
4973static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4974 if (OpNos[0] < 0) {
4975 if (OpNos[1] < 0)
4976 return false;
4977 OpNo0 = OpNo1 = OpNos[1];
4978 } else if (OpNos[1] < 0) {
4979 OpNo0 = OpNo1 = OpNos[0];
4980 } else {
4981 OpNo0 = OpNos[0];
4982 OpNo1 = OpNos[1];
4983 }
4984 return true;
4985}
4986
4987// Bytes is a VPERM-like permute vector, except that -1 is used for
4988// undefined bytes. Return true if the VPERM can be implemented using P.
4989// When returning true set OpNo0 to the VPERM operand that should be
4990// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4991//
4992// For example, if swapping the VPERM operands allows P to match, OpNo0
4993// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4994// operand, but rewriting it to use two duplicated operands allows it to
4995// match P, then OpNo0 and OpNo1 will be the same.
4996static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4997 unsigned &OpNo0, unsigned &OpNo1) {
4998 int OpNos[] = { -1, -1 };
4999 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5000 int Elt = Bytes[I];
5001 if (Elt >= 0) {
5002 // Make sure that the two permute vectors use the same suboperand
5003 // byte number. Only the operand numbers (the high bits) are
5004 // allowed to differ.
5005 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5006 return false;
5007 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5008 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5009 // Make sure that the operand mappings are consistent with previous
5010 // elements.
5011 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5012 return false;
5013 OpNos[ModelOpNo] = RealOpNo;
5014 }
5015 }
5016 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5017}
5018
5019// As above, but search for a matching permute.
5020static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5021 unsigned &OpNo0, unsigned &OpNo1) {
5022 for (auto &P : PermuteForms)
5023 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5024 return &P;
5025 return nullptr;
5026}
5027
5028// Bytes is a VPERM-like permute vector, except that -1 is used for
5029// undefined bytes. This permute is an operand of an outer permute.
5030// See whether redistributing the -1 bytes gives a shuffle that can be
5031// implemented using P. If so, set Transform to a VPERM-like permute vector
5032// that, when applied to the result of P, gives the original permute in Bytes.
5034 const Permute &P,
5035 SmallVectorImpl<int> &Transform) {
5036 unsigned To = 0;
5037 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5038 int Elt = Bytes[From];
5039 if (Elt < 0)
5040 // Byte number From of the result is undefined.
5041 Transform[From] = -1;
5042 else {
5043 while (P.Bytes[To] != Elt) {
5044 To += 1;
5045 if (To == SystemZ::VectorBytes)
5046 return false;
5047 }
5048 Transform[From] = To;
5049 }
5050 }
5051 return true;
5052}
5053
5054// As above, but search for a matching permute.
5055static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5056 SmallVectorImpl<int> &Transform) {
5057 for (auto &P : PermuteForms)
5058 if (matchDoublePermute(Bytes, P, Transform))
5059 return &P;
5060 return nullptr;
5061}
5062
5063// Convert the mask of the given shuffle op into a byte-level mask,
5064// as if it had type vNi8.
5065static bool getVPermMask(SDValue ShuffleOp,
5066 SmallVectorImpl<int> &Bytes) {
5067 EVT VT = ShuffleOp.getValueType();
5068 unsigned NumElements = VT.getVectorNumElements();
5069 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5070
5071 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5072 Bytes.resize(NumElements * BytesPerElement, -1);
5073 for (unsigned I = 0; I < NumElements; ++I) {
5074 int Index = VSN->getMaskElt(I);
5075 if (Index >= 0)
5076 for (unsigned J = 0; J < BytesPerElement; ++J)
5077 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5078 }
5079 return true;
5080 }
5081 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5082 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5083 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5084 Bytes.resize(NumElements * BytesPerElement, -1);
5085 for (unsigned I = 0; I < NumElements; ++I)
5086 for (unsigned J = 0; J < BytesPerElement; ++J)
5087 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5088 return true;
5089 }
5090 return false;
5091}
5092
5093// Bytes is a VPERM-like permute vector, except that -1 is used for
5094// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5095// the result come from a contiguous sequence of bytes from one input.
5096// Set Base to the selector for the first byte if so.
5097static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5098 unsigned BytesPerElement, int &Base) {
5099 Base = -1;
5100 for (unsigned I = 0; I < BytesPerElement; ++I) {
5101 if (Bytes[Start + I] >= 0) {
5102 unsigned Elem = Bytes[Start + I];
5103 if (Base < 0) {
5104 Base = Elem - I;
5105 // Make sure the bytes would come from one input operand.
5106 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5107 return false;
5108 } else if (unsigned(Base) != Elem - I)
5109 return false;
5110 }
5111 }
5112 return true;
5113}
5114
5115// Bytes is a VPERM-like permute vector, except that -1 is used for
5116// undefined bytes. Return true if it can be performed using VSLDB.
5117// When returning true, set StartIndex to the shift amount and OpNo0
5118// and OpNo1 to the VPERM operands that should be used as the first
5119// and second shift operand respectively.
5121 unsigned &StartIndex, unsigned &OpNo0,
5122 unsigned &OpNo1) {
5123 int OpNos[] = { -1, -1 };
5124 int Shift = -1;
5125 for (unsigned I = 0; I < 16; ++I) {
5126 int Index = Bytes[I];
5127 if (Index >= 0) {
5128 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5129 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5130 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5131 if (Shift < 0)
5132 Shift = ExpectedShift;
5133 else if (Shift != ExpectedShift)
5134 return false;
5135 // Make sure that the operand mappings are consistent with previous
5136 // elements.
5137 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5138 return false;
5139 OpNos[ModelOpNo] = RealOpNo;
5140 }
5141 }
5142 StartIndex = Shift;
5143 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5144}
5145
5146// Create a node that performs P on operands Op0 and Op1, casting the
5147// operands to the appropriate type. The type of the result is determined by P.
5149 const Permute &P, SDValue Op0, SDValue Op1) {
5150 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5151 // elements of a PACK are twice as wide as the outputs.
5152 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5153 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5154 P.Operand);
5155 // Cast both operands to the appropriate type.
5156 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5157 SystemZ::VectorBytes / InBytes);
5158 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5159 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5160 SDValue Op;
5161 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5162 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5163 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5164 } else if (P.Opcode == SystemZISD::PACK) {
5165 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5166 SystemZ::VectorBytes / P.Operand);
5167 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5168 } else {
5169 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5170 }
5171 return Op;
5172}
5173
5174static bool isZeroVector(SDValue N) {
5175 if (N->getOpcode() == ISD::BITCAST)
5176 N = N->getOperand(0);
5177 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5178 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5179 return Op->getZExtValue() == 0;
5180 return ISD::isBuildVectorAllZeros(N.getNode());
5181}
5182
5183// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5184static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5185 for (unsigned I = 0; I < Num ; I++)
5186 if (isZeroVector(Ops[I]))
5187 return I;
5188 return UINT32_MAX;
5189}
5190
5191// Bytes is a VPERM-like permute vector, except that -1 is used for
5192// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5193// VSLDB or VPERM.
5195 SDValue *Ops,
5196 const SmallVectorImpl<int> &Bytes) {
5197 for (unsigned I = 0; I < 2; ++I)
5198 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5199
5200 // First see whether VSLDB can be used.
5201 unsigned StartIndex, OpNo0, OpNo1;
5202 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5203 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5204 Ops[OpNo1],
5205 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5206
5207 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5208 // eliminate a zero vector by reusing any zero index in the permute vector.
5209 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5210 if (ZeroVecIdx != UINT32_MAX) {
5211 bool MaskFirst = true;
5212 int ZeroIdx = -1;
5213 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5214 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5215 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5216 if (OpNo == ZeroVecIdx && I == 0) {
5217 // If the first byte is zero, use mask as first operand.
5218 ZeroIdx = 0;
5219 break;
5220 }
5221 if (OpNo != ZeroVecIdx && Byte == 0) {
5222 // If mask contains a zero, use it by placing that vector first.
5223 ZeroIdx = I + SystemZ::VectorBytes;
5224 MaskFirst = false;
5225 break;
5226 }
5227 }
5228 if (ZeroIdx != -1) {
5229 SDValue IndexNodes[SystemZ::VectorBytes];
5230 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5231 if (Bytes[I] >= 0) {
5232 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5233 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5234 if (OpNo == ZeroVecIdx)
5235 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5236 else {
5237 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5238 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5239 }
5240 } else
5241 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5242 }
5243 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5244 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5245 if (MaskFirst)
5246 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5247 Mask);
5248 else
5249 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5250 Mask);
5251 }
5252 }
5253
5254 SDValue IndexNodes[SystemZ::VectorBytes];
5255 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5256 if (Bytes[I] >= 0)
5257 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5258 else
5259 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5260 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5261 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5262 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5263}
5264
5265namespace {
5266// Describes a general N-operand vector shuffle.
5267struct GeneralShuffle {
5268 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5269 void addUndef();
5270 bool add(SDValue, unsigned);
5271 SDValue getNode(SelectionDAG &, const SDLoc &);
5272 void tryPrepareForUnpack();
5273 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5274 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5275
5276 // The operands of the shuffle.
5278
5279 // Index I is -1 if byte I of the result is undefined. Otherwise the
5280 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5281 // Bytes[I] / SystemZ::VectorBytes.
5283
5284 // The type of the shuffle result.
5285 EVT VT;
5286
5287 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5288 unsigned UnpackFromEltSize;
5289};
5290}
5291
5292// Add an extra undefined element to the shuffle.
5293void GeneralShuffle::addUndef() {
5294 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5295 for (unsigned I = 0; I < BytesPerElement; ++I)
5296 Bytes.push_back(-1);
5297}
5298
5299// Add an extra element to the shuffle, taking it from element Elem of Op.
5300// A null Op indicates a vector input whose value will be calculated later;
5301// there is at most one such input per shuffle and it always has the same
5302// type as the result. Aborts and returns false if the source vector elements
5303// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5304// LLVM they become implicitly extended, but this is rare and not optimized.
5305bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5306 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5307
5308 // The source vector can have wider elements than the result,
5309 // either through an explicit TRUNCATE or because of type legalization.
5310 // We want the least significant part.
5311 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5312 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5313
5314 // Return false if the source elements are smaller than their destination
5315 // elements.
5316 if (FromBytesPerElement < BytesPerElement)
5317 return false;
5318
5319 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5320 (FromBytesPerElement - BytesPerElement));
5321
5322 // Look through things like shuffles and bitcasts.
5323 while (Op.getNode()) {
5324 if (Op.getOpcode() == ISD::BITCAST)
5325 Op = Op.getOperand(0);
5326 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5327 // See whether the bytes we need come from a contiguous part of one
5328 // operand.
5330 if (!getVPermMask(Op, OpBytes))
5331 break;
5332 int NewByte;
5333 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5334 break;
5335 if (NewByte < 0) {
5336 addUndef();
5337 return true;
5338 }
5339 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5340 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5341 } else if (Op.isUndef()) {
5342 addUndef();
5343 return true;
5344 } else
5345 break;
5346 }
5347
5348 // Make sure that the source of the extraction is in Ops.
5349 unsigned OpNo = 0;
5350 for (; OpNo < Ops.size(); ++OpNo)
5351 if (Ops[OpNo] == Op)
5352 break;
5353 if (OpNo == Ops.size())
5354 Ops.push_back(Op);
5355
5356 // Add the element to Bytes.
5357 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5358 for (unsigned I = 0; I < BytesPerElement; ++I)
5359 Bytes.push_back(Base + I);
5360
5361 return true;
5362}
5363
5364// Return SDNodes for the completed shuffle.
5365SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5366 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5367
5368 if (Ops.size() == 0)
5369 return DAG.getUNDEF(VT);
5370
5371 // Use a single unpack if possible as the last operation.
5372 tryPrepareForUnpack();
5373
5374 // Make sure that there are at least two shuffle operands.
5375 if (Ops.size() == 1)
5376 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5377
5378 // Create a tree of shuffles, deferring root node until after the loop.
5379 // Try to redistribute the undefined elements of non-root nodes so that
5380 // the non-root shuffles match something like a pack or merge, then adjust
5381 // the parent node's permute vector to compensate for the new order.
5382 // Among other things, this copes with vectors like <2 x i16> that were
5383 // padded with undefined elements during type legalization.
5384 //
5385 // In the best case this redistribution will lead to the whole tree
5386 // using packs and merges. It should rarely be a loss in other cases.
5387 unsigned Stride = 1;
5388 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5389 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5390 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5391
5392 // Create a mask for just these two operands.
5394 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5395 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5396 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5397 if (OpNo == I)
5398 NewBytes[J] = Byte;
5399 else if (OpNo == I + Stride)
5400 NewBytes[J] = SystemZ::VectorBytes + Byte;
5401 else
5402 NewBytes[J] = -1;
5403 }
5404 // See if it would be better to reorganize NewMask to avoid using VPERM.
5406 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5407 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5408 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5409 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5410 if (NewBytes[J] >= 0) {
5411 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5412 "Invalid double permute");
5413 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5414 } else
5415 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5416 }
5417 } else {
5418 // Just use NewBytes on the operands.
5419 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5420 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5421 if (NewBytes[J] >= 0)
5422 Bytes[J] = I * SystemZ::VectorBytes + J;
5423 }
5424 }
5425 }
5426
5427 // Now we just have 2 inputs. Put the second operand in Ops[1].
5428 if (Stride > 1) {
5429 Ops[1] = Ops[Stride];
5430 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5431 if (Bytes[I] >= int(SystemZ::VectorBytes))
5432 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5433 }
5434
5435 // Look for an instruction that can do the permute without resorting
5436 // to VPERM.
5437 unsigned OpNo0, OpNo1;
5438 SDValue Op;
5439 if (unpackWasPrepared() && Ops[1].isUndef())
5440 Op = Ops[0];
5441 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5442 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5443 else
5444 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5445
5446 Op = insertUnpackIfPrepared(DAG, DL, Op);
5447
5448 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5449}
5450
5451#ifndef NDEBUG
5452static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5453 dbgs() << Msg.c_str() << " { ";
5454 for (unsigned i = 0; i < Bytes.size(); i++)
5455 dbgs() << Bytes[i] << " ";
5456 dbgs() << "}\n";
5457}
5458#endif
5459
5460// If the Bytes vector matches an unpack operation, prepare to do the unpack
5461// after all else by removing the zero vector and the effect of the unpack on
5462// Bytes.
5463void GeneralShuffle::tryPrepareForUnpack() {
5464 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5465 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5466 return;
5467
5468 // Only do this if removing the zero vector reduces the depth, otherwise
5469 // the critical path will increase with the final unpack.
5470 if (Ops.size() > 2 &&
5471 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5472 return;
5473
5474 // Find an unpack that would allow removing the zero vector from Ops.
5475 UnpackFromEltSize = 1;
5476 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5477 bool MatchUnpack = true;
5479 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5480 unsigned ToEltSize = UnpackFromEltSize * 2;
5481 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5482 if (!IsZextByte)
5483 SrcBytes.push_back(Bytes[Elt]);
5484 if (Bytes[Elt] != -1) {
5485 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5486 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5487 MatchUnpack = false;
5488 break;
5489 }
5490 }
5491 }
5492 if (MatchUnpack) {
5493 if (Ops.size() == 2) {
5494 // Don't use unpack if a single source operand needs rearrangement.
5495 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5496 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5497 UnpackFromEltSize = UINT_MAX;
5498 return;
5499 }
5500 }
5501 break;
5502 }
5503 }
5504 if (UnpackFromEltSize > 4)
5505 return;
5506
5507 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5508 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5509 << ".\n";
5510 dumpBytes(Bytes, "Original Bytes vector:"););
5511
5512 // Apply the unpack in reverse to the Bytes array.
5513 unsigned B = 0;
5514 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5515 Elt += UnpackFromEltSize;
5516 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5517 Bytes[B] = Bytes[Elt];
5518 }
5519 while (B < SystemZ::VectorBytes)
5520 Bytes[B++] = -1;
5521
5522 // Remove the zero vector from Ops
5523 Ops.erase(&Ops[ZeroVecOpNo]);
5524 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5525 if (Bytes[I] >= 0) {
5526 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5527 if (OpNo > ZeroVecOpNo)
5528 Bytes[I] -= SystemZ::VectorBytes;
5529 }
5530
5531 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5532 dbgs() << "\n";);
5533}
5534
5535SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5536 const SDLoc &DL,
5537 SDValue Op) {
5538 if (!unpackWasPrepared())
5539 return Op;
5540 unsigned InBits = UnpackFromEltSize * 8;
5541 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5542 SystemZ::VectorBits / InBits);
5543 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5544 unsigned OutBits = InBits * 2;
5545 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5546 SystemZ::VectorBits / OutBits);
5547 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5548}
5549
5550// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5552 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5553 if (!Op.getOperand(I).isUndef())
5554 return false;
5555 return true;
5556}
5557
5558// Return a vector of type VT that contains Value in the first element.
5559// The other elements don't matter.
5561 SDValue Value) {
5562 // If we have a constant, replicate it to all elements and let the
5563 // BUILD_VECTOR lowering take care of it.
5564 if (Value.getOpcode() == ISD::Constant ||
5565 Value.getOpcode() == ISD::ConstantFP) {
5567 return DAG.getBuildVector(VT, DL, Ops);
5568 }
5569 if (Value.isUndef())
5570 return DAG.getUNDEF(VT);
5571 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5572}
5573
5574// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5575// element 1. Used for cases in which replication is cheap.
5577 SDValue Op0, SDValue Op1) {
5578 if (Op0.isUndef()) {
5579 if (Op1.isUndef())
5580 return DAG.getUNDEF(VT);
5581 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5582 }
5583 if (Op1.isUndef())
5584 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5585 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5586 buildScalarToVector(DAG, DL, VT, Op0),
5587 buildScalarToVector(DAG, DL, VT, Op1));
5588}
5589
5590// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5591// vector for them.
5593 SDValue Op1) {
5594 if (Op0.isUndef() && Op1.isUndef())
5595 return DAG.getUNDEF(MVT::v2i64);
5596 // If one of the two inputs is undefined then replicate the other one,
5597 // in order to avoid using another register unnecessarily.
5598 if (Op0.isUndef())
5599 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5600 else if (Op1.isUndef())
5601 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5602 else {
5603 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5604 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5605 }
5606 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5607}
5608
5609// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5610// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5611// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5612// would benefit from this representation and return it if so.
5614 BuildVectorSDNode *BVN) {
5615 EVT VT = BVN->getValueType(0);
5616 unsigned NumElements = VT.getVectorNumElements();
5617
5618 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5619 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5620 // need a BUILD_VECTOR, add an additional placeholder operand for that
5621 // BUILD_VECTOR and store its operands in ResidueOps.
5622 GeneralShuffle GS(VT);
5624 bool FoundOne = false;
5625 for (unsigned I = 0; I < NumElements; ++I) {
5626 SDValue Op = BVN->getOperand(I);
5627 if (Op.getOpcode() == ISD::TRUNCATE)
5628 Op = Op.getOperand(0);
5629 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5630 Op.getOperand(1).getOpcode() == ISD::Constant) {
5631 unsigned Elem = Op.getConstantOperandVal(1);
5632 if (!GS.add(Op.getOperand(0), Elem))
5633 return SDValue();
5634 FoundOne = true;
5635 } else if (Op.isUndef()) {
5636 GS.addUndef();
5637 } else {
5638 if (!GS.add(SDValue(), ResidueOps.size()))
5639 return SDValue();
5640 ResidueOps.push_back(BVN->getOperand(I));
5641 }
5642 }
5643
5644 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5645 if (!FoundOne)
5646 return SDValue();
5647
5648 // Create the BUILD_VECTOR for the remaining elements, if any.
5649 if (!ResidueOps.empty()) {
5650 while (ResidueOps.size() < NumElements)
5651 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5652 for (auto &Op : GS.Ops) {
5653 if (!Op.getNode()) {
5654 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5655 break;
5656 }
5657 }
5658 }
5659 return GS.getNode(DAG, SDLoc(BVN));
5660}
5661
5662bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5663 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5664 return true;
5665 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5666 return true;
5667 return false;
5668}
5669
5670// Combine GPR scalar values Elems into a vector of type VT.
5671SDValue
5672SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5673 SmallVectorImpl<SDValue> &Elems) const {
5674 // See whether there is a single replicated value.
5676 unsigned int NumElements = Elems.size();
5677 unsigned int Count = 0;
5678 for (auto Elem : Elems) {
5679 if (!Elem.isUndef()) {
5680 if (!Single.getNode())
5681 Single = Elem;
5682 else if (Elem != Single) {
5683 Single = SDValue();
5684 break;
5685 }
5686 Count += 1;
5687 }
5688 }
5689 // There are three cases here:
5690 //
5691 // - if the only defined element is a loaded one, the best sequence
5692 // is a replicating load.
5693 //
5694 // - otherwise, if the only defined element is an i64 value, we will
5695 // end up with the same VLVGP sequence regardless of whether we short-cut
5696 // for replication or fall through to the later code.
5697 //
5698 // - otherwise, if the only defined element is an i32 or smaller value,
5699 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5700 // This is only a win if the single defined element is used more than once.
5701 // In other cases we're better off using a single VLVGx.
5702 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5703 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5704
5705 // If all elements are loads, use VLREP/VLEs (below).
5706 bool AllLoads = true;
5707 for (auto Elem : Elems)
5708 if (!isVectorElementLoad(Elem)) {
5709 AllLoads = false;
5710 break;
5711 }
5712
5713 // The best way of building a v2i64 from two i64s is to use VLVGP.
5714 if (VT == MVT::v2i64 && !AllLoads)
5715 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5716
5717 // Use a 64-bit merge high to combine two doubles.
5718 if (VT == MVT::v2f64 && !AllLoads)
5719 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5720
5721 // Build v4f32 values directly from the FPRs:
5722 //
5723 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5724 // V V VMRHF
5725 // <ABxx> <CDxx>
5726 // V VMRHG
5727 // <ABCD>
5728 if (VT == MVT::v4f32 && !AllLoads) {
5729 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5730 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5731 // Avoid unnecessary undefs by reusing the other operand.
5732 if (Op01.isUndef())
5733 Op01 = Op23;
5734 else if (Op23.isUndef())
5735 Op23 = Op01;
5736 // Merging identical replications is a no-op.
5737 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5738 return Op01;
5739 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5740 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5742 DL, MVT::v2i64, Op01, Op23);
5743 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5744 }
5745
5746 // Collect the constant terms.
5749
5750 unsigned NumConstants = 0;
5751 for (unsigned I = 0; I < NumElements; ++I) {
5752 SDValue Elem = Elems[I];
5753 if (Elem.getOpcode() == ISD::Constant ||
5754 Elem.getOpcode() == ISD::ConstantFP) {
5755 NumConstants += 1;
5756 Constants[I] = Elem;
5757 Done[I] = true;
5758 }
5759 }
5760 // If there was at least one constant, fill in the other elements of
5761 // Constants with undefs to get a full vector constant and use that
5762 // as the starting point.
5764 SDValue ReplicatedVal;
5765 if (NumConstants > 0) {
5766 for (unsigned I = 0; I < NumElements; ++I)
5767 if (!Constants[I].getNode())
5768 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5769 Result = DAG.getBuildVector(VT, DL, Constants);
5770 } else {
5771 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5772 // avoid a false dependency on any previous contents of the vector
5773 // register.
5774
5775 // Use a VLREP if at least one element is a load. Make sure to replicate
5776 // the load with the most elements having its value.
5777 std::map<const SDNode*, unsigned> UseCounts;
5778 SDNode *LoadMaxUses = nullptr;
5779 for (unsigned I = 0; I < NumElements; ++I)
5780 if (isVectorElementLoad(Elems[I])) {
5781 SDNode *Ld = Elems[I].getNode();
5782 UseCounts[Ld]++;
5783 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5784 LoadMaxUses = Ld;
5785 }
5786 if (LoadMaxUses != nullptr) {
5787 ReplicatedVal = SDValue(LoadMaxUses, 0);
5788 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5789 } else {
5790 // Try to use VLVGP.
5791 unsigned I1 = NumElements / 2 - 1;
5792 unsigned I2 = NumElements - 1;
5793 bool Def1 = !Elems[I1].isUndef();
5794 bool Def2 = !Elems[I2].isUndef();
5795 if (Def1 || Def2) {
5796 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5797 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5798 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5799 joinDwords(DAG, DL, Elem1, Elem2));
5800 Done[I1] = true;
5801 Done[I2] = true;
5802 } else
5803 Result = DAG.getUNDEF(VT);
5804 }
5805 }
5806
5807 // Use VLVGx to insert the other elements.
5808 for (unsigned I = 0; I < NumElements; ++I)
5809 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5810 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5811 DAG.getConstant(I, DL, MVT::i32));
5812 return Result;
5813}
5814
5815SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5816 SelectionDAG &DAG) const {
5817 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5818 SDLoc DL(Op);
5819 EVT VT = Op.getValueType();
5820
5821 if (BVN->isConstant()) {
5822 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5823 return Op;
5824
5825 // Fall back to loading it from memory.
5826 return SDValue();
5827 }
5828
5829 // See if we should use shuffles to construct the vector from other vectors.
5830 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5831 return Res;
5832
5833 // Detect SCALAR_TO_VECTOR conversions.
5835 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5836
5837 // Otherwise use buildVector to build the vector up from GPRs.
5838 unsigned NumElements = Op.getNumOperands();
5840 for (unsigned I = 0; I < NumElements; ++I)
5841 Ops[I] = Op.getOperand(I);
5842 return buildVector(DAG, DL, VT, Ops);
5843}
5844
5845SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5846 SelectionDAG &DAG) const {
5847 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5848 SDLoc DL(Op);
5849 EVT VT = Op.getValueType();
5850 unsigned NumElements = VT.getVectorNumElements();
5851
5852 if (VSN->isSplat()) {
5853 SDValue Op0 = Op.getOperand(0);
5854 unsigned Index = VSN->getSplatIndex();
5856 "Splat index should be defined and in first operand");
5857 // See whether the value we're splatting is directly available as a scalar.
5858 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5860 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5861 // Otherwise keep it as a vector-to-vector operation.
5862 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5863 DAG.getTargetConstant(Index, DL, MVT::i32));
5864 }
5865
5866 GeneralShuffle GS(VT);
5867 for (unsigned I = 0; I < NumElements; ++I) {
5868 int Elt = VSN->getMaskElt(I);
5869 if (Elt < 0)
5870 GS.addUndef();
5871 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5872 unsigned(Elt) % NumElements))
5873 return SDValue();
5874 }
5875 return GS.getNode(DAG, SDLoc(VSN));
5876}
5877
5878SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5879 SelectionDAG &DAG) const {
5880 SDLoc DL(Op);
5881 // Just insert the scalar into element 0 of an undefined vector.
5882 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5883 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5884 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5885}
5886
5887SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5888 SelectionDAG &DAG) const {
5889 // Handle insertions of floating-point values.
5890 SDLoc DL(Op);
5891 SDValue Op0 = Op.getOperand(0);
5892 SDValue Op1 = Op.getOperand(1);
5893 SDValue Op2 = Op.getOperand(2);
5894 EVT VT = Op.getValueType();
5895
5896 // Insertions into constant indices of a v2f64 can be done using VPDI.
5897 // However, if the inserted value is a bitcast or a constant then it's
5898 // better to use GPRs, as below.
5899 if (VT == MVT::v2f64 &&
5900 Op1.getOpcode() != ISD::BITCAST &&
5901 Op1.getOpcode() != ISD::ConstantFP &&
5902 Op2.getOpcode() == ISD::Constant) {
5903 uint64_t Index = Op2->getAsZExtVal();
5904 unsigned Mask = VT.getVectorNumElements() - 1;
5905 if (Index <= Mask)
5906 return Op;
5907 }
5908
5909 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5911 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5912 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5913 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5914 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5915 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5916}
5917
5918SDValue
5919SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5920 SelectionDAG &DAG) const {
5921 // Handle extractions of floating-point values.
5922 SDLoc DL(Op);
5923 SDValue Op0 = Op.getOperand(0);
5924 SDValue Op1 = Op.getOperand(1);
5925 EVT VT = Op.getValueType();
5926 EVT VecVT = Op0.getValueType();
5927
5928 // Extractions of constant indices can be done directly.
5929 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5930 uint64_t Index = CIndexN->getZExtValue();
5931 unsigned Mask = VecVT.getVectorNumElements() - 1;
5932 if (Index <= Mask)
5933 return Op;
5934 }
5935
5936 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5937 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5938 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5939 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5940 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5941 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5942}
5943
5944SDValue SystemZTargetLowering::
5945lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5946 SDValue PackedOp = Op.getOperand(0);
5947 EVT OutVT = Op.getValueType();
5948 EVT InVT = PackedOp.getValueType();
5949 unsigned ToBits = OutVT.getScalarSizeInBits();
5950 unsigned FromBits = InVT.getScalarSizeInBits();
5951 do {
5952 FromBits *= 2;
5953 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5954 SystemZ::VectorBits / FromBits);
5955 PackedOp =
5956 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5957 } while (FromBits != ToBits);
5958 return PackedOp;
5959}
5960
5961// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5962SDValue SystemZTargetLowering::
5963lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5964 SDValue PackedOp = Op.getOperand(0);
5965 SDLoc DL(Op);
5966 EVT OutVT = Op.getValueType();
5967 EVT InVT = PackedOp.getValueType();
5968 unsigned InNumElts = InVT.getVectorNumElements();
5969 unsigned OutNumElts = OutVT.getVectorNumElements();
5970 unsigned NumInPerOut = InNumElts / OutNumElts;
5971
5972 SDValue ZeroVec =
5973 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5974
5975 SmallVector<int, 16> Mask(InNumElts);
5976 unsigned ZeroVecElt = InNumElts;
5977 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5978 unsigned MaskElt = PackedElt * NumInPerOut;
5979 unsigned End = MaskElt + NumInPerOut - 1;
5980 for (; MaskElt < End; MaskElt++)
5981 Mask[MaskElt] = ZeroVecElt++;
5982 Mask[MaskElt] = PackedElt;
5983 }
5984 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5985 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5986}
5987
5988SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5989 unsigned ByScalar) const {
5990 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5991 SDValue Op0 = Op.getOperand(0);
5992 SDValue Op1 = Op.getOperand(1);
5993 SDLoc DL(Op);
5994 EVT VT = Op.getValueType();
5995 unsigned ElemBitSize = VT.getScalarSizeInBits();
5996
5997 // See whether the shift vector is a splat represented as BUILD_VECTOR.
5998 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
5999 APInt SplatBits, SplatUndef;
6000 unsigned SplatBitSize;
6001 bool HasAnyUndefs;
6002 // Check for constant splats. Use ElemBitSize as the minimum element
6003 // width and reject splats that need wider elements.
6004 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6005 ElemBitSize, true) &&
6006 SplatBitSize == ElemBitSize) {
6007 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6008 DL, MVT::i32);
6009 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6010 }
6011 // Check for variable splats.
6012 BitVector UndefElements;
6013 SDValue Splat = BVN->getSplatValue(&UndefElements);
6014 if (Splat) {
6015 // Since i32 is the smallest legal type, we either need a no-op
6016 // or a truncation.
6017 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6018 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6019 }
6020 }
6021
6022 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6023 // and the shift amount is directly available in a GPR.
6024 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6025 if (VSN->isSplat()) {
6026 SDValue VSNOp0 = VSN->getOperand(0);
6027 unsigned Index = VSN->getSplatIndex();
6029 "Splat index should be defined and in first operand");
6030 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6031 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6032 // Since i32 is the smallest legal type, we either need a no-op
6033 // or a truncation.
6034 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6035 VSNOp0.getOperand(Index));
6036 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6037 }
6038 }
6039 }
6040
6041 // Otherwise just treat the current form as legal.
6042 return Op;
6043}
6044
6045SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6046 SelectionDAG &DAG) const {
6047 SDLoc DL(Op);
6048 MVT ResultVT = Op.getSimpleValueType();
6049 SDValue Arg = Op.getOperand(0);
6050 unsigned Check = Op.getConstantOperandVal(1);
6051
6052 unsigned TDCMask = 0;
6053 if (Check & fcSNan)
6055 if (Check & fcQNan)
6057 if (Check & fcPosInf)
6059 if (Check & fcNegInf)
6061 if (Check & fcPosNormal)
6063 if (Check & fcNegNormal)
6065 if (Check & fcPosSubnormal)
6067 if (Check & fcNegSubnormal)
6069 if (Check & fcPosZero)
6070 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6071 if (Check & fcNegZero)
6072 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6073 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6074
6075 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6076 return getCCResult(DAG, Intr);
6077}
6078
6080 SelectionDAG &DAG) const {
6081 switch (Op.getOpcode()) {
6082 case ISD::FRAMEADDR:
6083 return lowerFRAMEADDR(Op, DAG);
6084 case ISD::RETURNADDR:
6085 return lowerRETURNADDR(Op, DAG);
6086 case ISD::BR_CC:
6087 return lowerBR_CC(Op, DAG);
6088 case ISD::SELECT_CC:
6089 return lowerSELECT_CC(Op, DAG);
6090 case ISD::SETCC:
6091 return lowerSETCC(Op, DAG);
6092 case ISD::STRICT_FSETCC:
6093 return lowerSTRICT_FSETCC(Op, DAG, false);
6095 return lowerSTRICT_FSETCC(Op, DAG, true);
6096 case ISD::GlobalAddress:
6097 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6099 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6100 case ISD::BlockAddress:
6101 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6102 case ISD::JumpTable:
6103 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6104 case ISD::ConstantPool:
6105 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6106 case ISD::BITCAST:
6107 return lowerBITCAST(Op, DAG);
6108 case ISD::VASTART:
6109 return lowerVASTART(Op, DAG);
6110 case ISD::VACOPY:
6111 return lowerVACOPY(Op, DAG);
6113 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6115 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6116 case ISD::SMUL_LOHI:
6117 return lowerSMUL_LOHI(Op, DAG);
6118 case ISD::UMUL_LOHI:
6119 return lowerUMUL_LOHI(Op, DAG);
6120 case ISD::SDIVREM:
6121 return lowerSDIVREM(Op, DAG);
6122 case ISD::UDIVREM:
6123 return lowerUDIVREM(Op, DAG);
6124 case ISD::SADDO:
6125 case ISD::SSUBO:
6126 case ISD::UADDO:
6127 case ISD::USUBO:
6128 return lowerXALUO(Op, DAG);
6129 case ISD::UADDO_CARRY:
6130 case ISD::USUBO_CARRY:
6131 return lowerUADDSUBO_CARRY(Op, DAG);
6132 case ISD::OR:
6133 return lowerOR(Op, DAG);
6134 case ISD::CTPOP:
6135 return lowerCTPOP(Op, DAG);
6136 case ISD::ATOMIC_FENCE:
6137 return lowerATOMIC_FENCE(Op, DAG);
6138 case ISD::ATOMIC_SWAP:
6139 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6140 case ISD::ATOMIC_STORE:
6141 return lowerATOMIC_STORE(Op, DAG);
6142 case ISD::ATOMIC_LOAD:
6143 return lowerATOMIC_LOAD(Op, DAG);
6145 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6147 return lowerATOMIC_LOAD_SUB(Op, DAG);
6149 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6151 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6153 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6155 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6157 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6159 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6161 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6163 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6165 return lowerATOMIC_CMP_SWAP(Op, DAG);
6166 case ISD::STACKSAVE:
6167 return lowerSTACKSAVE(Op, DAG);
6168 case ISD::STACKRESTORE:
6169 return lowerSTACKRESTORE(Op, DAG);
6170 case ISD::PREFETCH:
6171 return lowerPREFETCH(Op, DAG);
6173 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6175 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6176 case ISD::BUILD_VECTOR:
6177 return lowerBUILD_VECTOR(Op, DAG);
6179 return lowerVECTOR_SHUFFLE(Op, DAG);
6181 return lowerSCALAR_TO_VECTOR(Op, DAG);
6183 return lowerINSERT_VECTOR_ELT(Op, DAG);
6185 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6187 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6189 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6190 case ISD::SHL:
6191 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6192 case ISD::SRL:
6193 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6194 case ISD::SRA:
6195 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6196 case ISD::ROTL:
6197 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6198 case ISD::IS_FPCLASS:
6199 return lowerIS_FPCLASS(Op, DAG);
6200 case ISD::GET_ROUNDING:
6201 return lowerGET_ROUNDING(Op, DAG);
6202 default:
6203 llvm_unreachable("Unexpected node to lower");
6204 }
6205}
6206
6207// Lower operations with invalid operand or result types (currently used
6208// only for 128-bit integer types).
6209void
6212 SelectionDAG &DAG) const {
6213 switch (N->getOpcode()) {
6214 case ISD::ATOMIC_LOAD: {
6215 SDLoc DL(N);
6216 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6217 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6218 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6220 DL, Tys, Ops, MVT::i128, MMO);
6221 Results.push_back(lowerGR128ToI128(DAG, Res));
6222 Results.push_back(Res.getValue(1));
6223 break;
6224 }
6225 case ISD::ATOMIC_STORE: {
6226 SDLoc DL(N);
6227 SDVTList Tys = DAG.getVTList(MVT::Other);
6228 SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)),
6229 N->getOperand(2)};
6230 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6232 DL, Tys, Ops, MVT::i128, MMO);
6233 // We have to enforce sequential consistency by performing a
6234 // serialization operation after the store.
6235 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6237 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6238 MVT::Other, Res), 0);
6239 Results.push_back(Res);
6240 break;
6241 }
6243 SDLoc DL(N);
6244 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6245 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6246 lowerI128ToGR128(DAG, N->getOperand(2)),
6247 lowerI128ToGR128(DAG, N->getOperand(3)) };
6248 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6250 DL, Tys, Ops, MVT::i128, MMO);
6251 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6253 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6254 Results.push_back(lowerGR128ToI128(DAG, Res));
6255 Results.push_back(Success);
6256 Results.push_back(Res.getValue(2));
6257 break;
6258 }
6259 case ISD::BITCAST: {
6260 SDValue Src = N->getOperand(0);
6261 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6262 !useSoftFloat()) {
6263 SDLoc DL(N);
6264 SDValue Lo, Hi;
6265 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6266 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6267 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6268 DAG.getConstant(1, DL, MVT::i32));
6269 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6270 DAG.getConstant(0, DL, MVT::i32));
6271 } else {
6272 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6273 "Unrecognized register class for f128.");
6274 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
6275 DL, MVT::f64, Src);
6276 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
6277 DL, MVT::f64, Src);
6278 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
6279 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
6280 }
6281 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6282 }
6283 break;
6284 }
6285 default:
6286 llvm_unreachable("Unexpected node to lower");
6287 }
6288}
6289
6290void
6293 SelectionDAG &DAG) const {
6294 return LowerOperationWrapper(N, Results, DAG);
6295}
6296
6297const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6298#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6299 switch ((SystemZISD::NodeType)Opcode) {
6300 case SystemZISD::FIRST_NUMBER: break;
6301 OPCODE(RET_GLUE);
6302 OPCODE(CALL);
6303 OPCODE(SIBCALL);
6304 OPCODE(TLS_GDCALL);
6305 OPCODE(TLS_LDCALL);
6306 OPCODE(PCREL_WRAPPER);
6307 OPCODE(PCREL_OFFSET);
6308 OPCODE(ICMP);
6309 OPCODE(FCMP);
6310 OPCODE(STRICT_FCMP);
6311 OPCODE(STRICT_FCMPS);
6312 OPCODE(TM);
6313 OPCODE(BR_CCMASK);
6314 OPCODE(SELECT_CCMASK);
6315 OPCODE(ADJDYNALLOC);
6316 OPCODE(PROBED_ALLOCA);
6317 OPCODE(POPCNT);
6318 OPCODE(SMUL_LOHI);
6319 OPCODE(UMUL_LOHI);
6320 OPCODE(SDIVREM);
6321 OPCODE(UDIVREM);
6322 OPCODE(SADDO);
6323 OPCODE(SSUBO);
6324 OPCODE(UADDO);
6325 OPCODE(USUBO);
6326 OPCODE(ADDCARRY);
6327 OPCODE(SUBCARRY);
6328 OPCODE(GET_CCMASK);
6329 OPCODE(MVC);
6330 OPCODE(NC);
6331 OPCODE(OC);
6332 OPCODE(XC);
6333 OPCODE(CLC);
6334 OPCODE(MEMSET_MVC);
6335 OPCODE(STPCPY);
6336 OPCODE(STRCMP);
6337 OPCODE(SEARCH_STRING);
6338 OPCODE(IPM);
6339 OPCODE(TBEGIN);
6340 OPCODE(TBEGIN_NOFLOAT);
6341 OPCODE(TEND);
6342 OPCODE(BYTE_MASK);
6343 OPCODE(ROTATE_MASK);
6344 OPCODE(REPLICATE);
6345 OPCODE(JOIN_DWORDS);
6346 OPCODE(SPLAT);
6347 OPCODE(MERGE_HIGH);
6348 OPCODE(MERGE_LOW);
6349 OPCODE(SHL_DOUBLE);
6350 OPCODE(PERMUTE_DWORDS);
6351 OPCODE(PERMUTE);
6352 OPCODE(PACK);
6353 OPCODE(PACKS_CC);
6354 OPCODE(PACKLS_CC);
6355 OPCODE(UNPACK_HIGH);
6356 OPCODE(UNPACKL_HIGH);
6357 OPCODE(UNPACK_LOW);
6358 OPCODE(UNPACKL_LOW);
6359 OPCODE(VSHL_BY_SCALAR);
6360 OPCODE(VSRL_BY_SCALAR);
6361 OPCODE(VSRA_BY_SCALAR);
6362 OPCODE(VROTL_BY_SCALAR);
6363 OPCODE(VSUM);
6364 OPCODE(VACC);
6365 OPCODE(VSCBI);
6366 OPCODE(VAC);
6367 OPCODE(VSBI);
6368 OPCODE(VACCC);
6369 OPCODE(VSBCBI);
6370 OPCODE(VICMPE);
6371 OPCODE(VICMPH);
6372 OPCODE(VICMPHL);
6373 OPCODE(VICMPES);
6374 OPCODE(VICMPHS);
6375 OPCODE(VICMPHLS);
6376 OPCODE(VFCMPE);
6377 OPCODE(STRICT_VFCMPE);
6378 OPCODE(STRICT_VFCMPES);
6379 OPCODE(VFCMPH);
6380 OPCODE(STRICT_VFCMPH);
6381 OPCODE(STRICT_VFCMPHS);
6382 OPCODE(VFCMPHE);
6383 OPCODE(STRICT_VFCMPHE);
6384 OPCODE(STRICT_VFCMPHES);
6385 OPCODE(VFCMPES);
6386 OPCODE(VFCMPHS);
6387 OPCODE(VFCMPHES);
6388 OPCODE(VFTCI);
6389 OPCODE(VEXTEND);
6390 OPCODE(STRICT_VEXTEND);
6391 OPCODE(VROUND);
6392 OPCODE(STRICT_VROUND);
6393 OPCODE(VTM);
6394 OPCODE(SCMP128HI);
6395 OPCODE(UCMP128HI);
6396 OPCODE(VFAE_CC);
6397 OPCODE(VFAEZ_CC);
6398 OPCODE(VFEE_CC);
6399 OPCODE(VFEEZ_CC);
6400 OPCODE(VFENE_CC);
6401 OPCODE(VFENEZ_CC);
6402 OPCODE(VISTR_CC);
6403 OPCODE(VSTRC_CC);
6404 OPCODE(VSTRCZ_CC);
6405 OPCODE(VSTRS_CC);
6406 OPCODE(VSTRSZ_CC);
6407 OPCODE(TDC);
6408 OPCODE(ATOMIC_SWAPW);
6409 OPCODE(ATOMIC_LOADW_ADD);
6410 OPCODE(ATOMIC_LOADW_SUB);
6411 OPCODE(ATOMIC_LOADW_AND);
6412 OPCODE(ATOMIC_LOADW_OR);
6413 OPCODE(ATOMIC_LOADW_XOR);
6414 OPCODE(ATOMIC_LOADW_NAND);
6415 OPCODE(ATOMIC_LOADW_MIN);
6416 OPCODE(ATOMIC_LOADW_MAX);
6417 OPCODE(ATOMIC_LOADW_UMIN);
6418 OPCODE(ATOMIC_LOADW_UMAX);
6419 OPCODE(ATOMIC_CMP_SWAPW);
6420 OPCODE(ATOMIC_CMP_SWAP);
6421 OPCODE(ATOMIC_LOAD_128);
6422 OPCODE(ATOMIC_STORE_128);
6423 OPCODE(ATOMIC_CMP_SWAP_128);
6424 OPCODE(LRV);
6425 OPCODE(STRV);
6426 OPCODE(VLER);
6427 OPCODE(VSTER);
6429 OPCODE(ADA_ENTRY);
6430 }
6431 return nullptr;
6432#undef OPCODE
6433}
6434
6435// Return true if VT is a vector whose elements are a whole number of bytes
6436// in width. Also check for presence of vector support.
6437bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6438 if (!Subtarget.hasVector())
6439 return false;
6440
6441 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6442}
6443
6444// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6445// producing a result of type ResVT. Op is a possibly bitcast version
6446// of the input vector and Index is the index (based on type VecVT) that
6447// should be extracted. Return the new extraction if a simplification
6448// was possible or if Force is true.
6449SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6450 EVT VecVT, SDValue Op,
6451 unsigned Index,
6452 DAGCombinerInfo &DCI,
6453 bool Force) const {
6454 SelectionDAG &DAG = DCI.DAG;
6455
6456 // The number of bytes being extracted.
6457 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6458
6459 for (;;) {
6460 unsigned Opcode = Op.getOpcode();
6461 if (Opcode == ISD::BITCAST)
6462 // Look through bitcasts.
6463 Op = Op.getOperand(0);
6464 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6465 canTreatAsByteVector(Op.getValueType())) {
6466 // Get a VPERM-like permute mask and see whether the bytes covered
6467 // by the extracted element are a contiguous sequence from one
6468 // source operand.
6470 if (!getVPermMask(Op, Bytes))
6471 break;
6472 int First;
6473 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6474 BytesPerElement, First))
6475 break;
6476 if (First < 0)
6477 return DAG.getUNDEF(ResVT);
6478 // Make sure the contiguous sequence starts at a multiple of the
6479 // original element size.
6480 unsigned Byte = unsigned(First) % Bytes.size();
6481 if (Byte % BytesPerElement != 0)
6482 break;
6483 // We can get the extracted value directly from an input.
6484 Index = Byte / BytesPerElement;
6485 Op = Op.getOperand(unsigned(First) / Bytes.size());
6486 Force = true;
6487 } else if (Opcode == ISD::BUILD_VECTOR &&
6488 canTreatAsByteVector(Op.getValueType())) {
6489 // We can only optimize this case if the BUILD_VECTOR elements are
6490 // at least as wide as the extracted value.
6491 EVT OpVT = Op.getValueType();
6492 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6493 if (OpBytesPerElement < BytesPerElement)
6494 break;
6495 // Make sure that the least-significant bit of the extracted value
6496 // is the least significant bit of an input.
6497 unsigned End = (Index + 1) * BytesPerElement;
6498 if (End % OpBytesPerElement != 0)
6499 break;
6500 // We're extracting the low part of one operand of the BUILD_VECTOR.
6501 Op = Op.getOperand(End / OpBytesPerElement - 1);
6502 if (!Op.getValueType().isInteger()) {
6503 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6504 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6505 DCI.AddToWorklist(Op.getNode());
6506 }
6507 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6508 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6509 if (VT != ResVT) {
6510 DCI.AddToWorklist(Op.getNode());
6511 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6512 }
6513 return Op;
6514 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6516 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6517 canTreatAsByteVector(Op.getValueType()) &&
6518 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6519 // Make sure that only the unextended bits are significant.
6520 EVT ExtVT = Op.getValueType();
6521 EVT OpVT = Op.getOperand(0).getValueType();
6522 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6523 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6524 unsigned Byte = Index * BytesPerElement;
6525 unsigned SubByte = Byte % ExtBytesPerElement;
6526 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6527 if (SubByte < MinSubByte ||
6528 SubByte + BytesPerElement > ExtBytesPerElement)
6529 break;
6530 // Get the byte offset of the unextended element
6531 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6532 // ...then add the byte offset relative to that element.
6533 Byte += SubByte - MinSubByte;
6534 if (Byte % BytesPerElement != 0)
6535 break;
6536 Op = Op.getOperand(0);
6537 Index = Byte / BytesPerElement;
6538 Force = true;
6539 } else
6540 break;
6541 }
6542 if (Force) {
6543 if (Op.getValueType() != VecVT) {
6544 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6545 DCI.AddToWorklist(Op.getNode());
6546 }
6547 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6548 DAG.getConstant(Index, DL, MVT::i32));
6549 }
6550 return SDValue();
6551}
6552
6553// Optimize vector operations in scalar value Op on the basis that Op
6554// is truncated to TruncVT.
6555SDValue SystemZTargetLowering::combineTruncateExtract(
6556 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6557 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6558 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6559 // of type TruncVT.
6560 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6561 TruncVT.getSizeInBits() % 8 == 0) {
6562 SDValue Vec = Op.getOperand(0);
6563 EVT VecVT = Vec.getValueType();
6564 if (canTreatAsByteVector(VecVT)) {
6565 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6566 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6567 unsigned TruncBytes = TruncVT.getStoreSize();
6568 if (BytesPerElement % TruncBytes == 0) {
6569 // Calculate the value of Y' in the above description. We are
6570 // splitting the original elements into Scale equal-sized pieces
6571 // and for truncation purposes want the last (least-significant)
6572 // of these pieces for IndexN. This is easiest to do by calculating
6573 // the start index of the following element and then subtracting 1.
6574 unsigned Scale = BytesPerElement / TruncBytes;
6575 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6576
6577 // Defer the creation of the bitcast from X to combineExtract,
6578 // which might be able to optimize the extraction.
6579 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6580 VecVT.getStoreSize() / TruncBytes);
6581 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6582 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6583 }
6584 }
6585 }
6586 }
6587 return SDValue();
6588}
6589
6590SDValue SystemZTargetLowering::combineZERO_EXTEND(
6591 SDNode *N, DAGCombinerInfo &DCI) const {
6592 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6593 SelectionDAG &DAG = DCI.DAG;
6594 SDValue N0 = N->getOperand(0);
6595 EVT VT = N->getValueType(0);
6597 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6598 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6599 if (TrueOp && FalseOp) {
6600 SDLoc DL(N0);
6601 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6602 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6603 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6604 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6605 // If N0 has multiple uses, change other uses as well.
6606 if (!N0.hasOneUse()) {
6607 SDValue TruncSelect =
6608 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6609 DCI.CombineTo(N0.getNode(), TruncSelect);
6610 }
6611 return NewSelect;
6612 }
6613 }
6614 return SDValue();
6615}
6616
6617SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6618 SDNode *N, DAGCombinerInfo &DCI) const {
6619 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6620 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6621 // into (select_cc LHS, RHS, -1, 0, COND)
6622 SelectionDAG &DAG = DCI.DAG;
6623 SDValue N0 = N->getOperand(0);
6624 EVT VT = N->getValueType(0);
6625 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6626 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6627 N0 = N0.getOperand(0);
6628 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6629 SDLoc DL(N0);
6630 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6631 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6632 N0.getOperand(2) };
6633 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6634 }
6635 return SDValue();
6636}
6637
6638SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6639 SDNode *N, DAGCombinerInfo &DCI) const {
6640 // Convert (sext (ashr (shl X, C1), C2)) to
6641 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6642 // cheap as narrower ones.
6643 SelectionDAG &DAG = DCI.DAG;
6644 SDValue N0 = N->getOperand(0);
6645 EVT VT = N->getValueType(0);
6646 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6647 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6648 SDValue Inner = N0.getOperand(0);
6649 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6650 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6651 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6652 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6653 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6654 EVT ShiftVT = N0.getOperand(1).getValueType();
6655 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6656 Inner.getOperand(0));
6657 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6658 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6659 ShiftVT));
6660 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6661 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6662 }
6663 }
6664 }
6665 return SDValue();
6666}
6667
6668SDValue SystemZTargetLowering::combineMERGE(
6669 SDNode *N, DAGCombinerInfo &DCI) const {
6670 SelectionDAG &DAG = DCI.DAG;
6671 unsigned Opcode = N->getOpcode();
6672 SDValue Op0 = N->getOperand(0);
6673 SDValue Op1 = N->getOperand(1);
6674 if (Op0.getOpcode() == ISD::BITCAST)
6675 Op0 = Op0.getOperand(0);
6677 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6678 // for v4f32.
6679 if (Op1 == N->getOperand(0))
6680 return Op1;
6681 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6682 EVT VT = Op1.getValueType();
6683 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6684 if (ElemBytes <= 4) {
6685 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6688 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6689 SystemZ::VectorBytes / ElemBytes / 2);
6690 if (VT != InVT) {
6691 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6692 DCI.AddToWorklist(Op1.getNode());
6693 }
6694 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6695 DCI.AddToWorklist(Op.getNode());
6696 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6697 }
6698 }
6699 return SDValue();
6700}
6701
6702SDValue SystemZTargetLowering::combineLOAD(
6703 SDNode *N, DAGCombinerInfo &DCI) const {
6704 SelectionDAG &DAG = DCI.DAG;
6705 EVT LdVT = N->getValueType(0);
6706 SDLoc DL(N);
6707
6708 // Replace an i128 load that is used solely to move its value into GPRs
6709 // by separate loads of both halves.
6710 if (LdVT == MVT::i128) {
6711 LoadSDNode *LD = cast<LoadSDNode>(N);
6712 if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6713 return SDValue();
6714
6715 // Scan through all users.
6717 int UsedElements = 0;
6718 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6719 UI != UIEnd; ++UI) {
6720 // Skip the uses of the chain.
6721 if (UI.getUse().getResNo() != 0)
6722 continue;
6723
6724 // Verify every user is a TRUNCATE to i64 of the low or high half ...
6725 SDNode *User = *UI;
6726 int Index = 1;
6727 if (User->getOpcode() == ISD::SRL &&
6728 User->getOperand(1).getOpcode() == ISD::Constant &&
6729 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6730 User = *User->use_begin();
6731 Index = 0;
6732 }
6733 if (User->getOpcode() != ISD::TRUNCATE ||
6734 User->getValueType(0) != MVT::i64)
6735 return SDValue();
6736
6737 // ... and no half is extracted twice.
6738 if (UsedElements & (1 << Index))
6739 return SDValue();
6740
6741 UsedElements |= 1 << Index;
6742 Users.push_back(std::make_pair(User, Index));
6743 }
6744
6745 // Rewrite each extraction as an independent load.
6746 SmallVector<SDValue, 2> ArgChains;
6747 for (auto UserAndIndex : Users) {
6748 SDNode *User = UserAndIndex.first;
6749 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6750 SDValue Ptr =
6751 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6752 SDValue EltLoad =
6753 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6754 LD->getPointerInfo().getWithOffset(Offset),
6755 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6756 LD->getAAInfo());
6757
6758 DCI.CombineTo(User, EltLoad, true);
6759 ArgChains.push_back(EltLoad.getValue(1));
6760 }
6761
6762 // Collect all chains via TokenFactor.
6763 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6764 ArgChains);
6765 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6766 DCI.AddToWorklist(Chain.getNode());
6767 return SDValue(N, 0);
6768 }
6769
6770 if (LdVT.isVector() || LdVT.isInteger())
6771 return SDValue();
6772 // Transform a scalar load that is REPLICATEd as well as having other
6773 // use(s) to the form where the other use(s) use the first element of the
6774 // REPLICATE instead of the load. Otherwise instruction selection will not
6775 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6776 // point loads.
6777
6778 SDValue Replicate;
6779 SmallVector<SDNode*, 8> OtherUses;
6780 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6781 UI != UE; ++UI) {
6782 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6783 if (Replicate)
6784 return SDValue(); // Should never happen
6785 Replicate = SDValue(*UI, 0);
6786 }
6787 else if (UI.getUse().getResNo() == 0)
6788 OtherUses.push_back(*UI);
6789 }
6790 if (!Replicate || OtherUses.empty())
6791 return SDValue();
6792
6793 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6794 Replicate, DAG.getConstant(0, DL, MVT::i32));
6795 // Update uses of the loaded Value while preserving old chains.
6796 for (SDNode *U : OtherUses) {
6798 for (SDValue Op : U->ops())
6799 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6800 DAG.UpdateNodeOperands(U, Ops);
6801 }
6802 return SDValue(N, 0);
6803}
6804
6805bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6806 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6807 return true;
6808 if (Subtarget.hasVectorEnhancements2())
6809 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6810 return true;
6811 return false;
6812}
6813
6815 if (!VT.isVector() || !VT.isSimple() ||
6816 VT.getSizeInBits() != 128 ||
6817 VT.getScalarSizeInBits() % 8 != 0)
6818 return false;
6819
6820 unsigned NumElts = VT.getVectorNumElements();
6821 for (unsigned i = 0; i < NumElts; ++i) {
6822 if (M[i] < 0) continue; // ignore UNDEF indices
6823 if ((unsigned) M[i] != NumElts - 1 - i)
6824 return false;
6825 }
6826
6827 return true;
6828}
6829
6830static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6831 for (auto *U : StoredVal->uses()) {
6832 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6833 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6834 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6835 continue;
6836 } else if (isa<BuildVectorSDNode>(U)) {
6837 SDValue BuildVector = SDValue(U, 0);
6838 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6839 isOnlyUsedByStores(BuildVector, DAG))
6840 continue;
6841 }
6842 return false;
6843 }
6844 return true;
6845}
6846
6847static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6848 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6849 return false;
6850
6851 SDValue Op0 = Val.getOperand(0);
6852 SDValue Op1 = Val.getOperand(1);
6853
6854 if (Op0.getOpcode() == ISD::SHL)
6855 std::swap(Op0, Op1);
6856 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
6857 Op1.getOperand(1).getOpcode() != ISD::Constant ||
6858 Op1.getConstantOperandVal(1) != 64)
6859 return false;
6860 Op1 = Op1.getOperand(0);
6861
6862 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
6863 Op0.getOperand(0).getValueType() != MVT::i64)
6864 return false;
6865 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
6866 Op1.getOperand(0).getValueType() != MVT::i64)
6867 return false;
6868
6869 LoPart = Op0.getOperand(0);
6870 HiPart = Op1.getOperand(0);
6871 return true;
6872}
6873
6874SDValue SystemZTargetLowering::combineSTORE(
6875 SDNode *N, DAGCombinerInfo &DCI) const {
6876 SelectionDAG &DAG = DCI.DAG;
6877 auto *SN = cast<StoreSDNode>(N);
6878 auto &Op1 = N->getOperand(1);
6879 EVT MemVT = SN->getMemoryVT();
6880 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6881 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6882 // If X has wider elements then convert it to:
6883 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6884 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6885 if (SDValue Value =
6886 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6887 DCI.AddToWorklist(Value.getNode());
6888
6889 // Rewrite the store with the new form of stored value.
6890 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6891 SN->getBasePtr(), SN->getMemoryVT(),
6892 SN->getMemOperand());
6893 }
6894 }
6895 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6896 if (!SN->isTruncatingStore() &&
6897 Op1.getOpcode() == ISD::BSWAP &&
6898 Op1.getNode()->hasOneUse() &&
6899 canLoadStoreByteSwapped(Op1.getValueType())) {
6900
6901 SDValue BSwapOp = Op1.getOperand(0);
6902
6903 if (BSwapOp.getValueType() == MVT::i16)
6904 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6905
6906 SDValue Ops[] = {
6907 N->getOperand(0), BSwapOp, N->getOperand(2)
6908 };
6909
6910 return
6911 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6912 Ops, MemVT, SN->getMemOperand());
6913 }
6914 // Combine STORE (element-swap) into VSTER
6915 if (!SN->isTruncatingStore() &&
6916 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
6917 Op1.getNode()->hasOneUse() &&
6918 Subtarget.hasVectorEnhancements2()) {
6919 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
6920 ArrayRef<int> ShuffleMask = SVN->getMask();
6921 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
6922 SDValue Ops[] = {
6923 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
6924 };
6925
6927 DAG.getVTList(MVT::Other),
6928 Ops, MemVT, SN->getMemOperand());
6929 }
6930 }
6931
6932 // Transform a store of an i128 moved from GPRs into two separate stores.
6933 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
6934 SDValue LoPart, HiPart;
6935 if (isMovedFromParts(Op1, LoPart, HiPart)) {
6936 SDLoc DL(SN);
6937 SDValue Chain0 =
6938 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
6939 SN->getPointerInfo(), SN->getOriginalAlign(),
6940 SN->getMemOperand()->getFlags(), SN->getAAInfo());
6941 SDValue Chain1 =
6942 DAG.getStore(SN->getChain(), DL, LoPart,
6943 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
6945 SN->getPointerInfo().getWithOffset(8),
6946 SN->getOriginalAlign(),
6947 SN->getMemOperand()->getFlags(), SN->getAAInfo());
6948
6949 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
6950 }
6951 }
6952
6953 // Replicate a reg or immediate with VREP instead of scalar multiply or
6954 // immediate load. It seems best to do this during the first DAGCombine as
6955 // it is straight-forward to handle the zero-extend node in the initial
6956 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
6957 // extracting an i16 element from a v16i8 vector).
6958 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
6959 isOnlyUsedByStores(Op1, DAG)) {
6960 SDValue Word = SDValue();
6961 EVT WordVT;
6962
6963 // Find a replicated immediate and return it if found in Word and its
6964 // type in WordVT.
6965 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
6966 // Some constants are better handled with a scalar store.
6967 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
6968 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
6969 return;
6970 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
6971 if (VCI.isVectorConstantLegal(Subtarget) &&
6972 VCI.Opcode == SystemZISD::REPLICATE) {
6973 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
6974 WordVT = VCI.VecVT.getScalarType();
6975 }
6976 };
6977
6978 // Find a replicated register and return it if found in Word and its type
6979 // in WordVT.
6980 auto FindReplicatedReg = [&](SDValue MulOp) {
6981 EVT MulVT = MulOp.getValueType();
6982 if (MulOp->getOpcode() == ISD::MUL &&
6983 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
6984 // Find a zero extended value and its type.
6985 SDValue LHS = MulOp->getOperand(0);
6986 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
6987 WordVT = LHS->getOperand(0).getValueType();
6988 else if (LHS->getOpcode() == ISD::AssertZext)
6989 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
6990 else
6991 return;
6992 // Find a replicating constant, e.g. 0x00010001.
6993 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
6995 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
6996 if (VCI.isVectorConstantLegal(Subtarget) &&
6997 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
6998 WordVT == VCI.VecVT.getScalarType())
6999 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7000 }
7001 }
7002 };
7003
7004 if (isa<BuildVectorSDNode>(Op1) &&
7005 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7006 SDValue SplatVal = Op1->getOperand(0);
7007 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7008 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7009 else
7010 FindReplicatedReg(SplatVal);
7011 } else {
7012 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7013 FindReplicatedImm(C, MemVT.getStoreSize());
7014 else
7015 FindReplicatedReg(Op1);
7016 }
7017
7018 if (Word != SDValue()) {
7019 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7020 "Bad type handling");
7021 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7022 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7023 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7024 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7025 SN->getBasePtr(), SN->getMemOperand());
7026 }
7027 }
7028
7029 return SDValue();
7030}
7031
7032SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7033 SDNode *N, DAGCombinerInfo &DCI) const {
7034 SelectionDAG &DAG = DCI.DAG;
7035 // Combine element-swap (LOAD) into VLER
7036 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7037 N->getOperand(0).hasOneUse() &&
7038 Subtarget.hasVectorEnhancements2()) {
7039 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7040 ArrayRef<int> ShuffleMask = SVN->getMask();
7041 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7042 SDValue Load = N->getOperand(0);
7043 LoadSDNode *LD = cast<LoadSDNode>(Load);
7044
7045 // Create the element-swapping load.
7046 SDValue Ops[] = {
7047 LD->getChain(), // Chain
7048 LD->getBasePtr() // Ptr
7049 };
7050 SDValue ESLoad =
7052 DAG.getVTList(LD->getValueType(0), MVT::Other),
7053 Ops, LD->getMemoryVT(), LD->getMemOperand());
7054
7055 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7056 // by the load dead.
7057 DCI.CombineTo(N, ESLoad);
7058
7059 // Next, combine the load away, we give it a bogus result value but a real
7060 // chain result. The result value is dead because the shuffle is dead.
7061 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7062
7063 // Return N so it doesn't get rechecked!
7064 return SDValue(N, 0);
7065 }
7066 }
7067
7068 return SDValue();
7069}
7070
7071SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7072 SDNode *N, DAGCombinerInfo &DCI) const {
7073 SelectionDAG &DAG = DCI.DAG;
7074
7075 if (!Subtarget.hasVector())
7076 return SDValue();
7077
7078 // Look through bitcasts that retain the number of vector elements.
7079 SDValue Op = N->getOperand(0);
7080 if (Op.getOpcode() == ISD::BITCAST &&
7081 Op.getValueType().isVector() &&
7082 Op.getOperand(0).getValueType().isVector() &&
7083 Op.getValueType().getVectorNumElements() ==
7084 Op.getOperand(0).getValueType().getVectorNumElements())
7085 Op = Op.getOperand(0);
7086
7087 // Pull BSWAP out of a vector extraction.
7088 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7089 EVT VecVT = Op.getValueType();
7090 EVT EltVT = VecVT.getVectorElementType();
7091 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7092 Op.getOperand(0), N->getOperand(1));
7093 DCI.AddToWorklist(Op.getNode());
7094 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7095 if (EltVT != N->getValueType(0)) {
7096 DCI.AddToWorklist(Op.getNode());
7097 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7098 }
7099 return Op;
7100 }
7101
7102 // Try to simplify a vector extraction.
7103 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7104 SDValue Op0 = N->getOperand(0);
7105 EVT VecVT = Op0.getValueType();
7106 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7107 IndexN->getZExtValue(), DCI, false);
7108 }
7109 return SDValue();
7110}
7111
7112SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7113 SDNode *N, DAGCombinerInfo &DCI) const {
7114 SelectionDAG &DAG = DCI.DAG;
7115 // (join_dwords X, X) == (replicate X)
7116 if (N->getOperand(0) == N->getOperand(1))
7117 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7118 N->getOperand(0));
7119 return SDValue();
7120}
7121
7123 SDValue Chain1 = N1->getOperand(0);
7124 SDValue Chain2 = N2->getOperand(0);
7125
7126 // Trivial case: both nodes take the same chain.
7127 if (Chain1 == Chain2)
7128 return Chain1;
7129
7130 // FIXME - we could handle more complex cases via TokenFactor,
7131 // assuming we can verify that this would not create a cycle.
7132 return SDValue();
7133}
7134
7135SDValue SystemZTargetLowering::combineFP_ROUND(
7136 SDNode *N, DAGCombinerInfo &DCI) const {
7137
7138 if (!Subtarget.hasVector())
7139 return SDValue();
7140
7141 // (fpround (extract_vector_elt X 0))
7142 // (fpround (extract_vector_elt X 1)) ->
7143 // (extract_vector_elt (VROUND X) 0)
7144 // (extract_vector_elt (VROUND X) 2)
7145 //
7146 // This is a special case since the target doesn't really support v2f32s.
7147 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7148 SelectionDAG &DAG = DCI.DAG;
7149 SDValue Op0 = N->getOperand(OpNo);
7150 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7152 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7153 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7154 Op0.getConstantOperandVal(1) == 0) {
7155 SDValue Vec = Op0.getOperand(0);
7156 for (auto *U : Vec->uses()) {
7157 if (U != Op0.getNode() && U->hasOneUse() &&
7158 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7159 U->getOperand(0) == Vec &&
7160 U->getOperand(1).getOpcode() == ISD::Constant &&
7161 U->getConstantOperandVal(1) == 1) {
7162 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7163 if (OtherRound.getOpcode() == N->getOpcode() &&
7164 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7165 OtherRound.getValueType() == MVT::f32) {
7166 SDValue VRound, Chain;
7167 if (N->isStrictFPOpcode()) {
7168 Chain = MergeInputChains(N, OtherRound.getNode());
7169 if (!Chain)
7170 continue;
7172 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7173 Chain = VRound.getValue(1);
7174 } else
7175 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7176 MVT::v4f32, Vec);
7177 DCI.AddToWorklist(VRound.getNode());
7178 SDValue Extract1 =
7179 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7180 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7181 DCI.AddToWorklist(Extract1.getNode());
7182 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7183 if (Chain)
7184 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7185 SDValue Extract0 =
7186 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7187 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7188 if (Chain)
7189 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7190 N->getVTList(), Extract0, Chain);
7191 return Extract0;
7192 }
7193 }
7194 }
7195 }
7196 return SDValue();
7197}
7198
7199SDValue SystemZTargetLowering::combineFP_EXTEND(
7200 SDNode *N, DAGCombinerInfo &DCI) const {
7201
7202 if (!Subtarget.hasVector())
7203 return SDValue();
7204
7205 // (fpextend (extract_vector_elt X 0))
7206 // (fpextend (extract_vector_elt X 2)) ->
7207 // (extract_vector_elt (VEXTEND X) 0)
7208 // (extract_vector_elt (VEXTEND X) 1)
7209 //
7210 // This is a special case since the target doesn't really support v2f32s.
7211 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7212 SelectionDAG &DAG = DCI.DAG;
7213 SDValue Op0 = N->getOperand(OpNo);
7214 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7216 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7217 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7218 Op0.getConstantOperandVal(1) == 0) {
7219 SDValue Vec = Op0.getOperand(0);
7220 for (auto *U : Vec->uses()) {
7221 if (U != Op0.getNode() && U->hasOneUse() &&
7222 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7223 U->getOperand(0) == Vec &&
7224 U->getOperand(1).getOpcode() == ISD::Constant &&
7225 U->getConstantOperandVal(1) == 2) {
7226 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7227 if (OtherExtend.getOpcode() == N->getOpcode() &&
7228 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7229 OtherExtend.getValueType() == MVT::f64) {
7230 SDValue VExtend, Chain;
7231 if (N->isStrictFPOpcode()) {
7232 Chain = MergeInputChains(N, OtherExtend.getNode());
7233 if (!Chain)
7234 continue;
7235 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7236 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7237 Chain = VExtend.getValue(1);
7238 } else
7239 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7240 MVT::v2f64, Vec);
7241 DCI.AddToWorklist(VExtend.getNode());
7242 SDValue Extract1 =
7243 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7244 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7245 DCI.AddToWorklist(Extract1.getNode());
7246 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7247 if (Chain)
7248 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7249 SDValue Extract0 =
7250 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7251 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7252 if (Chain)
7253 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7254 N->getVTList(), Extract0, Chain);
7255 return Extract0;
7256 }
7257 }
7258 }
7259 }
7260 return SDValue();
7261}
7262
7263SDValue SystemZTargetLowering::combineINT_TO_FP(
7264 SDNode *N, DAGCombinerInfo &DCI) const {
7265 if (DCI.Level != BeforeLegalizeTypes)
7266 return SDValue();
7267 SelectionDAG &DAG = DCI.DAG;
7268 LLVMContext &Ctx = *DAG.getContext();
7269 unsigned Opcode = N->getOpcode();
7270 EVT OutVT = N->getValueType(0);
7271 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7272 SDValue Op = N->getOperand(0);
7273 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7274 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7275
7276 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7277 // v2f64 = uint_to_fp v2i16
7278 // =>
7279 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7280 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7281 OutScalarBits <= 64) {
7282 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7283 EVT ExtVT = EVT::getVectorVT(
7284 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7285 unsigned ExtOpcode =
7287 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7288 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7289 }
7290 return SDValue();
7291}
7292
7293SDValue SystemZTargetLowering::combineBSWAP(
7294 SDNode *N, DAGCombinerInfo &DCI) const {
7295 SelectionDAG &DAG = DCI.DAG;
7296 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7297 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7298 N->getOperand(0).hasOneUse() &&
7299 canLoadStoreByteSwapped(N->getValueType(0))) {
7300 SDValue Load = N->getOperand(0);
7301 LoadSDNode *LD = cast<LoadSDNode>(Load);
7302
7303 // Create the byte-swapping load.
7304 SDValue Ops[] = {
7305 LD->getChain(), // Chain
7306 LD->getBasePtr() // Ptr
7307 };
7308 EVT LoadVT = N->getValueType(0);
7309 if (LoadVT == MVT::i16)
7310 LoadVT = MVT::i32;
7311 SDValue BSLoad =
7313 DAG.getVTList(LoadVT, MVT::Other),
7314 Ops, LD->getMemoryVT(), LD->getMemOperand());
7315
7316 // If this is an i16 load, insert the truncate.
7317 SDValue ResVal = BSLoad;
7318 if (N->getValueType(0) == MVT::i16)
7319 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7320
7321 // First, combine the bswap away. This makes the value produced by the
7322 // load dead.
7323 DCI.CombineTo(N, ResVal);
7324
7325 // Next, combine the load away, we give it a bogus result value but a real
7326 // chain result. The result value is dead because the bswap is dead.
7327 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7328
7329 // Return N so it doesn't get rechecked!
7330 return SDValue(N, 0);
7331 }
7332
7333 // Look through bitcasts that retain the number of vector elements.
7334 SDValue Op = N->getOperand(0);
7335 if (Op.getOpcode() == ISD::BITCAST &&
7336 Op.getValueType().isVector() &&
7337 Op.getOperand(0).getValueType().isVector() &&
7338 Op.getValueType().getVectorNumElements() ==
7339 Op.getOperand(0).getValueType().getVectorNumElements())
7340 Op = Op.getOperand(0);
7341
7342 // Push BSWAP into a vector insertion if at least one side then simplifies.
7343 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7344 SDValue Vec = Op.getOperand(0);
7345 SDValue Elt = Op.getOperand(1);
7346 SDValue Idx = Op.getOperand(2);
7347
7349 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7351 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7352 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7353 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7354 EVT VecVT = N->getValueType(0);
7355 EVT EltVT = N->getValueType(0).getVectorElementType();
7356 if (VecVT != Vec.getValueType()) {
7357 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7358 DCI.AddToWorklist(Vec.getNode());
7359 }
7360 if (EltVT != Elt.getValueType()) {
7361 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7362 DCI.AddToWorklist(Elt.getNode());
7363 }
7364 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7365 DCI.AddToWorklist(Vec.getNode());
7366 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7367 DCI.AddToWorklist(Elt.getNode());
7368 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7369 Vec, Elt, Idx);
7370 }
7371 }
7372
7373 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7374 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7375 if (SV && Op.hasOneUse()) {
7376 SDValue Op0 = Op.getOperand(0);
7377 SDValue Op1 = Op.getOperand(1);
7378
7380 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7382 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7383 EVT VecVT = N->getValueType(0);
7384 if (VecVT != Op0.getValueType()) {
7385 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7386 DCI.AddToWorklist(Op0.getNode());
7387 }
7388 if (VecVT != Op1.getValueType()) {
7389 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7390 DCI.AddToWorklist(Op1.getNode());
7391 }
7392 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7393 DCI.AddToWorklist(Op0.getNode());
7394 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7395 DCI.AddToWorklist(Op1.getNode());
7396 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7397 }
7398 }
7399
7400 return SDValue();
7401}
7402
7403static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7404 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7405 // set by the CCReg instruction using the CCValid / CCMask masks,
7406 // If the CCReg instruction is itself a ICMP testing the condition
7407 // code set by some other instruction, see whether we can directly
7408 // use that condition code.
7409
7410 // Verify that we have an ICMP against some constant.
7411 if (CCValid != SystemZ::CCMASK_ICMP)
7412 return false;
7413 auto *ICmp = CCReg.getNode();
7414 if (ICmp->getOpcode() != SystemZISD::ICMP)
7415 return false;
7416 auto *CompareLHS = ICmp->getOperand(0).getNode();
7417 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7418 if (!CompareRHS)
7419 return false;
7420
7421 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7422 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7423 // Verify that we have an appropriate mask for a EQ or NE comparison.
7424 bool Invert = false;
7425 if (CCMask == SystemZ::CCMASK_CMP_NE)
7426 Invert = !Invert;
7427 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7428 return false;
7429
7430 // Verify that the ICMP compares against one of select values.
7431 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7432 if (!TrueVal)
7433 return false;
7434 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7435 if (!FalseVal)
7436 return false;
7437 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7438 Invert = !Invert;
7439 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7440 return false;
7441
7442 // Compute the effective CC mask for the new branch or select.
7443 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7444 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7445 if (!NewCCValid || !NewCCMask)
7446 return false;
7447 CCValid = NewCCValid->getZExtValue();
7448 CCMask = NewCCMask->getZExtValue();
7449 if (Invert)
7450 CCMask ^= CCValid;
7451
7452 // Return the updated CCReg link.
7453 CCReg = CompareLHS->getOperand(4);
7454 return true;
7455 }
7456
7457 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7458 if (CompareLHS->getOpcode() == ISD::SRA) {
7459 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7460 if (!SRACount || SRACount->getZExtValue() != 30)
7461 return false;
7462 auto *SHL = CompareLHS->getOperand(0).getNode();
7463 if (SHL->getOpcode() != ISD::SHL)
7464 return false;
7465 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7466 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7467 return false;
7468 auto *IPM = SHL->getOperand(0).getNode();
7469 if (IPM->getOpcode() != SystemZISD::IPM)
7470 return false;
7471
7472 // Avoid introducing CC spills (because SRA would clobber CC).
7473 if (!CompareLHS->hasOneUse())
7474 return false;
7475 // Verify that the ICMP compares against zero.
7476 if (CompareRHS->getZExtValue() != 0)
7477 return false;
7478
7479 // Compute the effective CC mask for the new branch or select.
7480 CCMask = SystemZ::reverseCCMask(CCMask);
7481
7482 // Return the updated CCReg link.
7483 CCReg = IPM->getOperand(0);
7484 return true;
7485 }
7486
7487 return false;
7488}
7489
7490SDValue SystemZTargetLowering::combineBR_CCMASK(
7491 SDNode *N, DAGCombinerInfo &DCI) const {
7492 SelectionDAG &DAG = DCI.DAG;
7493
7494 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7495 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7496 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7497 if (!CCValid || !CCMask)
7498 return SDValue();
7499
7500 int CCValidVal = CCValid->getZExtValue();
7501 int CCMaskVal = CCMask->getZExtValue();
7502 SDValue Chain = N->getOperand(0);
7503 SDValue CCReg = N->getOperand(4);
7504
7505 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7506 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7507 Chain,
7508 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7509 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7510 N->getOperand(3), CCReg);
7511 return SDValue();
7512}
7513
7514SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7515 SDNode *N, DAGCombinerInfo &DCI) const {
7516 SelectionDAG &DAG = DCI.DAG;
7517
7518 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7519 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7520 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7521 if (!CCValid || !CCMask)
7522 return SDValue();
7523
7524 int CCValidVal = CCValid->getZExtValue();
7525 int CCMaskVal = CCMask->getZExtValue();
7526 SDValue CCReg = N->getOperand(4);
7527
7528 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7529 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7530 N->getOperand(0), N->getOperand(1),
7531 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7532 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7533 CCReg);
7534 return SDValue();
7535}
7536
7537
7538SDValue SystemZTargetLowering::combineGET_CCMASK(
7539 SDNode *N, DAGCombinerInfo &DCI) const {
7540
7541 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7542 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7543 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7544 if (!CCValid || !CCMask)
7545 return SDValue();
7546 int CCValidVal = CCValid->getZExtValue();
7547 int CCMaskVal = CCMask->getZExtValue();
7548
7549 SDValue Select = N->getOperand(0);
7550 if (Select->getOpcode() == ISD::TRUNCATE)
7551 Select = Select->getOperand(0);
7552 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7553 return SDValue();
7554
7555 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7556 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7557 if (!SelectCCValid || !SelectCCMask)
7558 return SDValue();
7559 int SelectCCValidVal = SelectCCValid->getZExtValue();
7560 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7561
7562 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7563 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7564 if (!TrueVal || !FalseVal)
7565 return SDValue();
7566 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7567 ;
7568 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7569 SelectCCMaskVal ^= SelectCCValidVal;
7570 else
7571 return SDValue();
7572
7573 if (SelectCCValidVal & ~CCValidVal)
7574 return SDValue();
7575 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7576 return SDValue();
7577
7578 return Select->getOperand(4);
7579}
7580
7581SDValue SystemZTargetLowering::combineIntDIVREM(
7582 SDNode *N, DAGCombinerInfo &DCI) const {
7583 SelectionDAG &DAG = DCI.DAG;
7584 EVT VT = N->getValueType(0);
7585 // In the case where the divisor is a vector of constants a cheaper
7586 // sequence of instructions can replace the divide. BuildSDIV is called to
7587 // do this during DAG combining, but it only succeeds when it can build a
7588 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7589 // since it is not Legal but Custom it can only happen before
7590 // legalization. Therefore we must scalarize this early before Combine
7591 // 1. For widened vectors, this is already the result of type legalization.
7592 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7593 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7594 return DAG.UnrollVectorOp(N);
7595 return SDValue();
7596}
7597
7598SDValue SystemZTargetLowering::combineINTRINSIC(
7599 SDNode *N, DAGCombinerInfo &DCI) const {
7600 SelectionDAG &DAG = DCI.DAG;
7601
7602 unsigned Id = N->getConstantOperandVal(1);
7603 switch (Id) {
7604 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7605 // or larger is simply a vector load.
7606 case Intrinsic::s390_vll:
7607 case Intrinsic::s390_vlrl:
7608 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7609 if (C->getZExtValue() >= 15)
7610 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7611 N->getOperand(3), MachinePointerInfo());
7612 break;
7613 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7614 case Intrinsic::s390_vstl:
7615 case Intrinsic::s390_vstrl:
7616 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7617 if (C->getZExtValue() >= 15)
7618 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7619 N->getOperand(4), MachinePointerInfo());
7620 break;
7621 }
7622
7623 return SDValue();
7624}
7625
7626SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7627 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7628 return N->getOperand(0);
7629 return N;
7630}
7631
7633 DAGCombinerInfo &DCI) const {
7634 switch(N->getOpcode()) {
7635 default: break;
7636 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7637 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7638 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7640 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7641 case ISD::LOAD: return combineLOAD(N, DCI);
7642 case ISD::STORE: return combineSTORE(N, DCI);
7643 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7644 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7645 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7647 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7649 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7650 case ISD::SINT_TO_FP:
7651 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7652 case ISD::BSWAP: return combineBSWAP(N, DCI);
7653 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7654 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7655 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7656 case ISD::SDIV:
7657 case ISD::UDIV:
7658 case ISD::SREM:
7659 case ISD::UREM: return combineIntDIVREM(N, DCI);
7661 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7662 }
7663
7664 return SDValue();
7665}
7666
7667// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7668// are for Op.
7669static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7670 unsigned OpNo) {
7671 EVT VT = Op.getValueType();
7672 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7673 APInt SrcDemE;
7674 unsigned Opcode = Op.getOpcode();
7675 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7676 unsigned Id = Op.getConstantOperandVal(0);
7677 switch (Id) {
7678 case Intrinsic::s390_vpksh: // PACKS
7679 case Intrinsic::s390_vpksf:
7680 case Intrinsic::s390_vpksg:
7681 case Intrinsic::s390_vpkshs: // PACKS_CC
7682 case Intrinsic::s390_vpksfs:
7683 case Intrinsic::s390_vpksgs:
7684 case Intrinsic::s390_vpklsh: // PACKLS
7685 case Intrinsic::s390_vpklsf:
7686 case Intrinsic::s390_vpklsg:
7687 case Intrinsic::s390_vpklshs: // PACKLS_CC
7688 case Intrinsic::s390_vpklsfs:
7689 case Intrinsic::s390_vpklsgs:
7690 // VECTOR PACK truncates the elements of two source vectors into one.
7691 SrcDemE = DemandedElts;
7692 if (OpNo == 2)
7693 SrcDemE.lshrInPlace(NumElts / 2);
7694 SrcDemE = SrcDemE.trunc(NumElts / 2);
7695 break;
7696 // VECTOR UNPACK extends half the elements of the source vector.
7697 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7698 case Intrinsic::s390_vuphh:
7699 case Intrinsic::s390_vuphf:
7700 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7701 case Intrinsic::s390_vuplhh:
7702 case Intrinsic::s390_vuplhf:
7703 SrcDemE = APInt(NumElts * 2, 0);
7704 SrcDemE.insertBits(DemandedElts, 0);
7705 break;
7706 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7707 case Intrinsic::s390_vuplhw:
7708 case Intrinsic::s390_vuplf:
7709 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7710 case Intrinsic::s390_vupllh:
7711 case Intrinsic::s390_vupllf:
7712 SrcDemE = APInt(NumElts * 2, 0);
7713 SrcDemE.insertBits(DemandedElts, NumElts);
7714 break;
7715 case Intrinsic::s390_vpdi: {
7716 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7717 SrcDemE = APInt(NumElts, 0);
7718 if (!DemandedElts[OpNo - 1])
7719 break;
7720 unsigned Mask = Op.getConstantOperandVal(3);
7721 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7722 // Demand input element 0 or 1, given by the mask bit value.
7723 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7724 break;
7725 }
7726 case Intrinsic::s390_vsldb: {
7727 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7728 assert(VT == MVT::v16i8 && "Unexpected type.");
7729 unsigned FirstIdx = Op.getConstantOperandVal(3);
7730 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7731 unsigned NumSrc0Els = 16 - FirstIdx;
7732 SrcDemE = APInt(NumElts, 0);
7733 if (OpNo == 1) {
7734 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7735 SrcDemE.insertBits(DemEls, FirstIdx);
7736 } else {
7737 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7738 SrcDemE.insertBits(DemEls, 0);
7739 }
7740 break;
7741 }
7742 case Intrinsic::s390_vperm:
7743 SrcDemE = APInt(NumElts, 1);
7744 break;
7745 default:
7746 llvm_unreachable("Unhandled intrinsic.");
7747 break;
7748 }
7749 } else {
7750 switch (Opcode) {
7752 // Scalar operand.
7753 SrcDemE = APInt(1, 1);
7754 break;
7756 SrcDemE = DemandedElts;
7757 break;
7758 default:
7759 llvm_unreachable("Unhandled opcode.");
7760 break;
7761 }
7762 }
7763 return SrcDemE;
7764}
7765
7766static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7767 const APInt &DemandedElts,
7768 const SelectionDAG &DAG, unsigned Depth,
7769 unsigned OpNo) {
7770 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7771 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7772 KnownBits LHSKnown =
7773 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7774 KnownBits RHSKnown =
7775 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7776 Known = LHSKnown.intersectWith(RHSKnown);
7777}
7778
7779void
7781 KnownBits &Known,
7782 const APInt &DemandedElts,
7783 const SelectionDAG &DAG,
7784 unsigned Depth) const {
7785 Known.resetAll();
7786
7787 // Intrinsic CC result is returned in the two low bits.
7788 unsigned tmp0, tmp1; // not used
7789 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7790 Known.Zero.setBitsFrom(2);
7791 return;
7792 }
7793 EVT VT = Op.getValueType();
7794 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7795 return;
7796 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7797 "KnownBits does not match VT in bitwidth");
7798 assert ((!VT.isVector() ||
7799 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7800 "DemandedElts does not match VT number of elements");
7801 unsigned BitWidth = Known.getBitWidth();
7802 unsigned Opcode = Op.getOpcode();
7803 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7804 bool IsLogical = false;
7805 unsigned Id = Op.getConstantOperandVal(0);
7806 switch (Id) {
7807 case Intrinsic::s390_vpksh: // PACKS
7808 case Intrinsic::s390_vpksf:
7809 case Intrinsic::s390_vpksg:
7810 case Intrinsic::s390_vpkshs: // PACKS_CC
7811 case Intrinsic::s390_vpksfs:
7812 case Intrinsic::s390_vpksgs:
7813 case Intrinsic::s390_vpklsh: // PACKLS
7814 case Intrinsic::s390_vpklsf:
7815 case Intrinsic::s390_vpklsg:
7816 case Intrinsic::s390_vpklshs: // PACKLS_CC
7817 case Intrinsic::s390_vpklsfs:
7818 case Intrinsic::s390_vpklsgs:
7819 case Intrinsic::s390_vpdi:
7820 case Intrinsic::s390_vsldb:
7821 case Intrinsic::s390_vperm:
7822 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7823 break;
7824 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7825 case Intrinsic::s390_vuplhh:
7826 case Intrinsic::s390_vuplhf:
7827 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7828 case Intrinsic::s390_vupllh:
7829 case Intrinsic::s390_vupllf:
7830 IsLogical = true;
7831 [[fallthrough]];
7832 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7833 case Intrinsic::s390_vuphh:
7834 case Intrinsic::s390_vuphf:
7835 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7836 case Intrinsic::s390_vuplhw:
7837 case Intrinsic::s390_vuplf: {
7838 SDValue SrcOp = Op.getOperand(1);
7839 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7840 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7841 if (IsLogical) {
7842 Known = Known.zext(BitWidth);
7843 } else
7844 Known = Known.sext(BitWidth);
7845 break;
7846 }
7847 default:
7848 break;
7849 }
7850 } else {
7851 switch (Opcode) {
7854 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7855 break;
7856 case SystemZISD::REPLICATE: {
7857 SDValue SrcOp = Op.getOperand(0);
7858 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7859 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7860 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7861 break;
7862 }
7863 default:
7864 break;
7865 }
7866 }
7867
7868 // Known has the width of the source operand(s). Adjust if needed to match
7869 // the passed bitwidth.
7870 if (Known.getBitWidth() != BitWidth)
7871 Known = Known.anyextOrTrunc(BitWidth);
7872}
7873
7874static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7875 const SelectionDAG &DAG, unsigned Depth,
7876 unsigned OpNo) {
7877 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7878 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7879 if (LHS == 1) return 1; // Early out.
7880 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7881 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7882 if (RHS == 1) return 1; // Early out.
7883 unsigned Common = std::min(LHS, RHS);
7884 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7885 EVT VT = Op.getValueType();
7886 unsigned VTBits = VT.getScalarSizeInBits();
7887 if (SrcBitWidth > VTBits) { // PACK
7888 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7889 if (Common > SrcExtraBits)
7890 return (Common - SrcExtraBits);
7891 return 1;
7892 }
7893 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
7894 return Common;
7895}
7896
7897unsigned
7899 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7900 unsigned Depth) const {
7901 if (Op.getResNo() != 0)
7902 return 1;
7903 unsigned Opcode = Op.getOpcode();
7904 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7905 unsigned Id = Op.getConstantOperandVal(0);
7906 switch (Id) {
7907 case Intrinsic::s390_vpksh: // PACKS
7908 case Intrinsic::s390_vpksf:
7909 case Intrinsic::s390_vpksg:
7910 case Intrinsic::s390_vpkshs: // PACKS_CC
7911 case Intrinsic::s390_vpksfs:
7912 case Intrinsic::s390_vpksgs:
7913 case Intrinsic::s390_vpklsh: // PACKLS
7914 case Intrinsic::s390_vpklsf:
7915 case Intrinsic::s390_vpklsg:
7916 case Intrinsic::s390_vpklshs: // PACKLS_CC
7917 case Intrinsic::s390_vpklsfs:
7918 case Intrinsic::s390_vpklsgs:
7919 case Intrinsic::s390_vpdi:
7920 case Intrinsic::s390_vsldb:
7921 case Intrinsic::s390_vperm:
7922 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
7923 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7924 case Intrinsic::s390_vuphh:
7925 case Intrinsic::s390_vuphf:
7926 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7927 case Intrinsic::s390_vuplhw:
7928 case Intrinsic::s390_vuplf: {
7929 SDValue PackedOp = Op.getOperand(1);
7930 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
7931 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
7932 EVT VT = Op.getValueType();
7933 unsigned VTBits = VT.getScalarSizeInBits();
7934 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
7935 return Tmp;
7936 }
7937 default:
7938 break;
7939 }
7940 } else {
7941 switch (Opcode) {
7943 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
7944 default:
7945 break;
7946 }
7947 }
7948
7949 return 1;
7950}
7951
7954 const APInt &DemandedElts, const SelectionDAG &DAG,
7955 bool PoisonOnly, unsigned Depth) const {
7956 switch (Op->getOpcode()) {
7959 return true;
7960 }
7961 return false;
7962}
7963
7964unsigned
7966 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
7967 unsigned StackAlign = TFI->getStackAlignment();
7968 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
7969 "Unexpected stack alignment");
7970 // The default stack probe size is 4096 if the function has no
7971 // stack-probe-size attribute.
7972 unsigned StackProbeSize =
7973 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
7974 // Round down to the stack alignment.
7975 StackProbeSize &= ~(StackAlign - 1);
7976 return StackProbeSize ? StackProbeSize : StackAlign;
7977}
7978
7979//===----------------------------------------------------------------------===//
7980// Custom insertion
7981//===----------------------------------------------------------------------===//
7982
7983// Force base value Base into a register before MI. Return the register.
7985 const SystemZInstrInfo *TII) {
7986 MachineBasicBlock *MBB = MI.getParent();
7987 MachineFunction &MF = *MBB->getParent();
7989
7990 if (Base.isReg()) {
7991 // Copy Base into a new virtual register to help register coalescing in
7992 // cases with multiple uses.
7993 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
7994 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
7995 .add(Base);
7996 return Reg;
7997 }
7998
7999 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8000 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8001 .add(Base)
8002 .addImm(0)
8003 .addReg(0);
8004 return Reg;
8005}
8006
8007// The CC operand of MI might be missing a kill marker because there
8008// were multiple uses of CC, and ISel didn't know which to mark.
8009// Figure out whether MI should have had a kill marker.
8011 // Scan forward through BB for a use/def of CC.
8013 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8014 const MachineInstr& mi = *miI;
8015 if (mi.readsRegister(SystemZ::CC))
8016 return false;
8017 if (mi.definesRegister(SystemZ::CC))
8018 break; // Should have kill-flag - update below.
8019 }
8020
8021 // If we hit the end of the block, check whether CC is live into a
8022 // successor.
8023 if (miI == MBB->end()) {
8024 for (const MachineBasicBlock *Succ : MBB->successors())
8025 if (Succ->isLiveIn(SystemZ::CC))
8026 return false;
8027 }
8028
8029 return true;
8030}
8031
8032// Return true if it is OK for this Select pseudo-opcode to be cascaded
8033// together with other Select pseudo-opcodes into a single basic-block with
8034// a conditional jump around it.
8036 switch (MI.getOpcode()) {
8037 case SystemZ::Select32:
8038 case SystemZ::Select64:
8039 case SystemZ::Select128:
8040 case SystemZ::SelectF32:
8041 case SystemZ::SelectF64:
8042 case SystemZ::SelectF128:
8043 case SystemZ::SelectVR32:
8044 case SystemZ::SelectVR64:
8045 case SystemZ::SelectVR128:
8046 return true;
8047
8048 default:
8049 return false;
8050 }
8051}
8052
8053// Helper function, which inserts PHI functions into SinkMBB:
8054// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8055// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8057 MachineBasicBlock *TrueMBB,
8058 MachineBasicBlock *FalseMBB,
8059 MachineBasicBlock *SinkMBB) {
8060 MachineFunction *MF = TrueMBB->getParent();
8062
8063 MachineInstr *FirstMI = Selects.front();
8064 unsigned CCValid = FirstMI->getOperand(3).getImm();
8065 unsigned CCMask = FirstMI->getOperand(4).getImm();
8066
8067 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8068
8069 // As we are creating the PHIs, we have to be careful if there is more than
8070 // one. Later Selects may reference the results of earlier Selects, but later
8071 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8072 // That also means that PHI construction must work forward from earlier to
8073 // later, and that the code must maintain a mapping from earlier PHI's
8074 // destination registers, and the registers that went into the PHI.
8076
8077 for (auto *MI : Selects) {
8078 Register DestReg = MI->getOperand(0).getReg();
8079 Register TrueReg = MI->getOperand(1).getReg();
8080 Register FalseReg = MI->getOperand(2).getReg();
8081
8082 // If this Select we are generating is the opposite condition from
8083 // the jump we generated, then we have to swap the operands for the
8084 // PHI that is going to be generated.
8085 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8086 std::swap(TrueReg, FalseReg);
8087
8088 if (RegRewriteTable.contains(TrueReg))
8089 TrueReg = RegRewriteTable[TrueReg].first;
8090
8091 if (RegRewriteTable.contains(FalseReg))
8092 FalseReg = RegRewriteTable[FalseReg].second;
8093
8094 DebugLoc DL = MI->getDebugLoc();
8095 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8096 .addReg(TrueReg).addMBB(TrueMBB)
8097 .addReg(FalseReg).addMBB(FalseMBB);
8098
8099 // Add this PHI to the rewrite table.
8100 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8101 }
8102
8104}
8105
8106// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8108SystemZTargetLowering::emitSelect(MachineInstr &MI,
8109 MachineBasicBlock *MBB) const {
8110 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8111 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8112
8113 unsigned CCValid = MI.getOperand(3).getImm();
8114 unsigned CCMask = MI.getOperand(4).getImm();
8115
8116 // If we have a sequence of Select* pseudo instructions using the
8117 // same condition code value, we want to expand all of them into
8118 // a single pair of basic blocks using the same condition.
8121 Selects.push_back(&MI);
8122 unsigned Count = 0;
8123 for (MachineInstr &NextMI : llvm::make_range(
8124 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8125 if (isSelectPseudo(NextMI)) {
8126 assert(NextMI.getOperand(3).getImm() == CCValid &&
8127 "Bad CCValid operands since CC was not redefined.");
8128 if (NextMI.getOperand(4).getImm() == CCMask ||
8129 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8130 Selects.push_back(&NextMI);
8131 continue;
8132 }
8133 break;
8134 }
8135 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
8136 break;
8137 bool User = false;
8138 for (auto *SelMI : Selects)
8139 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8140 User = true;
8141 break;
8142 }
8143 if (NextMI.isDebugInstr()) {
8144 if (User) {
8145 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8146 DbgValues.push_back(&NextMI);
8147 }
8148 } else if (User || ++Count > 20)
8149 break;
8150 }
8151
8152 MachineInstr *LastMI = Selects.back();
8153 bool CCKilled =
8154 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
8155 MachineBasicBlock *StartMBB = MBB;
8157 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8158
8159 // Unless CC was killed in the last Select instruction, mark it as
8160 // live-in to both FalseMBB and JoinMBB.
8161 if (!CCKilled) {
8162 FalseMBB->addLiveIn(SystemZ::CC);
8163 JoinMBB->addLiveIn(SystemZ::CC);
8164 }
8165
8166 // StartMBB:
8167 // BRC CCMask, JoinMBB
8168 // # fallthrough to FalseMBB
8169 MBB = StartMBB;
8170 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8171 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8172 MBB->addSuccessor(JoinMBB);
8173 MBB->addSuccessor(FalseMBB);
8174
8175 // FalseMBB:
8176 // # fallthrough to JoinMBB
8177 MBB = FalseMBB;
8178 MBB->addSuccessor(JoinMBB);
8179
8180 // JoinMBB:
8181 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8182 // ...
8183 MBB = JoinMBB;
8184 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8185 for (auto *SelMI : Selects)
8186 SelMI->eraseFromParent();
8187
8189 for (auto *DbgMI : DbgValues)
8190 MBB->splice(InsertPos, StartMBB, DbgMI);
8191
8192 return JoinMBB;
8193}
8194
8195// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8196// StoreOpcode is the store to use and Invert says whether the store should
8197// happen when the condition is false rather than true. If a STORE ON
8198// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8199MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8201 unsigned StoreOpcode,
8202 unsigned STOCOpcode,
8203 bool Invert) const {
8204 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8205
8206 Register SrcReg = MI.getOperand(0).getReg();
8207 MachineOperand Base = MI.getOperand(1);
8208 int64_t Disp = MI.getOperand(2).getImm();
8209 Register IndexReg = MI.getOperand(3).getReg();
8210 unsigned CCValid = MI.getOperand(4).getImm();
8211 unsigned CCMask = MI.getOperand(5).getImm();
8212 DebugLoc DL = MI.getDebugLoc();
8213
8214 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8215
8216 // ISel pattern matching also adds a load memory operand of the same
8217 // address, so take special care to find the storing memory operand.
8218 MachineMemOperand *MMO = nullptr;
8219 for (auto *I : MI.memoperands())
8220 if (I->isStore()) {
8221 MMO = I;
8222 break;
8223 }
8224
8225 // Use STOCOpcode if possible. We could use different store patterns in
8226 // order to avoid matching the index register, but the performance trade-offs
8227 // might be more complicated in that case.
8228 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8229 if (Invert)
8230 CCMask ^= CCValid;
8231
8232 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8233 .addReg(SrcReg)
8234 .add(Base)
8235 .addImm(Disp)
8236 .addImm(CCValid)
8237 .addImm(CCMask)
8238 .addMemOperand(MMO);
8239
8240 MI.eraseFromParent();
8241 return MBB;
8242 }
8243
8244 // Get the condition needed to branch around the store.
8245 if (!Invert)
8246 CCMask ^= CCValid;
8247
8248 MachineBasicBlock *StartMBB = MBB;
8250 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8251
8252 // Unless CC was killed in the CondStore instruction, mark it as
8253 // live-in to both FalseMBB and JoinMBB.
8254 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
8255 FalseMBB->addLiveIn(SystemZ::CC);
8256 JoinMBB->addLiveIn(SystemZ::CC);
8257 }
8258
8259 // StartMBB:
8260 // BRC CCMask, JoinMBB
8261 // # fallthrough to FalseMBB
8262 MBB = StartMBB;
8263 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8264 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8265 MBB->addSuccessor(JoinMBB);
8266 MBB->addSuccessor(FalseMBB);
8267
8268 // FalseMBB:
8269 // store %SrcReg, %Disp(%Index,%Base)
8270 // # fallthrough to JoinMBB
8271 MBB = FalseMBB;
8272 BuildMI(MBB, DL, TII->get(StoreOpcode))
8273 .addReg(SrcReg)
8274 .add(Base)
8275 .addImm(Disp)
8276 .addReg(IndexReg)
8277 .addMemOperand(MMO);
8278 MBB->addSuccessor(JoinMBB);
8279
8280 MI.eraseFromParent();
8281 return JoinMBB;
8282}
8283
8284// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8286SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8288 bool Unsigned) const {
8289 MachineFunction &MF = *MBB->getParent();
8290 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8292
8293 // Synthetic instruction to compare 128-bit values.
8294 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8295 Register Op0 = MI.getOperand(0).getReg();
8296 Register Op1 = MI.getOperand(1).getReg();
8297
8298 MachineBasicBlock *StartMBB = MBB;
8300 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8301
8302 // StartMBB:
8303 //
8304 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8305 // Swap the inputs to get:
8306 // CC 1 if high(Op0) > high(Op1)
8307 // CC 2 if high(Op0) < high(Op1)
8308 // CC 0 if high(Op0) == high(Op1)
8309 //
8310 // If CC != 0, we'd done, so jump over the next instruction.
8311 //
8312 // VEC[L]G Op1, Op0
8313 // JNE JoinMBB
8314 // # fallthrough to HiEqMBB
8315 MBB = StartMBB;
8316 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8317 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8318 .addReg(Op1).addReg(Op0);
8319 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8321 MBB->addSuccessor(JoinMBB);
8322 MBB->addSuccessor(HiEqMBB);
8323
8324 // HiEqMBB:
8325 //
8326 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8327 // Since we already know the high parts are equal, the CC
8328 // result will only depend on the low parts:
8329 // CC 1 if low(Op0) > low(Op1)
8330 // CC 3 if low(Op0) <= low(Op1)
8331 //
8332 // VCHLGS Tmp, Op0, Op1
8333 // # fallthrough to JoinMBB
8334 MBB = HiEqMBB;
8335 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8336 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8337 .addReg(Op0).addReg(Op1);
8338 MBB->addSuccessor(JoinMBB);
8339
8340 // Mark CC as live-in to JoinMBB.
8341 JoinMBB->addLiveIn(SystemZ::CC);
8342
8343 MI.eraseFromParent();
8344 return JoinMBB;
8345}
8346
8347// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8348// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8349// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8350// whether the field should be inverted after performing BinOpcode (e.g. for
8351// NAND).
8352MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8353 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8354 bool Invert) const {
8355 MachineFunction &MF = *MBB->getParent();
8356 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8358
8359 // Extract the operands. Base can be a register or a frame index.
8360 // Src2 can be a register or immediate.
8361 Register Dest = MI.getOperand(0).getReg();
8362 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8363 int64_t Disp = MI.getOperand(2).getImm();
8364 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8365 Register BitShift = MI.getOperand(4).getReg();
8366 Register NegBitShift = MI.getOperand(5).getReg();
8367 unsigned BitSize = MI.getOperand(6).getImm();
8368 DebugLoc DL = MI.getDebugLoc();
8369
8370 // Get the right opcodes for the displacement.
8371 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8372 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8373 assert(LOpcode && CSOpcode && "Displacement out of range");
8374
8375 // Create virtual registers for temporary results.
8376 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8377 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8378 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8379 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8380 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8381
8382 // Insert a basic block for the main loop.
8383 MachineBasicBlock *StartMBB = MBB;
8385 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8386
8387 // StartMBB:
8388 // ...
8389 // %OrigVal = L Disp(%Base)
8390 // # fall through to LoopMBB
8391 MBB = StartMBB;
8392 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8393 MBB->addSuccessor(LoopMBB);
8394
8395 // LoopMBB:
8396 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8397 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8398 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8399 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8400 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8401 // JNE LoopMBB
8402 // # fall through to DoneMBB
8403 MBB = LoopMBB;
8404 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8405 .addReg(OrigVal).addMBB(StartMBB)
8406 .addReg(Dest).addMBB(LoopMBB);
8407 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8408 .addReg(OldVal).addReg(BitShift).addImm(0);
8409 if (Invert) {
8410 // Perform the operation normally and then invert every bit of the field.
8411 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8412 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8413 // XILF with the upper BitSize bits set.
8414 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8415 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8416 } else if (BinOpcode)
8417 // A simply binary operation.
8418 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8419 .addReg(RotatedOldVal)
8420 .add(Src2);
8421 else
8422 // Use RISBG to rotate Src2 into position and use it to replace the
8423 // field in RotatedOldVal.
8424 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8425 .addReg(RotatedOldVal).addReg(Src2.getReg())
8426 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8427 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8428 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8429 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8430 .addReg(OldVal)
8431 .addReg(NewVal)
8432 .add(Base)
8433 .addImm(Disp);
8434 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8436 MBB->addSuccessor(LoopMBB);
8437 MBB->addSuccessor(DoneMBB);
8438
8439 MI.eraseFromParent();
8440 return DoneMBB;
8441}
8442
8443// Implement EmitInstrWithCustomInserter for subword pseudo
8444// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8445// instruction that should be used to compare the current field with the
8446// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8447// for when the current field should be kept.
8448MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8449 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8450 unsigned KeepOldMask) const {
8451 MachineFunction &MF = *MBB->getParent();
8452 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8454
8455 // Extract the operands. Base can be a register or a frame index.
8456 Register Dest = MI.getOperand(0).getReg();
8457 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8458 int64_t Disp = MI.getOperand(2).getImm();
8459 Register Src2 = MI.getOperand(3).getReg();
8460 Register BitShift = MI.getOperand(4).getReg();
8461 Register NegBitShift = MI.getOperand(5).getReg();
8462 unsigned BitSize = MI.getOperand(6).getImm();
8463 DebugLoc DL = MI.getDebugLoc();
8464
8465 // Get the right opcodes for the displacement.
8466 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8467 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8468 assert(LOpcode && CSOpcode && "Displacement out of range");
8469
8470 // Create virtual registers for temporary results.
8471 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8472 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8473 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8474 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8475 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8476 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8477
8478 // Insert 3 basic blocks for the loop.
8479 MachineBasicBlock *StartMBB = MBB;
8481 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8482 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8483 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8484
8485 // StartMBB:
8486 // ...
8487 // %OrigVal = L Disp(%Base)
8488 // # fall through to LoopMBB
8489 MBB = StartMBB;
8490 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8491 MBB->addSuccessor(LoopMBB);
8492
8493 // LoopMBB:
8494 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8495 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8496 // CompareOpcode %RotatedOldVal, %Src2
8497 // BRC KeepOldMask, UpdateMBB
8498 MBB = LoopMBB;
8499 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8500 .addReg(OrigVal).addMBB(StartMBB)
8501 .addReg(Dest).addMBB(UpdateMBB);
8502 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8503 .addReg(OldVal).addReg(BitShift).addImm(0);
8504 BuildMI(MBB, DL, TII->get(CompareOpcode))
8505 .addReg(RotatedOldVal).addReg(Src2);
8506 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8507 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8508 MBB->addSuccessor(UpdateMBB);
8509 MBB->addSuccessor(UseAltMBB);
8510
8511 // UseAltMBB:
8512 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8513 // # fall through to UpdateMBB
8514 MBB = UseAltMBB;
8515 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8516 .addReg(RotatedOldVal).addReg(Src2)
8517 .addImm(32).addImm(31 + BitSize).addImm(0);
8518 MBB->addSuccessor(UpdateMBB);
8519
8520 // UpdateMBB:
8521 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8522 // [ %RotatedAltVal, UseAltMBB ]
8523 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8524 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8525 // JNE LoopMBB
8526 // # fall through to DoneMBB
8527 MBB = UpdateMBB;
8528 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8529 .addReg(RotatedOldVal).addMBB(LoopMBB)
8530 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8531 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8532 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8533 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8534 .addReg(OldVal)
8535 .addReg(NewVal)
8536 .add(Base)
8537 .addImm(Disp);
8538 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8540 MBB->addSuccessor(LoopMBB);
8541 MBB->addSuccessor(DoneMBB);
8542
8543 MI.eraseFromParent();
8544 return DoneMBB;
8545}
8546
8547// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8548// instruction MI.
8550SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8551 MachineBasicBlock *MBB) const {
8552 MachineFunction &MF = *MBB->getParent();
8553 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8555
8556 // Extract the operands. Base can be a register or a frame index.
8557 Register Dest = MI.getOperand(0).getReg();
8558 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8559 int64_t Disp = MI.getOperand(2).getImm();
8560 Register CmpVal = MI.getOperand(3).getReg();
8561 Register OrigSwapVal = MI.getOperand(4).getReg();
8562 Register BitShift = MI.getOperand(5).getReg();
8563 Register NegBitShift = MI.getOperand(6).getReg();
8564 int64_t BitSize = MI.getOperand(7).getImm();
8565 DebugLoc DL = MI.getDebugLoc();
8566
8567 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8568
8569 // Get the right opcodes for the displacement and zero-extension.
8570 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8571 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8572 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8573 assert(LOpcode && CSOpcode && "Displacement out of range");
8574
8575 // Create virtual registers for temporary results.
8576 Register OrigOldVal = MRI.createVirtualRegister(RC);
8577 Register OldVal = MRI.createVirtualRegister(RC);
8578 Register SwapVal = MRI.createVirtualRegister(RC);
8579 Register StoreVal = MRI.createVirtualRegister(RC);
8580 Register OldValRot = MRI.createVirtualRegister(RC);
8581 Register RetryOldVal = MRI.createVirtualRegister(RC);
8582 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8583
8584 // Insert 2 basic blocks for the loop.
8585 MachineBasicBlock *StartMBB = MBB;
8587 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8588 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8589
8590 // StartMBB:
8591 // ...
8592 // %OrigOldVal = L Disp(%Base)
8593 // # fall through to LoopMBB
8594 MBB = StartMBB;
8595 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8596 .add(Base)
8597 .addImm(Disp)
8598 .addReg(0);
8599 MBB->addSuccessor(LoopMBB);
8600
8601 // LoopMBB:
8602 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8603 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8604 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8605 // ^^ The low BitSize bits contain the field
8606 // of interest.
8607 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8608 // ^^ Replace the upper 32-BitSize bits of the
8609 // swap value with those that we loaded and rotated.
8610 // %Dest = LL[CH] %OldValRot
8611 // CR %Dest, %CmpVal
8612 // JNE DoneMBB
8613 // # Fall through to SetMBB
8614 MBB = LoopMBB;
8615 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8616 .addReg(OrigOldVal).addMBB(StartMBB)
8617 .addReg(RetryOldVal).addMBB(SetMBB);
8618 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8619 .addReg(OrigSwapVal).addMBB(StartMBB)
8620 .addReg(RetrySwapVal).addMBB(SetMBB);
8621 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8622 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8623 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8624 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8625 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8626 .addReg(OldValRot);
8627 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8628 .addReg(Dest).addReg(CmpVal);
8629 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8632 MBB->addSuccessor(DoneMBB);
8633 MBB->addSuccessor(SetMBB);
8634
8635 // SetMBB:
8636 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8637 // ^^ Rotate the new field to its proper position.
8638 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8639 // JNE LoopMBB
8640 // # fall through to ExitMBB
8641 MBB = SetMBB;
8642 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8643 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8644 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8645 .addReg(OldVal)
8646 .addReg(StoreVal)
8647 .add(Base)
8648 .addImm(Disp);
8649 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8651 MBB->addSuccessor(LoopMBB);
8652 MBB->addSuccessor(DoneMBB);
8653
8654 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8655 // to the block after the loop. At this point, CC may have been defined
8656 // either by the CR in LoopMBB or by the CS in SetMBB.
8657 if (!MI.registerDefIsDead(SystemZ::CC))
8658 DoneMBB->addLiveIn(SystemZ::CC);
8659
8660 MI.eraseFromParent();
8661 return DoneMBB;
8662}
8663
8664// Emit a move from two GR64s to a GR128.
8666SystemZTargetLowering::emitPair128(MachineInstr &MI,
8667 MachineBasicBlock *MBB) const {
8668 MachineFunction &MF = *MBB->getParent();
8669 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8671 DebugLoc DL = MI.getDebugLoc();
8672
8673 Register Dest = MI.getOperand(0).getReg();
8674 Register Hi = MI.getOperand(1).getReg();
8675 Register Lo = MI.getOperand(2).getReg();
8676 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8677 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8678
8679 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8680 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8681 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8682 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8683 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8684
8685 MI.eraseFromParent();
8686 return MBB;
8687}
8688
8689// Emit an extension from a GR64 to a GR128. ClearEven is true
8690// if the high register of the GR128 value must be cleared or false if
8691// it's "don't care".
8692MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8694 bool ClearEven) const {
8695 MachineFunction &MF = *MBB->getParent();
8696 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8698 DebugLoc DL = MI.getDebugLoc();
8699
8700 Register Dest = MI.getOperand(0).getReg();
8701 Register Src = MI.getOperand(1).getReg();
8702 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8703
8704 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8705 if (ClearEven) {
8706 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8707 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8708
8709 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8710 .addImm(0);
8711 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8712 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8713 In128 = NewIn128;
8714 }
8715 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8716 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8717
8718 MI.eraseFromParent();
8719 return MBB;
8720}
8721
8723SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8725 unsigned Opcode, bool IsMemset) const {
8726 MachineFunction &MF = *MBB->getParent();
8727 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8729 DebugLoc DL = MI.getDebugLoc();
8730
8731 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8732 uint64_t DestDisp = MI.getOperand(1).getImm();
8733 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8734 uint64_t SrcDisp;
8735
8736 // Fold the displacement Disp if it is out of range.
8737 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8738 if (!isUInt<12>(Disp)) {
8739 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8740 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8741 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8742 .add(Base).addImm(Disp).addReg(0);
8743 Base = MachineOperand::CreateReg(Reg, false);
8744 Disp = 0;
8745 }
8746 };
8747
8748 if (!IsMemset) {
8749 SrcBase = earlyUseOperand(MI.getOperand(2));
8750 SrcDisp = MI.getOperand(3).getImm();
8751 } else {
8752 SrcBase = DestBase;
8753 SrcDisp = DestDisp++;
8754 foldDisplIfNeeded(DestBase, DestDisp);
8755 }
8756
8757 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8758 bool IsImmForm = LengthMO.isImm();
8759 bool IsRegForm = !IsImmForm;
8760
8761 // Build and insert one Opcode of Length, with special treatment for memset.
8762 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8764 MachineOperand DBase, uint64_t DDisp,
8766 unsigned Length) -> void {
8767 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8768 if (IsMemset) {
8769 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8770 if (ByteMO.isImm())
8771 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8772 .add(SBase).addImm(SDisp).add(ByteMO);
8773 else
8774 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8775 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8776 if (--Length == 0)
8777 return;
8778 }
8779 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8780 .add(DBase).addImm(DDisp).addImm(Length)
8781 .add(SBase).addImm(SDisp)
8782 .setMemRefs(MI.memoperands());
8783 };
8784
8785 bool NeedsLoop = false;
8786 uint64_t ImmLength = 0;
8787 Register LenAdjReg = SystemZ::NoRegister;
8788 if (IsImmForm) {
8789 ImmLength = LengthMO.getImm();
8790 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8791 if (ImmLength == 0) {
8792 MI.eraseFromParent();
8793 return MBB;
8794 }
8795 if (Opcode == SystemZ::CLC) {
8796 if (ImmLength > 3 * 256)
8797 // A two-CLC sequence is a clear win over a loop, not least because
8798 // it needs only one branch. A three-CLC sequence needs the same
8799 // number of branches as a loop (i.e. 2), but is shorter. That
8800 // brings us to lengths greater than 768 bytes. It seems relatively
8801 // likely that a difference will be found within the first 768 bytes,
8802 // so we just optimize for the smallest number of branch
8803 // instructions, in order to avoid polluting the prediction buffer
8804 // too much.
8805 NeedsLoop = true;
8806 } else if (ImmLength > 6 * 256)
8807 // The heuristic we use is to prefer loops for anything that would
8808 // require 7 or more MVCs. With these kinds of sizes there isn't much
8809 // to choose between straight-line code and looping code, since the
8810 // time will be dominated by the MVCs themselves.
8811 NeedsLoop = true;
8812 } else {
8813 NeedsLoop = true;
8814 LenAdjReg = LengthMO.getReg();
8815 }
8816
8817 // When generating more than one CLC, all but the last will need to
8818 // branch to the end when a difference is found.
8819 MachineBasicBlock *EndMBB =
8820 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8822 : nullptr);
8823
8824 if (NeedsLoop) {
8825 Register StartCountReg =
8826 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8827 if (IsImmForm) {
8828 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8829 ImmLength &= 255;
8830 } else {
8831 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8832 .addReg(LenAdjReg)
8833 .addReg(0)
8834 .addImm(8);
8835 }
8836
8837 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8838 auto loadZeroAddress = [&]() -> MachineOperand {
8839 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8840 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8841 return MachineOperand::CreateReg(Reg, false);
8842 };
8843 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8844 DestBase = loadZeroAddress();
8845 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8846 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8847
8848 MachineBasicBlock *StartMBB = nullptr;
8849 MachineBasicBlock *LoopMBB = nullptr;
8850 MachineBasicBlock *NextMBB = nullptr;
8851 MachineBasicBlock *DoneMBB = nullptr;
8852 MachineBasicBlock *AllDoneMBB = nullptr;
8853
8854 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8855 Register StartDestReg =
8856 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8857
8858 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8859 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8860 Register ThisDestReg =
8861 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8862 Register NextSrcReg = MRI.createVirtualRegister(RC);
8863 Register NextDestReg =
8864 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8865 RC = &SystemZ::GR64BitRegClass;
8866 Register ThisCountReg = MRI.createVirtualRegister(RC);
8867 Register NextCountReg = MRI.createVirtualRegister(RC);
8868
8869 if (IsRegForm) {
8870 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8871 StartMBB = SystemZ::emitBlockAfter(MBB);
8872 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8873 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8874 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8875
8876 // MBB:
8877 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8878 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8879 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8880 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8882 .addMBB(AllDoneMBB);
8883 MBB->addSuccessor(AllDoneMBB);
8884 if (!IsMemset)
8885 MBB->addSuccessor(StartMBB);
8886 else {
8887 // MemsetOneCheckMBB:
8888 // # Jump to MemsetOneMBB for a memset of length 1, or
8889 // # fall thru to StartMBB.
8890 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8891 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8892 MBB->addSuccessor(MemsetOneCheckMBB);
8893 MBB = MemsetOneCheckMBB;
8894 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8895 .addReg(LenAdjReg).addImm(-1);
8896 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8898 .addMBB(MemsetOneMBB);
8899 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8900 MBB->addSuccessor(StartMBB, {90, 100});
8901
8902 // MemsetOneMBB:
8903 // # Jump back to AllDoneMBB after a single MVI or STC.
8904 MBB = MemsetOneMBB;
8905 insertMemMemOp(MBB, MBB->end(),
8906 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
8907 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
8908 1);
8909 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
8910 MBB->addSuccessor(AllDoneMBB);
8911 }
8912
8913 // StartMBB:
8914 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
8915 MBB = StartMBB;
8916 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8917 .addReg(StartCountReg).addImm(0);
8918 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8920 .addMBB(DoneMBB);
8921 MBB->addSuccessor(DoneMBB);
8922 MBB->addSuccessor(LoopMBB);
8923 }
8924 else {
8925 StartMBB = MBB;
8926 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8927 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8928 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8929
8930 // StartMBB:
8931 // # fall through to LoopMBB
8932 MBB->addSuccessor(LoopMBB);
8933
8934 DestBase = MachineOperand::CreateReg(NextDestReg, false);
8935 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
8936 if (EndMBB && !ImmLength)
8937 // If the loop handled the whole CLC range, DoneMBB will be empty with
8938 // CC live-through into EndMBB, so add it as live-in.
8939 DoneMBB->addLiveIn(SystemZ::CC);
8940 }
8941
8942 // LoopMBB:
8943 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
8944 // [ %NextDestReg, NextMBB ]
8945 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
8946 // [ %NextSrcReg, NextMBB ]
8947 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
8948 // [ %NextCountReg, NextMBB ]
8949 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
8950 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
8951 // ( JLH EndMBB )
8952 //
8953 // The prefetch is used only for MVC. The JLH is used only for CLC.
8954 MBB = LoopMBB;
8955 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
8956 .addReg(StartDestReg).addMBB(StartMBB)
8957 .addReg(NextDestReg).addMBB(NextMBB);
8958 if (!HaveSingleBase)
8959 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
8960 .addReg(StartSrcReg).addMBB(StartMBB)
8961 .addReg(NextSrcReg).addMBB(NextMBB);
8962 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
8963 .addReg(StartCountReg).addMBB(StartMBB)
8964 .addReg(NextCountReg).addMBB(NextMBB);
8965 if (Opcode == SystemZ::MVC)
8966 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
8968 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
8969 insertMemMemOp(MBB, MBB->end(),
8970 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
8971 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
8972 if (EndMBB) {
8973 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8975 .addMBB(EndMBB);
8976 MBB->addSuccessor(EndMBB);
8977 MBB->addSuccessor(NextMBB);
8978 }
8979
8980 // NextMBB:
8981 // %NextDestReg = LA 256(%ThisDestReg)
8982 // %NextSrcReg = LA 256(%ThisSrcReg)
8983 // %NextCountReg = AGHI %ThisCountReg, -1
8984 // CGHI %NextCountReg, 0
8985 // JLH LoopMBB
8986 // # fall through to DoneMBB
8987 //
8988 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
8989 MBB = NextMBB;
8990 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
8991 .addReg(ThisDestReg).addImm(256).addReg(0);
8992 if (!HaveSingleBase)
8993 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
8994 .addReg(ThisSrcReg).addImm(256).addReg(0);
8995 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
8996 .addReg(ThisCountReg).addImm(-1);
8997 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8998 .addReg(NextCountReg).addImm(0);
8999 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9001 .addMBB(LoopMBB);
9002 MBB->addSuccessor(LoopMBB);
9003 MBB->addSuccessor(DoneMBB);
9004
9005 MBB = DoneMBB;
9006 if (IsRegForm) {
9007 // DoneMBB:
9008 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9009 // # Use EXecute Relative Long for the remainder of the bytes. The target
9010 // instruction of the EXRL will have a length field of 1 since 0 is an
9011 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9012 // 0xff) + 1.
9013 // # Fall through to AllDoneMBB.
9014 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9015 Register RemDestReg = HaveSingleBase ? RemSrcReg
9016 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9017 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9018 .addReg(StartDestReg).addMBB(StartMBB)
9019 .addReg(NextDestReg).addMBB(NextMBB);
9020 if (!HaveSingleBase)
9021 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9022 .addReg(StartSrcReg).addMBB(StartMBB)
9023 .addReg(NextSrcReg).addMBB(NextMBB);
9024 if (IsMemset)
9025 insertMemMemOp(MBB, MBB->end(),
9026 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9027 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9028 MachineInstrBuilder EXRL_MIB =
9029 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9030 .addImm(Opcode)
9031 .addReg(LenAdjReg)
9032 .addReg(RemDestReg).addImm(DestDisp)
9033 .addReg(RemSrcReg).addImm(SrcDisp);
9034 MBB->addSuccessor(AllDoneMBB);
9035 MBB = AllDoneMBB;
9036 if (Opcode != SystemZ::MVC) {
9037 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9038 if (EndMBB)
9039 MBB->addLiveIn(SystemZ::CC);
9040 }
9041 }
9043 }
9044
9045 // Handle any remaining bytes with straight-line code.
9046 while (ImmLength > 0) {
9047 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9048 // The previous iteration might have created out-of-range displacements.
9049 // Apply them using LA/LAY if so.
9050 foldDisplIfNeeded(DestBase, DestDisp);
9051 foldDisplIfNeeded(SrcBase, SrcDisp);
9052 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9053 DestDisp += ThisLength;
9054 SrcDisp += ThisLength;
9055 ImmLength -= ThisLength;
9056 // If there's another CLC to go, branch to the end if a difference
9057 // was found.
9058 if (EndMBB && ImmLength > 0) {
9060 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9062 .addMBB(EndMBB);
9063 MBB->addSuccessor(EndMBB);
9064 MBB->addSuccessor(NextMBB);
9065 MBB = NextMBB;
9066 }
9067 }
9068 if (EndMBB) {
9069 MBB->addSuccessor(EndMBB);
9070 MBB = EndMBB;
9071 MBB->addLiveIn(SystemZ::CC);
9072 }
9073
9074 MI.eraseFromParent();
9075 return MBB;
9076}
9077
9078// Decompose string pseudo-instruction MI into a loop that continually performs
9079// Opcode until CC != 3.
9080MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9081 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9082 MachineFunction &MF = *MBB->getParent();
9083 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9085 DebugLoc DL = MI.getDebugLoc();
9086
9087 uint64_t End1Reg = MI.getOperand(0).getReg();
9088 uint64_t Start1Reg = MI.getOperand(1).getReg();
9089 uint64_t Start2Reg = MI.getOperand(2).getReg();
9090 uint64_t CharReg = MI.getOperand(3).getReg();
9091
9092 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9093 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9094 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9095 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9096
9097 MachineBasicBlock *StartMBB = MBB;
9099 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9100
9101 // StartMBB:
9102 // # fall through to LoopMBB
9103 MBB->addSuccessor(LoopMBB);
9104
9105 // LoopMBB:
9106 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9107 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9108 // R0L = %CharReg
9109 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9110 // JO LoopMBB
9111 // # fall through to DoneMBB
9112 //
9113 // The load of R0L can be hoisted by post-RA LICM.
9114 MBB = LoopMBB;
9115
9116 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9117 .addReg(Start1Reg).addMBB(StartMBB)
9118 .addReg(End1Reg).addMBB(LoopMBB);
9119 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9120 .addReg(Start2Reg).addMBB(StartMBB)
9121 .addReg(End2Reg).addMBB(LoopMBB);
9122 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9123 BuildMI(MBB, DL, TII->get(Opcode))
9124 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9125 .addReg(This1Reg).addReg(This2Reg);
9126 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9128 MBB->addSuccessor(LoopMBB);
9129 MBB->addSuccessor(DoneMBB);
9130
9131 DoneMBB->addLiveIn(SystemZ::CC);
9132
9133 MI.eraseFromParent();
9134 return DoneMBB;
9135}
9136
9137// Update TBEGIN instruction with final opcode and register clobbers.
9138MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9139 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9140 bool NoFloat) const {
9141 MachineFunction &MF = *MBB->getParent();
9142 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9143 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9144
9145 // Update opcode.
9146 MI.setDesc(TII->get(Opcode));
9147
9148 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9149 // Make sure to add the corresponding GRSM bits if they are missing.
9150 uint64_t Control = MI.getOperand(2).getImm();
9151 static const unsigned GPRControlBit[16] = {
9152 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9153 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9154 };
9155 Control |= GPRControlBit[15];
9156 if (TFI->hasFP(MF))
9157 Control |= GPRControlBit[11];
9158 MI.getOperand(2).setImm(Control);
9159
9160 // Add GPR clobbers.
9161 for (int I = 0; I < 16; I++) {
9162 if ((Control & GPRControlBit[I]) == 0) {
9163 unsigned Reg = SystemZMC::GR64Regs[I];
9164 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9165 }
9166 }
9167
9168 // Add FPR/VR clobbers.
9169 if (!NoFloat && (Control & 4) != 0) {
9170 if (Subtarget.hasVector()) {
9171 for (unsigned Reg : SystemZMC::VR128Regs) {
9172 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9173 }
9174 } else {
9175 for (unsigned Reg : SystemZMC::FP64Regs) {
9176 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9177 }
9178 }
9179 }
9180
9181 return MBB;
9182}
9183
9184MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9185 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9186 MachineFunction &MF = *MBB->getParent();
9188 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9189 DebugLoc DL = MI.getDebugLoc();
9190
9191 Register SrcReg = MI.getOperand(0).getReg();
9192
9193 // Create new virtual register of the same class as source.
9194 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9195 Register DstReg = MRI->createVirtualRegister(RC);
9196
9197 // Replace pseudo with a normal load-and-test that models the def as
9198 // well.
9199 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9200 .addReg(SrcReg)
9201 .setMIFlags(MI.getFlags());
9202 MI.eraseFromParent();
9203
9204 return MBB;
9205}
9206
9207MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9209 MachineFunction &MF = *MBB->getParent();
9211 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9212 DebugLoc DL = MI.getDebugLoc();
9213 const unsigned ProbeSize = getStackProbeSize(MF);
9214 Register DstReg = MI.getOperand(0).getReg();
9215 Register SizeReg = MI.getOperand(2).getReg();
9216
9217 MachineBasicBlock *StartMBB = MBB;
9219 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9220 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9221 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9222 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9223
9226
9227 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9228 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9229
9230 // LoopTestMBB
9231 // BRC TailTestMBB
9232 // # fallthrough to LoopBodyMBB
9233 StartMBB->addSuccessor(LoopTestMBB);
9234 MBB = LoopTestMBB;
9235 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9236 .addReg(SizeReg)
9237 .addMBB(StartMBB)
9238 .addReg(IncReg)
9239 .addMBB(LoopBodyMBB);
9240 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9241 .addReg(PHIReg)
9242 .addImm(ProbeSize);
9243 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9245 .addMBB(TailTestMBB);
9246 MBB->addSuccessor(LoopBodyMBB);
9247 MBB->addSuccessor(TailTestMBB);
9248
9249 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9250 // J LoopTestMBB
9251 MBB = LoopBodyMBB;
9252 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9253 .addReg(PHIReg)
9254 .addImm(ProbeSize);
9255 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9256 .addReg(SystemZ::R15D)
9257 .addImm(ProbeSize);
9258 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9259 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9260 .setMemRefs(VolLdMMO);
9261 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9262 MBB->addSuccessor(LoopTestMBB);
9263
9264 // TailTestMBB
9265 // BRC DoneMBB
9266 // # fallthrough to TailMBB
9267 MBB = TailTestMBB;
9268 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9269 .addReg(PHIReg)
9270 .addImm(0);
9271 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9273 .addMBB(DoneMBB);
9274 MBB->addSuccessor(TailMBB);
9275 MBB->addSuccessor(DoneMBB);
9276
9277 // TailMBB
9278 // # fallthrough to DoneMBB
9279 MBB = TailMBB;
9280 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9281 .addReg(SystemZ::R15D)
9282 .addReg(PHIReg);
9283 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9284 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9285 .setMemRefs(VolLdMMO);
9286 MBB->addSuccessor(DoneMBB);
9287
9288 // DoneMBB
9289 MBB = DoneMBB;
9290 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9291 .addReg(SystemZ::R15D);
9292
9293 MI.eraseFromParent();
9294 return DoneMBB;
9295}
9296
9297SDValue SystemZTargetLowering::
9298getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9300 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9301 SDLoc DL(SP);
9302 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9303 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9304}
9305
9308 switch (MI.getOpcode()) {
9309 case SystemZ::Select32:
9310 case SystemZ::Select64:
9311 case SystemZ::Select128:
9312 case SystemZ::SelectF32:
9313 case SystemZ::SelectF64:
9314 case SystemZ::SelectF128:
9315 case SystemZ::SelectVR32:
9316 case SystemZ::SelectVR64:
9317 case SystemZ::SelectVR128:
9318 return emitSelect(MI, MBB);
9319
9320 case SystemZ::CondStore8Mux:
9321 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9322 case SystemZ::CondStore8MuxInv:
9323 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9324 case SystemZ::CondStore16Mux:
9325 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9326 case SystemZ::CondStore16MuxInv:
9327 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9328 case SystemZ::CondStore32Mux:
9329 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9330 case SystemZ::CondStore32MuxInv:
9331 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9332 case SystemZ::CondStore8:
9333 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9334 case SystemZ::CondStore8Inv:
9335 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9336 case SystemZ::CondStore16:
9337 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9338 case SystemZ::CondStore16Inv:
9339 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9340 case SystemZ::CondStore32:
9341 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9342 case SystemZ::CondStore32Inv:
9343 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9344 case SystemZ::CondStore64:
9345 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9346 case SystemZ::CondStore64Inv:
9347 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9348 case SystemZ::CondStoreF32:
9349 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9350 case SystemZ::CondStoreF32Inv:
9351 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9352 case SystemZ::CondStoreF64:
9353 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9354 case SystemZ::CondStoreF64Inv:
9355 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9356
9357 case SystemZ::SCmp128Hi:
9358 return emitICmp128Hi(MI, MBB, false);
9359 case SystemZ::UCmp128Hi:
9360 return emitICmp128Hi(MI, MBB, true);
9361
9362 case SystemZ::PAIR128:
9363 return emitPair128(MI, MBB);
9364 case SystemZ::AEXT128:
9365 return emitExt128(MI, MBB, false);
9366 case SystemZ::ZEXT128:
9367 return emitExt128(MI, MBB, true);
9368
9369 case SystemZ::ATOMIC_SWAPW:
9370 return emitAtomicLoadBinary(MI, MBB, 0);
9371
9372 case SystemZ::ATOMIC_LOADW_AR:
9373 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9374 case SystemZ::ATOMIC_LOADW_AFI:
9375 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9376
9377 case SystemZ::ATOMIC_LOADW_SR:
9378 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9379
9380 case SystemZ::ATOMIC_LOADW_NR:
9381 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9382 case SystemZ::ATOMIC_LOADW_NILH:
9383 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9384
9385 case SystemZ::ATOMIC_LOADW_OR:
9386 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9387 case SystemZ::ATOMIC_LOADW_OILH:
9388 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9389
9390 case SystemZ::ATOMIC_LOADW_XR:
9391 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9392 case SystemZ::ATOMIC_LOADW_XILF:
9393 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9394
9395 case SystemZ::ATOMIC_LOADW_NRi:
9396 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9397 case SystemZ::ATOMIC_LOADW_NILHi:
9398 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9399
9400 case SystemZ::ATOMIC_LOADW_MIN:
9401 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9402 case SystemZ::ATOMIC_LOADW_MAX:
9403 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9404 case SystemZ::ATOMIC_LOADW_UMIN:
9405 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9406 case SystemZ::ATOMIC_LOADW_UMAX:
9407 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9408
9409 case SystemZ::ATOMIC_CMP_SWAPW:
9410 return emitAtomicCmpSwapW(MI, MBB);
9411 case SystemZ::MVCImm:
9412 case SystemZ::MVCReg:
9413 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9414 case SystemZ::NCImm:
9415 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9416 case SystemZ::OCImm:
9417 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9418 case SystemZ::XCImm:
9419 case SystemZ::XCReg:
9420 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9421 case SystemZ::CLCImm:
9422 case SystemZ::CLCReg:
9423 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9424 case SystemZ::MemsetImmImm:
9425 case SystemZ::MemsetImmReg:
9426 case SystemZ::MemsetRegImm:
9427 case SystemZ::MemsetRegReg:
9428 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9429 case SystemZ::CLSTLoop:
9430 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9431 case SystemZ::MVSTLoop:
9432 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9433 case SystemZ::SRSTLoop:
9434 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9435 case SystemZ::TBEGIN:
9436 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9437 case SystemZ::TBEGIN_nofloat:
9438 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9439 case SystemZ::TBEGINC:
9440 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9441 case SystemZ::LTEBRCompare_Pseudo:
9442 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9443 case SystemZ::LTDBRCompare_Pseudo:
9444 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9445 case SystemZ::LTXBRCompare_Pseudo:
9446 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9447
9448 case SystemZ::PROBED_ALLOCA:
9449 return emitProbedAlloca(MI, MBB);
9450
9451 case TargetOpcode::STACKMAP:
9452 case TargetOpcode::PATCHPOINT:
9453 return emitPatchPoint(MI, MBB);
9454
9455 default:
9456 llvm_unreachable("Unexpected instr type to insert");
9457 }
9458}
9459
9460// This is only used by the isel schedulers, and is needed only to prevent
9461// compiler from crashing when list-ilp is used.
9462const TargetRegisterClass *
9463SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9464 if (VT == MVT::Untyped)
9465 return &SystemZ::ADDR128BitRegClass;
9467}
9468
9469SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9470 SelectionDAG &DAG) const {
9471 SDLoc dl(Op);
9472 /*
9473 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9474 settings:
9475 00 Round to nearest
9476 01 Round to 0
9477 10 Round to +inf
9478 11 Round to -inf
9479
9480 FLT_ROUNDS, on the other hand, expects the following:
9481 -1 Undefined
9482 0 Round to 0
9483 1 Round to nearest
9484 2 Round to +inf
9485 3 Round to -inf
9486 */
9487
9488 // Save FPC to register.
9489 SDValue Chain = Op.getOperand(0);
9490 SDValue EFPC(
9491 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9492 Chain = EFPC.getValue(1);
9493
9494 // Transform as necessary
9495 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9496 DAG.getConstant(3, dl, MVT::i32));
9497 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9498 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9499 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9500 DAG.getConstant(1, dl, MVT::i32)));
9501
9502 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9503 DAG.getConstant(1, dl, MVT::i32));
9504 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9505
9506 return DAG.getMergeValues({RetVal, Chain}, dl);
9507}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:888
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:695
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:707
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:669
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
bool hasPrivateLinkage() const
Definition: GlobalValue.h:526
bool hasInternalLinkage() const
Definition: GlobalValue.h:525
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:680
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1269
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1271
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1272
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1254
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:436
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1228
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1267
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1268
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1270
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1265
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:435
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1273
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1263
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1264
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1262
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1320
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:326
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})