LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsS390.h"
29#include <cctype>
30#include <optional>
31
32using namespace llvm;
33
34#define DEBUG_TYPE "systemz-lower"
35
36namespace {
37// Represents information about a comparison.
38struct Comparison {
39 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
40 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
41 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
42
43 // The operands to the comparison.
44 SDValue Op0, Op1;
45
46 // Chain if this is a strict floating-point comparison.
47 SDValue Chain;
48
49 // The opcode that should be used to compare Op0 and Op1.
50 unsigned Opcode;
51
52 // A SystemZICMP value. Only used for integer comparisons.
53 unsigned ICmpType;
54
55 // The mask of CC values that Opcode can produce.
56 unsigned CCValid;
57
58 // The mask of CC values for which the original condition is true.
59 unsigned CCMask;
60};
61} // end anonymous namespace
62
63// Classify VT as either 32 or 64 bit.
64static bool is32Bit(EVT VT) {
65 switch (VT.getSimpleVT().SimpleTy) {
66 case MVT::i32:
67 return true;
68 case MVT::i64:
69 return false;
70 default:
71 llvm_unreachable("Unsupported type");
72 }
73}
74
75// Return a version of MachineOperand that can be safely used before the
76// final use.
78 if (Op.isReg())
79 Op.setIsKill(false);
80 return Op;
81}
82
84 const SystemZSubtarget &STI)
85 : TargetLowering(TM), Subtarget(STI) {
86 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
87
88 auto *Regs = STI.getSpecialRegisters();
89
90 // Set up the register classes.
91 if (Subtarget.hasHighWord())
92 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
93 else
94 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
95 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
96 if (!useSoftFloat()) {
97 if (Subtarget.hasVector()) {
98 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
99 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
100 } else {
101 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
102 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
103 }
104 if (Subtarget.hasVectorEnhancements1())
105 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
106 else
107 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
108
109 if (Subtarget.hasVector()) {
110 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
114 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
115 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
116 }
117
118 if (Subtarget.hasVector())
119 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
120 }
121
122 // Compute derived properties from the register classes
124
125 // Set up special registers.
126 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
127
128 // TODO: It may be better to default to latency-oriented scheduling, however
129 // LLVM's current latency-oriented scheduler can't handle physreg definitions
130 // such as SystemZ has with CC, so set this to the register-pressure
131 // scheduler, because it can.
133
136
138
139 // Instructions are strings of 2-byte aligned 2-byte values.
141 // For performance reasons we prefer 16-byte alignment.
143
144 // Handle operations that are handled in a similar way for all types.
145 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
146 I <= MVT::LAST_FP_VALUETYPE;
147 ++I) {
149 if (isTypeLegal(VT)) {
150 // Lower SET_CC into an IPM-based sequence.
154
155 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
157
158 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
161 }
162 }
163
164 // Expand jump table branches as address arithmetic followed by an
165 // indirect jump.
167
168 // Expand BRCOND into a BR_CC (see above).
170
171 // Handle integer types except i128.
172 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
173 I <= MVT::LAST_INTEGER_VALUETYPE;
174 ++I) {
176 if (isTypeLegal(VT) && VT != MVT::i128) {
178
179 // Expand individual DIV and REMs into DIVREMs.
186
187 // Support addition/subtraction with overflow.
190
191 // Support addition/subtraction with carry.
194
195 // Support carry in as value rather than glue.
198
199 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
200 // available, or if the operand is constant.
202
203 // Use POPCNT on z196 and above.
204 if (Subtarget.hasPopulationCount())
206 else
208
209 // No special instructions for these.
212
213 // Use *MUL_LOHI where possible instead of MULH*.
218
219 // Only z196 and above have native support for conversions to unsigned.
220 // On z10, promoting to i64 doesn't generate an inexact condition for
221 // values that are outside the i32 range but in the i64 range, so use
222 // the default expansion.
223 if (!Subtarget.hasFPExtension())
225
226 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
227 // default to Expand, so need to be modified to Legal where appropriate.
229 if (Subtarget.hasFPExtension())
231
232 // And similarly for STRICT_[SU]INT_TO_FP.
234 if (Subtarget.hasFPExtension())
236 }
237 }
238
239 // Handle i128 if legal.
240 if (isTypeLegal(MVT::i128)) {
241 // No special instructions for these.
257
258 // Support addition/subtraction with carry.
263
264 // Use VPOPCT and add up partial results.
266
267 // We have to use libcalls for these.
276 }
277
278 // Type legalization will convert 8- and 16-bit atomic operations into
279 // forms that operate on i32s (but still keeping the original memory VT).
280 // Lower them into full i32 operations.
292
293 // Whether or not i128 is not a legal type, we need to custom lower
294 // the atomic operations in order to exploit SystemZ instructions.
299
300 // Mark sign/zero extending atomic loads as legal, which will make
301 // DAGCombiner fold extensions into atomic loads if possible.
303 {MVT::i8, MVT::i16, MVT::i32}, Legal);
305 {MVT::i8, MVT::i16}, Legal);
307 MVT::i8, Legal);
308
309 // We can use the CC result of compare-and-swap to implement
310 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
314
316
317 // Traps are legal, as we will convert them to "j .+2".
318 setOperationAction(ISD::TRAP, MVT::Other, Legal);
319
320 // z10 has instructions for signed but not unsigned FP conversion.
321 // Handle unsigned 32-bit types as signed 64-bit types.
322 if (!Subtarget.hasFPExtension()) {
327 }
328
329 // We have native support for a 64-bit CTLZ, via FLOGR.
333
334 // On z15 we have native support for a 64-bit CTPOP.
335 if (Subtarget.hasMiscellaneousExtensions3()) {
338 }
339
340 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
342
343 // Expand 128 bit shifts without using a libcall.
347 setLibcallName(RTLIB::SRL_I128, nullptr);
348 setLibcallName(RTLIB::SHL_I128, nullptr);
349 setLibcallName(RTLIB::SRA_I128, nullptr);
350
351 // Also expand 256 bit shifts if i128 is a legal type.
352 if (isTypeLegal(MVT::i128)) {
356 }
357
358 // Handle bitcast from fp128 to i128.
359 if (!isTypeLegal(MVT::i128))
361
362 // We have native instructions for i8, i16 and i32 extensions, but not i1.
364 for (MVT VT : MVT::integer_valuetypes()) {
368 }
369
370 // Handle the various types of symbolic address.
376
377 // We need to handle dynamic allocations specially because of the
378 // 160-byte area at the bottom of the stack.
381
384
385 // Handle prefetches with PFD or PFDRL.
387
388 // Handle readcyclecounter with STCKF.
390
392 // Assume by default that all vector operations need to be expanded.
393 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
394 if (getOperationAction(Opcode, VT) == Legal)
395 setOperationAction(Opcode, VT, Expand);
396
397 // Likewise all truncating stores and extending loads.
398 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
399 setTruncStoreAction(VT, InnerVT, Expand);
402 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
403 }
404
405 if (isTypeLegal(VT)) {
406 // These operations are legal for anything that can be stored in a
407 // vector register, even if there is no native support for the format
408 // as such. In particular, we can do these for v4f32 even though there
409 // are no specific instructions for that format.
415
416 // Likewise, except that we need to replace the nodes with something
417 // more specific.
420 }
421 }
422
423 // Handle integer vector types.
425 if (isTypeLegal(VT)) {
426 // These operations have direct equivalents.
431 if (VT != MVT::v2i64)
437 if (Subtarget.hasVectorEnhancements1())
439 else
443
444 // Convert a GPR scalar to a vector by inserting it into element 0.
446
447 // Use a series of unpacks for extensions.
450
451 // Detect shifts/rotates by a scalar amount and convert them into
452 // V*_BY_SCALAR.
457
458 // Add ISD::VECREDUCE_ADD as custom in order to implement
459 // it with VZERO+VSUM
461
462 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
463 // and inverting the result as necessary.
465 }
466 }
467
468 if (Subtarget.hasVector()) {
469 // There should be no need to check for float types other than v2f64
470 // since <2 x f32> isn't a legal type.
479
488 }
489
490 if (Subtarget.hasVectorEnhancements2()) {
499
508 }
509
510 // Handle floating-point types.
511 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
512 I <= MVT::LAST_FP_VALUETYPE;
513 ++I) {
515 if (isTypeLegal(VT)) {
516 // We can use FI for FRINT.
518
519 // We can use the extended form of FI for other rounding operations.
520 if (Subtarget.hasFPExtension()) {
526 }
527
528 // No special instructions for these.
534
535 // Special treatment.
537
538 // Handle constrained floating-point operations.
548 if (Subtarget.hasFPExtension()) {
554 }
555 }
556 }
557
558 // Handle floating-point vector types.
559 if (Subtarget.hasVector()) {
560 // Scalar-to-vector conversion is just a subreg.
563
564 // Some insertions and extractions can be done directly but others
565 // need to go via integers.
570
571 // These operations have direct equivalents.
572 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
573 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
574 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
575 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
576 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
577 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
578 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
579 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
580 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
583 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
586
587 // Handle constrained floating-point operations.
600
605 if (Subtarget.hasVectorEnhancements1()) {
608 }
609 }
610
611 // The vector enhancements facility 1 has instructions for these.
612 if (Subtarget.hasVectorEnhancements1()) {
613 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
614 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
615 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
616 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
617 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
618 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
619 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
620 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
621 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
624 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
627
632
637
642
647
652
653 // Handle constrained floating-point operations.
666 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
667 MVT::v4f32, MVT::v2f64 }) {
672 }
673 }
674
675 // We only have fused f128 multiply-addition on vector registers.
676 if (!Subtarget.hasVectorEnhancements1()) {
679 }
680
681 // We don't have a copysign instruction on vector registers.
682 if (Subtarget.hasVectorEnhancements1())
684
685 // Needed so that we don't try to implement f128 constant loads using
686 // a load-and-extend of a f80 constant (in cases where the constant
687 // would fit in an f80).
688 for (MVT VT : MVT::fp_valuetypes())
689 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
690
691 // We don't have extending load instruction on vector registers.
692 if (Subtarget.hasVectorEnhancements1()) {
693 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
694 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
695 }
696
697 // Floating-point truncation and stores need to be done separately.
698 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
699 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
700 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
701
702 // We have 64-bit FPR<->GPR moves, but need special handling for
703 // 32-bit forms.
704 if (!Subtarget.hasVector()) {
707 }
708
709 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
710 // structure, but VAEND is a no-op.
714
716
717 // Codes for which we want to perform some z-specific combinations.
721 ISD::LOAD,
732 ISD::SDIV,
733 ISD::UDIV,
734 ISD::SREM,
735 ISD::UREM,
738
739 // Handle intrinsics.
742
743 // We want to use MVC in preference to even a single load/store pair.
744 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
746
747 // The main memset sequence is a byte store followed by an MVC.
748 // Two STC or MV..I stores win over that, but the kind of fused stores
749 // generated by target-independent code don't when the byte value is
750 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
751 // than "STC;MVC". Handle the choice in target-specific code instead.
752 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
754
755 // Default to having -disable-strictnode-mutation on
756 IsStrictFPEnabled = true;
757
758 if (Subtarget.isTargetzOS()) {
759 struct RTLibCallMapping {
760 RTLIB::Libcall Code;
761 const char *Name;
762 };
763 static RTLibCallMapping RTLibCallCommon[] = {
764#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
765#include "ZOSLibcallNames.def"
766 };
767 for (auto &E : RTLibCallCommon)
768 setLibcallName(E.Code, E.Name);
769 }
770}
771
773 return Subtarget.hasSoftFloat();
774}
775
777 LLVMContext &, EVT VT) const {
778 if (!VT.isVector())
779 return MVT::i32;
781}
782
784 const MachineFunction &MF, EVT VT) const {
785 VT = VT.getScalarType();
786
787 if (!VT.isSimple())
788 return false;
789
790 switch (VT.getSimpleVT().SimpleTy) {
791 case MVT::f32:
792 case MVT::f64:
793 return true;
794 case MVT::f128:
795 return Subtarget.hasVectorEnhancements1();
796 default:
797 break;
798 }
799
800 return false;
801}
802
803// Return true if the constant can be generated with a vector instruction,
804// such as VGM, VGMB or VREPI.
806 const SystemZSubtarget &Subtarget) {
807 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
808 if (!Subtarget.hasVector() ||
809 (isFP128 && !Subtarget.hasVectorEnhancements1()))
810 return false;
811
812 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
813 // preferred way of creating all-zero and all-one vectors so give it
814 // priority over other methods below.
815 unsigned Mask = 0;
816 unsigned I = 0;
817 for (; I < SystemZ::VectorBytes; ++I) {
818 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
819 if (Byte == 0xff)
820 Mask |= 1ULL << I;
821 else if (Byte != 0)
822 break;
823 }
824 if (I == SystemZ::VectorBytes) {
826 OpVals.push_back(Mask);
828 return true;
829 }
830
831 if (SplatBitSize > 64)
832 return false;
833
834 auto tryValue = [&](uint64_t Value) -> bool {
835 // Try VECTOR REPLICATE IMMEDIATE
836 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
837 if (isInt<16>(SignedValue)) {
838 OpVals.push_back(((unsigned) SignedValue));
841 SystemZ::VectorBits / SplatBitSize);
842 return true;
843 }
844 // Try VECTOR GENERATE MASK
845 unsigned Start, End;
846 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
847 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
848 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
849 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
850 OpVals.push_back(Start - (64 - SplatBitSize));
851 OpVals.push_back(End - (64 - SplatBitSize));
854 SystemZ::VectorBits / SplatBitSize);
855 return true;
856 }
857 return false;
858 };
859
860 // First try assuming that any undefined bits above the highest set bit
861 // and below the lowest set bit are 1s. This increases the likelihood of
862 // being able to use a sign-extended element value in VECTOR REPLICATE
863 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
864 uint64_t SplatBitsZ = SplatBits.getZExtValue();
865 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
866 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
867 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
868 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
869 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
870 if (tryValue(SplatBitsZ | Upper | Lower))
871 return true;
872
873 // Now try assuming that any undefined bits between the first and
874 // last defined set bits are set. This increases the chances of
875 // using a non-wraparound mask.
876 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
877 return tryValue(SplatBitsZ | Middle);
878}
879
881 if (IntImm.isSingleWord()) {
882 IntBits = APInt(128, IntImm.getZExtValue());
883 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
884 } else
885 IntBits = IntImm;
886 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
887
888 // Find the smallest splat.
889 SplatBits = IntImm;
890 unsigned Width = SplatBits.getBitWidth();
891 while (Width > 8) {
892 unsigned HalfSize = Width / 2;
893 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
894 APInt LowValue = SplatBits.trunc(HalfSize);
895
896 // If the two halves do not match, stop here.
897 if (HighValue != LowValue || 8 > HalfSize)
898 break;
899
900 SplatBits = HighValue;
901 Width = HalfSize;
902 }
903 SplatUndef = 0;
904 SplatBitSize = Width;
905}
906
908 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
909 bool HasAnyUndefs;
910
911 // Get IntBits by finding the 128 bit splat.
912 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
913 true);
914
915 // Get SplatBits by finding the 8 bit or greater splat.
916 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
917 true);
918}
919
921 bool ForCodeSize) const {
922 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
923 if (Imm.isZero() || Imm.isNegZero())
924 return true;
925
927}
928
929/// Returns true if stack probing through inline assembly is requested.
931 // If the function specifically requests inline stack probes, emit them.
932 if (MF.getFunction().hasFnAttribute("probe-stack"))
933 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
934 "inline-asm";
935 return false;
936}
937
941}
942
946}
947
950 // Don't expand subword operations as they require special treatment.
951 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
953
954 // Don't expand if there is a target instruction available.
955 if (Subtarget.hasInterlockedAccess1() &&
956 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
963
965}
966
968 // We can use CGFI or CLGFI.
969 return isInt<32>(Imm) || isUInt<32>(Imm);
970}
971
973 // We can use ALGFI or SLGFI.
974 return isUInt<32>(Imm) || isUInt<32>(-Imm);
975}
976
978 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
979 // Unaligned accesses should never be slower than the expanded version.
980 // We check specifically for aligned accesses in the few cases where
981 // they are required.
982 if (Fast)
983 *Fast = 1;
984 return true;
985}
986
987// Information about the addressing mode for a memory access.
989 // True if a long displacement is supported.
991
992 // True if use of index register is supported.
994
995 AddressingMode(bool LongDispl, bool IdxReg) :
996 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
997};
998
999// Return the desired addressing mode for a Load which has only one use (in
1000// the same block) which is a Store.
1002 Type *Ty) {
1003 // With vector support a Load->Store combination may be combined to either
1004 // an MVC or vector operations and it seems to work best to allow the
1005 // vector addressing mode.
1006 if (HasVector)
1007 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1008
1009 // Otherwise only the MVC case is special.
1010 bool MVC = Ty->isIntegerTy(8);
1011 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1012}
1013
1014// Return the addressing mode which seems most desirable given an LLVM
1015// Instruction pointer.
1016static AddressingMode
1018 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1019 switch (II->getIntrinsicID()) {
1020 default: break;
1021 case Intrinsic::memset:
1022 case Intrinsic::memmove:
1023 case Intrinsic::memcpy:
1024 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1025 }
1026 }
1027
1028 if (isa<LoadInst>(I) && I->hasOneUse()) {
1029 auto *SingleUser = cast<Instruction>(*I->user_begin());
1030 if (SingleUser->getParent() == I->getParent()) {
1031 if (isa<ICmpInst>(SingleUser)) {
1032 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1033 if (C->getBitWidth() <= 64 &&
1034 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1035 // Comparison of memory with 16 bit signed / unsigned immediate
1036 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1037 } else if (isa<StoreInst>(SingleUser))
1038 // Load->Store
1039 return getLoadStoreAddrMode(HasVector, I->getType());
1040 }
1041 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1042 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1043 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1044 // Load->Store
1045 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1046 }
1047
1048 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1049
1050 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1051 // dependencies (LDE only supports small offsets).
1052 // * Utilize the vector registers to hold floating point
1053 // values (vector load / store instructions only support small
1054 // offsets).
1055
1056 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1057 I->getOperand(0)->getType());
1058 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1059 bool IsVectorAccess = MemAccessTy->isVectorTy();
1060
1061 // A store of an extracted vector element will be combined into a VSTE type
1062 // instruction.
1063 if (!IsVectorAccess && isa<StoreInst>(I)) {
1064 Value *DataOp = I->getOperand(0);
1065 if (isa<ExtractElementInst>(DataOp))
1066 IsVectorAccess = true;
1067 }
1068
1069 // A load which gets inserted into a vector element will be combined into a
1070 // VLE type instruction.
1071 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1072 User *LoadUser = *I->user_begin();
1073 if (isa<InsertElementInst>(LoadUser))
1074 IsVectorAccess = true;
1075 }
1076
1077 if (IsFPAccess || IsVectorAccess)
1078 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1079 }
1080
1081 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1082}
1083
1085 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1086 // Punt on globals for now, although they can be used in limited
1087 // RELATIVE LONG cases.
1088 if (AM.BaseGV)
1089 return false;
1090
1091 // Require a 20-bit signed offset.
1092 if (!isInt<20>(AM.BaseOffs))
1093 return false;
1094
1095 bool RequireD12 =
1096 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1097 AddressingMode SupportedAM(!RequireD12, true);
1098 if (I != nullptr)
1099 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1100
1101 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1102 return false;
1103
1104 if (!SupportedAM.IndexReg)
1105 // No indexing allowed.
1106 return AM.Scale == 0;
1107 else
1108 // Indexing is OK but no scale factor can be applied.
1109 return AM.Scale == 0 || AM.Scale == 1;
1110}
1111
1113 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1114 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1115 const int MVCFastLen = 16;
1116
1117 if (Limit != ~unsigned(0)) {
1118 // Don't expand Op into scalar loads/stores in these cases:
1119 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1120 return false; // Small memcpy: Use MVC
1121 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1122 return false; // Small memset (first byte with STC/MVI): Use MVC
1123 if (Op.isZeroMemset())
1124 return false; // Memset zero: Use XC
1125 }
1126
1127 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1128 SrcAS, FuncAttributes);
1129}
1130
1132 const AttributeList &FuncAttributes) const {
1133 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1134}
1135
1136bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1137 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1138 return false;
1139 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1140 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1141 return FromBits > ToBits;
1142}
1143
1145 if (!FromVT.isInteger() || !ToVT.isInteger())
1146 return false;
1147 unsigned FromBits = FromVT.getFixedSizeInBits();
1148 unsigned ToBits = ToVT.getFixedSizeInBits();
1149 return FromBits > ToBits;
1150}
1151
1152//===----------------------------------------------------------------------===//
1153// Inline asm support
1154//===----------------------------------------------------------------------===//
1155
1158 if (Constraint.size() == 1) {
1159 switch (Constraint[0]) {
1160 case 'a': // Address register
1161 case 'd': // Data register (equivalent to 'r')
1162 case 'f': // Floating-point register
1163 case 'h': // High-part register
1164 case 'r': // General-purpose register
1165 case 'v': // Vector register
1166 return C_RegisterClass;
1167
1168 case 'Q': // Memory with base and unsigned 12-bit displacement
1169 case 'R': // Likewise, plus an index
1170 case 'S': // Memory with base and signed 20-bit displacement
1171 case 'T': // Likewise, plus an index
1172 case 'm': // Equivalent to 'T'.
1173 return C_Memory;
1174
1175 case 'I': // Unsigned 8-bit constant
1176 case 'J': // Unsigned 12-bit constant
1177 case 'K': // Signed 16-bit constant
1178 case 'L': // Signed 20-bit displacement (on all targets we support)
1179 case 'M': // 0x7fffffff
1180 return C_Immediate;
1181
1182 default:
1183 break;
1184 }
1185 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1186 switch (Constraint[1]) {
1187 case 'Q': // Address with base and unsigned 12-bit displacement
1188 case 'R': // Likewise, plus an index
1189 case 'S': // Address with base and signed 20-bit displacement
1190 case 'T': // Likewise, plus an index
1191 return C_Address;
1192
1193 default:
1194 break;
1195 }
1196 }
1197 return TargetLowering::getConstraintType(Constraint);
1198}
1199
1202 const char *constraint) const {
1204 Value *CallOperandVal = info.CallOperandVal;
1205 // If we don't have a value, we can't do a match,
1206 // but allow it at the lowest weight.
1207 if (!CallOperandVal)
1208 return CW_Default;
1209 Type *type = CallOperandVal->getType();
1210 // Look at the constraint type.
1211 switch (*constraint) {
1212 default:
1214 break;
1215
1216 case 'a': // Address register
1217 case 'd': // Data register (equivalent to 'r')
1218 case 'h': // High-part register
1219 case 'r': // General-purpose register
1220 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1221 break;
1222
1223 case 'f': // Floating-point register
1224 if (!useSoftFloat())
1225 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1226 break;
1227
1228 case 'v': // Vector register
1229 if (Subtarget.hasVector())
1230 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1231 : CW_Default;
1232 break;
1233
1234 case 'I': // Unsigned 8-bit constant
1235 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1236 if (isUInt<8>(C->getZExtValue()))
1237 weight = CW_Constant;
1238 break;
1239
1240 case 'J': // Unsigned 12-bit constant
1241 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1242 if (isUInt<12>(C->getZExtValue()))
1243 weight = CW_Constant;
1244 break;
1245
1246 case 'K': // Signed 16-bit constant
1247 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1248 if (isInt<16>(C->getSExtValue()))
1249 weight = CW_Constant;
1250 break;
1251
1252 case 'L': // Signed 20-bit displacement (on all targets we support)
1253 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1254 if (isInt<20>(C->getSExtValue()))
1255 weight = CW_Constant;
1256 break;
1257
1258 case 'M': // 0x7fffffff
1259 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1260 if (C->getZExtValue() == 0x7fffffff)
1261 weight = CW_Constant;
1262 break;
1263 }
1264 return weight;
1265}
1266
1267// Parse a "{tNNN}" register constraint for which the register type "t"
1268// has already been verified. MC is the class associated with "t" and
1269// Map maps 0-based register numbers to LLVM register numbers.
1270static std::pair<unsigned, const TargetRegisterClass *>
1272 const unsigned *Map, unsigned Size) {
1273 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1274 if (isdigit(Constraint[2])) {
1275 unsigned Index;
1276 bool Failed =
1277 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1278 if (!Failed && Index < Size && Map[Index])
1279 return std::make_pair(Map[Index], RC);
1280 }
1281 return std::make_pair(0U, nullptr);
1282}
1283
1284std::pair<unsigned, const TargetRegisterClass *>
1286 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1287 if (Constraint.size() == 1) {
1288 // GCC Constraint Letters
1289 switch (Constraint[0]) {
1290 default: break;
1291 case 'd': // Data register (equivalent to 'r')
1292 case 'r': // General-purpose register
1293 if (VT.getSizeInBits() == 64)
1294 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1295 else if (VT.getSizeInBits() == 128)
1296 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1297 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1298
1299 case 'a': // Address register
1300 if (VT == MVT::i64)
1301 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1302 else if (VT == MVT::i128)
1303 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1304 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1305
1306 case 'h': // High-part register (an LLVM extension)
1307 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1308
1309 case 'f': // Floating-point register
1310 if (!useSoftFloat()) {
1311 if (VT.getSizeInBits() == 64)
1312 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1313 else if (VT.getSizeInBits() == 128)
1314 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1315 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1316 }
1317 break;
1318
1319 case 'v': // Vector register
1320 if (Subtarget.hasVector()) {
1321 if (VT.getSizeInBits() == 32)
1322 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1323 if (VT.getSizeInBits() == 64)
1324 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1325 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1326 }
1327 break;
1328 }
1329 }
1330 if (Constraint.starts_with("{")) {
1331
1332 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1333 // to check the size on.
1334 auto getVTSizeInBits = [&VT]() {
1335 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1336 };
1337
1338 // We need to override the default register parsing for GPRs and FPRs
1339 // because the interpretation depends on VT. The internal names of
1340 // the registers are also different from the external names
1341 // (F0D and F0S instead of F0, etc.).
1342 if (Constraint[1] == 'r') {
1343 if (getVTSizeInBits() == 32)
1344 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1346 if (getVTSizeInBits() == 128)
1347 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1349 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1351 }
1352 if (Constraint[1] == 'f') {
1353 if (useSoftFloat())
1354 return std::make_pair(
1355 0u, static_cast<const TargetRegisterClass *>(nullptr));
1356 if (getVTSizeInBits() == 32)
1357 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1359 if (getVTSizeInBits() == 128)
1360 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1362 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1364 }
1365 if (Constraint[1] == 'v') {
1366 if (!Subtarget.hasVector())
1367 return std::make_pair(
1368 0u, static_cast<const TargetRegisterClass *>(nullptr));
1369 if (getVTSizeInBits() == 32)
1370 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1372 if (getVTSizeInBits() == 64)
1373 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1375 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1377 }
1378 }
1379 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1380}
1381
1382// FIXME? Maybe this could be a TableGen attribute on some registers and
1383// this table could be generated automatically from RegInfo.
1386 const MachineFunction &MF) const {
1387 Register Reg =
1389 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1390 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1391 .Default(0);
1392
1393 if (Reg)
1394 return Reg;
1395 report_fatal_error("Invalid register name global variable");
1396}
1397
1399 const Constant *PersonalityFn) const {
1400 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1401}
1402
1404 const Constant *PersonalityFn) const {
1405 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1406}
1407
1409 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1410 SelectionDAG &DAG) const {
1411 // Only support length 1 constraints for now.
1412 if (Constraint.size() == 1) {
1413 switch (Constraint[0]) {
1414 case 'I': // Unsigned 8-bit constant
1415 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1416 if (isUInt<8>(C->getZExtValue()))
1417 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1418 Op.getValueType()));
1419 return;
1420
1421 case 'J': // Unsigned 12-bit constant
1422 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1423 if (isUInt<12>(C->getZExtValue()))
1424 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1425 Op.getValueType()));
1426 return;
1427
1428 case 'K': // Signed 16-bit constant
1429 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1430 if (isInt<16>(C->getSExtValue()))
1431 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1432 Op.getValueType()));
1433 return;
1434
1435 case 'L': // Signed 20-bit displacement (on all targets we support)
1436 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1437 if (isInt<20>(C->getSExtValue()))
1438 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1439 Op.getValueType()));
1440 return;
1441
1442 case 'M': // 0x7fffffff
1443 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1444 if (C->getZExtValue() == 0x7fffffff)
1445 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1446 Op.getValueType()));
1447 return;
1448 }
1449 }
1450 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1451}
1452
1453//===----------------------------------------------------------------------===//
1454// Calling conventions
1455//===----------------------------------------------------------------------===//
1456
1457#include "SystemZGenCallingConv.inc"
1458
1460 CallingConv::ID) const {
1461 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1462 SystemZ::R14D, 0 };
1463 return ScratchRegs;
1464}
1465
1467 Type *ToType) const {
1468 return isTruncateFree(FromType, ToType);
1469}
1470
1472 return CI->isTailCall();
1473}
1474
1475// Value is a value that has been passed to us in the location described by VA
1476// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1477// any loads onto Chain.
1479 CCValAssign &VA, SDValue Chain,
1480 SDValue Value) {
1481 // If the argument has been promoted from a smaller type, insert an
1482 // assertion to capture this.
1483 if (VA.getLocInfo() == CCValAssign::SExt)
1485 DAG.getValueType(VA.getValVT()));
1486 else if (VA.getLocInfo() == CCValAssign::ZExt)
1488 DAG.getValueType(VA.getValVT()));
1489
1490 if (VA.isExtInLoc())
1491 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1492 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1493 // If this is a short vector argument loaded from the stack,
1494 // extend from i64 to full vector size and then bitcast.
1495 assert(VA.getLocVT() == MVT::i64);
1496 assert(VA.getValVT().isVector());
1497 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1498 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1499 } else
1500 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1501 return Value;
1502}
1503
1504// Value is a value of type VA.getValVT() that we need to copy into
1505// the location described by VA. Return a copy of Value converted to
1506// VA.getValVT(). The caller is responsible for handling indirect values.
1508 CCValAssign &VA, SDValue Value) {
1509 switch (VA.getLocInfo()) {
1510 case CCValAssign::SExt:
1511 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1512 case CCValAssign::ZExt:
1513 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1514 case CCValAssign::AExt:
1515 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1516 case CCValAssign::BCvt: {
1517 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1518 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1519 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1520 // For an f32 vararg we need to first promote it to an f64 and then
1521 // bitcast it to an i64.
1522 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1523 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1524 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1525 ? MVT::v2i64
1526 : VA.getLocVT();
1527 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1528 // For ELF, this is a short vector argument to be stored to the stack,
1529 // bitcast to v2i64 and then extract first element.
1530 if (BitCastToType == MVT::v2i64)
1531 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1532 DAG.getConstant(0, DL, MVT::i32));
1533 return Value;
1534 }
1535 case CCValAssign::Full:
1536 return Value;
1537 default:
1538 llvm_unreachable("Unhandled getLocInfo()");
1539 }
1540}
1541
1543 SDLoc DL(In);
1544 SDValue Lo, Hi;
1545 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1546 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1547 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1548 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1549 DAG.getConstant(64, DL, MVT::i32)));
1550 } else {
1551 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1552 }
1553
1554 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1555 MVT::Untyped, Hi, Lo);
1556 return SDValue(Pair, 0);
1557}
1558
1560 SDLoc DL(In);
1561 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1562 DL, MVT::i64, In);
1563 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1564 DL, MVT::i64, In);
1565
1566 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1567 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1568 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1569 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1570 DAG.getConstant(64, DL, MVT::i32));
1571 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1572 } else {
1573 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1574 }
1575}
1576
1578 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1579 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1580 EVT ValueVT = Val.getValueType();
1581 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1582 // Inline assembly operand.
1583 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1584 return true;
1585 }
1586
1587 return false;
1588}
1589
1591 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1592 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1593 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1594 // Inline assembly operand.
1595 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1596 return DAG.getBitcast(ValueVT, Res);
1597 }
1598
1599 return SDValue();
1600}
1601
1603 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1604 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1605 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1607 MachineFrameInfo &MFI = MF.getFrameInfo();
1609 SystemZMachineFunctionInfo *FuncInfo =
1611 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1612 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1613
1614 // Assign locations to all of the incoming arguments.
1616 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1617 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1618 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1619
1620 unsigned NumFixedGPRs = 0;
1621 unsigned NumFixedFPRs = 0;
1622 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1623 SDValue ArgValue;
1624 CCValAssign &VA = ArgLocs[I];
1625 EVT LocVT = VA.getLocVT();
1626 if (VA.isRegLoc()) {
1627 // Arguments passed in registers
1628 const TargetRegisterClass *RC;
1629 switch (LocVT.getSimpleVT().SimpleTy) {
1630 default:
1631 // Integers smaller than i64 should be promoted to i64.
1632 llvm_unreachable("Unexpected argument type");
1633 case MVT::i32:
1634 NumFixedGPRs += 1;
1635 RC = &SystemZ::GR32BitRegClass;
1636 break;
1637 case MVT::i64:
1638 NumFixedGPRs += 1;
1639 RC = &SystemZ::GR64BitRegClass;
1640 break;
1641 case MVT::f32:
1642 NumFixedFPRs += 1;
1643 RC = &SystemZ::FP32BitRegClass;
1644 break;
1645 case MVT::f64:
1646 NumFixedFPRs += 1;
1647 RC = &SystemZ::FP64BitRegClass;
1648 break;
1649 case MVT::f128:
1650 NumFixedFPRs += 2;
1651 RC = &SystemZ::FP128BitRegClass;
1652 break;
1653 case MVT::v16i8:
1654 case MVT::v8i16:
1655 case MVT::v4i32:
1656 case MVT::v2i64:
1657 case MVT::v4f32:
1658 case MVT::v2f64:
1659 RC = &SystemZ::VR128BitRegClass;
1660 break;
1661 }
1662
1663 Register VReg = MRI.createVirtualRegister(RC);
1664 MRI.addLiveIn(VA.getLocReg(), VReg);
1665 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1666 } else {
1667 assert(VA.isMemLoc() && "Argument not register or memory");
1668
1669 // Create the frame index object for this incoming parameter.
1670 // FIXME: Pre-include call frame size in the offset, should not
1671 // need to manually add it here.
1672 int64_t ArgSPOffset = VA.getLocMemOffset();
1673 if (Subtarget.isTargetXPLINK64()) {
1674 auto &XPRegs =
1676 ArgSPOffset += XPRegs.getCallFrameSize();
1677 }
1678 int FI =
1679 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1680
1681 // Create the SelectionDAG nodes corresponding to a load
1682 // from this parameter. Unpromoted ints and floats are
1683 // passed as right-justified 8-byte values.
1684 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1685 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1686 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1687 DAG.getIntPtrConstant(4, DL));
1688 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1690 }
1691
1692 // Convert the value of the argument register into the value that's
1693 // being passed.
1694 if (VA.getLocInfo() == CCValAssign::Indirect) {
1695 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1697 // If the original argument was split (e.g. i128), we need
1698 // to load all parts of it here (using the same address).
1699 unsigned ArgIndex = Ins[I].OrigArgIndex;
1700 assert (Ins[I].PartOffset == 0);
1701 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1702 CCValAssign &PartVA = ArgLocs[I + 1];
1703 unsigned PartOffset = Ins[I + 1].PartOffset;
1704 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1705 DAG.getIntPtrConstant(PartOffset, DL));
1706 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1708 ++I;
1709 }
1710 } else
1711 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1712 }
1713
1714 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1715 // Save the number of non-varargs registers for later use by va_start, etc.
1716 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1717 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1718
1719 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1720 Subtarget.getSpecialRegisters());
1721
1722 // Likewise the address (in the form of a frame index) of where the
1723 // first stack vararg would be. The 1-byte size here is arbitrary.
1724 // FIXME: Pre-include call frame size in the offset, should not
1725 // need to manually add it here.
1726 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1727 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1728 FuncInfo->setVarArgsFrameIndex(FI);
1729 }
1730
1731 if (IsVarArg && Subtarget.isTargetELF()) {
1732 // Save the number of non-varargs registers for later use by va_start, etc.
1733 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1734 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1735
1736 // Likewise the address (in the form of a frame index) of where the
1737 // first stack vararg would be. The 1-byte size here is arbitrary.
1738 int64_t VarArgsOffset = CCInfo.getStackSize();
1739 FuncInfo->setVarArgsFrameIndex(
1740 MFI.CreateFixedObject(1, VarArgsOffset, true));
1741
1742 // ...and a similar frame index for the caller-allocated save area
1743 // that will be used to store the incoming registers.
1744 int64_t RegSaveOffset =
1745 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1746 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1747 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1748
1749 // Store the FPR varargs in the reserved frame slots. (We store the
1750 // GPRs as part of the prologue.)
1751 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1753 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1754 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1755 int FI =
1757 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1759 &SystemZ::FP64BitRegClass);
1760 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1761 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1763 }
1764 // Join the stores, which are independent of one another.
1765 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1766 ArrayRef(&MemOps[NumFixedFPRs],
1767 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1768 }
1769 }
1770
1771 if (Subtarget.isTargetXPLINK64()) {
1772 // Create virual register for handling incoming "ADA" special register (R5)
1773 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1774 Register ADAvReg = MRI.createVirtualRegister(RC);
1775 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1776 Subtarget.getSpecialRegisters());
1777 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1778 FuncInfo->setADAVirtualRegister(ADAvReg);
1779 }
1780 return Chain;
1781}
1782
1783static bool canUseSiblingCall(const CCState &ArgCCInfo,
1786 // Punt if there are any indirect or stack arguments, or if the call
1787 // needs the callee-saved argument register R6, or if the call uses
1788 // the callee-saved register arguments SwiftSelf and SwiftError.
1789 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1790 CCValAssign &VA = ArgLocs[I];
1792 return false;
1793 if (!VA.isRegLoc())
1794 return false;
1795 Register Reg = VA.getLocReg();
1796 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1797 return false;
1798 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1799 return false;
1800 }
1801 return true;
1802}
1803
1805 unsigned Offset, bool LoadAdr = false) {
1808 unsigned ADAvReg = MFI->getADAVirtualRegister();
1810
1811 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1812 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1813
1814 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1815 if (!LoadAdr)
1816 Result = DAG.getLoad(
1817 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1819
1820 return Result;
1821}
1822
1823// ADA access using Global value
1824// Note: for functions, address of descriptor is returned
1826 EVT PtrVT) {
1827 unsigned ADAtype;
1828 bool LoadAddr = false;
1829 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1830 bool IsFunction =
1831 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1832 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1833
1834 if (IsFunction) {
1835 if (IsInternal) {
1837 LoadAddr = true;
1838 } else
1840 } else {
1842 }
1843 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1844
1845 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1846}
1847
1848static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1849 SDLoc &DL, SDValue &Chain) {
1850 unsigned ADADelta = 0; // ADA offset in desc.
1851 unsigned EPADelta = 8; // EPA offset in desc.
1854
1855 // XPLink calling convention.
1856 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1857 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1858 G->getGlobal()->hasPrivateLinkage());
1859 if (IsInternal) {
1862 unsigned ADAvReg = MFI->getADAVirtualRegister();
1863 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1864 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1865 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1866 return true;
1867 } else {
1869 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1870 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1871 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1872 }
1873 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1875 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1876 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1877 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1878 } else {
1879 // Function pointer case
1880 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1881 DAG.getConstant(ADADelta, DL, PtrVT));
1882 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1884 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1885 DAG.getConstant(EPADelta, DL, PtrVT));
1886 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1888 }
1889 return false;
1890}
1891
1892SDValue
1894 SmallVectorImpl<SDValue> &InVals) const {
1895 SelectionDAG &DAG = CLI.DAG;
1896 SDLoc &DL = CLI.DL;
1898 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1900 SDValue Chain = CLI.Chain;
1901 SDValue Callee = CLI.Callee;
1902 bool &IsTailCall = CLI.IsTailCall;
1903 CallingConv::ID CallConv = CLI.CallConv;
1904 bool IsVarArg = CLI.IsVarArg;
1906 EVT PtrVT = getPointerTy(MF.getDataLayout());
1907 LLVMContext &Ctx = *DAG.getContext();
1909
1910 // FIXME: z/OS support to be added in later.
1911 if (Subtarget.isTargetXPLINK64())
1912 IsTailCall = false;
1913
1914 // Analyze the operands of the call, assigning locations to each operand.
1916 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1917 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1918
1919 // We don't support GuaranteedTailCallOpt, only automatically-detected
1920 // sibling calls.
1921 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1922 IsTailCall = false;
1923
1924 // Get a count of how many bytes are to be pushed on the stack.
1925 unsigned NumBytes = ArgCCInfo.getStackSize();
1926
1927 // Mark the start of the call.
1928 if (!IsTailCall)
1929 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1930
1931 // Copy argument values to their designated locations.
1933 SmallVector<SDValue, 8> MemOpChains;
1934 SDValue StackPtr;
1935 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1936 CCValAssign &VA = ArgLocs[I];
1937 SDValue ArgValue = OutVals[I];
1938
1939 if (VA.getLocInfo() == CCValAssign::Indirect) {
1940 // Store the argument in a stack slot and pass its address.
1941 unsigned ArgIndex = Outs[I].OrigArgIndex;
1942 EVT SlotVT;
1943 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1944 // Allocate the full stack space for a promoted (and split) argument.
1945 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1946 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1947 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1948 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1949 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1950 } else {
1951 SlotVT = Outs[I].VT;
1952 }
1953 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1954 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1955 MemOpChains.push_back(
1956 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1958 // If the original argument was split (e.g. i128), we need
1959 // to store all parts of it here (and pass just one address).
1960 assert (Outs[I].PartOffset == 0);
1961 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1962 SDValue PartValue = OutVals[I + 1];
1963 unsigned PartOffset = Outs[I + 1].PartOffset;
1964 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1965 DAG.getIntPtrConstant(PartOffset, DL));
1966 MemOpChains.push_back(
1967 DAG.getStore(Chain, DL, PartValue, Address,
1969 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1970 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1971 ++I;
1972 }
1973 ArgValue = SpillSlot;
1974 } else
1975 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1976
1977 if (VA.isRegLoc()) {
1978 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1979 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1980 // and low values.
1981 if (VA.getLocVT() == MVT::i128)
1982 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1983 // Queue up the argument copies and emit them at the end.
1984 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1985 } else {
1986 assert(VA.isMemLoc() && "Argument not register or memory");
1987
1988 // Work out the address of the stack slot. Unpromoted ints and
1989 // floats are passed as right-justified 8-byte values.
1990 if (!StackPtr.getNode())
1991 StackPtr = DAG.getCopyFromReg(Chain, DL,
1992 Regs->getStackPointerRegister(), PtrVT);
1993 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1994 VA.getLocMemOffset();
1995 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1996 Offset += 4;
1997 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1999
2000 // Emit the store.
2001 MemOpChains.push_back(
2002 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2003
2004 // Although long doubles or vectors are passed through the stack when
2005 // they are vararg (non-fixed arguments), if a long double or vector
2006 // occupies the third and fourth slot of the argument list GPR3 should
2007 // still shadow the third slot of the argument list.
2008 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2009 SDValue ShadowArgValue =
2010 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2011 DAG.getIntPtrConstant(1, DL));
2012 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2013 }
2014 }
2015 }
2016
2017 // Join the stores, which are independent of one another.
2018 if (!MemOpChains.empty())
2019 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2020
2021 // Accept direct calls by converting symbolic call addresses to the
2022 // associated Target* opcodes. Force %r1 to be used for indirect
2023 // tail calls.
2024 SDValue Glue;
2025
2026 if (Subtarget.isTargetXPLINK64()) {
2027 SDValue ADA;
2028 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2029 if (!IsBRASL) {
2030 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2031 ->getAddressOfCalleeRegister();
2032 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2033 Glue = Chain.getValue(1);
2034 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2035 }
2036 RegsToPass.push_back(std::make_pair(
2037 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2038 } else {
2039 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2040 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2041 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2042 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2043 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2044 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2045 } else if (IsTailCall) {
2046 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2047 Glue = Chain.getValue(1);
2048 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2049 }
2050 }
2051
2052 // Build a sequence of copy-to-reg nodes, chained and glued together.
2053 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2054 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2055 RegsToPass[I].second, Glue);
2056 Glue = Chain.getValue(1);
2057 }
2058
2059 // The first call operand is the chain and the second is the target address.
2061 Ops.push_back(Chain);
2062 Ops.push_back(Callee);
2063
2064 // Add argument registers to the end of the list so that they are
2065 // known live into the call.
2066 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2067 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2068 RegsToPass[I].second.getValueType()));
2069
2070 // Add a register mask operand representing the call-preserved registers.
2071 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2072 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2073 assert(Mask && "Missing call preserved mask for calling convention");
2074 Ops.push_back(DAG.getRegisterMask(Mask));
2075
2076 // Glue the call to the argument copies, if any.
2077 if (Glue.getNode())
2078 Ops.push_back(Glue);
2079
2080 // Emit the call.
2081 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2082 if (IsTailCall) {
2083 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2084 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2085 return Ret;
2086 }
2087 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2088 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2089 Glue = Chain.getValue(1);
2090
2091 // Mark the end of the call, which is glued to the call itself.
2092 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2093 Glue = Chain.getValue(1);
2094
2095 // Assign locations to each value returned by this call.
2097 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2098 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2099
2100 // Copy all of the result registers out of their specified physreg.
2101 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2102 CCValAssign &VA = RetLocs[I];
2103
2104 // Copy the value out, gluing the copy to the end of the call sequence.
2105 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2106 VA.getLocVT(), Glue);
2107 Chain = RetValue.getValue(1);
2108 Glue = RetValue.getValue(2);
2109
2110 // Convert the value of the return register into the value that's
2111 // being returned.
2112 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2113 }
2114
2115 return Chain;
2116}
2117
2118// Generate a call taking the given operands as arguments and returning a
2119// result of type RetVT.
2121 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2122 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2123 bool DoesNotReturn, bool IsReturnValueUsed) const {
2125 Args.reserve(Ops.size());
2126
2128 for (SDValue Op : Ops) {
2129 Entry.Node = Op;
2130 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2131 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2132 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2133 Args.push_back(Entry);
2134 }
2135
2136 SDValue Callee =
2137 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2138
2139 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2141 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2142 CLI.setDebugLoc(DL)
2143 .setChain(Chain)
2144 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2145 .setNoReturn(DoesNotReturn)
2146 .setDiscardResult(!IsReturnValueUsed)
2147 .setSExtResult(SignExtend)
2148 .setZExtResult(!SignExtend);
2149 return LowerCallTo(CLI);
2150}
2151
2154 MachineFunction &MF, bool isVarArg,
2156 LLVMContext &Context) const {
2157 // Special case that we cannot easily detect in RetCC_SystemZ since
2158 // i128 may not be a legal type.
2159 for (auto &Out : Outs)
2160 if (Out.ArgVT == MVT::i128)
2161 return false;
2162
2164 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2165 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2166}
2167
2168SDValue
2170 bool IsVarArg,
2172 const SmallVectorImpl<SDValue> &OutVals,
2173 const SDLoc &DL, SelectionDAG &DAG) const {
2175
2176 // Assign locations to each returned value.
2178 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2179 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2180
2181 // Quick exit for void returns
2182 if (RetLocs.empty())
2183 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2184
2185 if (CallConv == CallingConv::GHC)
2186 report_fatal_error("GHC functions return void only");
2187
2188 // Copy the result values into the output registers.
2189 SDValue Glue;
2191 RetOps.push_back(Chain);
2192 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2193 CCValAssign &VA = RetLocs[I];
2194 SDValue RetValue = OutVals[I];
2195
2196 // Make the return register live on exit.
2197 assert(VA.isRegLoc() && "Can only return in registers!");
2198
2199 // Promote the value as required.
2200 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2201
2202 // Chain and glue the copies together.
2203 Register Reg = VA.getLocReg();
2204 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2205 Glue = Chain.getValue(1);
2206 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2207 }
2208
2209 // Update chain and glue.
2210 RetOps[0] = Chain;
2211 if (Glue.getNode())
2212 RetOps.push_back(Glue);
2213
2214 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2215}
2216
2217// Return true if Op is an intrinsic node with chain that returns the CC value
2218// as its only (other) argument. Provide the associated SystemZISD opcode and
2219// the mask of valid CC values if so.
2220static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2221 unsigned &CCValid) {
2222 unsigned Id = Op.getConstantOperandVal(1);
2223 switch (Id) {
2224 case Intrinsic::s390_tbegin:
2225 Opcode = SystemZISD::TBEGIN;
2226 CCValid = SystemZ::CCMASK_TBEGIN;
2227 return true;
2228
2229 case Intrinsic::s390_tbegin_nofloat:
2231 CCValid = SystemZ::CCMASK_TBEGIN;
2232 return true;
2233
2234 case Intrinsic::s390_tend:
2235 Opcode = SystemZISD::TEND;
2236 CCValid = SystemZ::CCMASK_TEND;
2237 return true;
2238
2239 default:
2240 return false;
2241 }
2242}
2243
2244// Return true if Op is an intrinsic node without chain that returns the
2245// CC value as its final argument. Provide the associated SystemZISD
2246// opcode and the mask of valid CC values if so.
2247static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2248 unsigned Id = Op.getConstantOperandVal(0);
2249 switch (Id) {
2250 case Intrinsic::s390_vpkshs:
2251 case Intrinsic::s390_vpksfs:
2252 case Intrinsic::s390_vpksgs:
2253 Opcode = SystemZISD::PACKS_CC;
2254 CCValid = SystemZ::CCMASK_VCMP;
2255 return true;
2256
2257 case Intrinsic::s390_vpklshs:
2258 case Intrinsic::s390_vpklsfs:
2259 case Intrinsic::s390_vpklsgs:
2260 Opcode = SystemZISD::PACKLS_CC;
2261 CCValid = SystemZ::CCMASK_VCMP;
2262 return true;
2263
2264 case Intrinsic::s390_vceqbs:
2265 case Intrinsic::s390_vceqhs:
2266 case Intrinsic::s390_vceqfs:
2267 case Intrinsic::s390_vceqgs:
2268 Opcode = SystemZISD::VICMPES;
2269 CCValid = SystemZ::CCMASK_VCMP;
2270 return true;
2271
2272 case Intrinsic::s390_vchbs:
2273 case Intrinsic::s390_vchhs:
2274 case Intrinsic::s390_vchfs:
2275 case Intrinsic::s390_vchgs:
2276 Opcode = SystemZISD::VICMPHS;
2277 CCValid = SystemZ::CCMASK_VCMP;
2278 return true;
2279
2280 case Intrinsic::s390_vchlbs:
2281 case Intrinsic::s390_vchlhs:
2282 case Intrinsic::s390_vchlfs:
2283 case Intrinsic::s390_vchlgs:
2284 Opcode = SystemZISD::VICMPHLS;
2285 CCValid = SystemZ::CCMASK_VCMP;
2286 return true;
2287
2288 case Intrinsic::s390_vtm:
2289 Opcode = SystemZISD::VTM;
2290 CCValid = SystemZ::CCMASK_VCMP;
2291 return true;
2292
2293 case Intrinsic::s390_vfaebs:
2294 case Intrinsic::s390_vfaehs:
2295 case Intrinsic::s390_vfaefs:
2296 Opcode = SystemZISD::VFAE_CC;
2297 CCValid = SystemZ::CCMASK_ANY;
2298 return true;
2299
2300 case Intrinsic::s390_vfaezbs:
2301 case Intrinsic::s390_vfaezhs:
2302 case Intrinsic::s390_vfaezfs:
2303 Opcode = SystemZISD::VFAEZ_CC;
2304 CCValid = SystemZ::CCMASK_ANY;
2305 return true;
2306
2307 case Intrinsic::s390_vfeebs:
2308 case Intrinsic::s390_vfeehs:
2309 case Intrinsic::s390_vfeefs:
2310 Opcode = SystemZISD::VFEE_CC;
2311 CCValid = SystemZ::CCMASK_ANY;
2312 return true;
2313
2314 case Intrinsic::s390_vfeezbs:
2315 case Intrinsic::s390_vfeezhs:
2316 case Intrinsic::s390_vfeezfs:
2317 Opcode = SystemZISD::VFEEZ_CC;
2318 CCValid = SystemZ::CCMASK_ANY;
2319 return true;
2320
2321 case Intrinsic::s390_vfenebs:
2322 case Intrinsic::s390_vfenehs:
2323 case Intrinsic::s390_vfenefs:
2324 Opcode = SystemZISD::VFENE_CC;
2325 CCValid = SystemZ::CCMASK_ANY;
2326 return true;
2327
2328 case Intrinsic::s390_vfenezbs:
2329 case Intrinsic::s390_vfenezhs:
2330 case Intrinsic::s390_vfenezfs:
2331 Opcode = SystemZISD::VFENEZ_CC;
2332 CCValid = SystemZ::CCMASK_ANY;
2333 return true;
2334
2335 case Intrinsic::s390_vistrbs:
2336 case Intrinsic::s390_vistrhs:
2337 case Intrinsic::s390_vistrfs:
2338 Opcode = SystemZISD::VISTR_CC;
2340 return true;
2341
2342 case Intrinsic::s390_vstrcbs:
2343 case Intrinsic::s390_vstrchs:
2344 case Intrinsic::s390_vstrcfs:
2345 Opcode = SystemZISD::VSTRC_CC;
2346 CCValid = SystemZ::CCMASK_ANY;
2347 return true;
2348
2349 case Intrinsic::s390_vstrczbs:
2350 case Intrinsic::s390_vstrczhs:
2351 case Intrinsic::s390_vstrczfs:
2352 Opcode = SystemZISD::VSTRCZ_CC;
2353 CCValid = SystemZ::CCMASK_ANY;
2354 return true;
2355
2356 case Intrinsic::s390_vstrsb:
2357 case Intrinsic::s390_vstrsh:
2358 case Intrinsic::s390_vstrsf:
2359 Opcode = SystemZISD::VSTRS_CC;
2360 CCValid = SystemZ::CCMASK_ANY;
2361 return true;
2362
2363 case Intrinsic::s390_vstrszb:
2364 case Intrinsic::s390_vstrszh:
2365 case Intrinsic::s390_vstrszf:
2366 Opcode = SystemZISD::VSTRSZ_CC;
2367 CCValid = SystemZ::CCMASK_ANY;
2368 return true;
2369
2370 case Intrinsic::s390_vfcedbs:
2371 case Intrinsic::s390_vfcesbs:
2372 Opcode = SystemZISD::VFCMPES;
2373 CCValid = SystemZ::CCMASK_VCMP;
2374 return true;
2375
2376 case Intrinsic::s390_vfchdbs:
2377 case Intrinsic::s390_vfchsbs:
2378 Opcode = SystemZISD::VFCMPHS;
2379 CCValid = SystemZ::CCMASK_VCMP;
2380 return true;
2381
2382 case Intrinsic::s390_vfchedbs:
2383 case Intrinsic::s390_vfchesbs:
2384 Opcode = SystemZISD::VFCMPHES;
2385 CCValid = SystemZ::CCMASK_VCMP;
2386 return true;
2387
2388 case Intrinsic::s390_vftcidb:
2389 case Intrinsic::s390_vftcisb:
2390 Opcode = SystemZISD::VFTCI;
2391 CCValid = SystemZ::CCMASK_VCMP;
2392 return true;
2393
2394 case Intrinsic::s390_tdc:
2395 Opcode = SystemZISD::TDC;
2396 CCValid = SystemZ::CCMASK_TDC;
2397 return true;
2398
2399 default:
2400 return false;
2401 }
2402}
2403
2404// Emit an intrinsic with chain and an explicit CC register result.
2406 unsigned Opcode) {
2407 // Copy all operands except the intrinsic ID.
2408 unsigned NumOps = Op.getNumOperands();
2410 Ops.reserve(NumOps - 1);
2411 Ops.push_back(Op.getOperand(0));
2412 for (unsigned I = 2; I < NumOps; ++I)
2413 Ops.push_back(Op.getOperand(I));
2414
2415 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2416 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2417 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2418 SDValue OldChain = SDValue(Op.getNode(), 1);
2419 SDValue NewChain = SDValue(Intr.getNode(), 1);
2420 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2421 return Intr.getNode();
2422}
2423
2424// Emit an intrinsic with an explicit CC register result.
2426 unsigned Opcode) {
2427 // Copy all operands except the intrinsic ID.
2428 unsigned NumOps = Op.getNumOperands();
2430 Ops.reserve(NumOps - 1);
2431 for (unsigned I = 1; I < NumOps; ++I)
2432 Ops.push_back(Op.getOperand(I));
2433
2434 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2435 return Intr.getNode();
2436}
2437
2438// CC is a comparison that will be implemented using an integer or
2439// floating-point comparison. Return the condition code mask for
2440// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2441// unsigned comparisons and clear for signed ones. In the floating-point
2442// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2444#define CONV(X) \
2445 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2446 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2447 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2448
2449 switch (CC) {
2450 default:
2451 llvm_unreachable("Invalid integer condition!");
2452
2453 CONV(EQ);
2454 CONV(NE);
2455 CONV(GT);
2456 CONV(GE);
2457 CONV(LT);
2458 CONV(LE);
2459
2460 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2462 }
2463#undef CONV
2464}
2465
2466// If C can be converted to a comparison against zero, adjust the operands
2467// as necessary.
2468static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2469 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2470 return;
2471
2472 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2473 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2474 return;
2475
2476 int64_t Value = ConstOp1->getSExtValue();
2477 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2478 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2479 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2480 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2481 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2482 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2483 }
2484}
2485
2486// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2487// adjust the operands as necessary.
2488static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2489 Comparison &C) {
2490 // For us to make any changes, it must a comparison between a single-use
2491 // load and a constant.
2492 if (!C.Op0.hasOneUse() ||
2493 C.Op0.getOpcode() != ISD::LOAD ||
2494 C.Op1.getOpcode() != ISD::Constant)
2495 return;
2496
2497 // We must have an 8- or 16-bit load.
2498 auto *Load = cast<LoadSDNode>(C.Op0);
2499 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2500 if ((NumBits != 8 && NumBits != 16) ||
2501 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2502 return;
2503
2504 // The load must be an extending one and the constant must be within the
2505 // range of the unextended value.
2506 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2507 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2508 return;
2509 uint64_t Value = ConstOp1->getZExtValue();
2510 uint64_t Mask = (1 << NumBits) - 1;
2511 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2512 // Make sure that ConstOp1 is in range of C.Op0.
2513 int64_t SignedValue = ConstOp1->getSExtValue();
2514 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2515 return;
2516 if (C.ICmpType != SystemZICMP::SignedOnly) {
2517 // Unsigned comparison between two sign-extended values is equivalent
2518 // to unsigned comparison between two zero-extended values.
2519 Value &= Mask;
2520 } else if (NumBits == 8) {
2521 // Try to treat the comparison as unsigned, so that we can use CLI.
2522 // Adjust CCMask and Value as necessary.
2523 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2524 // Test whether the high bit of the byte is set.
2525 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2526 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2527 // Test whether the high bit of the byte is clear.
2528 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2529 else
2530 // No instruction exists for this combination.
2531 return;
2532 C.ICmpType = SystemZICMP::UnsignedOnly;
2533 }
2534 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2535 if (Value > Mask)
2536 return;
2537 // If the constant is in range, we can use any comparison.
2538 C.ICmpType = SystemZICMP::Any;
2539 } else
2540 return;
2541
2542 // Make sure that the first operand is an i32 of the right extension type.
2543 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2546 if (C.Op0.getValueType() != MVT::i32 ||
2547 Load->getExtensionType() != ExtType) {
2548 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2549 Load->getBasePtr(), Load->getPointerInfo(),
2550 Load->getMemoryVT(), Load->getAlign(),
2551 Load->getMemOperand()->getFlags());
2552 // Update the chain uses.
2553 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2554 }
2555
2556 // Make sure that the second operand is an i32 with the right value.
2557 if (C.Op1.getValueType() != MVT::i32 ||
2558 Value != ConstOp1->getZExtValue())
2559 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2560}
2561
2562// Return true if Op is either an unextended load, or a load suitable
2563// for integer register-memory comparisons of type ICmpType.
2564static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2565 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2566 if (Load) {
2567 // There are no instructions to compare a register with a memory byte.
2568 if (Load->getMemoryVT() == MVT::i8)
2569 return false;
2570 // Otherwise decide on extension type.
2571 switch (Load->getExtensionType()) {
2572 case ISD::NON_EXTLOAD:
2573 return true;
2574 case ISD::SEXTLOAD:
2575 return ICmpType != SystemZICMP::UnsignedOnly;
2576 case ISD::ZEXTLOAD:
2577 return ICmpType != SystemZICMP::SignedOnly;
2578 default:
2579 break;
2580 }
2581 }
2582 return false;
2583}
2584
2585// Return true if it is better to swap the operands of C.
2586static bool shouldSwapCmpOperands(const Comparison &C) {
2587 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2588 if (C.Op0.getValueType() == MVT::i128)
2589 return false;
2590 if (C.Op0.getValueType() == MVT::f128)
2591 return false;
2592
2593 // Always keep a floating-point constant second, since comparisons with
2594 // zero can use LOAD TEST and comparisons with other constants make a
2595 // natural memory operand.
2596 if (isa<ConstantFPSDNode>(C.Op1))
2597 return false;
2598
2599 // Never swap comparisons with zero since there are many ways to optimize
2600 // those later.
2601 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2602 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2603 return false;
2604
2605 // Also keep natural memory operands second if the loaded value is
2606 // only used here. Several comparisons have memory forms.
2607 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2608 return false;
2609
2610 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2611 // In that case we generally prefer the memory to be second.
2612 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2613 // The only exceptions are when the second operand is a constant and
2614 // we can use things like CHHSI.
2615 if (!ConstOp1)
2616 return true;
2617 // The unsigned memory-immediate instructions can handle 16-bit
2618 // unsigned integers.
2619 if (C.ICmpType != SystemZICMP::SignedOnly &&
2620 isUInt<16>(ConstOp1->getZExtValue()))
2621 return false;
2622 // The signed memory-immediate instructions can handle 16-bit
2623 // signed integers.
2624 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2625 isInt<16>(ConstOp1->getSExtValue()))
2626 return false;
2627 return true;
2628 }
2629
2630 // Try to promote the use of CGFR and CLGFR.
2631 unsigned Opcode0 = C.Op0.getOpcode();
2632 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2633 return true;
2634 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2635 return true;
2636 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2637 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2638 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2639 return true;
2640
2641 return false;
2642}
2643
2644// Check whether C tests for equality between X and Y and whether X - Y
2645// or Y - X is also computed. In that case it's better to compare the
2646// result of the subtraction against zero.
2648 Comparison &C) {
2649 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2650 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2651 for (SDNode *N : C.Op0->uses()) {
2652 if (N->getOpcode() == ISD::SUB &&
2653 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2654 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2655 // Disable the nsw and nuw flags: the backend needs to handle
2656 // overflow as well during comparison elimination.
2657 SDNodeFlags Flags = N->getFlags();
2658 Flags.setNoSignedWrap(false);
2659 Flags.setNoUnsignedWrap(false);
2660 N->setFlags(Flags);
2661 C.Op0 = SDValue(N, 0);
2662 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2663 return;
2664 }
2665 }
2666 }
2667}
2668
2669// Check whether C compares a floating-point value with zero and if that
2670// floating-point value is also negated. In this case we can use the
2671// negation to set CC, so avoiding separate LOAD AND TEST and
2672// LOAD (NEGATIVE/COMPLEMENT) instructions.
2673static void adjustForFNeg(Comparison &C) {
2674 // This optimization is invalid for strict comparisons, since FNEG
2675 // does not raise any exceptions.
2676 if (C.Chain)
2677 return;
2678 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2679 if (C1 && C1->isZero()) {
2680 for (SDNode *N : C.Op0->uses()) {
2681 if (N->getOpcode() == ISD::FNEG) {
2682 C.Op0 = SDValue(N, 0);
2683 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2684 return;
2685 }
2686 }
2687 }
2688}
2689
2690// Check whether C compares (shl X, 32) with 0 and whether X is
2691// also sign-extended. In that case it is better to test the result
2692// of the sign extension using LTGFR.
2693//
2694// This case is important because InstCombine transforms a comparison
2695// with (sext (trunc X)) into a comparison with (shl X, 32).
2696static void adjustForLTGFR(Comparison &C) {
2697 // Check for a comparison between (shl X, 32) and 0.
2698 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2699 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2700 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2701 if (C1 && C1->getZExtValue() == 32) {
2702 SDValue ShlOp0 = C.Op0.getOperand(0);
2703 // See whether X has any SIGN_EXTEND_INREG uses.
2704 for (SDNode *N : ShlOp0->uses()) {
2705 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2706 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2707 C.Op0 = SDValue(N, 0);
2708 return;
2709 }
2710 }
2711 }
2712 }
2713}
2714
2715// If C compares the truncation of an extending load, try to compare
2716// the untruncated value instead. This exposes more opportunities to
2717// reuse CC.
2718static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2719 Comparison &C) {
2720 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2721 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2722 C.Op1.getOpcode() == ISD::Constant &&
2723 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2724 C.Op1->getAsZExtVal() == 0) {
2725 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2726 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2727 C.Op0.getValueSizeInBits().getFixedValue()) {
2728 unsigned Type = L->getExtensionType();
2729 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2730 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2731 C.Op0 = C.Op0.getOperand(0);
2732 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2733 }
2734 }
2735 }
2736}
2737
2738// Return true if shift operation N has an in-range constant shift value.
2739// Store it in ShiftVal if so.
2740static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2741 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2742 if (!Shift)
2743 return false;
2744
2745 uint64_t Amount = Shift->getZExtValue();
2746 if (Amount >= N.getValueSizeInBits())
2747 return false;
2748
2749 ShiftVal = Amount;
2750 return true;
2751}
2752
2753// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2754// instruction and whether the CC value is descriptive enough to handle
2755// a comparison of type Opcode between the AND result and CmpVal.
2756// CCMask says which comparison result is being tested and BitSize is
2757// the number of bits in the operands. If TEST UNDER MASK can be used,
2758// return the corresponding CC mask, otherwise return 0.
2759static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2760 uint64_t Mask, uint64_t CmpVal,
2761 unsigned ICmpType) {
2762 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2763
2764 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2765 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2766 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2767 return 0;
2768
2769 // Work out the masks for the lowest and highest bits.
2771 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2772
2773 // Signed ordered comparisons are effectively unsigned if the sign
2774 // bit is dropped.
2775 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2776
2777 // Check for equality comparisons with 0, or the equivalent.
2778 if (CmpVal == 0) {
2779 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2781 if (CCMask == SystemZ::CCMASK_CMP_NE)
2783 }
2784 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2785 if (CCMask == SystemZ::CCMASK_CMP_LT)
2787 if (CCMask == SystemZ::CCMASK_CMP_GE)
2789 }
2790 if (EffectivelyUnsigned && CmpVal < Low) {
2791 if (CCMask == SystemZ::CCMASK_CMP_LE)
2793 if (CCMask == SystemZ::CCMASK_CMP_GT)
2795 }
2796
2797 // Check for equality comparisons with the mask, or the equivalent.
2798 if (CmpVal == Mask) {
2799 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2801 if (CCMask == SystemZ::CCMASK_CMP_NE)
2803 }
2804 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2805 if (CCMask == SystemZ::CCMASK_CMP_GT)
2807 if (CCMask == SystemZ::CCMASK_CMP_LE)
2809 }
2810 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2811 if (CCMask == SystemZ::CCMASK_CMP_GE)
2813 if (CCMask == SystemZ::CCMASK_CMP_LT)
2815 }
2816
2817 // Check for ordered comparisons with the top bit.
2818 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2819 if (CCMask == SystemZ::CCMASK_CMP_LE)
2821 if (CCMask == SystemZ::CCMASK_CMP_GT)
2823 }
2824 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2825 if (CCMask == SystemZ::CCMASK_CMP_LT)
2827 if (CCMask == SystemZ::CCMASK_CMP_GE)
2829 }
2830
2831 // If there are just two bits, we can do equality checks for Low and High
2832 // as well.
2833 if (Mask == Low + High) {
2834 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2836 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2838 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2840 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2842 }
2843
2844 // Looks like we've exhausted our options.
2845 return 0;
2846}
2847
2848// See whether C can be implemented as a TEST UNDER MASK instruction.
2849// Update the arguments with the TM version if so.
2851 Comparison &C) {
2852 // Use VECTOR TEST UNDER MASK for i128 operations.
2853 if (C.Op0.getValueType() == MVT::i128) {
2854 // We can use VTM for EQ/NE comparisons of x & y against 0.
2855 if (C.Op0.getOpcode() == ISD::AND &&
2856 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2857 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2858 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2859 if (Mask && Mask->getAPIntValue() == 0) {
2860 C.Opcode = SystemZISD::VTM;
2861 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2862 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2863 C.CCValid = SystemZ::CCMASK_VCMP;
2864 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2865 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2866 else
2867 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2868 }
2869 }
2870 return;
2871 }
2872
2873 // Check that we have a comparison with a constant.
2874 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2875 if (!ConstOp1)
2876 return;
2877 uint64_t CmpVal = ConstOp1->getZExtValue();
2878
2879 // Check whether the nonconstant input is an AND with a constant mask.
2880 Comparison NewC(C);
2881 uint64_t MaskVal;
2882 ConstantSDNode *Mask = nullptr;
2883 if (C.Op0.getOpcode() == ISD::AND) {
2884 NewC.Op0 = C.Op0.getOperand(0);
2885 NewC.Op1 = C.Op0.getOperand(1);
2886 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2887 if (!Mask)
2888 return;
2889 MaskVal = Mask->getZExtValue();
2890 } else {
2891 // There is no instruction to compare with a 64-bit immediate
2892 // so use TMHH instead if possible. We need an unsigned ordered
2893 // comparison with an i64 immediate.
2894 if (NewC.Op0.getValueType() != MVT::i64 ||
2895 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2896 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2897 NewC.ICmpType == SystemZICMP::SignedOnly)
2898 return;
2899 // Convert LE and GT comparisons into LT and GE.
2900 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2901 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2902 if (CmpVal == uint64_t(-1))
2903 return;
2904 CmpVal += 1;
2905 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2906 }
2907 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2908 // be masked off without changing the result.
2909 MaskVal = -(CmpVal & -CmpVal);
2910 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2911 }
2912 if (!MaskVal)
2913 return;
2914
2915 // Check whether the combination of mask, comparison value and comparison
2916 // type are suitable.
2917 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2918 unsigned NewCCMask, ShiftVal;
2919 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2920 NewC.Op0.getOpcode() == ISD::SHL &&
2921 isSimpleShift(NewC.Op0, ShiftVal) &&
2922 (MaskVal >> ShiftVal != 0) &&
2923 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2924 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2925 MaskVal >> ShiftVal,
2926 CmpVal >> ShiftVal,
2927 SystemZICMP::Any))) {
2928 NewC.Op0 = NewC.Op0.getOperand(0);
2929 MaskVal >>= ShiftVal;
2930 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2931 NewC.Op0.getOpcode() == ISD::SRL &&
2932 isSimpleShift(NewC.Op0, ShiftVal) &&
2933 (MaskVal << ShiftVal != 0) &&
2934 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2935 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2936 MaskVal << ShiftVal,
2937 CmpVal << ShiftVal,
2939 NewC.Op0 = NewC.Op0.getOperand(0);
2940 MaskVal <<= ShiftVal;
2941 } else {
2942 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2943 NewC.ICmpType);
2944 if (!NewCCMask)
2945 return;
2946 }
2947
2948 // Go ahead and make the change.
2949 C.Opcode = SystemZISD::TM;
2950 C.Op0 = NewC.Op0;
2951 if (Mask && Mask->getZExtValue() == MaskVal)
2952 C.Op1 = SDValue(Mask, 0);
2953 else
2954 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2955 C.CCValid = SystemZ::CCMASK_TM;
2956 C.CCMask = NewCCMask;
2957}
2958
2959// Implement i128 comparison in vector registers.
2960static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2961 Comparison &C) {
2962 if (C.Opcode != SystemZISD::ICMP)
2963 return;
2964 if (C.Op0.getValueType() != MVT::i128)
2965 return;
2966
2967 // (In-)Equality comparisons can be implemented via VCEQGS.
2968 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2969 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2970 C.Opcode = SystemZISD::VICMPES;
2971 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2972 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2973 C.CCValid = SystemZ::CCMASK_VCMP;
2974 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2975 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2976 else
2977 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2978 return;
2979 }
2980
2981 // Normalize other comparisons to GT.
2982 bool Swap = false, Invert = false;
2983 switch (C.CCMask) {
2984 case SystemZ::CCMASK_CMP_GT: break;
2985 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2986 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2987 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2988 default: llvm_unreachable("Invalid integer condition!");
2989 }
2990 if (Swap)
2991 std::swap(C.Op0, C.Op1);
2992
2993 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2994 C.Opcode = SystemZISD::UCMP128HI;
2995 else
2996 C.Opcode = SystemZISD::SCMP128HI;
2997 C.CCValid = SystemZ::CCMASK_ANY;
2998 C.CCMask = SystemZ::CCMASK_1;
2999
3000 if (Invert)
3001 C.CCMask ^= C.CCValid;
3002}
3003
3004// See whether the comparison argument contains a redundant AND
3005// and remove it if so. This sometimes happens due to the generic
3006// BRCOND expansion.
3008 Comparison &C) {
3009 if (C.Op0.getOpcode() != ISD::AND)
3010 return;
3011 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3012 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3013 return;
3014 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3015 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3016 return;
3017
3018 C.Op0 = C.Op0.getOperand(0);
3019}
3020
3021// Return a Comparison that tests the condition-code result of intrinsic
3022// node Call against constant integer CC using comparison code Cond.
3023// Opcode is the opcode of the SystemZISD operation for the intrinsic
3024// and CCValid is the set of possible condition-code results.
3025static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3026 SDValue Call, unsigned CCValid, uint64_t CC,
3028 Comparison C(Call, SDValue(), SDValue());
3029 C.Opcode = Opcode;
3030 C.CCValid = CCValid;
3031 if (Cond == ISD::SETEQ)
3032 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3033 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3034 else if (Cond == ISD::SETNE)
3035 // ...and the inverse of that.
3036 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3037 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3038 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3039 // always true for CC>3.
3040 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3041 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3042 // ...and the inverse of that.
3043 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3044 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3045 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3046 // always true for CC>3.
3047 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3048 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3049 // ...and the inverse of that.
3050 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3051 else
3052 llvm_unreachable("Unexpected integer comparison type");
3053 C.CCMask &= CCValid;
3054 return C;
3055}
3056
3057// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3058static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3059 ISD::CondCode Cond, const SDLoc &DL,
3060 SDValue Chain = SDValue(),
3061 bool IsSignaling = false) {
3062 if (CmpOp1.getOpcode() == ISD::Constant) {
3063 assert(!Chain);
3064 unsigned Opcode, CCValid;
3065 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3066 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3067 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3068 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3069 CmpOp1->getAsZExtVal(), Cond);
3070 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3071 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3072 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3073 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3074 CmpOp1->getAsZExtVal(), Cond);
3075 }
3076 Comparison C(CmpOp0, CmpOp1, Chain);
3077 C.CCMask = CCMaskForCondCode(Cond);
3078 if (C.Op0.getValueType().isFloatingPoint()) {
3079 C.CCValid = SystemZ::CCMASK_FCMP;
3080 if (!C.Chain)
3081 C.Opcode = SystemZISD::FCMP;
3082 else if (!IsSignaling)
3083 C.Opcode = SystemZISD::STRICT_FCMP;
3084 else
3085 C.Opcode = SystemZISD::STRICT_FCMPS;
3087 } else {
3088 assert(!C.Chain);
3089 C.CCValid = SystemZ::CCMASK_ICMP;
3090 C.Opcode = SystemZISD::ICMP;
3091 // Choose the type of comparison. Equality and inequality tests can
3092 // use either signed or unsigned comparisons. The choice also doesn't
3093 // matter if both sign bits are known to be clear. In those cases we
3094 // want to give the main isel code the freedom to choose whichever
3095 // form fits best.
3096 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3097 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3098 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3099 C.ICmpType = SystemZICMP::Any;
3100 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3101 C.ICmpType = SystemZICMP::UnsignedOnly;
3102 else
3103 C.ICmpType = SystemZICMP::SignedOnly;
3104 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3105 adjustForRedundantAnd(DAG, DL, C);
3106 adjustZeroCmp(DAG, DL, C);
3107 adjustSubwordCmp(DAG, DL, C);
3108 adjustForSubtraction(DAG, DL, C);
3110 adjustICmpTruncate(DAG, DL, C);
3111 }
3112
3113 if (shouldSwapCmpOperands(C)) {
3114 std::swap(C.Op0, C.Op1);
3115 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3116 }
3117
3119 adjustICmp128(DAG, DL, C);
3120 return C;
3121}
3122
3123// Emit the comparison instruction described by C.
3124static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3125 if (!C.Op1.getNode()) {
3126 SDNode *Node;
3127 switch (C.Op0.getOpcode()) {
3129 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3130 return SDValue(Node, 0);
3132 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3133 return SDValue(Node, Node->getNumValues() - 1);
3134 default:
3135 llvm_unreachable("Invalid comparison operands");
3136 }
3137 }
3138 if (C.Opcode == SystemZISD::ICMP)
3139 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3140 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3141 if (C.Opcode == SystemZISD::TM) {
3142 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3144 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3145 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3146 }
3147 if (C.Opcode == SystemZISD::VICMPES) {
3148 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3149 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3150 return SDValue(Val.getNode(), 1);
3151 }
3152 if (C.Chain) {
3153 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3154 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3155 }
3156 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3157}
3158
3159// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3160// 64 bits. Extend is the extension type to use. Store the high part
3161// in Hi and the low part in Lo.
3162static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3163 SDValue Op0, SDValue Op1, SDValue &Hi,
3164 SDValue &Lo) {
3165 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3166 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3167 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3168 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3169 DAG.getConstant(32, DL, MVT::i64));
3170 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3171 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3172}
3173
3174// Lower a binary operation that produces two VT results, one in each
3175// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3176// and Opcode performs the GR128 operation. Store the even register result
3177// in Even and the odd register result in Odd.
3178static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3179 unsigned Opcode, SDValue Op0, SDValue Op1,
3180 SDValue &Even, SDValue &Odd) {
3181 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3182 bool Is32Bit = is32Bit(VT);
3183 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3184 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3185}
3186
3187// Return an i32 value that is 1 if the CC value produced by CCReg is
3188// in the mask CCMask and 0 otherwise. CC is known to have a value
3189// in CCValid, so other values can be ignored.
3190static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3191 unsigned CCValid, unsigned CCMask) {
3192 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3193 DAG.getConstant(0, DL, MVT::i32),
3194 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3195 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3196 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3197}
3198
3199// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3200// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3201// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3202// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3203// floating-point comparisons.
3206 switch (CC) {
3207 case ISD::SETOEQ:
3208 case ISD::SETEQ:
3209 switch (Mode) {
3210 case CmpMode::Int: return SystemZISD::VICMPE;
3211 case CmpMode::FP: return SystemZISD::VFCMPE;
3212 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3213 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3214 }
3215 llvm_unreachable("Bad mode");
3216
3217 case ISD::SETOGE:
3218 case ISD::SETGE:
3219 switch (Mode) {
3220 case CmpMode::Int: return 0;
3221 case CmpMode::FP: return SystemZISD::VFCMPHE;
3222 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3223 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3224 }
3225 llvm_unreachable("Bad mode");
3226
3227 case ISD::SETOGT:
3228 case ISD::SETGT:
3229 switch (Mode) {
3230 case CmpMode::Int: return SystemZISD::VICMPH;
3231 case CmpMode::FP: return SystemZISD::VFCMPH;
3232 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3233 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3234 }
3235 llvm_unreachable("Bad mode");
3236
3237 case ISD::SETUGT:
3238 switch (Mode) {
3239 case CmpMode::Int: return SystemZISD::VICMPHL;
3240 case CmpMode::FP: return 0;
3241 case CmpMode::StrictFP: return 0;
3242 case CmpMode::SignalingFP: return 0;
3243 }
3244 llvm_unreachable("Bad mode");
3245
3246 default:
3247 return 0;
3248 }
3249}
3250
3251// Return the SystemZISD vector comparison operation for CC or its inverse,
3252// or 0 if neither can be done directly. Indicate in Invert whether the
3253// result is for the inverse of CC. Mode is as above.
3255 bool &Invert) {
3256 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3257 Invert = false;
3258 return Opcode;
3259 }
3260
3261 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3262 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3263 Invert = true;
3264 return Opcode;
3265 }
3266
3267 return 0;
3268}
3269
3270// Return a v2f64 that contains the extended form of elements Start and Start+1
3271// of v4f32 value Op. If Chain is nonnull, return the strict form.
3272static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3273 SDValue Op, SDValue Chain) {
3274 int Mask[] = { Start, -1, Start + 1, -1 };
3275 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3276 if (Chain) {
3277 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3278 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3279 }
3280 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3281}
3282
3283// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3284// producing a result of type VT. If Chain is nonnull, return the strict form.
3285SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3286 const SDLoc &DL, EVT VT,
3287 SDValue CmpOp0,
3288 SDValue CmpOp1,
3289 SDValue Chain) const {
3290 // There is no hardware support for v4f32 (unless we have the vector
3291 // enhancements facility 1), so extend the vector into two v2f64s
3292 // and compare those.
3293 if (CmpOp0.getValueType() == MVT::v4f32 &&
3294 !Subtarget.hasVectorEnhancements1()) {
3295 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3296 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3297 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3298 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3299 if (Chain) {
3300 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3301 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3302 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3303 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3304 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3305 H1.getValue(1), L1.getValue(1),
3306 HRes.getValue(1), LRes.getValue(1) };
3307 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3308 SDValue Ops[2] = { Res, NewChain };
3309 return DAG.getMergeValues(Ops, DL);
3310 }
3311 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3312 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3313 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3314 }
3315 if (Chain) {
3316 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3317 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3318 }
3319 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3320}
3321
3322// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3323// an integer mask of type VT. If Chain is nonnull, we have a strict
3324// floating-point comparison. If in addition IsSignaling is true, we have
3325// a strict signaling floating-point comparison.
3326SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3327 const SDLoc &DL, EVT VT,
3329 SDValue CmpOp0,
3330 SDValue CmpOp1,
3331 SDValue Chain,
3332 bool IsSignaling) const {
3333 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3334 assert (!Chain || IsFP);
3335 assert (!IsSignaling || Chain);
3336 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3337 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3338 bool Invert = false;
3339 SDValue Cmp;
3340 switch (CC) {
3341 // Handle tests for order using (or (ogt y x) (oge x y)).
3342 case ISD::SETUO:
3343 Invert = true;
3344 [[fallthrough]];
3345 case ISD::SETO: {
3346 assert(IsFP && "Unexpected integer comparison");
3347 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3348 DL, VT, CmpOp1, CmpOp0, Chain);
3349 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3350 DL, VT, CmpOp0, CmpOp1, Chain);
3351 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3352 if (Chain)
3353 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3354 LT.getValue(1), GE.getValue(1));
3355 break;
3356 }
3357
3358 // Handle <> tests using (or (ogt y x) (ogt x y)).
3359 case ISD::SETUEQ:
3360 Invert = true;
3361 [[fallthrough]];
3362 case ISD::SETONE: {
3363 assert(IsFP && "Unexpected integer comparison");
3364 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3365 DL, VT, CmpOp1, CmpOp0, Chain);
3366 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3367 DL, VT, CmpOp0, CmpOp1, Chain);
3368 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3369 if (Chain)
3370 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3371 LT.getValue(1), GT.getValue(1));
3372 break;
3373 }
3374
3375 // Otherwise a single comparison is enough. It doesn't really
3376 // matter whether we try the inversion or the swap first, since
3377 // there are no cases where both work.
3378 default:
3379 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3380 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3381 else {
3383 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3384 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3385 else
3386 llvm_unreachable("Unhandled comparison");
3387 }
3388 if (Chain)
3389 Chain = Cmp.getValue(1);
3390 break;
3391 }
3392 if (Invert) {
3393 SDValue Mask =
3394 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3395 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3396 }
3397 if (Chain && Chain.getNode() != Cmp.getNode()) {
3398 SDValue Ops[2] = { Cmp, Chain };
3399 Cmp = DAG.getMergeValues(Ops, DL);
3400 }
3401 return Cmp;
3402}
3403
3404SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3405 SelectionDAG &DAG) const {
3406 SDValue CmpOp0 = Op.getOperand(0);
3407 SDValue CmpOp1 = Op.getOperand(1);
3408 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3409 SDLoc DL(Op);
3410 EVT VT = Op.getValueType();
3411 if (VT.isVector())
3412 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3413
3414 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3415 SDValue CCReg = emitCmp(DAG, DL, C);
3416 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3417}
3418
3419SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3420 SelectionDAG &DAG,
3421 bool IsSignaling) const {
3422 SDValue Chain = Op.getOperand(0);
3423 SDValue CmpOp0 = Op.getOperand(1);
3424 SDValue CmpOp1 = Op.getOperand(2);
3425 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3426 SDLoc DL(Op);
3427 EVT VT = Op.getNode()->getValueType(0);
3428 if (VT.isVector()) {
3429 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3430 Chain, IsSignaling);
3431 return Res.getValue(Op.getResNo());
3432 }
3433
3434 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3435 SDValue CCReg = emitCmp(DAG, DL, C);
3436 CCReg->setFlags(Op->getFlags());
3437 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3438 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3439 return DAG.getMergeValues(Ops, DL);
3440}
3441
3442SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3443 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3444 SDValue CmpOp0 = Op.getOperand(2);
3445 SDValue CmpOp1 = Op.getOperand(3);
3446 SDValue Dest = Op.getOperand(4);
3447 SDLoc DL(Op);
3448
3449 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3450 SDValue CCReg = emitCmp(DAG, DL, C);
3451 return DAG.getNode(
3452 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3453 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3454 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3455}
3456
3457// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3458// allowing Pos and Neg to be wider than CmpOp.
3459static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3460 return (Neg.getOpcode() == ISD::SUB &&
3461 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3462 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3463 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3464 Pos.getOperand(0) == CmpOp)));
3465}
3466
3467// Return the absolute or negative absolute of Op; IsNegative decides which.
3469 bool IsNegative) {
3470 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3471 if (IsNegative)
3472 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3473 DAG.getConstant(0, DL, Op.getValueType()), Op);
3474 return Op;
3475}
3476
3477SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3478 SelectionDAG &DAG) const {
3479 SDValue CmpOp0 = Op.getOperand(0);
3480 SDValue CmpOp1 = Op.getOperand(1);
3481 SDValue TrueOp = Op.getOperand(2);
3482 SDValue FalseOp = Op.getOperand(3);
3483 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3484 SDLoc DL(Op);
3485
3486 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3487
3488 // Check for absolute and negative-absolute selections, including those
3489 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3490 // This check supplements the one in DAGCombiner.
3491 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3492 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3493 C.Op1.getOpcode() == ISD::Constant &&
3494 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3495 C.Op1->getAsZExtVal() == 0) {
3496 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3497 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3498 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3499 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3500 }
3501
3502 SDValue CCReg = emitCmp(DAG, DL, C);
3503 SDValue Ops[] = {TrueOp, FalseOp,
3504 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3505 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3506
3507 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3508}
3509
3510SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3511 SelectionDAG &DAG) const {
3512 SDLoc DL(Node);
3513 const GlobalValue *GV = Node->getGlobal();
3514 int64_t Offset = Node->getOffset();
3515 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3517
3519 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3520 if (isInt<32>(Offset)) {
3521 // Assign anchors at 1<<12 byte boundaries.
3522 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3523 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3524 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3525
3526 // The offset can be folded into the address if it is aligned to a
3527 // halfword.
3528 Offset -= Anchor;
3529 if (Offset != 0 && (Offset & 1) == 0) {
3530 SDValue Full =
3531 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3532 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3533 Offset = 0;
3534 }
3535 } else {
3536 // Conservatively load a constant offset greater than 32 bits into a
3537 // register below.
3538 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3539 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3540 }
3541 } else if (Subtarget.isTargetELF()) {
3542 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3543 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3544 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3546 } else if (Subtarget.isTargetzOS()) {
3547 Result = getADAEntry(DAG, GV, DL, PtrVT);
3548 } else
3549 llvm_unreachable("Unexpected Subtarget");
3550
3551 // If there was a non-zero offset that we didn't fold, create an explicit
3552 // addition for it.
3553 if (Offset != 0)
3554 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3555 DAG.getConstant(Offset, DL, PtrVT));
3556
3557 return Result;
3558}
3559
3560SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3561 SelectionDAG &DAG,
3562 unsigned Opcode,
3563 SDValue GOTOffset) const {
3564 SDLoc DL(Node);
3565 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3566 SDValue Chain = DAG.getEntryNode();
3567 SDValue Glue;
3568
3571 report_fatal_error("In GHC calling convention TLS is not supported");
3572
3573 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3574 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3575 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3576 Glue = Chain.getValue(1);
3577 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3578 Glue = Chain.getValue(1);
3579
3580 // The first call operand is the chain and the second is the TLS symbol.
3582 Ops.push_back(Chain);
3583 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3584 Node->getValueType(0),
3585 0, 0));
3586
3587 // Add argument registers to the end of the list so that they are
3588 // known live into the call.
3589 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3590 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3591
3592 // Add a register mask operand representing the call-preserved registers.
3593 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3594 const uint32_t *Mask =
3595 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3596 assert(Mask && "Missing call preserved mask for calling convention");
3597 Ops.push_back(DAG.getRegisterMask(Mask));
3598
3599 // Glue the call to the argument copies.
3600 Ops.push_back(Glue);
3601
3602 // Emit the call.
3603 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3604 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3605 Glue = Chain.getValue(1);
3606
3607 // Copy the return value from %r2.
3608 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3609}
3610
3611SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3612 SelectionDAG &DAG) const {
3613 SDValue Chain = DAG.getEntryNode();
3614 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3615
3616 // The high part of the thread pointer is in access register 0.
3617 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3618 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3619
3620 // The low part of the thread pointer is in access register 1.
3621 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3622 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3623
3624 // Merge them into a single 64-bit address.
3625 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3626 DAG.getConstant(32, DL, PtrVT));
3627 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3628}
3629
3630SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3631 SelectionDAG &DAG) const {
3632 if (DAG.getTarget().useEmulatedTLS())
3633 return LowerToTLSEmulatedModel(Node, DAG);
3634 SDLoc DL(Node);
3635 const GlobalValue *GV = Node->getGlobal();
3636 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3637 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3638
3641 report_fatal_error("In GHC calling convention TLS is not supported");
3642
3643 SDValue TP = lowerThreadPointer(DL, DAG);
3644
3645 // Get the offset of GA from the thread pointer, based on the TLS model.
3647 switch (model) {
3649 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3652
3653 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3654 Offset = DAG.getLoad(
3655 PtrVT, DL, DAG.getEntryNode(), Offset,
3657
3658 // Call __tls_get_offset to retrieve the offset.
3659 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3660 break;
3661 }
3662
3664 // Load the GOT offset of the module ID.
3667
3668 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3669 Offset = DAG.getLoad(
3670 PtrVT, DL, DAG.getEntryNode(), Offset,
3672
3673 // Call __tls_get_offset to retrieve the module base offset.
3674 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3675
3676 // Note: The SystemZLDCleanupPass will remove redundant computations
3677 // of the module base offset. Count total number of local-dynamic
3678 // accesses to trigger execution of that pass.
3682
3683 // Add the per-symbol offset.
3685
3686 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3687 DTPOffset = DAG.getLoad(
3688 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3690
3691 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3692 break;
3693 }
3694
3695 case TLSModel::InitialExec: {
3696 // Load the offset from the GOT.
3697 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3700 Offset =
3701 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3703 break;
3704 }
3705
3706 case TLSModel::LocalExec: {
3707 // Force the offset into the constant pool and load it from there.
3710
3711 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3712 Offset = DAG.getLoad(
3713 PtrVT, DL, DAG.getEntryNode(), Offset,
3715 break;
3716 }
3717 }
3718
3719 // Add the base and offset together.
3720 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3721}
3722
3723SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3724 SelectionDAG &DAG) const {
3725 SDLoc DL(Node);
3726 const BlockAddress *BA = Node->getBlockAddress();
3727 int64_t Offset = Node->getOffset();
3728 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3729
3730 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3731 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3732 return Result;
3733}
3734
3735SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3736 SelectionDAG &DAG) const {
3737 SDLoc DL(JT);
3738 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3739 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3740
3741 // Use LARL to load the address of the table.
3742 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3743}
3744
3745SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3746 SelectionDAG &DAG) const {
3747 SDLoc DL(CP);
3748 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3749
3751 if (CP->isMachineConstantPoolEntry())
3752 Result =
3753 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3754 else
3755 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3756 CP->getOffset());
3757
3758 // Use LARL to load the address of the constant pool entry.
3759 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3760}
3761
3762SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3763 SelectionDAG &DAG) const {
3764 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3766 MachineFrameInfo &MFI = MF.getFrameInfo();
3767 MFI.setFrameAddressIsTaken(true);
3768
3769 SDLoc DL(Op);
3770 unsigned Depth = Op.getConstantOperandVal(0);
3771 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3772
3773 // By definition, the frame address is the address of the back chain. (In
3774 // the case of packed stack without backchain, return the address where the
3775 // backchain would have been stored. This will either be an unused space or
3776 // contain a saved register).
3777 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3778 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3779
3780 if (Depth > 0) {
3781 // FIXME The frontend should detect this case.
3782 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3783 report_fatal_error("Unsupported stack frame traversal count");
3784
3785 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3786 while (Depth--) {
3787 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3789 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3790 }
3791 }
3792
3793 return BackChain;
3794}
3795
3796SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3797 SelectionDAG &DAG) const {
3799 MachineFrameInfo &MFI = MF.getFrameInfo();
3800 MFI.setReturnAddressIsTaken(true);
3801
3803 return SDValue();
3804
3805 SDLoc DL(Op);
3806 unsigned Depth = Op.getConstantOperandVal(0);
3807 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3808
3809 if (Depth > 0) {
3810 // FIXME The frontend should detect this case.
3811 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3812 report_fatal_error("Unsupported stack frame traversal count");
3813
3814 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3815 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
3816 int Offset = TFL->getReturnAddressOffset(MF);
3817 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3818 DAG.getConstant(Offset, DL, PtrVT));
3819 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3821 }
3822
3823 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
3824 // implicit live-in.
3827 &SystemZ::GR64BitRegClass);
3828 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3829}
3830
3831SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3832 SelectionDAG &DAG) const {
3833 SDLoc DL(Op);
3834 SDValue In = Op.getOperand(0);
3835 EVT InVT = In.getValueType();
3836 EVT ResVT = Op.getValueType();
3837
3838 // Convert loads directly. This is normally done by DAGCombiner,
3839 // but we need this case for bitcasts that are created during lowering
3840 // and which are then lowered themselves.
3841 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3842 if (ISD::isNormalLoad(LoadN)) {
3843 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3844 LoadN->getBasePtr(), LoadN->getMemOperand());
3845 // Update the chain uses.
3846 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3847 return NewLoad;
3848 }
3849
3850 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3851 SDValue In64;
3852 if (Subtarget.hasHighWord()) {
3853 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3854 MVT::i64);
3855 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3856 MVT::i64, SDValue(U64, 0), In);
3857 } else {
3858 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3859 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3860 DAG.getConstant(32, DL, MVT::i64));
3861 }
3862 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3863 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3864 DL, MVT::f32, Out64);
3865 }
3866 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3867 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3868 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3869 MVT::f64, SDValue(U64, 0), In);
3870 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3871 if (Subtarget.hasHighWord())
3872 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3873 MVT::i32, Out64);
3874 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3875 DAG.getConstant(32, DL, MVT::i64));
3876 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3877 }
3878 llvm_unreachable("Unexpected bitcast combination");
3879}
3880
3881SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3882 SelectionDAG &DAG) const {
3883
3884 if (Subtarget.isTargetXPLINK64())
3885 return lowerVASTART_XPLINK(Op, DAG);
3886 else
3887 return lowerVASTART_ELF(Op, DAG);
3888}
3889
3890SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3891 SelectionDAG &DAG) const {
3893 SystemZMachineFunctionInfo *FuncInfo =
3895
3896 SDLoc DL(Op);
3897
3898 // vastart just stores the address of the VarArgsFrameIndex slot into the
3899 // memory location argument.
3900 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3901 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3902 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3903 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3904 MachinePointerInfo(SV));
3905}
3906
3907SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3908 SelectionDAG &DAG) const {
3910 SystemZMachineFunctionInfo *FuncInfo =
3912 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3913
3914 SDValue Chain = Op.getOperand(0);
3915 SDValue Addr = Op.getOperand(1);
3916 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3917 SDLoc DL(Op);
3918
3919 // The initial values of each field.
3920 const unsigned NumFields = 4;
3921 SDValue Fields[NumFields] = {
3922 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3923 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3924 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3925 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3926 };
3927
3928 // Store each field into its respective slot.
3929 SDValue MemOps[NumFields];
3930 unsigned Offset = 0;
3931 for (unsigned I = 0; I < NumFields; ++I) {
3932 SDValue FieldAddr = Addr;
3933 if (Offset != 0)
3934 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3936 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3938 Offset += 8;
3939 }
3940 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3941}
3942
3943SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3944 SelectionDAG &DAG) const {
3945 SDValue Chain = Op.getOperand(0);
3946 SDValue DstPtr = Op.getOperand(1);
3947 SDValue SrcPtr = Op.getOperand(2);
3948 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3949 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3950 SDLoc DL(Op);
3951
3952 uint32_t Sz =
3953 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3954 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3955 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3956 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3957 MachinePointerInfo(SrcSV));
3958}
3959
3960SDValue
3961SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3962 SelectionDAG &DAG) const {
3963 if (Subtarget.isTargetXPLINK64())
3964 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3965 else
3966 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3967}
3968
3969SDValue
3970SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3971 SelectionDAG &DAG) const {
3972 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3974 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3975 SDValue Chain = Op.getOperand(0);
3976 SDValue Size = Op.getOperand(1);
3977 SDValue Align = Op.getOperand(2);
3978 SDLoc DL(Op);
3979
3980 // If user has set the no alignment function attribute, ignore
3981 // alloca alignments.
3982 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3983
3984 uint64_t StackAlign = TFI->getStackAlignment();
3985 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3986 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3987
3988 SDValue NeededSpace = Size;
3989
3990 // Add extra space for alignment if needed.
3991 EVT PtrVT = getPointerTy(MF.getDataLayout());
3992 if (ExtraAlignSpace)
3993 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3994 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3995
3996 bool IsSigned = false;
3997 bool DoesNotReturn = false;
3998 bool IsReturnValueUsed = false;
3999 EVT VT = Op.getValueType();
4000 SDValue AllocaCall =
4001 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4002 CallingConv::C, IsSigned, DL, DoesNotReturn,
4003 IsReturnValueUsed)
4004 .first;
4005
4006 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4007 // to end of call in order to ensure it isn't broken up from the call
4008 // sequence.
4009 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4010 Register SPReg = Regs.getStackPointerRegister();
4011 Chain = AllocaCall.getValue(1);
4012 SDValue Glue = AllocaCall.getValue(2);
4013 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4014 Chain = NewSPRegNode.getValue(1);
4015
4016 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4017 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4018 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4019
4020 // Dynamically realign if needed.
4021 if (ExtraAlignSpace) {
4022 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4023 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4024 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4025 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4026 }
4027
4028 SDValue Ops[2] = {Result, Chain};
4029 return DAG.getMergeValues(Ops, DL);
4030}
4031
4032SDValue
4033SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4034 SelectionDAG &DAG) const {
4035 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4037 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4038 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4039
4040 SDValue Chain = Op.getOperand(0);
4041 SDValue Size = Op.getOperand(1);
4042 SDValue Align = Op.getOperand(2);
4043 SDLoc DL(Op);
4044
4045 // If user has set the no alignment function attribute, ignore
4046 // alloca alignments.
4047 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4048
4049 uint64_t StackAlign = TFI->getStackAlignment();
4050 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4051 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4052
4054 SDValue NeededSpace = Size;
4055
4056 // Get a reference to the stack pointer.
4057 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4058
4059 // If we need a backchain, save it now.
4060 SDValue Backchain;
4061 if (StoreBackchain)
4062 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4064
4065 // Add extra space for alignment if needed.
4066 if (ExtraAlignSpace)
4067 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4068 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4069
4070 // Get the new stack pointer value.
4071 SDValue NewSP;
4072 if (hasInlineStackProbe(MF)) {
4074 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4075 Chain = NewSP.getValue(1);
4076 }
4077 else {
4078 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4079 // Copy the new stack pointer back.
4080 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4081 }
4082
4083 // The allocated data lives above the 160 bytes allocated for the standard
4084 // frame, plus any outgoing stack arguments. We don't know how much that
4085 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4086 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4087 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4088
4089 // Dynamically realign if needed.
4090 if (RequiredAlign > StackAlign) {
4091 Result =
4092 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4093 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4094 Result =
4095 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4096 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4097 }
4098
4099 if (StoreBackchain)
4100 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4102
4103 SDValue Ops[2] = { Result, Chain };
4104 return DAG.getMergeValues(Ops, DL);
4105}
4106
4107SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4108 SDValue Op, SelectionDAG &DAG) const {
4109 SDLoc DL(Op);
4110
4111 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4112}
4113
4114SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4115 SelectionDAG &DAG) const {
4116 EVT VT = Op.getValueType();
4117 SDLoc DL(Op);
4118 SDValue Ops[2];
4119 if (is32Bit(VT))
4120 // Just do a normal 64-bit multiplication and extract the results.
4121 // We define this so that it can be used for constant division.
4122 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4123 Op.getOperand(1), Ops[1], Ops[0]);
4124 else if (Subtarget.hasMiscellaneousExtensions2())
4125 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4126 // the high result in the even register. ISD::SMUL_LOHI is defined to
4127 // return the low half first, so the results are in reverse order.
4129 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4130 else {
4131 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4132 //
4133 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4134 //
4135 // but using the fact that the upper halves are either all zeros
4136 // or all ones:
4137 //
4138 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4139 //
4140 // and grouping the right terms together since they are quicker than the
4141 // multiplication:
4142 //
4143 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4144 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4145 SDValue LL = Op.getOperand(0);
4146 SDValue RL = Op.getOperand(1);
4147 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4148 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4149 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4150 // the high result in the even register. ISD::SMUL_LOHI is defined to
4151 // return the low half first, so the results are in reverse order.
4153 LL, RL, Ops[1], Ops[0]);
4154 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4155 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4156 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4157 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4158 }
4159 return DAG.getMergeValues(Ops, DL);
4160}
4161
4162SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4163 SelectionDAG &DAG) const {
4164 EVT VT = Op.getValueType();
4165 SDLoc DL(Op);
4166 SDValue Ops[2];
4167 if (is32Bit(VT))
4168 // Just do a normal 64-bit multiplication and extract the results.
4169 // We define this so that it can be used for constant division.
4170 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4171 Op.getOperand(1), Ops[1], Ops[0]);
4172 else
4173 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4174 // the high result in the even register. ISD::UMUL_LOHI is defined to
4175 // return the low half first, so the results are in reverse order.
4177 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4178 return DAG.getMergeValues(Ops, DL);
4179}
4180
4181SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4182 SelectionDAG &DAG) const {
4183 SDValue Op0 = Op.getOperand(0);
4184 SDValue Op1 = Op.getOperand(1);
4185 EVT VT = Op.getValueType();
4186 SDLoc DL(Op);
4187
4188 // We use DSGF for 32-bit division. This means the first operand must
4189 // always be 64-bit, and the second operand should be 32-bit whenever
4190 // that is possible, to improve performance.
4191 if (is32Bit(VT))
4192 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4193 else if (DAG.ComputeNumSignBits(Op1) > 32)
4194 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4195
4196 // DSG(F) returns the remainder in the even register and the
4197 // quotient in the odd register.
4198 SDValue Ops[2];
4199 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4200 return DAG.getMergeValues(Ops, DL);
4201}
4202
4203SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4204 SelectionDAG &DAG) const {
4205 EVT VT = Op.getValueType();
4206 SDLoc DL(Op);
4207
4208 // DL(G) returns the remainder in the even register and the
4209 // quotient in the odd register.
4210 SDValue Ops[2];
4212 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4213 return DAG.getMergeValues(Ops, DL);
4214}
4215
4216SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4217 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4218
4219 // Get the known-zero masks for each operand.
4220 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4221 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4222 DAG.computeKnownBits(Ops[1])};
4223
4224 // See if the upper 32 bits of one operand and the lower 32 bits of the
4225 // other are known zero. They are the low and high operands respectively.
4226 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4227 Known[1].Zero.getZExtValue() };
4228 unsigned High, Low;
4229 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4230 High = 1, Low = 0;
4231 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4232 High = 0, Low = 1;
4233 else
4234 return Op;
4235
4236 SDValue LowOp = Ops[Low];
4237 SDValue HighOp = Ops[High];
4238
4239 // If the high part is a constant, we're better off using IILH.
4240 if (HighOp.getOpcode() == ISD::Constant)
4241 return Op;
4242
4243 // If the low part is a constant that is outside the range of LHI,
4244 // then we're better off using IILF.
4245 if (LowOp.getOpcode() == ISD::Constant) {
4246 int64_t Value = int32_t(LowOp->getAsZExtVal());
4247 if (!isInt<16>(Value))
4248 return Op;
4249 }
4250
4251 // Check whether the high part is an AND that doesn't change the
4252 // high 32 bits and just masks out low bits. We can skip it if so.
4253 if (HighOp.getOpcode() == ISD::AND &&
4254 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4255 SDValue HighOp0 = HighOp.getOperand(0);
4257 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4258 HighOp = HighOp0;
4259 }
4260
4261 // Take advantage of the fact that all GR32 operations only change the
4262 // low 32 bits by truncating Low to an i32 and inserting it directly
4263 // using a subreg. The interesting cases are those where the truncation
4264 // can be folded.
4265 SDLoc DL(Op);
4266 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4267 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4268 MVT::i64, HighOp, Low32);
4269}
4270
4271// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4272SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4273 SelectionDAG &DAG) const {
4274 SDNode *N = Op.getNode();
4275 SDValue LHS = N->getOperand(0);
4276 SDValue RHS = N->getOperand(1);
4277 SDLoc DL(N);
4278
4279 if (N->getValueType(0) == MVT::i128) {
4280 unsigned BaseOp = 0;
4281 unsigned FlagOp = 0;
4282 bool IsBorrow = false;
4283 switch (Op.getOpcode()) {
4284 default: llvm_unreachable("Unknown instruction!");
4285 case ISD::UADDO:
4286 BaseOp = ISD::ADD;
4287 FlagOp = SystemZISD::VACC;
4288 break;
4289 case ISD::USUBO:
4290 BaseOp = ISD::SUB;
4291 FlagOp = SystemZISD::VSCBI;
4292 IsBorrow = true;
4293 break;
4294 }
4295 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4296 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4297 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4298 DAG.getValueType(MVT::i1));
4299 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4300 if (IsBorrow)
4301 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4302 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4303 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4304 }
4305
4306 unsigned BaseOp = 0;
4307 unsigned CCValid = 0;
4308 unsigned CCMask = 0;
4309
4310 switch (Op.getOpcode()) {
4311 default: llvm_unreachable("Unknown instruction!");
4312 case ISD::SADDO:
4313 BaseOp = SystemZISD::SADDO;
4314 CCValid = SystemZ::CCMASK_ARITH;
4316 break;
4317 case ISD::SSUBO:
4318 BaseOp = SystemZISD::SSUBO;
4319 CCValid = SystemZ::CCMASK_ARITH;
4321 break;
4322 case ISD::UADDO:
4323 BaseOp = SystemZISD::UADDO;
4324 CCValid = SystemZ::CCMASK_LOGICAL;
4326 break;
4327 case ISD::USUBO:
4328 BaseOp = SystemZISD::USUBO;
4329 CCValid = SystemZ::CCMASK_LOGICAL;
4331 break;
4332 }
4333
4334 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4335 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4336
4337 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4338 if (N->getValueType(1) == MVT::i1)
4339 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4340
4341 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4342}
4343
4344static bool isAddCarryChain(SDValue Carry) {
4345 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4346 Carry = Carry.getOperand(2);
4347 return Carry.getOpcode() == ISD::UADDO;
4348}
4349
4350static bool isSubBorrowChain(SDValue Carry) {
4351 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4352 Carry = Carry.getOperand(2);
4353 return Carry.getOpcode() == ISD::USUBO;
4354}
4355
4356// Lower UADDO_CARRY/USUBO_CARRY nodes.
4357SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4358 SelectionDAG &DAG) const {
4359
4360 SDNode *N = Op.getNode();
4361 MVT VT = N->getSimpleValueType(0);
4362
4363 // Let legalize expand this if it isn't a legal type yet.
4364 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4365 return SDValue();
4366
4367 SDValue LHS = N->getOperand(0);
4368 SDValue RHS = N->getOperand(1);
4369 SDValue Carry = Op.getOperand(2);
4370 SDLoc DL(N);
4371
4372 if (VT == MVT::i128) {
4373 unsigned BaseOp = 0;
4374 unsigned FlagOp = 0;
4375 bool IsBorrow = false;
4376 switch (Op.getOpcode()) {
4377 default: llvm_unreachable("Unknown instruction!");
4378 case ISD::UADDO_CARRY:
4379 BaseOp = SystemZISD::VAC;
4380 FlagOp = SystemZISD::VACCC;
4381 break;
4382 case ISD::USUBO_CARRY:
4383 BaseOp = SystemZISD::VSBI;
4384 FlagOp = SystemZISD::VSBCBI;
4385 IsBorrow = true;
4386 break;
4387 }
4388 if (IsBorrow)
4389 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4390 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4391 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4392 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4393 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4394 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4395 DAG.getValueType(MVT::i1));
4396 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4397 if (IsBorrow)
4398 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4399 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4400 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4401 }
4402
4403 unsigned BaseOp = 0;
4404 unsigned CCValid = 0;
4405 unsigned CCMask = 0;
4406
4407 switch (Op.getOpcode()) {
4408 default: llvm_unreachable("Unknown instruction!");
4409 case ISD::UADDO_CARRY:
4410 if (!isAddCarryChain(Carry))
4411 return SDValue();
4412
4413 BaseOp = SystemZISD::ADDCARRY;
4414 CCValid = SystemZ::CCMASK_LOGICAL;
4416 break;
4417 case ISD::USUBO_CARRY:
4418 if (!isSubBorrowChain(Carry))
4419 return SDValue();
4420
4421 BaseOp = SystemZISD::SUBCARRY;
4422 CCValid = SystemZ::CCMASK_LOGICAL;
4424 break;
4425 }
4426
4427 // Set the condition code from the carry flag.
4428 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4429 DAG.getConstant(CCValid, DL, MVT::i32),
4430 DAG.getConstant(CCMask, DL, MVT::i32));
4431
4432 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4433 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4434
4435 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4436 if (N->getValueType(1) == MVT::i1)
4437 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4438
4439 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4440}
4441
4442SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4443 SelectionDAG &DAG) const {
4444 EVT VT = Op.getValueType();
4445 SDLoc DL(Op);
4446 Op = Op.getOperand(0);
4447
4448 if (VT.getScalarSizeInBits() == 128) {
4449 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4450 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4451 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4452 DAG.getConstant(0, DL, MVT::i64));
4453 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4454 return Op;
4455 }
4456
4457 // Handle vector types via VPOPCT.
4458 if (VT.isVector()) {
4459 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4460 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4461 switch (VT.getScalarSizeInBits()) {
4462 case 8:
4463 break;
4464 case 16: {
4465 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4466 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4467 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4468 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4469 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4470 break;
4471 }
4472 case 32: {
4473 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4474 DAG.getConstant(0, DL, MVT::i32));
4475 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4476 break;
4477 }
4478 case 64: {
4479 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4480 DAG.getConstant(0, DL, MVT::i32));
4481 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4482 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4483 break;
4484 }
4485 default:
4486 llvm_unreachable("Unexpected type");
4487 }
4488 return Op;
4489 }
4490
4491 // Get the known-zero mask for the operand.
4492 KnownBits Known = DAG.computeKnownBits(Op);
4493 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4494 if (NumSignificantBits == 0)
4495 return DAG.getConstant(0, DL, VT);
4496
4497 // Skip known-zero high parts of the operand.
4498 int64_t OrigBitSize = VT.getSizeInBits();
4499 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4500 BitSize = std::min(BitSize, OrigBitSize);
4501
4502 // The POPCNT instruction counts the number of bits in each byte.
4503 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4504 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4505 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4506
4507 // Add up per-byte counts in a binary tree. All bits of Op at
4508 // position larger than BitSize remain zero throughout.
4509 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4510 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4511 if (BitSize != OrigBitSize)
4512 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4513 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4514 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4515 }
4516
4517 // Extract overall result from high byte.
4518 if (BitSize > 8)
4519 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4520 DAG.getConstant(BitSize - 8, DL, VT));
4521
4522 return Op;
4523}
4524
4525SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4526 SelectionDAG &DAG) const {
4527 SDLoc DL(Op);
4528 AtomicOrdering FenceOrdering =
4529 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4530 SyncScope::ID FenceSSID =
4531 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4532
4533 // The only fence that needs an instruction is a sequentially-consistent
4534 // cross-thread fence.
4535 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4536 FenceSSID == SyncScope::System) {
4537 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4538 Op.getOperand(0)),
4539 0);
4540 }
4541
4542 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4543 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4544}
4545
4546SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4547 SelectionDAG &DAG) const {
4548 auto *Node = cast<AtomicSDNode>(Op.getNode());
4549 assert(
4550 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
4551 "Only custom lowering i128 or f128.");
4552 // Use same code to handle both legal and non-legal i128 types.
4555 return DAG.getMergeValues(Results, SDLoc(Op));
4556}
4557
4558// Prepare for a Compare And Swap for a subword operation. This needs to be
4559// done in memory with 4 bytes at natural alignment.
4561 SDValue &AlignedAddr, SDValue &BitShift,
4562 SDValue &NegBitShift) {
4563 EVT PtrVT = Addr.getValueType();
4564 EVT WideVT = MVT::i32;
4565
4566 // Get the address of the containing word.
4567 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4568 DAG.getConstant(-4, DL, PtrVT));
4569
4570 // Get the number of bits that the word must be rotated left in order
4571 // to bring the field to the top bits of a GR32.
4572 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4573 DAG.getConstant(3, DL, PtrVT));
4574 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4575
4576 // Get the complementing shift amount, for rotating a field in the top
4577 // bits back to its proper position.
4578 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4579 DAG.getConstant(0, DL, WideVT), BitShift);
4580
4581}
4582
4583// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4584// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4585SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4586 SelectionDAG &DAG,
4587 unsigned Opcode) const {
4588 auto *Node = cast<AtomicSDNode>(Op.getNode());
4589
4590 // 32-bit operations need no special handling.
4591 EVT NarrowVT = Node->getMemoryVT();
4592 EVT WideVT = MVT::i32;
4593 if (NarrowVT == WideVT)
4594 return Op;
4595
4596 int64_t BitSize = NarrowVT.getSizeInBits();
4597 SDValue ChainIn = Node->getChain();
4598 SDValue Addr = Node->getBasePtr();
4599 SDValue Src2 = Node->getVal();
4600 MachineMemOperand *MMO = Node->getMemOperand();
4601 SDLoc DL(Node);
4602
4603 // Convert atomic subtracts of constants into additions.
4604 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4605 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4607 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4608 }
4609
4610 SDValue AlignedAddr, BitShift, NegBitShift;
4611 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4612
4613 // Extend the source operand to 32 bits and prepare it for the inner loop.
4614 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4615 // operations require the source to be shifted in advance. (This shift
4616 // can be folded if the source is constant.) For AND and NAND, the lower
4617 // bits must be set, while for other opcodes they should be left clear.
4618 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4619 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4620 DAG.getConstant(32 - BitSize, DL, WideVT));
4621 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4623 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4624 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4625
4626 // Construct the ATOMIC_LOADW_* node.
4627 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4628 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4629 DAG.getConstant(BitSize, DL, WideVT) };
4630 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4631 NarrowVT, MMO);
4632
4633 // Rotate the result of the final CS so that the field is in the lower
4634 // bits of a GR32, then truncate it.
4635 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4636 DAG.getConstant(BitSize, DL, WideVT));
4637 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4638
4639 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4640 return DAG.getMergeValues(RetOps, DL);
4641}
4642
4643// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4644// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4645SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4646 SelectionDAG &DAG) const {
4647 auto *Node = cast<AtomicSDNode>(Op.getNode());
4648 EVT MemVT = Node->getMemoryVT();
4649 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4650 // A full-width operation: negate and use LAA(G).
4651 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4652 assert(Subtarget.hasInterlockedAccess1() &&
4653 "Should have been expanded by AtomicExpand pass.");
4654 SDValue Src2 = Node->getVal();
4655 SDLoc DL(Src2);
4656 SDValue NegSrc2 =
4657 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4658 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4659 Node->getChain(), Node->getBasePtr(), NegSrc2,
4660 Node->getMemOperand());
4661 }
4662
4663 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4664}
4665
4666// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4667SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4668 SelectionDAG &DAG) const {
4669 auto *Node = cast<AtomicSDNode>(Op.getNode());
4670 SDValue ChainIn = Node->getOperand(0);
4671 SDValue Addr = Node->getOperand(1);
4672 SDValue CmpVal = Node->getOperand(2);
4673 SDValue SwapVal = Node->getOperand(3);
4674 MachineMemOperand *MMO = Node->getMemOperand();
4675 SDLoc DL(Node);
4676
4677 if (Node->getMemoryVT() == MVT::i128) {
4678 // Use same code to handle both legal and non-legal i128 types.
4681 return DAG.getMergeValues(Results, DL);
4682 }
4683
4684 // We have native support for 32-bit and 64-bit compare and swap, but we
4685 // still need to expand extracting the "success" result from the CC.
4686 EVT NarrowVT = Node->getMemoryVT();
4687 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4688 if (NarrowVT == WideVT) {
4689 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4690 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4692 DL, Tys, Ops, NarrowVT, MMO);
4693 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4695
4696 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4697 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4698 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4699 return SDValue();
4700 }
4701
4702 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4703 // via a fullword ATOMIC_CMP_SWAPW operation.
4704 int64_t BitSize = NarrowVT.getSizeInBits();
4705
4706 SDValue AlignedAddr, BitShift, NegBitShift;
4707 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4708
4709 // Construct the ATOMIC_CMP_SWAPW node.
4710 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4711 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4712 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4714 VTList, Ops, NarrowVT, MMO);
4715 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4717
4718 // emitAtomicCmpSwapW() will zero extend the result (original value).
4719 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4720 DAG.getValueType(NarrowVT));
4721 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4722 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4723 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4724 return SDValue();
4725}
4726
4728SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4729 // Because of how we convert atomic_load and atomic_store to normal loads and
4730 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4731 // since DAGCombine hasn't been updated to account for atomic, but non
4732 // volatile loads. (See D57601)
4733 if (auto *SI = dyn_cast<StoreInst>(&I))
4734 if (SI->isAtomic())
4736 if (auto *LI = dyn_cast<LoadInst>(&I))
4737 if (LI->isAtomic())
4739 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4740 if (AI->isAtomic())
4742 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4743 if (AI->isAtomic())
4746}
4747
4748SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4749 SelectionDAG &DAG) const {
4751 auto *Regs = Subtarget.getSpecialRegisters();
4753 report_fatal_error("Variable-sized stack allocations are not supported "
4754 "in GHC calling convention");
4755 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4756 Regs->getStackPointerRegister(), Op.getValueType());
4757}
4758
4759SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4760 SelectionDAG &DAG) const {
4762 auto *Regs = Subtarget.getSpecialRegisters();
4763 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4764
4766 report_fatal_error("Variable-sized stack allocations are not supported "
4767 "in GHC calling convention");
4768
4769 SDValue Chain = Op.getOperand(0);
4770 SDValue NewSP = Op.getOperand(1);
4771 SDValue Backchain;
4772 SDLoc DL(Op);
4773
4774 if (StoreBackchain) {
4775 SDValue OldSP = DAG.getCopyFromReg(
4776 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4777 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4779 }
4780
4781 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4782
4783 if (StoreBackchain)
4784 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4786
4787 return Chain;
4788}
4789
4790SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4791 SelectionDAG &DAG) const {
4792 bool IsData = Op.getConstantOperandVal(4);
4793 if (!IsData)
4794 // Just preserve the chain.
4795 return Op.getOperand(0);
4796
4797 SDLoc DL(Op);
4798 bool IsWrite = Op.getConstantOperandVal(2);
4799 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4800 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4801 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4802 Op.getOperand(1)};
4804 Node->getVTList(), Ops,
4805 Node->getMemoryVT(), Node->getMemOperand());
4806}
4807
4808// Convert condition code in CCReg to an i32 value.
4810 SDLoc DL(CCReg);
4811 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4812 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4813 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4814}
4815
4816SDValue
4817SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4818 SelectionDAG &DAG) const {
4819 unsigned Opcode, CCValid;
4820 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4821 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4822 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4823 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4824 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4825 return SDValue();
4826 }
4827
4828 return SDValue();
4829}
4830
4831SDValue
4832SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4833 SelectionDAG &DAG) const {
4834 unsigned Opcode, CCValid;
4835 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4836 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4837 if (Op->getNumValues() == 1)
4838 return getCCResult(DAG, SDValue(Node, 0));
4839 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4840 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4841 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4842 }
4843
4844 unsigned Id = Op.getConstantOperandVal(0);
4845 switch (Id) {
4846 case Intrinsic::thread_pointer:
4847 return lowerThreadPointer(SDLoc(Op), DAG);
4848
4849 case Intrinsic::s390_vpdi:
4850 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4851 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4852
4853 case Intrinsic::s390_vperm:
4854 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4855 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4856
4857 case Intrinsic::s390_vuphb:
4858 case Intrinsic::s390_vuphh:
4859 case Intrinsic::s390_vuphf:
4860 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4861 Op.getOperand(1));
4862
4863 case Intrinsic::s390_vuplhb:
4864 case Intrinsic::s390_vuplhh:
4865 case Intrinsic::s390_vuplhf:
4866 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4867 Op.getOperand(1));
4868
4869 case Intrinsic::s390_vuplb:
4870 case Intrinsic::s390_vuplhw:
4871 case Intrinsic::s390_vuplf:
4872 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4873 Op.getOperand(1));
4874
4875 case Intrinsic::s390_vupllb:
4876 case Intrinsic::s390_vupllh:
4877 case Intrinsic::s390_vupllf:
4878 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4879 Op.getOperand(1));
4880
4881 case Intrinsic::s390_vsumb:
4882 case Intrinsic::s390_vsumh:
4883 case Intrinsic::s390_vsumgh:
4884 case Intrinsic::s390_vsumgf:
4885 case Intrinsic::s390_vsumqf:
4886 case Intrinsic::s390_vsumqg:
4887 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4888 Op.getOperand(1), Op.getOperand(2));
4889
4890 case Intrinsic::s390_vaq:
4891 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4892 Op.getOperand(1), Op.getOperand(2));
4893 case Intrinsic::s390_vaccb:
4894 case Intrinsic::s390_vacch:
4895 case Intrinsic::s390_vaccf:
4896 case Intrinsic::s390_vaccg:
4897 case Intrinsic::s390_vaccq:
4898 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4899 Op.getOperand(1), Op.getOperand(2));
4900 case Intrinsic::s390_vacq:
4901 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4902 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4903 case Intrinsic::s390_vacccq:
4904 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4905 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4906
4907 case Intrinsic::s390_vsq:
4908 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4909 Op.getOperand(1), Op.getOperand(2));
4910 case Intrinsic::s390_vscbib:
4911 case Intrinsic::s390_vscbih:
4912 case Intrinsic::s390_vscbif:
4913 case Intrinsic::s390_vscbig:
4914 case Intrinsic::s390_vscbiq:
4915 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4916 Op.getOperand(1), Op.getOperand(2));
4917 case Intrinsic::s390_vsbiq:
4918 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4919 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4920 case Intrinsic::s390_vsbcbiq:
4921 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4922 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4923 }
4924
4925 return SDValue();
4926}
4927
4928namespace {
4929// Says that SystemZISD operation Opcode can be used to perform the equivalent
4930// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4931// Operand is the constant third operand, otherwise it is the number of
4932// bytes in each element of the result.
4933struct Permute {
4934 unsigned Opcode;
4935 unsigned Operand;
4936 unsigned char Bytes[SystemZ::VectorBytes];
4937};
4938}
4939
4940static const Permute PermuteForms[] = {
4941 // VMRHG
4943 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4944 // VMRHF
4946 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4947 // VMRHH
4949 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4950 // VMRHB
4952 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4953 // VMRLG
4955 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4956 // VMRLF
4958 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4959 // VMRLH
4961 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4962 // VMRLB
4964 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4965 // VPKG
4966 { SystemZISD::PACK, 4,
4967 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4968 // VPKF
4969 { SystemZISD::PACK, 2,
4970 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4971 // VPKH
4972 { SystemZISD::PACK, 1,
4973 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4974 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4976 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4977 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4979 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4980};
4981
4982// Called after matching a vector shuffle against a particular pattern.
4983// Both the original shuffle and the pattern have two vector operands.
4984// OpNos[0] is the operand of the original shuffle that should be used for
4985// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4986// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4987// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4988// for operands 0 and 1 of the pattern.
4989static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4990 if (OpNos[0] < 0) {
4991 if (OpNos[1] < 0)
4992 return false;
4993 OpNo0 = OpNo1 = OpNos[1];
4994 } else if (OpNos[1] < 0) {
4995 OpNo0 = OpNo1 = OpNos[0];
4996 } else {
4997 OpNo0 = OpNos[0];
4998 OpNo1 = OpNos[1];
4999 }
5000 return true;
5001}
5002
5003// Bytes is a VPERM-like permute vector, except that -1 is used for
5004// undefined bytes. Return true if the VPERM can be implemented using P.
5005// When returning true set OpNo0 to the VPERM operand that should be
5006// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5007//
5008// For example, if swapping the VPERM operands allows P to match, OpNo0
5009// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5010// operand, but rewriting it to use two duplicated operands allows it to
5011// match P, then OpNo0 and OpNo1 will be the same.
5012static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5013 unsigned &OpNo0, unsigned &OpNo1) {
5014 int OpNos[] = { -1, -1 };
5015 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5016 int Elt = Bytes[I];
5017 if (Elt >= 0) {
5018 // Make sure that the two permute vectors use the same suboperand
5019 // byte number. Only the operand numbers (the high bits) are
5020 // allowed to differ.
5021 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5022 return false;
5023 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5024 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5025 // Make sure that the operand mappings are consistent with previous
5026 // elements.
5027 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5028 return false;
5029 OpNos[ModelOpNo] = RealOpNo;
5030 }
5031 }
5032 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5033}
5034
5035// As above, but search for a matching permute.
5036static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5037 unsigned &OpNo0, unsigned &OpNo1) {
5038 for (auto &P : PermuteForms)
5039 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5040 return &P;
5041 return nullptr;
5042}
5043
5044// Bytes is a VPERM-like permute vector, except that -1 is used for
5045// undefined bytes. This permute is an operand of an outer permute.
5046// See whether redistributing the -1 bytes gives a shuffle that can be
5047// implemented using P. If so, set Transform to a VPERM-like permute vector
5048// that, when applied to the result of P, gives the original permute in Bytes.
5050 const Permute &P,
5051 SmallVectorImpl<int> &Transform) {
5052 unsigned To = 0;
5053 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5054 int Elt = Bytes[From];
5055 if (Elt < 0)
5056 // Byte number From of the result is undefined.
5057 Transform[From] = -1;
5058 else {
5059 while (P.Bytes[To] != Elt) {
5060 To += 1;
5061 if (To == SystemZ::VectorBytes)
5062 return false;
5063 }
5064 Transform[From] = To;
5065 }
5066 }
5067 return true;
5068}
5069
5070// As above, but search for a matching permute.
5071static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5072 SmallVectorImpl<int> &Transform) {
5073 for (auto &P : PermuteForms)
5074 if (matchDoublePermute(Bytes, P, Transform))
5075 return &P;
5076 return nullptr;
5077}
5078
5079// Convert the mask of the given shuffle op into a byte-level mask,
5080// as if it had type vNi8.
5081static bool getVPermMask(SDValue ShuffleOp,
5082 SmallVectorImpl<int> &Bytes) {
5083 EVT VT = ShuffleOp.getValueType();
5084 unsigned NumElements = VT.getVectorNumElements();
5085 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5086
5087 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5088 Bytes.resize(NumElements * BytesPerElement, -1);
5089 for (unsigned I = 0; I < NumElements; ++I) {
5090 int Index = VSN->getMaskElt(I);
5091 if (Index >= 0)
5092 for (unsigned J = 0; J < BytesPerElement; ++J)
5093 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5094 }
5095 return true;
5096 }
5097 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5098 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5099 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5100 Bytes.resize(NumElements * BytesPerElement, -1);
5101 for (unsigned I = 0; I < NumElements; ++I)
5102 for (unsigned J = 0; J < BytesPerElement; ++J)
5103 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5104 return true;
5105 }
5106 return false;
5107}
5108
5109// Bytes is a VPERM-like permute vector, except that -1 is used for
5110// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5111// the result come from a contiguous sequence of bytes from one input.
5112// Set Base to the selector for the first byte if so.
5113static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5114 unsigned BytesPerElement, int &Base) {
5115 Base = -1;
5116 for (unsigned I = 0; I < BytesPerElement; ++I) {
5117 if (Bytes[Start + I] >= 0) {
5118 unsigned Elem = Bytes[Start + I];
5119 if (Base < 0) {
5120 Base = Elem - I;
5121 // Make sure the bytes would come from one input operand.
5122 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5123 return false;
5124 } else if (unsigned(Base) != Elem - I)
5125 return false;
5126 }
5127 }
5128 return true;
5129}
5130
5131// Bytes is a VPERM-like permute vector, except that -1 is used for
5132// undefined bytes. Return true if it can be performed using VSLDB.
5133// When returning true, set StartIndex to the shift amount and OpNo0
5134// and OpNo1 to the VPERM operands that should be used as the first
5135// and second shift operand respectively.
5137 unsigned &StartIndex, unsigned &OpNo0,
5138 unsigned &OpNo1) {
5139 int OpNos[] = { -1, -1 };
5140 int Shift = -1;
5141 for (unsigned I = 0; I < 16; ++I) {
5142 int Index = Bytes[I];
5143 if (Index >= 0) {
5144 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5145 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5146 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5147 if (Shift < 0)
5148 Shift = ExpectedShift;
5149 else if (Shift != ExpectedShift)
5150 return false;
5151 // Make sure that the operand mappings are consistent with previous
5152 // elements.
5153 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5154 return false;
5155 OpNos[ModelOpNo] = RealOpNo;
5156 }
5157 }
5158 StartIndex = Shift;
5159 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5160}
5161
5162// Create a node that performs P on operands Op0 and Op1, casting the
5163// operands to the appropriate type. The type of the result is determined by P.
5165 const Permute &P, SDValue Op0, SDValue Op1) {
5166 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5167 // elements of a PACK are twice as wide as the outputs.
5168 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5169 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5170 P.Operand);
5171 // Cast both operands to the appropriate type.
5172 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5173 SystemZ::VectorBytes / InBytes);
5174 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5175 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5176 SDValue Op;
5177 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5178 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5179 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5180 } else if (P.Opcode == SystemZISD::PACK) {
5181 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5182 SystemZ::VectorBytes / P.Operand);
5183 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5184 } else {
5185 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5186 }
5187 return Op;
5188}
5189
5190static bool isZeroVector(SDValue N) {
5191 if (N->getOpcode() == ISD::BITCAST)
5192 N = N->getOperand(0);
5193 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5194 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5195 return Op->getZExtValue() == 0;
5196 return ISD::isBuildVectorAllZeros(N.getNode());
5197}
5198
5199// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5200static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5201 for (unsigned I = 0; I < Num ; I++)
5202 if (isZeroVector(Ops[I]))
5203 return I;
5204 return UINT32_MAX;
5205}
5206
5207// Bytes is a VPERM-like permute vector, except that -1 is used for
5208// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5209// VSLDB or VPERM.
5211 SDValue *Ops,
5212 const SmallVectorImpl<int> &Bytes) {
5213 for (unsigned I = 0; I < 2; ++I)
5214 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5215
5216 // First see whether VSLDB can be used.
5217 unsigned StartIndex, OpNo0, OpNo1;
5218 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5219 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5220 Ops[OpNo1],
5221 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5222
5223 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5224 // eliminate a zero vector by reusing any zero index in the permute vector.
5225 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5226 if (ZeroVecIdx != UINT32_MAX) {
5227 bool MaskFirst = true;
5228 int ZeroIdx = -1;
5229 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5230 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5231 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5232 if (OpNo == ZeroVecIdx && I == 0) {
5233 // If the first byte is zero, use mask as first operand.
5234 ZeroIdx = 0;
5235 break;
5236 }
5237 if (OpNo != ZeroVecIdx && Byte == 0) {
5238 // If mask contains a zero, use it by placing that vector first.
5239 ZeroIdx = I + SystemZ::VectorBytes;
5240 MaskFirst = false;
5241 break;
5242 }
5243 }
5244 if (ZeroIdx != -1) {
5245 SDValue IndexNodes[SystemZ::VectorBytes];
5246 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5247 if (Bytes[I] >= 0) {
5248 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5249 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5250 if (OpNo == ZeroVecIdx)
5251 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5252 else {
5253 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5254 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5255 }
5256 } else
5257 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5258 }
5259 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5260 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5261 if (MaskFirst)
5262 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5263 Mask);
5264 else
5265 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5266 Mask);
5267 }
5268 }
5269
5270 SDValue IndexNodes[SystemZ::VectorBytes];
5271 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5272 if (Bytes[I] >= 0)
5273 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5274 else
5275 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5276 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5277 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5278 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5279}
5280
5281namespace {
5282// Describes a general N-operand vector shuffle.
5283struct GeneralShuffle {
5284 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5285 void addUndef();
5286 bool add(SDValue, unsigned);
5287 SDValue getNode(SelectionDAG &, const SDLoc &);
5288 void tryPrepareForUnpack();
5289 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5290 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5291
5292 // The operands of the shuffle.
5294
5295 // Index I is -1 if byte I of the result is undefined. Otherwise the
5296 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5297 // Bytes[I] / SystemZ::VectorBytes.
5299
5300 // The type of the shuffle result.
5301 EVT VT;
5302
5303 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5304 unsigned UnpackFromEltSize;
5305};
5306}
5307
5308// Add an extra undefined element to the shuffle.
5309void GeneralShuffle::addUndef() {
5310 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5311 for (unsigned I = 0; I < BytesPerElement; ++I)
5312 Bytes.push_back(-1);
5313}
5314
5315// Add an extra element to the shuffle, taking it from element Elem of Op.
5316// A null Op indicates a vector input whose value will be calculated later;
5317// there is at most one such input per shuffle and it always has the same
5318// type as the result. Aborts and returns false if the source vector elements
5319// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5320// LLVM they become implicitly extended, but this is rare and not optimized.
5321bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5322 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5323
5324 // The source vector can have wider elements than the result,
5325 // either through an explicit TRUNCATE or because of type legalization.
5326 // We want the least significant part.
5327 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5328 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5329
5330 // Return false if the source elements are smaller than their destination
5331 // elements.
5332 if (FromBytesPerElement < BytesPerElement)
5333 return false;
5334
5335 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5336 (FromBytesPerElement - BytesPerElement));
5337
5338 // Look through things like shuffles and bitcasts.
5339 while (Op.getNode()) {
5340 if (Op.getOpcode() == ISD::BITCAST)
5341 Op = Op.getOperand(0);
5342 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5343 // See whether the bytes we need come from a contiguous part of one
5344 // operand.
5346 if (!getVPermMask(Op, OpBytes))
5347 break;
5348 int NewByte;
5349 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5350 break;
5351 if (NewByte < 0) {
5352 addUndef();
5353 return true;
5354 }
5355 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5356 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5357 } else if (Op.isUndef()) {
5358 addUndef();
5359 return true;
5360 } else
5361 break;
5362 }
5363
5364 // Make sure that the source of the extraction is in Ops.
5365 unsigned OpNo = 0;
5366 for (; OpNo < Ops.size(); ++OpNo)
5367 if (Ops[OpNo] == Op)
5368 break;
5369 if (OpNo == Ops.size())
5370 Ops.push_back(Op);
5371
5372 // Add the element to Bytes.
5373 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5374 for (unsigned I = 0; I < BytesPerElement; ++I)
5375 Bytes.push_back(Base + I);
5376
5377 return true;
5378}
5379
5380// Return SDNodes for the completed shuffle.
5381SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5382 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5383
5384 if (Ops.size() == 0)
5385 return DAG.getUNDEF(VT);
5386
5387 // Use a single unpack if possible as the last operation.
5388 tryPrepareForUnpack();
5389
5390 // Make sure that there are at least two shuffle operands.
5391 if (Ops.size() == 1)
5392 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5393
5394 // Create a tree of shuffles, deferring root node until after the loop.
5395 // Try to redistribute the undefined elements of non-root nodes so that
5396 // the non-root shuffles match something like a pack or merge, then adjust
5397 // the parent node's permute vector to compensate for the new order.
5398 // Among other things, this copes with vectors like <2 x i16> that were
5399 // padded with undefined elements during type legalization.
5400 //
5401 // In the best case this redistribution will lead to the whole tree
5402 // using packs and merges. It should rarely be a loss in other cases.
5403 unsigned Stride = 1;
5404 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5405 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5406 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5407
5408 // Create a mask for just these two operands.
5410 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5411 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5412 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5413 if (OpNo == I)
5414 NewBytes[J] = Byte;
5415 else if (OpNo == I + Stride)
5416 NewBytes[J] = SystemZ::VectorBytes + Byte;
5417 else
5418 NewBytes[J] = -1;
5419 }
5420 // See if it would be better to reorganize NewMask to avoid using VPERM.
5422 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5423 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5424 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5425 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5426 if (NewBytes[J] >= 0) {
5427 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5428 "Invalid double permute");
5429 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5430 } else
5431 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5432 }
5433 } else {
5434 // Just use NewBytes on the operands.
5435 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5436 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5437 if (NewBytes[J] >= 0)
5438 Bytes[J] = I * SystemZ::VectorBytes + J;
5439 }
5440 }
5441 }
5442
5443 // Now we just have 2 inputs. Put the second operand in Ops[1].
5444 if (Stride > 1) {
5445 Ops[1] = Ops[Stride];
5446 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5447 if (Bytes[I] >= int(SystemZ::VectorBytes))
5448 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5449 }
5450
5451 // Look for an instruction that can do the permute without resorting
5452 // to VPERM.
5453 unsigned OpNo0, OpNo1;
5454 SDValue Op;
5455 if (unpackWasPrepared() && Ops[1].isUndef())
5456 Op = Ops[0];
5457 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5458 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5459 else
5460 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5461
5462 Op = insertUnpackIfPrepared(DAG, DL, Op);
5463
5464 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5465}
5466
5467#ifndef NDEBUG
5468static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5469 dbgs() << Msg.c_str() << " { ";
5470 for (unsigned i = 0; i < Bytes.size(); i++)
5471 dbgs() << Bytes[i] << " ";
5472 dbgs() << "}\n";
5473}
5474#endif
5475
5476// If the Bytes vector matches an unpack operation, prepare to do the unpack
5477// after all else by removing the zero vector and the effect of the unpack on
5478// Bytes.
5479void GeneralShuffle::tryPrepareForUnpack() {
5480 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5481 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5482 return;
5483
5484 // Only do this if removing the zero vector reduces the depth, otherwise
5485 // the critical path will increase with the final unpack.
5486 if (Ops.size() > 2 &&
5487 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5488 return;
5489
5490 // Find an unpack that would allow removing the zero vector from Ops.
5491 UnpackFromEltSize = 1;
5492 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5493 bool MatchUnpack = true;
5495 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5496 unsigned ToEltSize = UnpackFromEltSize * 2;
5497 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5498 if (!IsZextByte)
5499 SrcBytes.push_back(Bytes[Elt]);
5500 if (Bytes[Elt] != -1) {
5501 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5502 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5503 MatchUnpack = false;
5504 break;
5505 }
5506 }
5507 }
5508 if (MatchUnpack) {
5509 if (Ops.size() == 2) {
5510 // Don't use unpack if a single source operand needs rearrangement.
5511 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5512 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5513 UnpackFromEltSize = UINT_MAX;
5514 return;
5515 }
5516 }
5517 break;
5518 }
5519 }
5520 if (UnpackFromEltSize > 4)
5521 return;
5522
5523 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5524 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5525 << ".\n";
5526 dumpBytes(Bytes, "Original Bytes vector:"););
5527
5528 // Apply the unpack in reverse to the Bytes array.
5529 unsigned B = 0;
5530 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5531 Elt += UnpackFromEltSize;
5532 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5533 Bytes[B] = Bytes[Elt];
5534 }
5535 while (B < SystemZ::VectorBytes)
5536 Bytes[B++] = -1;
5537
5538 // Remove the zero vector from Ops
5539 Ops.erase(&Ops[ZeroVecOpNo]);
5540 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5541 if (Bytes[I] >= 0) {
5542 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5543 if (OpNo > ZeroVecOpNo)
5544 Bytes[I] -= SystemZ::VectorBytes;
5545 }
5546
5547 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5548 dbgs() << "\n";);
5549}
5550
5551SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5552 const SDLoc &DL,
5553 SDValue Op) {
5554 if (!unpackWasPrepared())
5555 return Op;
5556 unsigned InBits = UnpackFromEltSize * 8;
5557 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5558 SystemZ::VectorBits / InBits);
5559 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5560 unsigned OutBits = InBits * 2;
5561 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5562 SystemZ::VectorBits / OutBits);
5563 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5564}
5565
5566// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5568 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5569 if (!Op.getOperand(I).isUndef())
5570 return false;
5571 return true;
5572}
5573
5574// Return a vector of type VT that contains Value in the first element.
5575// The other elements don't matter.
5577 SDValue Value) {
5578 // If we have a constant, replicate it to all elements and let the
5579 // BUILD_VECTOR lowering take care of it.
5580 if (Value.getOpcode() == ISD::Constant ||
5581 Value.getOpcode() == ISD::ConstantFP) {
5583 return DAG.getBuildVector(VT, DL, Ops);
5584 }
5585 if (Value.isUndef())
5586 return DAG.getUNDEF(VT);
5587 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5588}
5589
5590// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5591// element 1. Used for cases in which replication is cheap.
5593 SDValue Op0, SDValue Op1) {
5594 if (Op0.isUndef()) {
5595 if (Op1.isUndef())
5596 return DAG.getUNDEF(VT);
5597 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5598 }
5599 if (Op1.isUndef())
5600 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5601 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5602 buildScalarToVector(DAG, DL, VT, Op0),
5603 buildScalarToVector(DAG, DL, VT, Op1));
5604}
5605
5606// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5607// vector for them.
5609 SDValue Op1) {
5610 if (Op0.isUndef() && Op1.isUndef())
5611 return DAG.getUNDEF(MVT::v2i64);
5612 // If one of the two inputs is undefined then replicate the other one,
5613 // in order to avoid using another register unnecessarily.
5614 if (Op0.isUndef())
5615 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5616 else if (Op1.isUndef())
5617 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5618 else {
5619 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5620 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5621 }
5622 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5623}
5624
5625// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5626// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5627// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5628// would benefit from this representation and return it if so.
5630 BuildVectorSDNode *BVN) {
5631 EVT VT = BVN->getValueType(0);
5632 unsigned NumElements = VT.getVectorNumElements();
5633
5634 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5635 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5636 // need a BUILD_VECTOR, add an additional placeholder operand for that
5637 // BUILD_VECTOR and store its operands in ResidueOps.
5638 GeneralShuffle GS(VT);
5640 bool FoundOne = false;
5641 for (unsigned I = 0; I < NumElements; ++I) {
5642 SDValue Op = BVN->getOperand(I);
5643 if (Op.getOpcode() == ISD::TRUNCATE)
5644 Op = Op.getOperand(0);
5645 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5646 Op.getOperand(1).getOpcode() == ISD::Constant) {
5647 unsigned Elem = Op.getConstantOperandVal(1);
5648 if (!GS.add(Op.getOperand(0), Elem))
5649 return SDValue();
5650 FoundOne = true;
5651 } else if (Op.isUndef()) {
5652 GS.addUndef();
5653 } else {
5654 if (!GS.add(SDValue(), ResidueOps.size()))
5655 return SDValue();
5656 ResidueOps.push_back(BVN->getOperand(I));
5657 }
5658 }
5659
5660 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5661 if (!FoundOne)
5662 return SDValue();
5663
5664 // Create the BUILD_VECTOR for the remaining elements, if any.
5665 if (!ResidueOps.empty()) {
5666 while (ResidueOps.size() < NumElements)
5667 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5668 for (auto &Op : GS.Ops) {
5669 if (!Op.getNode()) {
5670 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5671 break;
5672 }
5673 }
5674 }
5675 return GS.getNode(DAG, SDLoc(BVN));
5676}
5677
5678bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5679 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5680 return true;
5681 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5682 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5683 return true;
5684 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5685 return true;
5686 return false;
5687}
5688
5689// Combine GPR scalar values Elems into a vector of type VT.
5690SDValue
5691SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5692 SmallVectorImpl<SDValue> &Elems) const {
5693 // See whether there is a single replicated value.
5695 unsigned int NumElements = Elems.size();
5696 unsigned int Count = 0;
5697 for (auto Elem : Elems) {
5698 if (!Elem.isUndef()) {
5699 if (!Single.getNode())
5700 Single = Elem;
5701 else if (Elem != Single) {
5702 Single = SDValue();
5703 break;
5704 }
5705 Count += 1;
5706 }
5707 }
5708 // There are three cases here:
5709 //
5710 // - if the only defined element is a loaded one, the best sequence
5711 // is a replicating load.
5712 //
5713 // - otherwise, if the only defined element is an i64 value, we will
5714 // end up with the same VLVGP sequence regardless of whether we short-cut
5715 // for replication or fall through to the later code.
5716 //
5717 // - otherwise, if the only defined element is an i32 or smaller value,
5718 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5719 // This is only a win if the single defined element is used more than once.
5720 // In other cases we're better off using a single VLVGx.
5721 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5722 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5723
5724 // If all elements are loads, use VLREP/VLEs (below).
5725 bool AllLoads = true;
5726 for (auto Elem : Elems)
5727 if (!isVectorElementLoad(Elem)) {
5728 AllLoads = false;
5729 break;
5730 }
5731
5732 // The best way of building a v2i64 from two i64s is to use VLVGP.
5733 if (VT == MVT::v2i64 && !AllLoads)
5734 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5735
5736 // Use a 64-bit merge high to combine two doubles.
5737 if (VT == MVT::v2f64 && !AllLoads)
5738 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5739
5740 // Build v4f32 values directly from the FPRs:
5741 //
5742 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5743 // V V VMRHF
5744 // <ABxx> <CDxx>
5745 // V VMRHG
5746 // <ABCD>
5747 if (VT == MVT::v4f32 && !AllLoads) {
5748 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5749 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5750 // Avoid unnecessary undefs by reusing the other operand.
5751 if (Op01.isUndef())
5752 Op01 = Op23;
5753 else if (Op23.isUndef())
5754 Op23 = Op01;
5755 // Merging identical replications is a no-op.
5756 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5757 return Op01;
5758 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5759 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5761 DL, MVT::v2i64, Op01, Op23);
5762 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5763 }
5764
5765 // Collect the constant terms.
5768
5769 unsigned NumConstants = 0;
5770 for (unsigned I = 0; I < NumElements; ++I) {
5771 SDValue Elem = Elems[I];
5772 if (Elem.getOpcode() == ISD::Constant ||
5773 Elem.getOpcode() == ISD::ConstantFP) {
5774 NumConstants += 1;
5775 Constants[I] = Elem;
5776 Done[I] = true;
5777 }
5778 }
5779 // If there was at least one constant, fill in the other elements of
5780 // Constants with undefs to get a full vector constant and use that
5781 // as the starting point.
5783 SDValue ReplicatedVal;
5784 if (NumConstants > 0) {
5785 for (unsigned I = 0; I < NumElements; ++I)
5786 if (!Constants[I].getNode())
5787 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5788 Result = DAG.getBuildVector(VT, DL, Constants);
5789 } else {
5790 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5791 // avoid a false dependency on any previous contents of the vector
5792 // register.
5793
5794 // Use a VLREP if at least one element is a load. Make sure to replicate
5795 // the load with the most elements having its value.
5796 std::map<const SDNode*, unsigned> UseCounts;
5797 SDNode *LoadMaxUses = nullptr;
5798 for (unsigned I = 0; I < NumElements; ++I)
5799 if (isVectorElementLoad(Elems[I])) {
5800 SDNode *Ld = Elems[I].getNode();
5801 UseCounts[Ld]++;
5802 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5803 LoadMaxUses = Ld;
5804 }
5805 if (LoadMaxUses != nullptr) {
5806 ReplicatedVal = SDValue(LoadMaxUses, 0);
5807 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5808 } else {
5809 // Try to use VLVGP.
5810 unsigned I1 = NumElements / 2 - 1;
5811 unsigned I2 = NumElements - 1;
5812 bool Def1 = !Elems[I1].isUndef();
5813 bool Def2 = !Elems[I2].isUndef();
5814 if (Def1 || Def2) {
5815 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5816 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5817 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5818 joinDwords(DAG, DL, Elem1, Elem2));
5819 Done[I1] = true;
5820 Done[I2] = true;
5821 } else
5822 Result = DAG.getUNDEF(VT);
5823 }
5824 }
5825
5826 // Use VLVGx to insert the other elements.
5827 for (unsigned I = 0; I < NumElements; ++I)
5828 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5829 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5830 DAG.getConstant(I, DL, MVT::i32));
5831 return Result;
5832}
5833
5834SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5835 SelectionDAG &DAG) const {
5836 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5837 SDLoc DL(Op);
5838 EVT VT = Op.getValueType();
5839
5840 if (BVN->isConstant()) {
5841 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5842 return Op;
5843
5844 // Fall back to loading it from memory.
5845 return SDValue();
5846 }
5847
5848 // See if we should use shuffles to construct the vector from other vectors.
5849 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5850 return Res;
5851
5852 // Detect SCALAR_TO_VECTOR conversions.
5854 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5855
5856 // Otherwise use buildVector to build the vector up from GPRs.
5857 unsigned NumElements = Op.getNumOperands();
5859 for (unsigned I = 0; I < NumElements; ++I)
5860 Ops[I] = Op.getOperand(I);
5861 return buildVector(DAG, DL, VT, Ops);
5862}
5863
5864SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5865 SelectionDAG &DAG) const {
5866 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5867 SDLoc DL(Op);
5868 EVT VT = Op.getValueType();
5869 unsigned NumElements = VT.getVectorNumElements();
5870
5871 if (VSN->isSplat()) {
5872 SDValue Op0 = Op.getOperand(0);
5873 unsigned Index = VSN->getSplatIndex();
5875 "Splat index should be defined and in first operand");
5876 // See whether the value we're splatting is directly available as a scalar.
5877 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5879 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5880 // Otherwise keep it as a vector-to-vector operation.
5881 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5882 DAG.getTargetConstant(Index, DL, MVT::i32));
5883 }
5884
5885 GeneralShuffle GS(VT);
5886 for (unsigned I = 0; I < NumElements; ++I) {
5887 int Elt = VSN->getMaskElt(I);
5888 if (Elt < 0)
5889 GS.addUndef();
5890 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5891 unsigned(Elt) % NumElements))
5892 return SDValue();
5893 }
5894 return GS.getNode(DAG, SDLoc(VSN));
5895}
5896
5897SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5898 SelectionDAG &DAG) const {
5899 SDLoc DL(Op);
5900 // Just insert the scalar into element 0 of an undefined vector.
5901 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5902 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5903 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5904}
5905
5906SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5907 SelectionDAG &DAG) const {
5908 // Handle insertions of floating-point values.
5909 SDLoc DL(Op);
5910 SDValue Op0 = Op.getOperand(0);
5911 SDValue Op1 = Op.getOperand(1);
5912 SDValue Op2 = Op.getOperand(2);
5913 EVT VT = Op.getValueType();
5914
5915 // Insertions into constant indices of a v2f64 can be done using VPDI.
5916 // However, if the inserted value is a bitcast or a constant then it's
5917 // better to use GPRs, as below.
5918 if (VT == MVT::v2f64 &&
5919 Op1.getOpcode() != ISD::BITCAST &&
5920 Op1.getOpcode() != ISD::ConstantFP &&
5921 Op2.getOpcode() == ISD::Constant) {
5922 uint64_t Index = Op2->getAsZExtVal();
5923 unsigned Mask = VT.getVectorNumElements() - 1;
5924 if (Index <= Mask)
5925 return Op;
5926 }
5927
5928 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5930 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5931 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5932 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5933 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5934 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5935}
5936
5937SDValue
5938SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5939 SelectionDAG &DAG) const {
5940 // Handle extractions of floating-point values.
5941 SDLoc DL(Op);
5942 SDValue Op0 = Op.getOperand(0);
5943 SDValue Op1 = Op.getOperand(1);
5944 EVT VT = Op.getValueType();
5945 EVT VecVT = Op0.getValueType();
5946
5947 // Extractions of constant indices can be done directly.
5948 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5949 uint64_t Index = CIndexN->getZExtValue();
5950 unsigned Mask = VecVT.getVectorNumElements() - 1;
5951 if (Index <= Mask)
5952 return Op;
5953 }
5954
5955 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5956 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5957 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5958 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5959 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5960 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5961}
5962
5963SDValue SystemZTargetLowering::
5964lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5965 SDValue PackedOp = Op.getOperand(0);
5966 EVT OutVT = Op.getValueType();
5967 EVT InVT = PackedOp.getValueType();
5968 unsigned ToBits = OutVT.getScalarSizeInBits();
5969 unsigned FromBits = InVT.getScalarSizeInBits();
5970 do {
5971 FromBits *= 2;
5972 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5973 SystemZ::VectorBits / FromBits);
5974 PackedOp =
5975 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5976 } while (FromBits != ToBits);
5977 return PackedOp;
5978}
5979
5980// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5981SDValue SystemZTargetLowering::
5982lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5983 SDValue PackedOp = Op.getOperand(0);
5984 SDLoc DL(Op);
5985 EVT OutVT = Op.getValueType();
5986 EVT InVT = PackedOp.getValueType();
5987 unsigned InNumElts = InVT.getVectorNumElements();
5988 unsigned OutNumElts = OutVT.getVectorNumElements();
5989 unsigned NumInPerOut = InNumElts / OutNumElts;
5990
5991 SDValue ZeroVec =
5992 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5993
5994 SmallVector<int, 16> Mask(InNumElts);
5995 unsigned ZeroVecElt = InNumElts;
5996 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5997 unsigned MaskElt = PackedElt * NumInPerOut;
5998 unsigned End = MaskElt + NumInPerOut - 1;
5999 for (; MaskElt < End; MaskElt++)
6000 Mask[MaskElt] = ZeroVecElt++;
6001 Mask[MaskElt] = PackedElt;
6002 }
6003 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6004 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6005}
6006
6007SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6008 unsigned ByScalar) const {
6009 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6010 SDValue Op0 = Op.getOperand(0);
6011 SDValue Op1 = Op.getOperand(1);
6012 SDLoc DL(Op);
6013 EVT VT = Op.getValueType();
6014 unsigned ElemBitSize = VT.getScalarSizeInBits();
6015
6016 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6017 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6018 APInt SplatBits, SplatUndef;
6019 unsigned SplatBitSize;
6020 bool HasAnyUndefs;
6021 // Check for constant splats. Use ElemBitSize as the minimum element
6022 // width and reject splats that need wider elements.
6023 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6024 ElemBitSize, true) &&
6025 SplatBitSize == ElemBitSize) {
6026 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6027 DL, MVT::i32);
6028 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6029 }
6030 // Check for variable splats.
6031 BitVector UndefElements;
6032 SDValue Splat = BVN->getSplatValue(&UndefElements);
6033 if (Splat) {
6034 // Since i32 is the smallest legal type, we either need a no-op
6035 // or a truncation.
6036 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6037 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6038 }
6039 }
6040
6041 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6042 // and the shift amount is directly available in a GPR.
6043 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6044 if (VSN->isSplat()) {
6045 SDValue VSNOp0 = VSN->getOperand(0);
6046 unsigned Index = VSN->getSplatIndex();
6048 "Splat index should be defined and in first operand");
6049 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6050 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6051 // Since i32 is the smallest legal type, we either need a no-op
6052 // or a truncation.
6053 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6054 VSNOp0.getOperand(Index));
6055 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6056 }
6057 }
6058 }
6059
6060 // Otherwise just treat the current form as legal.
6061 return Op;
6062}
6063
6064SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6065 SelectionDAG &DAG) const {
6066 SDLoc DL(Op);
6067 MVT ResultVT = Op.getSimpleValueType();
6068 SDValue Arg = Op.getOperand(0);
6069 unsigned Check = Op.getConstantOperandVal(1);
6070
6071 unsigned TDCMask = 0;
6072 if (Check & fcSNan)
6074 if (Check & fcQNan)
6076 if (Check & fcPosInf)
6078 if (Check & fcNegInf)
6080 if (Check & fcPosNormal)
6082 if (Check & fcNegNormal)
6084 if (Check & fcPosSubnormal)
6086 if (Check & fcNegSubnormal)
6088 if (Check & fcPosZero)
6089 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6090 if (Check & fcNegZero)
6091 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6092 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6093
6094 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6095 return getCCResult(DAG, Intr);
6096}
6097
6098SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6099 SelectionDAG &DAG) const {
6100 SDLoc DL(Op);
6101 SDValue Chain = Op.getOperand(0);
6102
6103 // STCKF only supports a memory operand, so we have to use a temporary.
6104 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6105 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6106 MachinePointerInfo MPI =
6108
6109 // Use STCFK to store the TOD clock into the temporary.
6110 SDValue StoreOps[] = {Chain, StackPtr};
6111 Chain = DAG.getMemIntrinsicNode(
6112 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6114
6115 // And read it back from there.
6116 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6117}
6118
6120 SelectionDAG &DAG) const {
6121 switch (Op.getOpcode()) {
6122 case ISD::FRAMEADDR:
6123 return lowerFRAMEADDR(Op, DAG);
6124 case ISD::RETURNADDR:
6125 return lowerRETURNADDR(Op, DAG);
6126 case ISD::BR_CC:
6127 return lowerBR_CC(Op, DAG);
6128 case ISD::SELECT_CC:
6129 return lowerSELECT_CC(Op, DAG);
6130 case ISD::SETCC:
6131 return lowerSETCC(Op, DAG);
6132 case ISD::STRICT_FSETCC:
6133 return lowerSTRICT_FSETCC(Op, DAG, false);
6135 return lowerSTRICT_FSETCC(Op, DAG, true);
6136 case ISD::GlobalAddress:
6137 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6139 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6140 case ISD::BlockAddress:
6141 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6142 case ISD::JumpTable:
6143 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6144 case ISD::ConstantPool:
6145 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6146 case ISD::BITCAST:
6147 return lowerBITCAST(Op, DAG);
6148 case ISD::VASTART:
6149 return lowerVASTART(Op, DAG);
6150 case ISD::VACOPY:
6151 return lowerVACOPY(Op, DAG);
6153 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6155 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6156 case ISD::SMUL_LOHI:
6157 return lowerSMUL_LOHI(Op, DAG);
6158 case ISD::UMUL_LOHI:
6159 return lowerUMUL_LOHI(Op, DAG);
6160 case ISD::SDIVREM:
6161 return lowerSDIVREM(Op, DAG);
6162 case ISD::UDIVREM:
6163 return lowerUDIVREM(Op, DAG);
6164 case ISD::SADDO:
6165 case ISD::SSUBO:
6166 case ISD::UADDO:
6167 case ISD::USUBO:
6168 return lowerXALUO(Op, DAG);
6169 case ISD::UADDO_CARRY:
6170 case ISD::USUBO_CARRY:
6171 return lowerUADDSUBO_CARRY(Op, DAG);
6172 case ISD::OR:
6173 return lowerOR(Op, DAG);
6174 case ISD::CTPOP:
6175 return lowerCTPOP(Op, DAG);
6176 case ISD::VECREDUCE_ADD:
6177 return lowerVECREDUCE_ADD(Op, DAG);
6178 case ISD::ATOMIC_FENCE:
6179 return lowerATOMIC_FENCE(Op, DAG);
6180 case ISD::ATOMIC_SWAP:
6181 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6182 case ISD::ATOMIC_STORE:
6183 case ISD::ATOMIC_LOAD:
6184 return lowerATOMIC_LDST_I128(Op, DAG);
6186 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6188 return lowerATOMIC_LOAD_SUB(Op, DAG);
6190 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6192 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6194 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6196 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6198 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6200 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6202 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6204 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6206 return lowerATOMIC_CMP_SWAP(Op, DAG);
6207 case ISD::STACKSAVE:
6208 return lowerSTACKSAVE(Op, DAG);
6209 case ISD::STACKRESTORE:
6210 return lowerSTACKRESTORE(Op, DAG);
6211 case ISD::PREFETCH:
6212 return lowerPREFETCH(Op, DAG);
6214 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6216 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6217 case ISD::BUILD_VECTOR:
6218 return lowerBUILD_VECTOR(Op, DAG);
6220 return lowerVECTOR_SHUFFLE(Op, DAG);
6222 return lowerSCALAR_TO_VECTOR(Op, DAG);
6224 return lowerINSERT_VECTOR_ELT(Op, DAG);
6226 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6228 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6230 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6231 case ISD::SHL:
6232 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6233 case ISD::SRL:
6234 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6235 case ISD::SRA:
6236 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6237 case ISD::ROTL:
6238 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6239 case ISD::IS_FPCLASS:
6240 return lowerIS_FPCLASS(Op, DAG);
6241 case ISD::GET_ROUNDING:
6242 return lowerGET_ROUNDING(Op, DAG);
6244 return lowerREADCYCLECOUNTER(Op, DAG);
6245 default:
6246 llvm_unreachable("Unexpected node to lower");
6247 }
6248}
6249
6250// Manually lower a bitcast to avoid introducing illegal types after type
6251// legalization.
6253 SDValue Chain, const SDLoc &SL) {
6254 SDValue Hi =
6255 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::i64, Src);
6256 SDValue Lo =
6257 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::i64, Src);
6258
6259 Hi = DAG.getBitcast(MVT::f64, Hi);
6260 Lo = DAG.getBitcast(MVT::f64, Lo);
6261
6262 SDNode *Pair = DAG.getMachineNode(
6263 SystemZ::REG_SEQUENCE, SL, MVT::f128,
6264 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
6265 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
6266 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
6267 return SDValue(Pair, 0);
6268}
6269
6270static std::pair<SDValue, SDValue>
6272 SDValue LoFP =
6273 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
6274 SDValue HiFP =
6275 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
6276 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
6277 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
6278
6279 return {Hi, Lo};
6280}
6281
6283 const SDLoc &SL) {
6284
6285 auto [Hi, Lo] = expandBitCastF128ToI128Parts(DAG, Src, SL);
6286 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, SL, MVT::Untyped, Hi, Lo);
6287 return SDValue(Pair, 0);
6288}
6289
6290// Lower operations with invalid operand or result types (currently used
6291// only for 128-bit integer types).
6292void
6295 SelectionDAG &DAG) const {
6296 switch (N->getOpcode()) {
6297 case ISD::ATOMIC_LOAD: {
6298 SDLoc DL(N);
6299 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6300 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6301 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6303 DL, Tys, Ops, MVT::i128, MMO);
6304
6305 EVT VT = N->getValueType(0);
6306
6307 if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6308 SDValue Lowered = lowerGR128ToI128(DAG, Res);
6309 Results.push_back(DAG.getBitcast(VT, Lowered));
6310 Results.push_back(Res.getValue(1));
6311 } else {
6312 // For the f128 case, after type legalization, we cannot produce a bitcast
6313 // with an illegal type (i.e. i128), so manually lower it.
6314 //
6315 // FIXME: Really v2i64 should be legal, and should be used in place of
6316 // unttyped. Then we could emit the bitcast which will potentially fold
6317 // into the use.
6318 SDValue Cast = expandBitCastI128ToF128(DAG, Res, Res.getValue(1), DL);
6319 Results.push_back(Cast);
6320 Results.push_back(Res.getValue(1));
6321 }
6322
6323 break;
6324 }
6325 case ISD::ATOMIC_STORE: {
6326 SDLoc DL(N);
6327 SDVTList Tys = DAG.getVTList(MVT::Other);
6328 SDValue Val = N->getOperand(1);
6329 EVT VT = Val.getValueType();
6330
6331 if (VT == MVT::i128 || isTypeLegal(MVT::i128)) {
6332 Val = DAG.getBitcast(MVT::i128, Val);
6333 Val = lowerI128ToGR128(DAG, Val);
6334 } else {
6335 Val = expandBitCastF128ToI128(DAG, Val, DL);
6336 }
6337
6338 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
6339 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6341 DL, Tys, Ops, MVT::i128, MMO);
6342 // We have to enforce sequential consistency by performing a
6343 // serialization operation after the store.
6344 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6346 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6347 MVT::Other, Res), 0);
6348 Results.push_back(Res);
6349 break;
6350 }
6352 SDLoc DL(N);
6353 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6354 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6355 lowerI128ToGR128(DAG, N->getOperand(2)),
6356 lowerI128ToGR128(DAG, N->getOperand(3)) };
6357 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6359 DL, Tys, Ops, MVT::i128, MMO);
6360 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6362 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6363 Results.push_back(lowerGR128ToI128(DAG, Res));
6364 Results.push_back(Success);
6365 Results.push_back(Res.getValue(2));
6366 break;
6367 }
6368 case ISD::BITCAST: {
6369 SDValue Src = N->getOperand(0);
6370 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6371 !useSoftFloat()) {
6372 SDLoc DL(N);
6373 SDValue Lo, Hi;
6374 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6375 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6376 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6377 DAG.getConstant(1, DL, MVT::i32));
6378 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6379 DAG.getConstant(0, DL, MVT::i32));
6380 } else {
6381 // FIXME: Assert should be moved into expandBitCastF128ToI128Parts
6382 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6383 "Unrecognized register class for f128.");
6384 std::tie(Hi, Lo) = expandBitCastF128ToI128Parts(DAG, Src, DL);
6385 }
6386
6387 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6388 }
6389 break;
6390 }
6391 default:
6392 llvm_unreachable("Unexpected node to lower");
6393 }
6394}
6395
6396void
6399 SelectionDAG &DAG) const {
6400 return LowerOperationWrapper(N, Results, DAG);
6401}
6402
6403const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6404#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6405 switch ((SystemZISD::NodeType)Opcode) {
6406 case SystemZISD::FIRST_NUMBER: break;
6407 OPCODE(RET_GLUE);
6408 OPCODE(CALL);
6409 OPCODE(SIBCALL);
6410 OPCODE(TLS_GDCALL);
6411 OPCODE(TLS_LDCALL);
6412 OPCODE(PCREL_WRAPPER);
6413 OPCODE(PCREL_OFFSET);
6414 OPCODE(ICMP);
6415 OPCODE(FCMP);
6416 OPCODE(STRICT_FCMP);
6417 OPCODE(STRICT_FCMPS);
6418 OPCODE(TM);
6419 OPCODE(BR_CCMASK);
6420 OPCODE(SELECT_CCMASK);
6421 OPCODE(ADJDYNALLOC);
6422 OPCODE(PROBED_ALLOCA);
6423 OPCODE(POPCNT);
6424 OPCODE(SMUL_LOHI);
6425 OPCODE(UMUL_LOHI);
6426 OPCODE(SDIVREM);
6427 OPCODE(UDIVREM);
6428 OPCODE(SADDO);
6429 OPCODE(SSUBO);
6430 OPCODE(UADDO);
6431 OPCODE(USUBO);
6432 OPCODE(ADDCARRY);
6433 OPCODE(SUBCARRY);
6434 OPCODE(GET_CCMASK);
6435 OPCODE(MVC);
6436 OPCODE(NC);
6437 OPCODE(OC);
6438 OPCODE(XC);
6439 OPCODE(CLC);
6440 OPCODE(MEMSET_MVC);
6441 OPCODE(STPCPY);
6442 OPCODE(STRCMP);
6443 OPCODE(SEARCH_STRING);
6444 OPCODE(IPM);
6445 OPCODE(TBEGIN);
6446 OPCODE(TBEGIN_NOFLOAT);
6447 OPCODE(TEND);
6448 OPCODE(BYTE_MASK);
6449 OPCODE(ROTATE_MASK);
6450 OPCODE(REPLICATE);
6451 OPCODE(JOIN_DWORDS);
6452 OPCODE(SPLAT);
6453 OPCODE(MERGE_HIGH);
6454 OPCODE(MERGE_LOW);
6455 OPCODE(SHL_DOUBLE);
6456 OPCODE(PERMUTE_DWORDS);
6457 OPCODE(PERMUTE);
6458 OPCODE(PACK);
6459 OPCODE(PACKS_CC);
6460 OPCODE(PACKLS_CC);
6461 OPCODE(UNPACK_HIGH);
6462 OPCODE(UNPACKL_HIGH);
6463 OPCODE(UNPACK_LOW);
6464 OPCODE(UNPACKL_LOW);
6465 OPCODE(VSHL_BY_SCALAR);
6466 OPCODE(VSRL_BY_SCALAR);
6467 OPCODE(VSRA_BY_SCALAR);
6468 OPCODE(VROTL_BY_SCALAR);
6469 OPCODE(VSUM);
6470 OPCODE(VACC);
6471 OPCODE(VSCBI);
6472 OPCODE(VAC);
6473 OPCODE(VSBI);
6474 OPCODE(VACCC);
6475 OPCODE(VSBCBI);
6476 OPCODE(VICMPE);
6477 OPCODE(VICMPH);
6478 OPCODE(VICMPHL);
6479 OPCODE(VICMPES);
6480 OPCODE(VICMPHS);
6481 OPCODE(VICMPHLS);
6482 OPCODE(VFCMPE);
6483 OPCODE(STRICT_VFCMPE);
6484 OPCODE(STRICT_VFCMPES);
6485 OPCODE(VFCMPH);
6486 OPCODE(STRICT_VFCMPH);
6487 OPCODE(STRICT_VFCMPHS);
6488 OPCODE(VFCMPHE);
6489 OPCODE(STRICT_VFCMPHE);
6490 OPCODE(STRICT_VFCMPHES);
6491 OPCODE(VFCMPES);
6492 OPCODE(VFCMPHS);
6493 OPCODE(VFCMPHES);
6494 OPCODE(VFTCI);
6495 OPCODE(VEXTEND);
6496 OPCODE(STRICT_VEXTEND);
6497 OPCODE(VROUND);
6498 OPCODE(STRICT_VROUND);
6499 OPCODE(VTM);
6500 OPCODE(SCMP128HI);
6501 OPCODE(UCMP128HI);
6502 OPCODE(VFAE_CC);
6503 OPCODE(VFAEZ_CC);
6504 OPCODE(VFEE_CC);
6505 OPCODE(VFEEZ_CC);
6506 OPCODE(VFENE_CC);
6507 OPCODE(VFENEZ_CC);
6508 OPCODE(VISTR_CC);
6509 OPCODE(VSTRC_CC);
6510 OPCODE(VSTRCZ_CC);
6511 OPCODE(VSTRS_CC);
6512 OPCODE(VSTRSZ_CC);
6513 OPCODE(TDC);
6514 OPCODE(ATOMIC_SWAPW);
6515 OPCODE(ATOMIC_LOADW_ADD);
6516 OPCODE(ATOMIC_LOADW_SUB);
6517 OPCODE(ATOMIC_LOADW_AND);
6518 OPCODE(ATOMIC_LOADW_OR);
6519 OPCODE(ATOMIC_LOADW_XOR);
6520 OPCODE(ATOMIC_LOADW_NAND);
6521 OPCODE(ATOMIC_LOADW_MIN);
6522 OPCODE(ATOMIC_LOADW_MAX);
6523 OPCODE(ATOMIC_LOADW_UMIN);
6524 OPCODE(ATOMIC_LOADW_UMAX);
6525 OPCODE(ATOMIC_CMP_SWAPW);
6526 OPCODE(ATOMIC_CMP_SWAP);
6527 OPCODE(ATOMIC_LOAD_128);
6528 OPCODE(ATOMIC_STORE_128);
6529 OPCODE(ATOMIC_CMP_SWAP_128);
6530 OPCODE(LRV);
6531 OPCODE(STRV);
6532 OPCODE(VLER);
6533 OPCODE(VSTER);
6534 OPCODE(STCKF);
6536 OPCODE(ADA_ENTRY);
6537 }
6538 return nullptr;
6539#undef OPCODE
6540}
6541
6542// Return true if VT is a vector whose elements are a whole number of bytes
6543// in width. Also check for presence of vector support.
6544bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6545 if (!Subtarget.hasVector())
6546 return false;
6547
6548 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6549}
6550
6551// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6552// producing a result of type ResVT. Op is a possibly bitcast version
6553// of the input vector and Index is the index (based on type VecVT) that
6554// should be extracted. Return the new extraction if a simplification
6555// was possible or if Force is true.
6556SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6557 EVT VecVT, SDValue Op,
6558 unsigned Index,
6559 DAGCombinerInfo &DCI,
6560 bool Force) const {
6561 SelectionDAG &DAG = DCI.DAG;
6562
6563 // The number of bytes being extracted.
6564 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6565
6566 for (;;) {
6567 unsigned Opcode = Op.getOpcode();
6568 if (Opcode == ISD::BITCAST)
6569 // Look through bitcasts.
6570 Op = Op.getOperand(0);
6571 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6572 canTreatAsByteVector(Op.getValueType())) {
6573 // Get a VPERM-like permute mask and see whether the bytes covered
6574 // by the extracted element are a contiguous sequence from one
6575 // source operand.
6577 if (!getVPermMask(Op, Bytes))
6578 break;
6579 int First;
6580 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6581 BytesPerElement, First))
6582 break;
6583 if (First < 0)
6584 return DAG.getUNDEF(ResVT);
6585 // Make sure the contiguous sequence starts at a multiple of the
6586 // original element size.
6587 unsigned Byte = unsigned(First) % Bytes.size();
6588 if (Byte % BytesPerElement != 0)
6589 break;
6590 // We can get the extracted value directly from an input.
6591 Index = Byte / BytesPerElement;
6592 Op = Op.getOperand(unsigned(First) / Bytes.size());
6593 Force = true;
6594 } else if (Opcode == ISD::BUILD_VECTOR &&
6595 canTreatAsByteVector(Op.getValueType())) {
6596 // We can only optimize this case if the BUILD_VECTOR elements are
6597 // at least as wide as the extracted value.
6598 EVT OpVT = Op.getValueType();
6599 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6600 if (OpBytesPerElement < BytesPerElement)
6601 break;
6602 // Make sure that the least-significant bit of the extracted value
6603 // is the least significant bit of an input.
6604 unsigned End = (Index + 1) * BytesPerElement;
6605 if (End % OpBytesPerElement != 0)
6606 break;
6607 // We're extracting the low part of one operand of the BUILD_VECTOR.
6608 Op = Op.getOperand(End / OpBytesPerElement - 1);
6609 if (!Op.getValueType().isInteger()) {
6610 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6611 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6612 DCI.AddToWorklist(Op.getNode());
6613 }
6614 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6615 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6616 if (VT != ResVT) {
6617 DCI.AddToWorklist(Op.getNode());
6618 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6619 }
6620 return Op;
6621 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6623 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6624 canTreatAsByteVector(Op.getValueType()) &&
6625 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6626 // Make sure that only the unextended bits are significant.
6627 EVT ExtVT = Op.getValueType();
6628 EVT OpVT = Op.getOperand(0).getValueType();
6629 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6630 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6631 unsigned Byte = Index * BytesPerElement;
6632 unsigned SubByte = Byte % ExtBytesPerElement;
6633 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6634 if (SubByte < MinSubByte ||
6635 SubByte + BytesPerElement > ExtBytesPerElement)
6636 break;
6637 // Get the byte offset of the unextended element
6638 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6639 // ...then add the byte offset relative to that element.
6640 Byte += SubByte - MinSubByte;
6641 if (Byte % BytesPerElement != 0)
6642 break;
6643 Op = Op.getOperand(0);
6644 Index = Byte / BytesPerElement;
6645 Force = true;
6646 } else
6647 break;
6648 }
6649 if (Force) {
6650 if (Op.getValueType() != VecVT) {
6651 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6652 DCI.AddToWorklist(Op.getNode());
6653 }
6654 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6655 DAG.getConstant(Index, DL, MVT::i32));
6656 }
6657 return SDValue();
6658}
6659
6660// Optimize vector operations in scalar value Op on the basis that Op
6661// is truncated to TruncVT.
6662SDValue SystemZTargetLowering::combineTruncateExtract(
6663 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6664 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6665 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6666 // of type TruncVT.
6667 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6668 TruncVT.getSizeInBits() % 8 == 0) {
6669 SDValue Vec = Op.getOperand(0);
6670 EVT VecVT = Vec.getValueType();
6671 if (canTreatAsByteVector(VecVT)) {
6672 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6673 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6674 unsigned TruncBytes = TruncVT.getStoreSize();
6675 if (BytesPerElement % TruncBytes == 0) {
6676 // Calculate the value of Y' in the above description. We are
6677 // splitting the original elements into Scale equal-sized pieces
6678 // and for truncation purposes want the last (least-significant)
6679 // of these pieces for IndexN. This is easiest to do by calculating
6680 // the start index of the following element and then subtracting 1.
6681 unsigned Scale = BytesPerElement / TruncBytes;
6682 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6683
6684 // Defer the creation of the bitcast from X to combineExtract,
6685 // which might be able to optimize the extraction.
6686 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6687 VecVT.getStoreSize() / TruncBytes);
6688 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6689 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6690 }
6691 }
6692 }
6693 }
6694 return SDValue();
6695}
6696
6697SDValue SystemZTargetLowering::combineZERO_EXTEND(
6698 SDNode *N, DAGCombinerInfo &DCI) const {
6699 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6700 SelectionDAG &DAG = DCI.DAG;
6701 SDValue N0 = N->getOperand(0);
6702 EVT VT = N->getValueType(0);
6704 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6705 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6706 if (TrueOp && FalseOp) {
6707 SDLoc DL(N0);
6708 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6709 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6710 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6711 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6712 // If N0 has multiple uses, change other uses as well.
6713 if (!N0.hasOneUse()) {
6714 SDValue TruncSelect =
6715 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6716 DCI.CombineTo(N0.getNode(), TruncSelect);
6717 }
6718 return NewSelect;
6719 }
6720 }
6721 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6722 // of the result is smaller than the size of X and all the truncated bits
6723 // of X are already zero.
6724 if (N0.getOpcode() == ISD::XOR &&
6725 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6726 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6727 N0.getOperand(1).getOpcode() == ISD::Constant) {
6728 SDValue X = N0.getOperand(0).getOperand(0);
6729 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6730 KnownBits Known = DAG.computeKnownBits(X);
6731 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6732 N0.getValueSizeInBits(),
6733 VT.getSizeInBits());
6734 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6735 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6737 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6738 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6739 }
6740 }
6741 }
6742
6743 return SDValue();
6744}
6745
6746SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6747 SDNode *N, DAGCombinerInfo &DCI) const {
6748 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6749 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6750 // into (select_cc LHS, RHS, -1, 0, COND)
6751 SelectionDAG &DAG = DCI.DAG;
6752 SDValue N0 = N->getOperand(0);
6753 EVT VT = N->getValueType(0);
6754 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6755 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6756 N0 = N0.getOperand(0);
6757 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6758 SDLoc DL(N0);
6759 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6760 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6761 N0.getOperand(2) };
6762 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6763 }
6764 return SDValue();
6765}
6766
6767SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6768 SDNode *N, DAGCombinerInfo &DCI) const {
6769 // Convert (sext (ashr (shl X, C1), C2)) to
6770 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6771 // cheap as narrower ones.
6772 SelectionDAG &DAG = DCI.DAG;
6773 SDValue N0 = N->getOperand(0);
6774 EVT VT = N->getValueType(0);
6775 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6776 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6777 SDValue Inner = N0.getOperand(0);
6778 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6779 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6780 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6781 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6782 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6783 EVT ShiftVT = N0.getOperand(1).getValueType();
6784 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6785 Inner.getOperand(0));
6786 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6787 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6788 ShiftVT));
6789 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6790 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6791 }
6792 }
6793 }
6794
6795 return SDValue();
6796}
6797
6798SDValue SystemZTargetLowering::combineMERGE(
6799 SDNode *N, DAGCombinerInfo &DCI) const {
6800 SelectionDAG &DAG = DCI.DAG;
6801 unsigned Opcode = N->getOpcode();
6802 SDValue Op0 = N->getOperand(0);
6803 SDValue Op1 = N->getOperand(1);
6804 if (Op0.getOpcode() == ISD::BITCAST)
6805 Op0 = Op0.getOperand(0);
6807 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6808 // for v4f32.
6809 if (Op1 == N->getOperand(0))
6810 return Op1;
6811 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6812 EVT VT = Op1.getValueType();
6813 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6814 if (ElemBytes <= 4) {
6815 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6818 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6819 SystemZ::VectorBytes / ElemBytes / 2);
6820 if (VT != InVT) {
6821 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6822 DCI.AddToWorklist(Op1.getNode());
6823 }
6824 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6825 DCI.AddToWorklist(Op.getNode());
6826 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6827 }
6828 }
6829 return SDValue();
6830}
6831
6832SDValue SystemZTargetLowering::combineLOAD(
6833 SDNode *N, DAGCombinerInfo &DCI) const {
6834 SelectionDAG &DAG = DCI.DAG;
6835 EVT LdVT = N->getValueType(0);
6836 SDLoc DL(N);
6837
6838 // Replace an i128 load that is used solely to move its value into GPRs
6839 // by separate loads of both halves.
6840 if (LdVT == MVT::i128) {
6841 LoadSDNode *LD = cast<LoadSDNode>(N);
6842 if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6843 return SDValue();
6844
6845 // Scan through all users.
6847 int UsedElements = 0;
6848 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6849 UI != UIEnd; ++UI) {
6850 // Skip the uses of the chain.
6851 if (UI.getUse().getResNo() != 0)
6852 continue;
6853
6854 // Verify every user is a TRUNCATE to i64 of the low or high half ...
6855 SDNode *User = *UI;
6856 int Index = 1;
6857 if (User->getOpcode() == ISD::SRL &&
6858 User->getOperand(1).getOpcode() == ISD::Constant &&
6859 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6860 User = *User->use_begin();
6861 Index = 0;
6862 }
6863 if (User->getOpcode() != ISD::TRUNCATE ||
6864 User->getValueType(0) != MVT::i64)
6865 return SDValue();
6866
6867 // ... and no half is extracted twice.
6868 if (UsedElements & (1 << Index))
6869 return SDValue();
6870
6871 UsedElements |= 1 << Index;
6872 Users.push_back(std::make_pair(User, Index));
6873 }
6874
6875 // Rewrite each extraction as an independent load.
6876 SmallVector<SDValue, 2> ArgChains;
6877 for (auto UserAndIndex : Users) {
6878 SDNode *User = UserAndIndex.first;
6879 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6880 SDValue Ptr =
6881 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6882 SDValue EltLoad =
6883 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6884 LD->getPointerInfo().getWithOffset(Offset),
6885 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6886 LD->getAAInfo());
6887
6888 DCI.CombineTo(User, EltLoad, true);
6889 ArgChains.push_back(EltLoad.getValue(1));
6890 }
6891
6892 // Collect all chains via TokenFactor.
6893 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6894 ArgChains);
6895 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6896 DCI.AddToWorklist(Chain.getNode());
6897 return SDValue(N, 0);
6898 }
6899
6900 if (LdVT.isVector() || LdVT.isInteger())
6901 return SDValue();
6902 // Transform a scalar load that is REPLICATEd as well as having other
6903 // use(s) to the form where the other use(s) use the first element of the
6904 // REPLICATE instead of the load. Otherwise instruction selection will not
6905 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6906 // point loads.
6907
6908 SDValue Replicate;
6909 SmallVector<SDNode*, 8> OtherUses;
6910 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6911 UI != UE; ++UI) {
6912 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6913 if (Replicate)
6914 return SDValue(); // Should never happen
6915 Replicate = SDValue(*UI, 0);
6916 }
6917 else if (UI.getUse().getResNo() == 0)
6918 OtherUses.push_back(*UI);
6919 }
6920 if (!Replicate || OtherUses.empty())
6921 return SDValue();
6922
6923 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6924 Replicate, DAG.getConstant(0, DL, MVT::i32));
6925 // Update uses of the loaded Value while preserving old chains.
6926 for (SDNode *U : OtherUses) {
6928 for (SDValue Op : U->ops())
6929 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6930 DAG.UpdateNodeOperands(U, Ops);
6931 }
6932 return SDValue(N, 0);
6933}
6934
6935bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6936 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6937 return true;
6938 if (Subtarget.hasVectorEnhancements2())
6939 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6940 return true;
6941 return false;
6942}
6943
6945 if (!VT.isVector() || !VT.isSimple() ||
6946 VT.getSizeInBits() != 128 ||
6947 VT.getScalarSizeInBits() % 8 != 0)
6948 return false;
6949
6950 unsigned NumElts = VT.getVectorNumElements();
6951 for (unsigned i = 0; i < NumElts; ++i) {
6952 if (M[i] < 0) continue; // ignore UNDEF indices
6953 if ((unsigned) M[i] != NumElts - 1 - i)
6954 return false;
6955 }
6956
6957 return true;
6958}
6959
6960static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6961 for (auto *U : StoredVal->uses()) {
6962 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6963 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6964 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6965 continue;
6966 } else if (isa<BuildVectorSDNode>(U)) {
6967 SDValue BuildVector = SDValue(U, 0);
6968 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6969 isOnlyUsedByStores(BuildVector, DAG))
6970 continue;
6971 }
6972 return false;
6973 }
6974 return true;
6975}
6976
6977static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6978 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6979 return false;
6980
6981 SDValue Op0 = Val.getOperand(0);
6982 SDValue Op1 = Val.getOperand(1);
6983
6984 if (Op0.getOpcode() == ISD::SHL)
6985 std::swap(Op0, Op1);
6986 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
6987 Op1.getOperand(1).getOpcode() != ISD::Constant ||
6988 Op1.getConstantOperandVal(1) != 64)
6989 return false;
6990 Op1 = Op1.getOperand(0);
6991
6992 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
6993 Op0.getOperand(0).getValueType() != MVT::i64)
6994 return false;
6995 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
6996 Op1.getOperand(0).getValueType() != MVT::i64)
6997 return false;
6998
6999 LoPart = Op0.getOperand(0);
7000 HiPart = Op1.getOperand(0);
7001 return true;
7002}
7003
7004SDValue SystemZTargetLowering::combineSTORE(
7005 SDNode *N, DAGCombinerInfo &DCI) const {
7006 SelectionDAG &DAG = DCI.DAG;
7007 auto *SN = cast<StoreSDNode>(N);
7008 auto &Op1 = N->getOperand(1);
7009 EVT MemVT = SN->getMemoryVT();
7010 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
7011 // for the extraction to be done on a vMiN value, so that we can use VSTE.
7012 // If X has wider elements then convert it to:
7013 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
7014 if (MemVT.isInteger() && SN->isTruncatingStore()) {
7015 if (SDValue Value =
7016 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
7017 DCI.AddToWorklist(Value.getNode());
7018
7019 // Rewrite the store with the new form of stored value.
7020 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
7021 SN->getBasePtr(), SN->getMemoryVT(),
7022 SN->getMemOperand());
7023 }
7024 }
7025 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
7026 if (!SN->isTruncatingStore() &&
7027 Op1.getOpcode() == ISD::BSWAP &&
7028 Op1.getNode()->hasOneUse() &&
7029 canLoadStoreByteSwapped(Op1.getValueType())) {
7030
7031 SDValue BSwapOp = Op1.getOperand(0);
7032
7033 if (BSwapOp.getValueType() == MVT::i16)
7034 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
7035
7036 SDValue Ops[] = {
7037 N->getOperand(0), BSwapOp, N->getOperand(2)
7038 };
7039
7040 return
7041 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
7042 Ops, MemVT, SN->getMemOperand());
7043 }
7044 // Combine STORE (element-swap) into VSTER
7045 if (!SN->isTruncatingStore() &&
7046 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7047 Op1.getNode()->hasOneUse() &&
7048 Subtarget.hasVectorEnhancements2()) {
7049 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7050 ArrayRef<int> ShuffleMask = SVN->getMask();
7051 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7052 SDValue Ops[] = {
7053 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7054 };
7055
7057 DAG.getVTList(MVT::Other),
7058 Ops, MemVT, SN->getMemOperand());
7059 }
7060 }
7061
7062 // Combine STORE (READCYCLECOUNTER) into STCKF.
7063 if (!SN->isTruncatingStore() &&
7065 Op1.hasOneUse() &&
7066 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7067 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7069 DAG.getVTList(MVT::Other),
7070 Ops, MemVT, SN->getMemOperand());
7071 }
7072
7073 // Transform a store of an i128 moved from GPRs into two separate stores.
7074 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7075 SDValue LoPart, HiPart;
7076 if (isMovedFromParts(Op1, LoPart, HiPart)) {
7077 SDLoc DL(SN);
7078 SDValue Chain0 =
7079 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7080 SN->getPointerInfo(), SN->getOriginalAlign(),
7081 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7082 SDValue Chain1 =
7083 DAG.getStore(SN->getChain(), DL, LoPart,
7084 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7086 SN->getPointerInfo().getWithOffset(8),
7087 SN->getOriginalAlign(),
7088 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7089
7090 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7091 }
7092 }
7093
7094 // Replicate a reg or immediate with VREP instead of scalar multiply or
7095 // immediate load. It seems best to do this during the first DAGCombine as
7096 // it is straight-forward to handle the zero-extend node in the initial
7097 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7098 // extracting an i16 element from a v16i8 vector).
7099 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7100 isOnlyUsedByStores(Op1, DAG)) {
7101 SDValue Word = SDValue();
7102 EVT WordVT;
7103
7104 // Find a replicated immediate and return it if found in Word and its
7105 // type in WordVT.
7106 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7107 // Some constants are better handled with a scalar store.
7108 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7109 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7110 return;
7111 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7112 if (VCI.isVectorConstantLegal(Subtarget) &&
7113 VCI.Opcode == SystemZISD::REPLICATE) {
7114 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7115 WordVT = VCI.VecVT.getScalarType();
7116 }
7117 };
7118
7119 // Find a replicated register and return it if found in Word and its type
7120 // in WordVT.
7121 auto FindReplicatedReg = [&](SDValue MulOp) {
7122 EVT MulVT = MulOp.getValueType();
7123 if (MulOp->getOpcode() == ISD::MUL &&
7124 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7125 // Find a zero extended value and its type.
7126 SDValue LHS = MulOp->getOperand(0);
7127 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7128 WordVT = LHS->getOperand(0).getValueType();
7129 else if (LHS->getOpcode() == ISD::AssertZext)
7130 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7131 else
7132 return;
7133 // Find a replicating constant, e.g. 0x00010001.
7134 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7136 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7137 if (VCI.isVectorConstantLegal(Subtarget) &&
7138 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7139 WordVT == VCI.VecVT.getScalarType())
7140 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7141 }
7142 }
7143 };
7144
7145 if (isa<BuildVectorSDNode>(Op1) &&
7146 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7147 SDValue SplatVal = Op1->getOperand(0);
7148 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7149 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7150 else
7151 FindReplicatedReg(SplatVal);
7152 } else {
7153 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7154 FindReplicatedImm(C, MemVT.getStoreSize());
7155 else
7156 FindReplicatedReg(Op1);
7157 }
7158
7159 if (Word != SDValue()) {
7160 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7161 "Bad type handling");
7162 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7163 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7164 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7165 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7166 SN->getBasePtr(), SN->getMemOperand());
7167 }
7168 }
7169
7170 return SDValue();
7171}
7172
7173SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7174 SDNode *N, DAGCombinerInfo &DCI) const {
7175 SelectionDAG &DAG = DCI.DAG;
7176 // Combine element-swap (LOAD) into VLER
7177 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7178 N->getOperand(0).hasOneUse() &&
7179 Subtarget.hasVectorEnhancements2()) {
7180 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7181 ArrayRef<int> ShuffleMask = SVN->getMask();
7182 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7183 SDValue Load = N->getOperand(0);
7184 LoadSDNode *LD = cast<LoadSDNode>(Load);
7185
7186 // Create the element-swapping load.
7187 SDValue Ops[] = {
7188 LD->getChain(), // Chain
7189 LD->getBasePtr() // Ptr
7190 };
7191 SDValue ESLoad =
7193 DAG.getVTList(LD->getValueType(0), MVT::Other),
7194 Ops, LD->getMemoryVT(), LD->getMemOperand());
7195
7196 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7197 // by the load dead.
7198 DCI.CombineTo(N, ESLoad);
7199
7200 // Next, combine the load away, we give it a bogus result value but a real
7201 // chain result. The result value is dead because the shuffle is dead.
7202 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7203
7204 // Return N so it doesn't get rechecked!
7205 return SDValue(N, 0);
7206 }
7207 }
7208
7209 return SDValue();
7210}
7211
7212SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7213 SDNode *N, DAGCombinerInfo &DCI) const {
7214 SelectionDAG &DAG = DCI.DAG;
7215
7216 if (!Subtarget.hasVector())
7217 return SDValue();
7218
7219 // Look through bitcasts that retain the number of vector elements.
7220 SDValue Op = N->getOperand(0);
7221 if (Op.getOpcode() == ISD::BITCAST &&
7222 Op.getValueType().isVector() &&
7223 Op.getOperand(0).getValueType().isVector() &&
7224 Op.getValueType().getVectorNumElements() ==
7225 Op.getOperand(0).getValueType().getVectorNumElements())
7226 Op = Op.getOperand(0);
7227
7228 // Pull BSWAP out of a vector extraction.
7229 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7230 EVT VecVT = Op.getValueType();
7231 EVT EltVT = VecVT.getVectorElementType();
7232 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7233 Op.getOperand(0), N->getOperand(1));
7234 DCI.AddToWorklist(Op.getNode());
7235 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7236 if (EltVT != N->getValueType(0)) {
7237 DCI.AddToWorklist(Op.getNode());
7238 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7239 }
7240 return Op;
7241 }
7242
7243 // Try to simplify a vector extraction.
7244 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7245 SDValue Op0 = N->getOperand(0);
7246 EVT VecVT = Op0.getValueType();
7247 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7248 IndexN->getZExtValue(), DCI, false);
7249 }
7250 return SDValue();
7251}
7252
7253SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7254 SDNode *N, DAGCombinerInfo &DCI) const {
7255 SelectionDAG &DAG = DCI.DAG;
7256 // (join_dwords X, X) == (replicate X)
7257 if (N->getOperand(0) == N->getOperand(1))
7258 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7259 N->getOperand(0));
7260 return SDValue();
7261}
7262
7264 SDValue Chain1 = N1->getOperand(0);
7265 SDValue Chain2 = N2->getOperand(0);
7266
7267 // Trivial case: both nodes take the same chain.
7268 if (Chain1 == Chain2)
7269 return Chain1;
7270
7271 // FIXME - we could handle more complex cases via TokenFactor,
7272 // assuming we can verify that this would not create a cycle.
7273 return SDValue();
7274}
7275
7276SDValue SystemZTargetLowering::combineFP_ROUND(
7277 SDNode *N, DAGCombinerInfo &DCI) const {
7278
7279 if (!Subtarget.hasVector())
7280 return SDValue();
7281
7282 // (fpround (extract_vector_elt X 0))
7283 // (fpround (extract_vector_elt X 1)) ->
7284 // (extract_vector_elt (VROUND X) 0)
7285 // (extract_vector_elt (VROUND X) 2)
7286 //
7287 // This is a special case since the target doesn't really support v2f32s.
7288 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7289 SelectionDAG &DAG = DCI.DAG;
7290 SDValue Op0 = N->getOperand(OpNo);
7291 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7293 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7294 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7295 Op0.getConstantOperandVal(1) == 0) {
7296 SDValue Vec = Op0.getOperand(0);
7297 for (auto *U : Vec->uses()) {
7298 if (U != Op0.getNode() && U->hasOneUse() &&
7299 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7300 U->getOperand(0) == Vec &&
7301 U->getOperand(1).getOpcode() == ISD::Constant &&
7302 U->getConstantOperandVal(1) == 1) {
7303 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7304 if (OtherRound.getOpcode() == N->getOpcode() &&
7305 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7306 OtherRound.getValueType() == MVT::f32) {
7307 SDValue VRound, Chain;
7308 if (N->isStrictFPOpcode()) {
7309 Chain = MergeInputChains(N, OtherRound.getNode());
7310 if (!Chain)
7311 continue;
7313 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7314 Chain = VRound.getValue(1);
7315 } else
7316 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7317 MVT::v4f32, Vec);
7318 DCI.AddToWorklist(VRound.getNode());
7319 SDValue Extract1 =
7320 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7321 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7322 DCI.AddToWorklist(Extract1.getNode());
7323 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7324 if (Chain)
7325 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7326 SDValue Extract0 =
7327 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7328 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7329 if (Chain)
7330 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7331 N->getVTList(), Extract0, Chain);
7332 return Extract0;
7333 }
7334 }
7335 }
7336 }
7337 return SDValue();
7338}
7339
7340SDValue SystemZTargetLowering::combineFP_EXTEND(
7341 SDNode *N, DAGCombinerInfo &DCI) const {
7342
7343 if (!Subtarget.hasVector())
7344 return SDValue();
7345
7346 // (fpextend (extract_vector_elt X 0))
7347 // (fpextend (extract_vector_elt X 2)) ->
7348 // (extract_vector_elt (VEXTEND X) 0)
7349 // (extract_vector_elt (VEXTEND X) 1)
7350 //
7351 // This is a special case since the target doesn't really support v2f32s.
7352 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7353 SelectionDAG &DAG = DCI.DAG;
7354 SDValue Op0 = N->getOperand(OpNo);
7355 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7357 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7358 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7359 Op0.getConstantOperandVal(1) == 0) {
7360 SDValue Vec = Op0.getOperand(0);
7361 for (auto *U : Vec->uses()) {
7362 if (U != Op0.getNode() && U->hasOneUse() &&
7363 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7364 U->getOperand(0) == Vec &&
7365 U->getOperand(1).getOpcode() == ISD::Constant &&
7366 U->getConstantOperandVal(1) == 2) {
7367 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7368 if (OtherExtend.getOpcode() == N->getOpcode() &&
7369 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7370 OtherExtend.getValueType() == MVT::f64) {
7371 SDValue VExtend, Chain;
7372 if (N->isStrictFPOpcode()) {
7373 Chain = MergeInputChains(N, OtherExtend.getNode());
7374 if (!Chain)
7375 continue;
7376 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7377 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7378 Chain = VExtend.getValue(1);
7379 } else
7380 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7381 MVT::v2f64, Vec);
7382 DCI.AddToWorklist(VExtend.getNode());
7383 SDValue Extract1 =
7384 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7385 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7386 DCI.AddToWorklist(Extract1.getNode());
7387 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7388 if (Chain)
7389 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7390 SDValue Extract0 =
7391 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7392 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7393 if (Chain)
7394 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7395 N->getVTList(), Extract0, Chain);
7396 return Extract0;
7397 }
7398 }
7399 }
7400 }
7401 return SDValue();
7402}
7403
7404SDValue SystemZTargetLowering::combineINT_TO_FP(
7405 SDNode *N, DAGCombinerInfo &DCI) const {
7406 if (DCI.Level != BeforeLegalizeTypes)
7407 return SDValue();
7408 SelectionDAG &DAG = DCI.DAG;
7409 LLVMContext &Ctx = *DAG.getContext();
7410 unsigned Opcode = N->getOpcode();
7411 EVT OutVT = N->getValueType(0);
7412 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7413 SDValue Op = N->getOperand(0);
7414 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7415 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7416
7417 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7418 // v2f64 = uint_to_fp v2i16
7419 // =>
7420 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7421 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7422 OutScalarBits <= 64) {
7423 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7424 EVT ExtVT = EVT::getVectorVT(
7425 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7426 unsigned ExtOpcode =
7428 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7429 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7430 }
7431 return SDValue();
7432}
7433
7434SDValue SystemZTargetLowering::combineBSWAP(
7435 SDNode *N, DAGCombinerInfo &DCI) const {
7436 SelectionDAG &DAG = DCI.DAG;
7437 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7438 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7439 N->getOperand(0).hasOneUse() &&
7440 canLoadStoreByteSwapped(N->getValueType(0))) {
7441 SDValue Load = N->getOperand(0);
7442 LoadSDNode *LD = cast<LoadSDNode>(Load);
7443
7444 // Create the byte-swapping load.
7445 SDValue Ops[] = {
7446 LD->getChain(), // Chain
7447 LD->getBasePtr() // Ptr
7448 };
7449 EVT LoadVT = N->getValueType(0);
7450 if (LoadVT == MVT::i16)
7451 LoadVT = MVT::i32;
7452 SDValue BSLoad =
7454 DAG.getVTList(LoadVT, MVT::Other),
7455 Ops, LD->getMemoryVT(), LD->getMemOperand());
7456
7457 // If this is an i16 load, insert the truncate.
7458 SDValue ResVal = BSLoad;
7459 if (N->getValueType(0) == MVT::i16)
7460 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7461
7462 // First, combine the bswap away. This makes the value produced by the
7463 // load dead.
7464 DCI.CombineTo(N, ResVal);
7465
7466 // Next, combine the load away, we give it a bogus result value but a real
7467 // chain result. The result value is dead because the bswap is dead.
7468 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7469
7470 // Return N so it doesn't get rechecked!
7471 return SDValue(N, 0);
7472 }
7473
7474 // Look through bitcasts that retain the number of vector elements.
7475 SDValue Op = N->getOperand(0);
7476 if (Op.getOpcode() == ISD::BITCAST &&
7477 Op.getValueType().isVector() &&
7478 Op.getOperand(0).getValueType().isVector() &&
7479 Op.getValueType().getVectorNumElements() ==
7480 Op.getOperand(0).getValueType().getVectorNumElements())
7481 Op = Op.getOperand(0);
7482
7483 // Push BSWAP into a vector insertion if at least one side then simplifies.
7484 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7485 SDValue Vec = Op.getOperand(0);
7486 SDValue Elt = Op.getOperand(1);
7487 SDValue Idx = Op.getOperand(2);
7488
7490 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7492 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7493 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7494 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7495 EVT VecVT = N->getValueType(0);
7496 EVT EltVT = N->getValueType(0).getVectorElementType();
7497 if (VecVT != Vec.getValueType()) {
7498 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7499 DCI.AddToWorklist(Vec.getNode());
7500 }
7501 if (EltVT != Elt.getValueType()) {
7502 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7503 DCI.AddToWorklist(Elt.getNode());
7504 }
7505 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7506 DCI.AddToWorklist(Vec.getNode());
7507 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7508 DCI.AddToWorklist(Elt.getNode());
7509 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7510 Vec, Elt, Idx);
7511 }
7512 }
7513
7514 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7515 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7516 if (SV && Op.hasOneUse()) {
7517 SDValue Op0 = Op.getOperand(0);
7518 SDValue Op1 = Op.getOperand(1);
7519
7521 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7523 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7524 EVT VecVT = N->getValueType(0);
7525 if (VecVT != Op0.getValueType()) {
7526 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7527 DCI.AddToWorklist(Op0.getNode());
7528 }
7529 if (VecVT != Op1.getValueType()) {
7530 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7531 DCI.AddToWorklist(Op1.getNode());
7532 }
7533 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7534 DCI.AddToWorklist(Op0.getNode());
7535 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7536 DCI.AddToWorklist(Op1.getNode());
7537 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7538 }
7539 }
7540
7541 return SDValue();
7542}
7543
7544static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7545 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7546 // set by the CCReg instruction using the CCValid / CCMask masks,
7547 // If the CCReg instruction is itself a ICMP testing the condition
7548 // code set by some other instruction, see whether we can directly
7549 // use that condition code.
7550
7551 // Verify that we have an ICMP against some constant.
7552 if (CCValid != SystemZ::CCMASK_ICMP)
7553 return false;
7554 auto *ICmp = CCReg.getNode();
7555 if (ICmp->getOpcode() != SystemZISD::ICMP)
7556 return false;
7557 auto *CompareLHS = ICmp->getOperand(0).getNode();
7558 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7559 if (!CompareRHS)
7560 return false;
7561
7562 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7563 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7564 // Verify that we have an appropriate mask for a EQ or NE comparison.
7565 bool Invert = false;
7566 if (CCMask == SystemZ::CCMASK_CMP_NE)
7567 Invert = !Invert;
7568 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7569 return false;
7570
7571 // Verify that the ICMP compares against one of select values.
7572 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7573 if (!TrueVal)
7574 return false;
7575 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7576 if (!FalseVal)
7577 return false;
7578 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7579 Invert = !Invert;
7580 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7581 return false;
7582
7583 // Compute the effective CC mask for the new branch or select.
7584 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7585 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7586 if (!NewCCValid || !NewCCMask)
7587 return false;
7588 CCValid = NewCCValid->getZExtValue();
7589 CCMask = NewCCMask->getZExtValue();
7590 if (Invert)
7591 CCMask ^= CCValid;
7592
7593 // Return the updated CCReg link.
7594 CCReg = CompareLHS->getOperand(4);
7595 return true;
7596 }
7597
7598 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7599 if (CompareLHS->getOpcode() == ISD::SRA) {
7600 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7601 if (!SRACount || SRACount->getZExtValue() != 30)
7602 return false;
7603 auto *SHL = CompareLHS->getOperand(0).getNode();
7604 if (SHL->getOpcode() != ISD::SHL)
7605 return false;
7606 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7607 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7608 return false;
7609 auto *IPM = SHL->getOperand(0).getNode();
7610 if (IPM->getOpcode() != SystemZISD::IPM)
7611 return false;
7612
7613 // Avoid introducing CC spills (because SRA would clobber CC).
7614 if (!CompareLHS->hasOneUse())
7615 return false;
7616 // Verify that the ICMP compares against zero.
7617 if (CompareRHS->getZExtValue() != 0)
7618 return false;
7619
7620 // Compute the effective CC mask for the new branch or select.
7621 CCMask = SystemZ::reverseCCMask(CCMask);
7622
7623 // Return the updated CCReg link.
7624 CCReg = IPM->getOperand(0);
7625 return true;
7626 }
7627
7628 return false;
7629}
7630
7631SDValue SystemZTargetLowering::combineBR_CCMASK(
7632 SDNode *N, DAGCombinerInfo &DCI) const {
7633 SelectionDAG &DAG = DCI.DAG;
7634
7635 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7636 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7637 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7638 if (!CCValid || !CCMask)
7639 return SDValue();
7640
7641 int CCValidVal = CCValid->getZExtValue();
7642 int CCMaskVal = CCMask->getZExtValue();
7643 SDValue Chain = N->getOperand(0);
7644 SDValue CCReg = N->getOperand(4);
7645
7646 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7647 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7648 Chain,
7649 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7650 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7651 N->getOperand(3), CCReg);
7652 return SDValue();
7653}
7654
7655SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7656 SDNode *N, DAGCombinerInfo &DCI) const {
7657 SelectionDAG &DAG = DCI.DAG;
7658
7659 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7660 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7661 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7662 if (!CCValid || !CCMask)
7663 return SDValue();
7664
7665 int CCValidVal = CCValid->getZExtValue();
7666 int CCMaskVal = CCMask->getZExtValue();
7667 SDValue CCReg = N->getOperand(4);
7668
7669 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7670 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7671 N->getOperand(0), N->getOperand(1),
7672 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7673 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7674 CCReg);
7675 return SDValue();
7676}
7677
7678
7679SDValue SystemZTargetLowering::combineGET_CCMASK(
7680 SDNode *N, DAGCombinerInfo &DCI) const {
7681
7682 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7683 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7684 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7685 if (!CCValid || !CCMask)
7686 return SDValue();
7687 int CCValidVal = CCValid->getZExtValue();
7688 int CCMaskVal = CCMask->getZExtValue();
7689
7690 SDValue Select = N->getOperand(0);
7691 if (Select->getOpcode() == ISD::TRUNCATE)
7692 Select = Select->getOperand(0);
7693 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7694 return SDValue();
7695
7696 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7697 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7698 if (!SelectCCValid || !SelectCCMask)
7699 return SDValue();
7700 int SelectCCValidVal = SelectCCValid->getZExtValue();
7701 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7702
7703 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7704 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7705 if (!TrueVal || !FalseVal)
7706 return SDValue();
7707 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7708 ;
7709 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7710 SelectCCMaskVal ^= SelectCCValidVal;
7711 else
7712 return SDValue();
7713
7714 if (SelectCCValidVal & ~CCValidVal)
7715 return SDValue();
7716 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7717 return SDValue();
7718
7719 return Select->getOperand(4);
7720}
7721
7722SDValue SystemZTargetLowering::combineIntDIVREM(
7723 SDNode *N, DAGCombinerInfo &DCI) const {
7724 SelectionDAG &DAG = DCI.DAG;
7725 EVT VT = N->getValueType(0);
7726 // In the case where the divisor is a vector of constants a cheaper
7727 // sequence of instructions can replace the divide. BuildSDIV is called to
7728 // do this during DAG combining, but it only succeeds when it can build a
7729 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7730 // since it is not Legal but Custom it can only happen before
7731 // legalization. Therefore we must scalarize this early before Combine
7732 // 1. For widened vectors, this is already the result of type legalization.
7733 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7734 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7735 return DAG.UnrollVectorOp(N);
7736 return SDValue();
7737}
7738
7739SDValue SystemZTargetLowering::combineINTRINSIC(
7740 SDNode *N, DAGCombinerInfo &DCI) const {
7741 SelectionDAG &DAG = DCI.DAG;
7742
7743 unsigned Id = N->getConstantOperandVal(1);
7744 switch (Id) {
7745 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7746 // or larger is simply a vector load.
7747 case Intrinsic::s390_vll:
7748 case Intrinsic::s390_vlrl:
7749 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7750 if (C->getZExtValue() >= 15)
7751 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7752 N->getOperand(3), MachinePointerInfo());
7753 break;
7754 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7755 case Intrinsic::s390_vstl:
7756 case Intrinsic::s390_vstrl:
7757 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7758 if (C->getZExtValue() >= 15)
7759 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7760 N->getOperand(4), MachinePointerInfo());
7761 break;
7762 }
7763
7764 return SDValue();
7765}
7766
7767SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7768 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7769 return N->getOperand(0);
7770 return N;
7771}
7772
7774 DAGCombinerInfo &DCI) const {
7775 switch(N->getOpcode()) {
7776 default: break;
7777 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7778 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7779 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7781 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7782 case ISD::LOAD: return combineLOAD(N, DCI);
7783 case ISD::STORE: return combineSTORE(N, DCI);
7784 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7785 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7786 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7788 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7790 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7791 case ISD::SINT_TO_FP:
7792 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7793 case ISD::BSWAP: return combineBSWAP(N, DCI);
7794 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7795 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7796 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7797 case ISD::SDIV:
7798 case ISD::UDIV:
7799 case ISD::SREM:
7800 case ISD::UREM: return combineIntDIVREM(N, DCI);
7802 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7803 }
7804
7805 return SDValue();
7806}
7807
7808// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7809// are for Op.
7810static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7811 unsigned OpNo) {
7812 EVT VT = Op.getValueType();
7813 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7814 APInt SrcDemE;
7815 unsigned Opcode = Op.getOpcode();
7816 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7817 unsigned Id = Op.getConstantOperandVal(0);
7818 switch (Id) {
7819 case Intrinsic::s390_vpksh: // PACKS
7820 case Intrinsic::s390_vpksf:
7821 case Intrinsic::s390_vpksg:
7822 case Intrinsic::s390_vpkshs: // PACKS_CC
7823 case Intrinsic::s390_vpksfs:
7824 case Intrinsic::s390_vpksgs:
7825 case Intrinsic::s390_vpklsh: // PACKLS
7826 case Intrinsic::s390_vpklsf:
7827 case Intrinsic::s390_vpklsg:
7828 case Intrinsic::s390_vpklshs: // PACKLS_CC
7829 case Intrinsic::s390_vpklsfs:
7830 case Intrinsic::s390_vpklsgs:
7831 // VECTOR PACK truncates the elements of two source vectors into one.
7832 SrcDemE = DemandedElts;
7833 if (OpNo == 2)
7834 SrcDemE.lshrInPlace(NumElts / 2);
7835 SrcDemE = SrcDemE.trunc(NumElts / 2);
7836 break;
7837 // VECTOR UNPACK extends half the elements of the source vector.
7838 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7839 case Intrinsic::s390_vuphh:
7840 case Intrinsic::s390_vuphf:
7841 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7842 case Intrinsic::s390_vuplhh:
7843 case Intrinsic::s390_vuplhf:
7844 SrcDemE = APInt(NumElts * 2, 0);
7845 SrcDemE.insertBits(DemandedElts, 0);
7846 break;
7847 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7848 case Intrinsic::s390_vuplhw:
7849 case Intrinsic::s390_vuplf:
7850 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7851 case Intrinsic::s390_vupllh:
7852 case Intrinsic::s390_vupllf:
7853 SrcDemE = APInt(NumElts * 2, 0);
7854 SrcDemE.insertBits(DemandedElts, NumElts);
7855 break;
7856 case Intrinsic::s390_vpdi: {
7857 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7858 SrcDemE = APInt(NumElts, 0);
7859 if (!DemandedElts[OpNo - 1])
7860 break;
7861 unsigned Mask = Op.getConstantOperandVal(3);
7862 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7863 // Demand input element 0 or 1, given by the mask bit value.
7864 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7865 break;
7866 }
7867 case Intrinsic::s390_vsldb: {
7868 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7869 assert(VT == MVT::v16i8 && "Unexpected type.");
7870 unsigned FirstIdx = Op.getConstantOperandVal(3);
7871 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7872 unsigned NumSrc0Els = 16 - FirstIdx;
7873 SrcDemE = APInt(NumElts, 0);
7874 if (OpNo == 1) {
7875 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7876 SrcDemE.insertBits(DemEls, FirstIdx);
7877 } else {
7878 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7879 SrcDemE.insertBits(DemEls, 0);
7880 }
7881 break;
7882 }
7883 case Intrinsic::s390_vperm:
7884 SrcDemE = APInt(NumElts, -1);
7885 break;
7886 default:
7887 llvm_unreachable("Unhandled intrinsic.");
7888 break;
7889 }
7890 } else {
7891 switch (Opcode) {
7893 // Scalar operand.
7894 SrcDemE = APInt(1, 1);
7895 break;
7897 SrcDemE = DemandedElts;
7898 break;
7899 default:
7900 llvm_unreachable("Unhandled opcode.");
7901 break;
7902 }
7903 }
7904 return SrcDemE;
7905}
7906
7907static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7908 const APInt &DemandedElts,
7909 const SelectionDAG &DAG, unsigned Depth,
7910 unsigned OpNo) {
7911 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7912 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7913 KnownBits LHSKnown =
7914 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7915 KnownBits RHSKnown =
7916 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7917 Known = LHSKnown.intersectWith(RHSKnown);
7918}
7919
7920void
7922 KnownBits &Known,
7923 const APInt &DemandedElts,
7924 const SelectionDAG &DAG,
7925 unsigned Depth) const {
7926 Known.resetAll();
7927
7928 // Intrinsic CC result is returned in the two low bits.
7929 unsigned tmp0, tmp1; // not used
7930 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7931 Known.Zero.setBitsFrom(2);
7932 return;
7933 }
7934 EVT VT = Op.getValueType();
7935 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7936 return;
7937 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7938 "KnownBits does not match VT in bitwidth");
7939 assert ((!VT.isVector() ||
7940 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7941 "DemandedElts does not match VT number of elements");
7942 unsigned BitWidth = Known.getBitWidth();
7943 unsigned Opcode = Op.getOpcode();
7944 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7945 bool IsLogical = false;
7946 unsigned Id = Op.getConstantOperandVal(0);
7947 switch (Id) {
7948 case Intrinsic::s390_vpksh: // PACKS
7949 case Intrinsic::s390_vpksf:
7950 case Intrinsic::s390_vpksg:
7951 case Intrinsic::s390_vpkshs: // PACKS_CC
7952 case Intrinsic::s390_vpksfs:
7953 case Intrinsic::s390_vpksgs:
7954 case Intrinsic::s390_vpklsh: // PACKLS
7955 case Intrinsic::s390_vpklsf:
7956 case Intrinsic::s390_vpklsg:
7957 case Intrinsic::s390_vpklshs: // PACKLS_CC
7958 case Intrinsic::s390_vpklsfs:
7959 case Intrinsic::s390_vpklsgs:
7960 case Intrinsic::s390_vpdi:
7961 case Intrinsic::s390_vsldb:
7962 case Intrinsic::s390_vperm:
7963 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7964 break;
7965 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7966 case Intrinsic::s390_vuplhh:
7967 case Intrinsic::s390_vuplhf:
7968 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7969 case Intrinsic::s390_vupllh:
7970 case Intrinsic::s390_vupllf:
7971 IsLogical = true;
7972 [[fallthrough]];
7973 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7974 case Intrinsic::s390_vuphh:
7975 case Intrinsic::s390_vuphf:
7976 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7977 case Intrinsic::s390_vuplhw:
7978 case Intrinsic::s390_vuplf: {
7979 SDValue SrcOp = Op.getOperand(1);
7980 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7981 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7982 if (IsLogical) {
7983 Known = Known.zext(BitWidth);
7984 } else
7985 Known = Known.sext(BitWidth);
7986 break;
7987 }
7988 default:
7989 break;
7990 }
7991 } else {
7992 switch (Opcode) {
7995 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7996 break;
7997 case SystemZISD::REPLICATE: {
7998 SDValue SrcOp = Op.getOperand(0);
7999 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
8000 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
8001 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
8002 break;
8003 }
8004 default:
8005 break;
8006 }
8007 }
8008
8009 // Known has the width of the source operand(s). Adjust if needed to match
8010 // the passed bitwidth.
8011 if (Known.getBitWidth() != BitWidth)
8012 Known = Known.anyextOrTrunc(BitWidth);
8013}
8014
8015static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
8016 const SelectionDAG &DAG, unsigned Depth,
8017 unsigned OpNo) {
8018 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
8019 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
8020 if (LHS == 1) return 1; // Early out.
8021 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
8022 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
8023 if (RHS == 1) return 1; // Early out.
8024 unsigned Common = std::min(LHS, RHS);
8025 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
8026 EVT VT = Op.getValueType();
8027 unsigned VTBits = VT.getScalarSizeInBits();
8028 if (SrcBitWidth > VTBits) { // PACK
8029 unsigned SrcExtraBits = SrcBitWidth - VTBits;
8030 if (Common > SrcExtraBits)
8031 return (Common - SrcExtraBits);
8032 return 1;
8033 }
8034 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
8035 return Common;
8036}
8037
8038unsigned
8040 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
8041 unsigned Depth) const {
8042 if (Op.getResNo() != 0)
8043 return 1;
8044 unsigned Opcode = Op.getOpcode();
8045 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
8046 unsigned Id = Op.getConstantOperandVal(0);
8047 switch (Id) {
8048 case Intrinsic::s390_vpksh: // PACKS
8049 case Intrinsic::s390_vpksf:
8050 case Intrinsic::s390_vpksg:
8051 case Intrinsic::s390_vpkshs: // PACKS_CC
8052 case Intrinsic::s390_vpksfs:
8053 case Intrinsic::s390_vpksgs:
8054 case Intrinsic::s390_vpklsh: // PACKLS
8055 case Intrinsic::s390_vpklsf:
8056 case Intrinsic::s390_vpklsg:
8057 case Intrinsic::s390_vpklshs: // PACKLS_CC
8058 case Intrinsic::s390_vpklsfs:
8059 case Intrinsic::s390_vpklsgs:
8060 case Intrinsic::s390_vpdi:
8061 case Intrinsic::s390_vsldb:
8062 case Intrinsic::s390_vperm:
8063 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8064 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8065 case Intrinsic::s390_vuphh:
8066 case Intrinsic::s390_vuphf:
8067 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8068 case Intrinsic::s390_vuplhw:
8069 case Intrinsic::s390_vuplf: {
8070 SDValue PackedOp = Op.getOperand(1);
8071 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8072 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8073 EVT VT = Op.getValueType();
8074 unsigned VTBits = VT.getScalarSizeInBits();
8075 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8076 return Tmp;
8077 }
8078 default:
8079 break;
8080 }
8081 } else {
8082 switch (Opcode) {
8084 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8085 default:
8086 break;
8087 }
8088 }
8089
8090 return 1;
8091}
8092
8095 const APInt &DemandedElts, const SelectionDAG &DAG,
8096 bool PoisonOnly, unsigned Depth) const {
8097 switch (Op->getOpcode()) {
8100 return true;
8101 }
8102 return false;
8103}
8104
8105unsigned
8107 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8108 unsigned StackAlign = TFI->getStackAlignment();
8109 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8110 "Unexpected stack alignment");
8111 // The default stack probe size is 4096 if the function has no
8112 // stack-probe-size attribute.
8113 unsigned StackProbeSize =
8114 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8115 // Round down to the stack alignment.
8116 StackProbeSize &= ~(StackAlign - 1);
8117 return StackProbeSize ? StackProbeSize : StackAlign;
8118}
8119
8120//===----------------------------------------------------------------------===//
8121// Custom insertion
8122//===----------------------------------------------------------------------===//
8123
8124// Force base value Base into a register before MI. Return the register.
8126 const SystemZInstrInfo *TII) {
8127 MachineBasicBlock *MBB = MI.getParent();
8128 MachineFunction &MF = *MBB->getParent();
8130
8131 if (Base.isReg()) {
8132 // Copy Base into a new virtual register to help register coalescing in
8133 // cases with multiple uses.
8134 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8135 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8136 .add(Base);
8137 return Reg;
8138 }
8139
8140 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8141 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8142 .add(Base)
8143 .addImm(0)
8144 .addReg(0);
8145 return Reg;
8146}
8147
8148// The CC operand of MI might be missing a kill marker because there
8149// were multiple uses of CC, and ISel didn't know which to mark.
8150// Figure out whether MI should have had a kill marker.
8152 // Scan forward through BB for a use/def of CC.
8154 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8155 const MachineInstr& mi = *miI;
8156 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
8157 return false;
8158 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
8159 break; // Should have kill-flag - update below.
8160 }
8161
8162 // If we hit the end of the block, check whether CC is live into a
8163 // successor.
8164 if (miI == MBB->end()) {
8165 for (const MachineBasicBlock *Succ : MBB->successors())
8166 if (Succ->isLiveIn(SystemZ::CC))
8167 return false;
8168 }
8169
8170 return true;
8171}
8172
8173// Return true if it is OK for this Select pseudo-opcode to be cascaded
8174// together with other Select pseudo-opcodes into a single basic-block with
8175// a conditional jump around it.
8177 switch (MI.getOpcode()) {
8178 case SystemZ::Select32:
8179 case SystemZ::Select64:
8180 case SystemZ::Select128:
8181 case SystemZ::SelectF32:
8182 case SystemZ::SelectF64:
8183 case SystemZ::SelectF128:
8184 case SystemZ::SelectVR32:
8185 case SystemZ::SelectVR64:
8186 case SystemZ::SelectVR128:
8187 return true;
8188
8189 default:
8190 return false;
8191 }
8192}
8193
8194// Helper function, which inserts PHI functions into SinkMBB:
8195// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8196// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8198 MachineBasicBlock *TrueMBB,
8199 MachineBasicBlock *FalseMBB,
8200 MachineBasicBlock *SinkMBB) {
8201 MachineFunction *MF = TrueMBB->getParent();
8203
8204 MachineInstr *FirstMI = Selects.front();
8205 unsigned CCValid = FirstMI->getOperand(3).getImm();
8206 unsigned CCMask = FirstMI->getOperand(4).getImm();
8207
8208 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8209
8210 // As we are creating the PHIs, we have to be careful if there is more than
8211 // one. Later Selects may reference the results of earlier Selects, but later
8212 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8213 // That also means that PHI construction must work forward from earlier to
8214 // later, and that the code must maintain a mapping from earlier PHI's
8215 // destination registers, and the registers that went into the PHI.
8217
8218 for (auto *MI : Selects) {
8219 Register DestReg = MI->getOperand(0).getReg();
8220 Register TrueReg = MI->getOperand(1).getReg();
8221 Register FalseReg = MI->getOperand(2).getReg();
8222
8223 // If this Select we are generating is the opposite condition from
8224 // the jump we generated, then we have to swap the operands for the
8225 // PHI that is going to be generated.
8226 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8227 std::swap(TrueReg, FalseReg);
8228
8229 if (RegRewriteTable.contains(TrueReg))
8230 TrueReg = RegRewriteTable[TrueReg].first;
8231
8232 if (RegRewriteTable.contains(FalseReg))
8233 FalseReg = RegRewriteTable[FalseReg].second;
8234
8235 DebugLoc DL = MI->getDebugLoc();
8236 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8237 .addReg(TrueReg).addMBB(TrueMBB)
8238 .addReg(FalseReg).addMBB(FalseMBB);
8239
8240 // Add this PHI to the rewrite table.
8241 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8242 }
8243
8245}
8246
8248SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
8249 MachineBasicBlock *BB) const {
8250 MachineFunction &MF = *BB->getParent();
8251 MachineFrameInfo &MFI = MF.getFrameInfo();
8252 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
8253 assert(TFL->hasReservedCallFrame(MF) &&
8254 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
8255 (void)TFL;
8256 // Get the MaxCallFrameSize value and erase MI since it serves no further
8257 // purpose as the call frame is statically reserved in the prolog. Set
8258 // AdjustsStack as MI is *not* mapped as a frame instruction.
8259 uint32_t NumBytes = MI.getOperand(0).getImm();
8260 if (NumBytes > MFI.getMaxCallFrameSize())
8261 MFI.setMaxCallFrameSize(NumBytes);
8262 MFI.setAdjustsStack(true);
8263
8264 MI.eraseFromParent();
8265 return BB;
8266}
8267
8268// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8270SystemZTargetLowering::emitSelect(MachineInstr &MI,
8271 MachineBasicBlock *MBB) const {
8272 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8273 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8274
8275 unsigned CCValid = MI.getOperand(3).getImm();
8276 unsigned CCMask = MI.getOperand(4).getImm();
8277
8278 // If we have a sequence of Select* pseudo instructions using the
8279 // same condition code value, we want to expand all of them into
8280 // a single pair of basic blocks using the same condition.
8283 Selects.push_back(&MI);
8284 unsigned Count = 0;
8285 for (MachineInstr &NextMI : llvm::make_range(
8286 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8287 if (isSelectPseudo(NextMI)) {
8288 assert(NextMI.getOperand(3).getImm() == CCValid &&
8289 "Bad CCValid operands since CC was not redefined.");
8290 if (NextMI.getOperand(4).getImm() == CCMask ||
8291 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8292 Selects.push_back(&NextMI);
8293 continue;
8294 }
8295 break;
8296 }
8297 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8298 NextMI.usesCustomInsertionHook())
8299 break;
8300 bool User = false;
8301 for (auto *SelMI : Selects)
8302 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8303 User = true;
8304 break;
8305 }
8306 if (NextMI.isDebugInstr()) {
8307 if (User) {
8308 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8309 DbgValues.push_back(&NextMI);
8310 }
8311 } else if (User || ++Count > 20)
8312 break;
8313 }
8314
8315 MachineInstr *LastMI = Selects.back();
8316 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
8317 checkCCKill(*LastMI, MBB));
8318 MachineBasicBlock *StartMBB = MBB;
8320 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8321
8322 // Unless CC was killed in the last Select instruction, mark it as
8323 // live-in to both FalseMBB and JoinMBB.
8324 if (!CCKilled) {
8325 FalseMBB->addLiveIn(SystemZ::CC);
8326 JoinMBB->addLiveIn(SystemZ::CC);
8327 }
8328
8329 // StartMBB:
8330 // BRC CCMask, JoinMBB
8331 // # fallthrough to FalseMBB
8332 MBB = StartMBB;
8333 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8334 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8335 MBB->addSuccessor(JoinMBB);
8336 MBB->addSuccessor(FalseMBB);
8337
8338 // FalseMBB:
8339 // # fallthrough to JoinMBB
8340 MBB = FalseMBB;
8341 MBB->addSuccessor(JoinMBB);
8342
8343 // JoinMBB:
8344 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8345 // ...
8346 MBB = JoinMBB;
8347 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8348 for (auto *SelMI : Selects)
8349 SelMI->eraseFromParent();
8350
8352 for (auto *DbgMI : DbgValues)
8353 MBB->splice(InsertPos, StartMBB, DbgMI);
8354
8355 return JoinMBB;
8356}
8357
8358// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8359// StoreOpcode is the store to use and Invert says whether the store should
8360// happen when the condition is false rather than true. If a STORE ON
8361// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8362MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8364 unsigned StoreOpcode,
8365 unsigned STOCOpcode,
8366 bool Invert) const {
8367 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8368
8369 Register SrcReg = MI.getOperand(0).getReg();
8370 MachineOperand Base = MI.getOperand(1);
8371 int64_t Disp = MI.getOperand(2).getImm();
8372 Register IndexReg = MI.getOperand(3).getReg();
8373 unsigned CCValid = MI.getOperand(4).getImm();
8374 unsigned CCMask = MI.getOperand(5).getImm();
8375 DebugLoc DL = MI.getDebugLoc();
8376
8377 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8378
8379 // ISel pattern matching also adds a load memory operand of the same
8380 // address, so take special care to find the storing memory operand.
8381 MachineMemOperand *MMO = nullptr;
8382 for (auto *I : MI.memoperands())
8383 if (I->isStore()) {
8384 MMO = I;
8385 break;
8386 }
8387
8388 // Use STOCOpcode if possible. We could use different store patterns in
8389 // order to avoid matching the index register, but the performance trade-offs
8390 // might be more complicated in that case.
8391 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8392 if (Invert)
8393 CCMask ^= CCValid;
8394
8395 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8396 .addReg(SrcReg)
8397 .add(Base)
8398 .addImm(Disp)
8399 .addImm(CCValid)
8400 .addImm(CCMask)
8401 .addMemOperand(MMO);
8402
8403 MI.eraseFromParent();
8404 return MBB;
8405 }
8406
8407 // Get the condition needed to branch around the store.
8408 if (!Invert)
8409 CCMask ^= CCValid;
8410
8411 MachineBasicBlock *StartMBB = MBB;
8413 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8414
8415 // Unless CC was killed in the CondStore instruction, mark it as
8416 // live-in to both FalseMBB and JoinMBB.
8417 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
8418 !checkCCKill(MI, JoinMBB)) {
8419 FalseMBB->addLiveIn(SystemZ::CC);
8420 JoinMBB->addLiveIn(SystemZ::CC);
8421 }
8422
8423 // StartMBB:
8424 // BRC CCMask, JoinMBB
8425 // # fallthrough to FalseMBB
8426 MBB = StartMBB;
8427 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8428 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8429 MBB->addSuccessor(JoinMBB);
8430 MBB->addSuccessor(FalseMBB);
8431
8432 // FalseMBB:
8433 // store %SrcReg, %Disp(%Index,%Base)
8434 // # fallthrough to JoinMBB
8435 MBB = FalseMBB;
8436 BuildMI(MBB, DL, TII->get(StoreOpcode))
8437 .addReg(SrcReg)
8438 .add(Base)
8439 .addImm(Disp)
8440 .addReg(IndexReg)
8441 .addMemOperand(MMO);
8442 MBB->addSuccessor(JoinMBB);
8443
8444 MI.eraseFromParent();
8445 return JoinMBB;
8446}
8447
8448// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8450SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8452 bool Unsigned) const {
8453 MachineFunction &MF = *MBB->getParent();
8454 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8456
8457 // Synthetic instruction to compare 128-bit values.
8458 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8459 Register Op0 = MI.getOperand(0).getReg();
8460 Register Op1 = MI.getOperand(1).getReg();
8461
8462 MachineBasicBlock *StartMBB = MBB;
8464 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8465
8466 // StartMBB:
8467 //
8468 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8469 // Swap the inputs to get:
8470 // CC 1 if high(Op0) > high(Op1)
8471 // CC 2 if high(Op0) < high(Op1)
8472 // CC 0 if high(Op0) == high(Op1)
8473 //
8474 // If CC != 0, we'd done, so jump over the next instruction.
8475 //
8476 // VEC[L]G Op1, Op0
8477 // JNE JoinMBB
8478 // # fallthrough to HiEqMBB
8479 MBB = StartMBB;
8480 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8481 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8482 .addReg(Op1).addReg(Op0);
8483 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8485 MBB->addSuccessor(JoinMBB);
8486 MBB->addSuccessor(HiEqMBB);
8487
8488 // HiEqMBB:
8489 //
8490 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8491 // Since we already know the high parts are equal, the CC
8492 // result will only depend on the low parts:
8493 // CC 1 if low(Op0) > low(Op1)
8494 // CC 3 if low(Op0) <= low(Op1)
8495 //
8496 // VCHLGS Tmp, Op0, Op1
8497 // # fallthrough to JoinMBB
8498 MBB = HiEqMBB;
8499 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8500 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8501 .addReg(Op0).addReg(Op1);
8502 MBB->addSuccessor(JoinMBB);
8503
8504 // Mark CC as live-in to JoinMBB.
8505 JoinMBB->addLiveIn(SystemZ::CC);
8506
8507 MI.eraseFromParent();
8508 return JoinMBB;
8509}
8510
8511// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8512// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8513// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8514// whether the field should be inverted after performing BinOpcode (e.g. for
8515// NAND).
8516MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8517 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8518 bool Invert) const {
8519 MachineFunction &MF = *MBB->getParent();
8520 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8522
8523 // Extract the operands. Base can be a register or a frame index.
8524 // Src2 can be a register or immediate.
8525 Register Dest = MI.getOperand(0).getReg();
8526 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8527 int64_t Disp = MI.getOperand(2).getImm();
8528 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8529 Register BitShift = MI.getOperand(4).getReg();
8530 Register NegBitShift = MI.getOperand(5).getReg();
8531 unsigned BitSize = MI.getOperand(6).getImm();
8532 DebugLoc DL = MI.getDebugLoc();
8533
8534 // Get the right opcodes for the displacement.
8535 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8536 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8537 assert(LOpcode && CSOpcode && "Displacement out of range");
8538
8539 // Create virtual registers for temporary results.
8540 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8541 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8542 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8543 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8544 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8545
8546 // Insert a basic block for the main loop.
8547 MachineBasicBlock *StartMBB = MBB;
8549 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8550
8551 // StartMBB:
8552 // ...
8553 // %OrigVal = L Disp(%Base)
8554 // # fall through to LoopMBB
8555 MBB = StartMBB;
8556 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8557 MBB->addSuccessor(LoopMBB);
8558
8559 // LoopMBB:
8560 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8561 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8562 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8563 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8564 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8565 // JNE LoopMBB
8566 // # fall through to DoneMBB
8567 MBB = LoopMBB;
8568 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8569 .addReg(OrigVal).addMBB(StartMBB)
8570 .addReg(Dest).addMBB(LoopMBB);
8571 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8572 .addReg(OldVal).addReg(BitShift).addImm(0);
8573 if (Invert) {
8574 // Perform the operation normally and then invert every bit of the field.
8575 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8576 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8577 // XILF with the upper BitSize bits set.
8578 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8579 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8580 } else if (BinOpcode)
8581 // A simply binary operation.
8582 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8583 .addReg(RotatedOldVal)
8584 .add(Src2);
8585 else
8586 // Use RISBG to rotate Src2 into position and use it to replace the
8587 // field in RotatedOldVal.
8588 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8589 .addReg(RotatedOldVal).addReg(Src2.getReg())
8590 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8591 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8592 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8593 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8594 .addReg(OldVal)
8595 .addReg(NewVal)
8596 .add(Base)
8597 .addImm(Disp);
8598 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8600 MBB->addSuccessor(LoopMBB);
8601 MBB->addSuccessor(DoneMBB);
8602
8603 MI.eraseFromParent();
8604 return DoneMBB;
8605}
8606
8607// Implement EmitInstrWithCustomInserter for subword pseudo
8608// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8609// instruction that should be used to compare the current field with the
8610// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8611// for when the current field should be kept.
8612MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8613 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8614 unsigned KeepOldMask) const {
8615 MachineFunction &MF = *MBB->getParent();
8616 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8618
8619 // Extract the operands. Base can be a register or a frame index.
8620 Register Dest = MI.getOperand(0).getReg();
8621 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8622 int64_t Disp = MI.getOperand(2).getImm();
8623 Register Src2 = MI.getOperand(3).getReg();
8624 Register BitShift = MI.getOperand(4).getReg();
8625 Register NegBitShift = MI.getOperand(5).getReg();
8626 unsigned BitSize = MI.getOperand(6).getImm();
8627 DebugLoc DL = MI.getDebugLoc();
8628
8629 // Get the right opcodes for the displacement.
8630 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8631 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8632 assert(LOpcode && CSOpcode && "Displacement out of range");
8633
8634 // Create virtual registers for temporary results.
8635 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8636 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8637 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8638 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8639 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8640 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8641
8642 // Insert 3 basic blocks for the loop.
8643 MachineBasicBlock *StartMBB = MBB;
8645 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8646 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8647 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8648
8649 // StartMBB:
8650 // ...
8651 // %OrigVal = L Disp(%Base)
8652 // # fall through to LoopMBB
8653 MBB = StartMBB;
8654 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8655 MBB->addSuccessor(LoopMBB);
8656
8657 // LoopMBB:
8658 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8659 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8660 // CompareOpcode %RotatedOldVal, %Src2
8661 // BRC KeepOldMask, UpdateMBB
8662 MBB = LoopMBB;
8663 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8664 .addReg(OrigVal).addMBB(StartMBB)
8665 .addReg(Dest).addMBB(UpdateMBB);
8666 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8667 .addReg(OldVal).addReg(BitShift).addImm(0);
8668 BuildMI(MBB, DL, TII->get(CompareOpcode))
8669 .addReg(RotatedOldVal).addReg(Src2);
8670 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8671 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8672 MBB->addSuccessor(UpdateMBB);
8673 MBB->addSuccessor(UseAltMBB);
8674
8675 // UseAltMBB:
8676 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8677 // # fall through to UpdateMBB
8678 MBB = UseAltMBB;
8679 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8680 .addReg(RotatedOldVal).addReg(Src2)
8681 .addImm(32).addImm(31 + BitSize).addImm(0);
8682 MBB->addSuccessor(UpdateMBB);
8683
8684 // UpdateMBB:
8685 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8686 // [ %RotatedAltVal, UseAltMBB ]
8687 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8688 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8689 // JNE LoopMBB
8690 // # fall through to DoneMBB
8691 MBB = UpdateMBB;
8692 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8693 .addReg(RotatedOldVal).addMBB(LoopMBB)
8694 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8695 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8696 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8697 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8698 .addReg(OldVal)
8699 .addReg(NewVal)
8700 .add(Base)
8701 .addImm(Disp);
8702 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8704 MBB->addSuccessor(LoopMBB);
8705 MBB->addSuccessor(DoneMBB);
8706
8707 MI.eraseFromParent();
8708 return DoneMBB;
8709}
8710
8711// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8712// instruction MI.
8714SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8715 MachineBasicBlock *MBB) const {
8716 MachineFunction &MF = *MBB->getParent();
8717 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8719
8720 // Extract the operands. Base can be a register or a frame index.
8721 Register Dest = MI.getOperand(0).getReg();
8722 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8723 int64_t Disp = MI.getOperand(2).getImm();
8724 Register CmpVal = MI.getOperand(3).getReg();
8725 Register OrigSwapVal = MI.getOperand(4).getReg();
8726 Register BitShift = MI.getOperand(5).getReg();
8727 Register NegBitShift = MI.getOperand(6).getReg();
8728 int64_t BitSize = MI.getOperand(7).getImm();
8729 DebugLoc DL = MI.getDebugLoc();
8730
8731 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8732
8733 // Get the right opcodes for the displacement and zero-extension.
8734 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8735 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8736 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8737 assert(LOpcode && CSOpcode && "Displacement out of range");
8738
8739 // Create virtual registers for temporary results.
8740 Register OrigOldVal = MRI.createVirtualRegister(RC);
8741 Register OldVal = MRI.createVirtualRegister(RC);
8742 Register SwapVal = MRI.createVirtualRegister(RC);
8743 Register StoreVal = MRI.createVirtualRegister(RC);
8744 Register OldValRot = MRI.createVirtualRegister(RC);
8745 Register RetryOldVal = MRI.createVirtualRegister(RC);
8746 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8747
8748 // Insert 2 basic blocks for the loop.
8749 MachineBasicBlock *StartMBB = MBB;
8751 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8752 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8753
8754 // StartMBB:
8755 // ...
8756 // %OrigOldVal = L Disp(%Base)
8757 // # fall through to LoopMBB
8758 MBB = StartMBB;
8759 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8760 .add(Base)
8761 .addImm(Disp)
8762 .addReg(0);
8763 MBB->addSuccessor(LoopMBB);
8764
8765 // LoopMBB:
8766 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8767 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8768 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8769 // ^^ The low BitSize bits contain the field
8770 // of interest.
8771 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8772 // ^^ Replace the upper 32-BitSize bits of the
8773 // swap value with those that we loaded and rotated.
8774 // %Dest = LL[CH] %OldValRot
8775 // CR %Dest, %CmpVal
8776 // JNE DoneMBB
8777 // # Fall through to SetMBB
8778 MBB = LoopMBB;
8779 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8780 .addReg(OrigOldVal).addMBB(StartMBB)
8781 .addReg(RetryOldVal).addMBB(SetMBB);
8782 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8783 .addReg(OrigSwapVal).addMBB(StartMBB)
8784 .addReg(RetrySwapVal).addMBB(SetMBB);
8785 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8786 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8787 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8788 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8789 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8790 .addReg(OldValRot);
8791 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8792 .addReg(Dest).addReg(CmpVal);
8793 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8796 MBB->addSuccessor(DoneMBB);
8797 MBB->addSuccessor(SetMBB);
8798
8799 // SetMBB:
8800 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8801 // ^^ Rotate the new field to its proper position.
8802 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8803 // JNE LoopMBB
8804 // # fall through to ExitMBB
8805 MBB = SetMBB;
8806 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8807 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8808 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8809 .addReg(OldVal)
8810 .addReg(StoreVal)
8811 .add(Base)
8812 .addImm(Disp);
8813 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8815 MBB->addSuccessor(LoopMBB);
8816 MBB->addSuccessor(DoneMBB);
8817
8818 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8819 // to the block after the loop. At this point, CC may have been defined
8820 // either by the CR in LoopMBB or by the CS in SetMBB.
8821 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
8822 DoneMBB->addLiveIn(SystemZ::CC);
8823
8824 MI.eraseFromParent();
8825 return DoneMBB;
8826}
8827
8828// Emit a move from two GR64s to a GR128.
8830SystemZTargetLowering::emitPair128(MachineInstr &MI,
8831 MachineBasicBlock *MBB) const {
8832 MachineFunction &MF = *MBB->getParent();
8833 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8835 DebugLoc DL = MI.getDebugLoc();
8836
8837 Register Dest = MI.getOperand(0).getReg();
8838 Register Hi = MI.getOperand(1).getReg();
8839 Register Lo = MI.getOperand(2).getReg();
8840 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8841 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8842
8843 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8844 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8845 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8846 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8847 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8848
8849 MI.eraseFromParent();
8850 return MBB;
8851}
8852
8853// Emit an extension from a GR64 to a GR128. ClearEven is true
8854// if the high register of the GR128 value must be cleared or false if
8855// it's "don't care".
8856MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8858 bool ClearEven) const {
8859 MachineFunction &MF = *MBB->getParent();
8860 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8862 DebugLoc DL = MI.getDebugLoc();
8863
8864 Register Dest = MI.getOperand(0).getReg();
8865 Register Src = MI.getOperand(1).getReg();
8866 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8867
8868 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8869 if (ClearEven) {
8870 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8871 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8872
8873 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8874 .addImm(0);
8875 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8876 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8877 In128 = NewIn128;
8878 }
8879 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8880 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8881
8882 MI.eraseFromParent();
8883 return MBB;
8884}
8885
8887SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8889 unsigned Opcode, bool IsMemset) const {
8890 MachineFunction &MF = *MBB->getParent();
8891 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8893 DebugLoc DL = MI.getDebugLoc();
8894
8895 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8896 uint64_t DestDisp = MI.getOperand(1).getImm();
8897 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8898 uint64_t SrcDisp;
8899
8900 // Fold the displacement Disp if it is out of range.
8901 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8902 if (!isUInt<12>(Disp)) {
8903 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8904 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8905 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8906 .add(Base).addImm(Disp).addReg(0);
8907 Base = MachineOperand::CreateReg(Reg, false);
8908 Disp = 0;
8909 }
8910 };
8911
8912 if (!IsMemset) {
8913 SrcBase = earlyUseOperand(MI.getOperand(2));
8914 SrcDisp = MI.getOperand(3).getImm();
8915 } else {
8916 SrcBase = DestBase;
8917 SrcDisp = DestDisp++;
8918 foldDisplIfNeeded(DestBase, DestDisp);
8919 }
8920
8921 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8922 bool IsImmForm = LengthMO.isImm();
8923 bool IsRegForm = !IsImmForm;
8924
8925 // Build and insert one Opcode of Length, with special treatment for memset.
8926 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8928 MachineOperand DBase, uint64_t DDisp,
8930 unsigned Length) -> void {
8931 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8932 if (IsMemset) {
8933 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8934 if (ByteMO.isImm())
8935 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8936 .add(SBase).addImm(SDisp).add(ByteMO);
8937 else
8938 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8939 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8940 if (--Length == 0)
8941 return;
8942 }
8943 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8944 .add(DBase).addImm(DDisp).addImm(Length)
8945 .add(SBase).addImm(SDisp)
8946 .setMemRefs(MI.memoperands());
8947 };
8948
8949 bool NeedsLoop = false;
8950 uint64_t ImmLength = 0;
8951 Register LenAdjReg = SystemZ::NoRegister;
8952 if (IsImmForm) {
8953 ImmLength = LengthMO.getImm();
8954 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8955 if (ImmLength == 0) {
8956 MI.eraseFromParent();
8957 return MBB;
8958 }
8959 if (Opcode == SystemZ::CLC) {
8960 if (ImmLength > 3 * 256)
8961 // A two-CLC sequence is a clear win over a loop, not least because
8962 // it needs only one branch. A three-CLC sequence needs the same
8963 // number of branches as a loop (i.e. 2), but is shorter. That
8964 // brings us to lengths greater than 768 bytes. It seems relatively
8965 // likely that a difference will be found within the first 768 bytes,
8966 // so we just optimize for the smallest number of branch
8967 // instructions, in order to avoid polluting the prediction buffer
8968 // too much.
8969 NeedsLoop = true;
8970 } else if (ImmLength > 6 * 256)
8971 // The heuristic we use is to prefer loops for anything that would
8972 // require 7 or more MVCs. With these kinds of sizes there isn't much
8973 // to choose between straight-line code and looping code, since the
8974 // time will be dominated by the MVCs themselves.
8975 NeedsLoop = true;
8976 } else {
8977 NeedsLoop = true;
8978 LenAdjReg = LengthMO.getReg();
8979 }
8980
8981 // When generating more than one CLC, all but the last will need to
8982 // branch to the end when a difference is found.
8983 MachineBasicBlock *EndMBB =
8984 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8986 : nullptr);
8987
8988 if (NeedsLoop) {
8989 Register StartCountReg =
8990 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8991 if (IsImmForm) {
8992 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8993 ImmLength &= 255;
8994 } else {
8995 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8996 .addReg(LenAdjReg)
8997 .addReg(0)
8998 .addImm(8);
8999 }
9000
9001 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
9002 auto loadZeroAddress = [&]() -> MachineOperand {
9003 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9004 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
9005 return MachineOperand::CreateReg(Reg, false);
9006 };
9007 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
9008 DestBase = loadZeroAddress();
9009 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
9010 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
9011
9012 MachineBasicBlock *StartMBB = nullptr;
9013 MachineBasicBlock *LoopMBB = nullptr;
9014 MachineBasicBlock *NextMBB = nullptr;
9015 MachineBasicBlock *DoneMBB = nullptr;
9016 MachineBasicBlock *AllDoneMBB = nullptr;
9017
9018 Register StartSrcReg = forceReg(MI, SrcBase, TII);
9019 Register StartDestReg =
9020 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
9021
9022 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
9023 Register ThisSrcReg = MRI.createVirtualRegister(RC);
9024 Register ThisDestReg =
9025 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
9026 Register NextSrcReg = MRI.createVirtualRegister(RC);
9027 Register NextDestReg =
9028 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
9029 RC = &SystemZ::GR64BitRegClass;
9030 Register ThisCountReg = MRI.createVirtualRegister(RC);
9031 Register NextCountReg = MRI.createVirtualRegister(RC);
9032
9033 if (IsRegForm) {
9034 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9035 StartMBB = SystemZ::emitBlockAfter(MBB);
9036 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9037 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9038 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
9039
9040 // MBB:
9041 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
9042 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9043 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
9044 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9046 .addMBB(AllDoneMBB);
9047 MBB->addSuccessor(AllDoneMBB);
9048 if (!IsMemset)
9049 MBB->addSuccessor(StartMBB);
9050 else {
9051 // MemsetOneCheckMBB:
9052 // # Jump to MemsetOneMBB for a memset of length 1, or
9053 // # fall thru to StartMBB.
9054 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
9055 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
9056 MBB->addSuccessor(MemsetOneCheckMBB);
9057 MBB = MemsetOneCheckMBB;
9058 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9059 .addReg(LenAdjReg).addImm(-1);
9060 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9062 .addMBB(MemsetOneMBB);
9063 MBB->addSuccessor(MemsetOneMBB, {10, 100});
9064 MBB->addSuccessor(StartMBB, {90, 100});
9065
9066 // MemsetOneMBB:
9067 // # Jump back to AllDoneMBB after a single MVI or STC.
9068 MBB = MemsetOneMBB;
9069 insertMemMemOp(MBB, MBB->end(),
9070 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9071 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9072 1);
9073 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9074 MBB->addSuccessor(AllDoneMBB);
9075 }
9076
9077 // StartMBB:
9078 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9079 MBB = StartMBB;
9080 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9081 .addReg(StartCountReg).addImm(0);
9082 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9084 .addMBB(DoneMBB);
9085 MBB->addSuccessor(DoneMBB);
9086 MBB->addSuccessor(LoopMBB);
9087 }
9088 else {
9089 StartMBB = MBB;
9090 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9091 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9092 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9093
9094 // StartMBB:
9095 // # fall through to LoopMBB
9096 MBB->addSuccessor(LoopMBB);
9097
9098 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9099 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9100 if (EndMBB && !ImmLength)
9101 // If the loop handled the whole CLC range, DoneMBB will be empty with
9102 // CC live-through into EndMBB, so add it as live-in.
9103 DoneMBB->addLiveIn(SystemZ::CC);
9104 }
9105
9106 // LoopMBB:
9107 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9108 // [ %NextDestReg, NextMBB ]
9109 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9110 // [ %NextSrcReg, NextMBB ]
9111 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9112 // [ %NextCountReg, NextMBB ]
9113 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9114 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9115 // ( JLH EndMBB )
9116 //
9117 // The prefetch is used only for MVC. The JLH is used only for CLC.
9118 MBB = LoopMBB;
9119 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9120 .addReg(StartDestReg).addMBB(StartMBB)
9121 .addReg(NextDestReg).addMBB(NextMBB);
9122 if (!HaveSingleBase)
9123 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9124 .addReg(StartSrcReg).addMBB(StartMBB)
9125 .addReg(NextSrcReg).addMBB(NextMBB);
9126 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9127 .addReg(StartCountReg).addMBB(StartMBB)
9128 .addReg(NextCountReg).addMBB(NextMBB);
9129 if (Opcode == SystemZ::MVC)
9130 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9132 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9133 insertMemMemOp(MBB, MBB->end(),
9134 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9135 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9136 if (EndMBB) {
9137 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9139 .addMBB(EndMBB);
9140 MBB->addSuccessor(EndMBB);
9141 MBB->addSuccessor(NextMBB);
9142 }
9143
9144 // NextMBB:
9145 // %NextDestReg = LA 256(%ThisDestReg)
9146 // %NextSrcReg = LA 256(%ThisSrcReg)
9147 // %NextCountReg = AGHI %ThisCountReg, -1
9148 // CGHI %NextCountReg, 0
9149 // JLH LoopMBB
9150 // # fall through to DoneMBB
9151 //
9152 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9153 MBB = NextMBB;
9154 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9155 .addReg(ThisDestReg).addImm(256).addReg(0);
9156 if (!HaveSingleBase)
9157 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9158 .addReg(ThisSrcReg).addImm(256).addReg(0);
9159 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9160 .addReg(ThisCountReg).addImm(-1);
9161 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9162 .addReg(NextCountReg).addImm(0);
9163 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9165 .addMBB(LoopMBB);
9166 MBB->addSuccessor(LoopMBB);
9167 MBB->addSuccessor(DoneMBB);
9168
9169 MBB = DoneMBB;
9170 if (IsRegForm) {
9171 // DoneMBB:
9172 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9173 // # Use EXecute Relative Long for the remainder of the bytes. The target
9174 // instruction of the EXRL will have a length field of 1 since 0 is an
9175 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9176 // 0xff) + 1.
9177 // # Fall through to AllDoneMBB.
9178 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9179 Register RemDestReg = HaveSingleBase ? RemSrcReg
9180 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9181 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9182 .addReg(StartDestReg).addMBB(StartMBB)
9183 .addReg(NextDestReg).addMBB(NextMBB);
9184 if (!HaveSingleBase)
9185 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9186 .addReg(StartSrcReg).addMBB(StartMBB)
9187 .addReg(NextSrcReg).addMBB(NextMBB);
9188 if (IsMemset)
9189 insertMemMemOp(MBB, MBB->end(),
9190 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9191 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9192 MachineInstrBuilder EXRL_MIB =
9193 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9194 .addImm(Opcode)
9195 .addReg(LenAdjReg)
9196 .addReg(RemDestReg).addImm(DestDisp)
9197 .addReg(RemSrcReg).addImm(SrcDisp);
9198 MBB->addSuccessor(AllDoneMBB);
9199 MBB = AllDoneMBB;
9200 if (Opcode != SystemZ::MVC) {
9201 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9202 if (EndMBB)
9203 MBB->addLiveIn(SystemZ::CC);
9204 }
9205 }
9207 }
9208
9209 // Handle any remaining bytes with straight-line code.
9210 while (ImmLength > 0) {
9211 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9212 // The previous iteration might have created out-of-range displacements.
9213 // Apply them using LA/LAY if so.
9214 foldDisplIfNeeded(DestBase, DestDisp);
9215 foldDisplIfNeeded(SrcBase, SrcDisp);
9216 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9217 DestDisp += ThisLength;
9218 SrcDisp += ThisLength;
9219 ImmLength -= ThisLength;
9220 // If there's another CLC to go, branch to the end if a difference
9221 // was found.
9222 if (EndMBB && ImmLength > 0) {
9224 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9226 .addMBB(EndMBB);
9227 MBB->addSuccessor(EndMBB);
9228 MBB->addSuccessor(NextMBB);
9229 MBB = NextMBB;
9230 }
9231 }
9232 if (EndMBB) {
9233 MBB->addSuccessor(EndMBB);
9234 MBB = EndMBB;
9235 MBB->addLiveIn(SystemZ::CC);
9236 }
9237
9238 MI.eraseFromParent();
9239 return MBB;
9240}
9241
9242// Decompose string pseudo-instruction MI into a loop that continually performs
9243// Opcode until CC != 3.
9244MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9245 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9246 MachineFunction &MF = *MBB->getParent();
9247 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9249 DebugLoc DL = MI.getDebugLoc();
9250
9251 uint64_t End1Reg = MI.getOperand(0).getReg();
9252 uint64_t Start1Reg = MI.getOperand(1).getReg();
9253 uint64_t Start2Reg = MI.getOperand(2).getReg();
9254 uint64_t CharReg = MI.getOperand(3).getReg();
9255
9256 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9257 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9258 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9259 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9260
9261 MachineBasicBlock *StartMBB = MBB;
9263 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9264
9265 // StartMBB:
9266 // # fall through to LoopMBB
9267 MBB->addSuccessor(LoopMBB);
9268
9269 // LoopMBB:
9270 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9271 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9272 // R0L = %CharReg
9273 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9274 // JO LoopMBB
9275 // # fall through to DoneMBB
9276 //
9277 // The load of R0L can be hoisted by post-RA LICM.
9278 MBB = LoopMBB;
9279
9280 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9281 .addReg(Start1Reg).addMBB(StartMBB)
9282 .addReg(End1Reg).addMBB(LoopMBB);
9283 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9284 .addReg(Start2Reg).addMBB(StartMBB)
9285 .addReg(End2Reg).addMBB(LoopMBB);
9286 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9287 BuildMI(MBB, DL, TII->get(Opcode))
9288 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9289 .addReg(This1Reg).addReg(This2Reg);
9290 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9292 MBB->addSuccessor(LoopMBB);
9293 MBB->addSuccessor(DoneMBB);
9294
9295 DoneMBB->addLiveIn(SystemZ::CC);
9296
9297 MI.eraseFromParent();
9298 return DoneMBB;
9299}
9300
9301// Update TBEGIN instruction with final opcode and register clobbers.
9302MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9303 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9304 bool NoFloat) const {
9305 MachineFunction &MF = *MBB->getParent();
9306 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9307 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9308
9309 // Update opcode.
9310 MI.setDesc(TII->get(Opcode));
9311
9312 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9313 // Make sure to add the corresponding GRSM bits if they are missing.
9314 uint64_t Control = MI.getOperand(2).getImm();
9315 static const unsigned GPRControlBit[16] = {
9316 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9317 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9318 };
9319 Control |= GPRControlBit[15];
9320 if (TFI->hasFP(MF))
9321 Control |= GPRControlBit[11];
9322 MI.getOperand(2).setImm(Control);
9323
9324 // Add GPR clobbers.
9325 for (int I = 0; I < 16; I++) {
9326 if ((Control & GPRControlBit[I]) == 0) {
9327 unsigned Reg = SystemZMC::GR64Regs[I];
9328 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9329 }
9330 }
9331
9332 // Add FPR/VR clobbers.
9333 if (!NoFloat && (Control & 4) != 0) {
9334 if (Subtarget.hasVector()) {
9335 for (unsigned Reg : SystemZMC::VR128Regs) {
9336 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9337 }
9338 } else {
9339 for (unsigned Reg : SystemZMC::FP64Regs) {
9340 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9341 }
9342 }
9343 }
9344
9345 return MBB;
9346}
9347
9348MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9349 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9350 MachineFunction &MF = *MBB->getParent();
9352 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9353 DebugLoc DL = MI.getDebugLoc();
9354
9355 Register SrcReg = MI.getOperand(0).getReg();
9356
9357 // Create new virtual register of the same class as source.
9358 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9359 Register DstReg = MRI->createVirtualRegister(RC);
9360
9361 // Replace pseudo with a normal load-and-test that models the def as
9362 // well.
9363 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9364 .addReg(SrcReg)
9365 .setMIFlags(MI.getFlags());
9366 MI.eraseFromParent();
9367
9368 return MBB;
9369}
9370
9371MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9373 MachineFunction &MF = *MBB->getParent();
9375 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9376 DebugLoc DL = MI.getDebugLoc();
9377 const unsigned ProbeSize = getStackProbeSize(MF);
9378 Register DstReg = MI.getOperand(0).getReg();
9379 Register SizeReg = MI.getOperand(2).getReg();
9380
9381 MachineBasicBlock *StartMBB = MBB;
9383 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9384 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9385 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9386 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9387
9390
9391 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9392 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9393
9394 // LoopTestMBB
9395 // BRC TailTestMBB
9396 // # fallthrough to LoopBodyMBB
9397 StartMBB->addSuccessor(LoopTestMBB);
9398 MBB = LoopTestMBB;
9399 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9400 .addReg(SizeReg)
9401 .addMBB(StartMBB)
9402 .addReg(IncReg)
9403 .addMBB(LoopBodyMBB);
9404 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9405 .addReg(PHIReg)
9406 .addImm(ProbeSize);
9407 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9409 .addMBB(TailTestMBB);
9410 MBB->addSuccessor(LoopBodyMBB);
9411 MBB->addSuccessor(TailTestMBB);
9412
9413 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9414 // J LoopTestMBB
9415 MBB = LoopBodyMBB;
9416 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9417 .addReg(PHIReg)
9418 .addImm(ProbeSize);
9419 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9420 .addReg(SystemZ::R15D)
9421 .addImm(ProbeSize);
9422 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9423 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9424 .setMemRefs(VolLdMMO);
9425 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9426 MBB->addSuccessor(LoopTestMBB);
9427
9428 // TailTestMBB
9429 // BRC DoneMBB
9430 // # fallthrough to TailMBB
9431 MBB = TailTestMBB;
9432 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9433 .addReg(PHIReg)
9434 .addImm(0);
9435 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9437 .addMBB(DoneMBB);
9438 MBB->addSuccessor(TailMBB);
9439 MBB->addSuccessor(DoneMBB);
9440
9441 // TailMBB
9442 // # fallthrough to DoneMBB
9443 MBB = TailMBB;
9444 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9445 .addReg(SystemZ::R15D)
9446 .addReg(PHIReg);
9447 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9448 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9449 .setMemRefs(VolLdMMO);
9450 MBB->addSuccessor(DoneMBB);
9451
9452 // DoneMBB
9453 MBB = DoneMBB;
9454 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9455 .addReg(SystemZ::R15D);
9456
9457 MI.eraseFromParent();
9458 return DoneMBB;
9459}
9460
9461SDValue SystemZTargetLowering::
9462getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9464 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9465 SDLoc DL(SP);
9466 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9467 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9468}
9469
9472 switch (MI.getOpcode()) {
9473 case SystemZ::ADJCALLSTACKDOWN:
9474 case SystemZ::ADJCALLSTACKUP:
9475 return emitAdjCallStack(MI, MBB);
9476
9477 case SystemZ::Select32:
9478 case SystemZ::Select64:
9479 case SystemZ::Select128:
9480 case SystemZ::SelectF32:
9481 case SystemZ::SelectF64:
9482 case SystemZ::SelectF128:
9483 case SystemZ::SelectVR32:
9484 case SystemZ::SelectVR64:
9485 case SystemZ::SelectVR128:
9486 return emitSelect(MI, MBB);
9487
9488 case SystemZ::CondStore8Mux:
9489 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9490 case SystemZ::CondStore8MuxInv:
9491 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9492 case SystemZ::CondStore16Mux:
9493 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9494 case SystemZ::CondStore16MuxInv:
9495 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9496 case SystemZ::CondStore32Mux:
9497 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9498 case SystemZ::CondStore32MuxInv:
9499 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9500 case SystemZ::CondStore8:
9501 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9502 case SystemZ::CondStore8Inv:
9503 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9504 case SystemZ::CondStore16:
9505 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9506 case SystemZ::CondStore16Inv:
9507 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9508 case SystemZ::CondStore32:
9509 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9510 case SystemZ::CondStore32Inv:
9511 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9512 case SystemZ::CondStore64:
9513 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9514 case SystemZ::CondStore64Inv:
9515 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9516 case SystemZ::CondStoreF32:
9517 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9518 case SystemZ::CondStoreF32Inv:
9519 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9520 case SystemZ::CondStoreF64:
9521 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9522 case SystemZ::CondStoreF64Inv:
9523 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9524
9525 case SystemZ::SCmp128Hi:
9526 return emitICmp128Hi(MI, MBB, false);
9527 case SystemZ::UCmp128Hi:
9528 return emitICmp128Hi(MI, MBB, true);
9529
9530 case SystemZ::PAIR128:
9531 return emitPair128(MI, MBB);
9532 case SystemZ::AEXT128:
9533 return emitExt128(MI, MBB, false);
9534 case SystemZ::ZEXT128:
9535 return emitExt128(MI, MBB, true);
9536
9537 case SystemZ::ATOMIC_SWAPW:
9538 return emitAtomicLoadBinary(MI, MBB, 0);
9539
9540 case SystemZ::ATOMIC_LOADW_AR:
9541 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9542 case SystemZ::ATOMIC_LOADW_AFI:
9543 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9544
9545 case SystemZ::ATOMIC_LOADW_SR:
9546 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9547
9548 case SystemZ::ATOMIC_LOADW_NR:
9549 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9550 case SystemZ::ATOMIC_LOADW_NILH:
9551 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9552
9553 case SystemZ::ATOMIC_LOADW_OR:
9554 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9555 case SystemZ::ATOMIC_LOADW_OILH:
9556 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9557
9558 case SystemZ::ATOMIC_LOADW_XR:
9559 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9560 case SystemZ::ATOMIC_LOADW_XILF:
9561 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9562
9563 case SystemZ::ATOMIC_LOADW_NRi:
9564 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9565 case SystemZ::ATOMIC_LOADW_NILHi:
9566 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9567
9568 case SystemZ::ATOMIC_LOADW_MIN:
9569 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9570 case SystemZ::ATOMIC_LOADW_MAX:
9571 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9572 case SystemZ::ATOMIC_LOADW_UMIN:
9573 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9574 case SystemZ::ATOMIC_LOADW_UMAX:
9575 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9576
9577 case SystemZ::ATOMIC_CMP_SWAPW:
9578 return emitAtomicCmpSwapW(MI, MBB);
9579 case SystemZ::MVCImm:
9580 case SystemZ::MVCReg:
9581 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9582 case SystemZ::NCImm:
9583 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9584 case SystemZ::OCImm:
9585 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9586 case SystemZ::XCImm:
9587 case SystemZ::XCReg:
9588 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9589 case SystemZ::CLCImm:
9590 case SystemZ::CLCReg:
9591 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9592 case SystemZ::MemsetImmImm:
9593 case SystemZ::MemsetImmReg:
9594 case SystemZ::MemsetRegImm:
9595 case SystemZ::MemsetRegReg:
9596 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9597 case SystemZ::CLSTLoop:
9598 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9599 case SystemZ::MVSTLoop:
9600 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9601 case SystemZ::SRSTLoop:
9602 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9603 case SystemZ::TBEGIN:
9604 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9605 case SystemZ::TBEGIN_nofloat:
9606 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9607 case SystemZ::TBEGINC:
9608 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9609 case SystemZ::LTEBRCompare_Pseudo:
9610 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9611 case SystemZ::LTDBRCompare_Pseudo:
9612 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9613 case SystemZ::LTXBRCompare_Pseudo:
9614 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9615
9616 case SystemZ::PROBED_ALLOCA:
9617 return emitProbedAlloca(MI, MBB);
9618
9619 case TargetOpcode::STACKMAP:
9620 case TargetOpcode::PATCHPOINT:
9621 return emitPatchPoint(MI, MBB);
9622
9623 default:
9624 llvm_unreachable("Unexpected instr type to insert");
9625 }
9626}
9627
9628// This is only used by the isel schedulers, and is needed only to prevent
9629// compiler from crashing when list-ilp is used.
9630const TargetRegisterClass *
9631SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9632 if (VT == MVT::Untyped)
9633 return &SystemZ::ADDR128BitRegClass;
9635}
9636
9637SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9638 SelectionDAG &DAG) const {
9639 SDLoc dl(Op);
9640 /*
9641 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9642 settings:
9643 00 Round to nearest
9644 01 Round to 0
9645 10 Round to +inf
9646 11 Round to -inf
9647
9648 FLT_ROUNDS, on the other hand, expects the following:
9649 -1 Undefined
9650 0 Round to 0
9651 1 Round to nearest
9652 2 Round to +inf
9653 3 Round to -inf
9654 */
9655
9656 // Save FPC to register.
9657 SDValue Chain = Op.getOperand(0);
9658 SDValue EFPC(
9659 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9660 Chain = EFPC.getValue(1);
9661
9662 // Transform as necessary
9663 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9664 DAG.getConstant(3, dl, MVT::i32));
9665 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9666 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9667 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9668 DAG.getConstant(1, dl, MVT::i32)));
9669
9670 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9671 DAG.getConstant(1, dl, MVT::i32));
9672 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9673
9674 return DAG.getMergeValues({RetVal, Chain}, dl);
9675}
9676
9677SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
9678 SelectionDAG &DAG) const {
9679 EVT VT = Op.getValueType();
9680 Op = Op.getOperand(0);
9681 EVT OpVT = Op.getValueType();
9682
9683 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
9684
9685 SDLoc DL(Op);
9686
9687 // load a 0 vector for the third operand of VSUM.
9688 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
9689
9690 // execute VSUM.
9691 switch (OpVT.getScalarSizeInBits()) {
9692 case 8:
9693 case 16:
9694 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
9695 [[fallthrough]];
9696 case 32:
9697 case 64:
9698 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
9699 DAG.getBitcast(Op.getValueType(), Zero));
9700 break;
9701 case 128:
9702 break; // VSUM over v1i128 should not happen and would be a noop
9703 default:
9704 llvm_unreachable("Unexpected scalar size.");
9705 }
9706 // Cast to original vector type, retrieve last element.
9707 return DAG.getNode(
9708 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
9709 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
9710}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, SDValue Chain, const SDLoc &SL)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static std::pair< SDValue, SDValue > expandBitCastF128ToI128Parts(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:889
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:715
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:563
bool hasPrivateLinkage() const
Definition: GlobalValue.h:527
bool hasInternalLinkage() const
Definition: GlobalValue.h:526
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setMaxCallFrameSize(unsigned S)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr reads the specified register.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:462
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:676
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:199
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:751
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:237
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1133
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1129
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:724
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:477
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:251
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1276
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:715
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1162
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1278
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1248
@ STRICT_FCEIL
Definition: ISDOpcodes.h:427
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1279
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:240
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1038
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:784
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:484
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1261
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:437
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:791
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:391
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:689
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1235
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1240
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:821
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:256
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:478
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1274
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:904
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:230
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1275
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:412
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1407
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:775
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:451
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:621
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1228
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:995
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:931
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1084
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:328
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1277
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1063
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:508
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:728
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1244
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:212
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:223
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:628
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1158
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:324
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:431
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1371
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:881
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:652
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:706
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:601
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1272
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:436
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:425
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:536
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:781
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:426
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:743
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1280
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:972
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1048
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:810
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:799
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:889
@ STRICT_FROUND
Definition: ISDOpcodes.h:429
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:737
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:304
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:450
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:428
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1270
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:444
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:466
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:443
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:991
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1271
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:837
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1189
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:471
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:681
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1215
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:401
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:525
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1269
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:870
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:832
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:424
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:787
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1077
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:764
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:494
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:423
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1327
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:516
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1530
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1510
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:337
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:275
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})