LLVM 19.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
23#include "llvm/IR/Intrinsics.h"
24#include "llvm/IR/IntrinsicsS390.h"
27#include <cctype>
28#include <optional>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "systemz-lower"
33
34namespace {
35// Represents information about a comparison.
36struct Comparison {
37 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
38 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
39 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
40
41 // The operands to the comparison.
42 SDValue Op0, Op1;
43
44 // Chain if this is a strict floating-point comparison.
45 SDValue Chain;
46
47 // The opcode that should be used to compare Op0 and Op1.
48 unsigned Opcode;
49
50 // A SystemZICMP value. Only used for integer comparisons.
51 unsigned ICmpType;
52
53 // The mask of CC values that Opcode can produce.
54 unsigned CCValid;
55
56 // The mask of CC values for which the original condition is true.
57 unsigned CCMask;
58};
59} // end anonymous namespace
60
61// Classify VT as either 32 or 64 bit.
62static bool is32Bit(EVT VT) {
63 switch (VT.getSimpleVT().SimpleTy) {
64 case MVT::i32:
65 return true;
66 case MVT::i64:
67 return false;
68 default:
69 llvm_unreachable("Unsupported type");
70 }
71}
72
73// Return a version of MachineOperand that can be safely used before the
74// final use.
76 if (Op.isReg())
77 Op.setIsKill(false);
78 return Op;
79}
80
82 const SystemZSubtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
85
86 auto *Regs = STI.getSpecialRegisters();
87
88 // Set up the register classes.
89 if (Subtarget.hasHighWord())
90 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
91 else
92 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
93 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
94 if (!useSoftFloat()) {
95 if (Subtarget.hasVector()) {
96 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
97 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
98 } else {
99 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
100 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
101 }
102 if (Subtarget.hasVectorEnhancements1())
103 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
104 else
105 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
106
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
109 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
110 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
111 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
112 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
113 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
114 }
115
116 if (Subtarget.hasVector())
117 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
118 }
119
120 // Compute derived properties from the register classes
122
123 // Set up special registers.
124 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
125
126 // TODO: It may be better to default to latency-oriented scheduling, however
127 // LLVM's current latency-oriented scheduler can't handle physreg definitions
128 // such as SystemZ has with CC, so set this to the register-pressure
129 // scheduler, because it can.
131
134
136
137 // Instructions are strings of 2-byte aligned 2-byte values.
139 // For performance reasons we prefer 16-byte alignment.
141
142 // Handle operations that are handled in a similar way for all types.
143 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
144 I <= MVT::LAST_FP_VALUETYPE;
145 ++I) {
147 if (isTypeLegal(VT)) {
148 // Lower SET_CC into an IPM-based sequence.
152
153 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
155
156 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
159 }
160 }
161
162 // Expand jump table branches as address arithmetic followed by an
163 // indirect jump.
165
166 // Expand BRCOND into a BR_CC (see above).
168
169 // Handle integer types except i128.
170 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
171 I <= MVT::LAST_INTEGER_VALUETYPE;
172 ++I) {
174 if (isTypeLegal(VT) && VT != MVT::i128) {
176
177 // Expand individual DIV and REMs into DIVREMs.
184
185 // Support addition/subtraction with overflow.
188
189 // Support addition/subtraction with carry.
192
193 // Support carry in as value rather than glue.
196
197 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
198 // available, or if the operand is constant.
200
201 // Use POPCNT on z196 and above.
202 if (Subtarget.hasPopulationCount())
204 else
206
207 // No special instructions for these.
210
211 // Use *MUL_LOHI where possible instead of MULH*.
216
217 // Only z196 and above have native support for conversions to unsigned.
218 // On z10, promoting to i64 doesn't generate an inexact condition for
219 // values that are outside the i32 range but in the i64 range, so use
220 // the default expansion.
221 if (!Subtarget.hasFPExtension())
223
224 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
225 // default to Expand, so need to be modified to Legal where appropriate.
227 if (Subtarget.hasFPExtension())
229
230 // And similarly for STRICT_[SU]INT_TO_FP.
232 if (Subtarget.hasFPExtension())
234 }
235 }
236
237 // Handle i128 if legal.
238 if (isTypeLegal(MVT::i128)) {
239 // No special instructions for these.
255
256 // Support addition/subtraction with carry.
261
262 // Use VPOPCT and add up partial results.
264
265 // We have to use libcalls for these.
274 }
275
276 // Type legalization will convert 8- and 16-bit atomic operations into
277 // forms that operate on i32s (but still keeping the original memory VT).
278 // Lower them into full i32 operations.
290
291 // Whether or not i128 is not a legal type, we need to custom lower
292 // the atomic operations in order to exploit SystemZ instructions.
295
296 // We can use the CC result of compare-and-swap to implement
297 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
301
303
304 // Traps are legal, as we will convert them to "j .+2".
305 setOperationAction(ISD::TRAP, MVT::Other, Legal);
306
307 // z10 has instructions for signed but not unsigned FP conversion.
308 // Handle unsigned 32-bit types as signed 64-bit types.
309 if (!Subtarget.hasFPExtension()) {
314 }
315
316 // We have native support for a 64-bit CTLZ, via FLOGR.
320
321 // On z15 we have native support for a 64-bit CTPOP.
322 if (Subtarget.hasMiscellaneousExtensions3()) {
325 }
326
327 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
329
330 // Expand 128 bit shifts without using a libcall.
334 setLibcallName(RTLIB::SRL_I128, nullptr);
335 setLibcallName(RTLIB::SHL_I128, nullptr);
336 setLibcallName(RTLIB::SRA_I128, nullptr);
337
338 // Also expand 256 bit shifts if i128 is a legal type.
339 if (isTypeLegal(MVT::i128)) {
343 }
344
345 // Handle bitcast from fp128 to i128.
346 if (!isTypeLegal(MVT::i128))
348
349 // We have native instructions for i8, i16 and i32 extensions, but not i1.
351 for (MVT VT : MVT::integer_valuetypes()) {
355 }
356
357 // Handle the various types of symbolic address.
363
364 // We need to handle dynamic allocations specially because of the
365 // 160-byte area at the bottom of the stack.
368
371
372 // Handle prefetches with PFD or PFDRL.
374
375 // Handle readcyclecounter with STCKF.
377
379 // Assume by default that all vector operations need to be expanded.
380 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
381 if (getOperationAction(Opcode, VT) == Legal)
382 setOperationAction(Opcode, VT, Expand);
383
384 // Likewise all truncating stores and extending loads.
385 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
386 setTruncStoreAction(VT, InnerVT, Expand);
389 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
390 }
391
392 if (isTypeLegal(VT)) {
393 // These operations are legal for anything that can be stored in a
394 // vector register, even if there is no native support for the format
395 // as such. In particular, we can do these for v4f32 even though there
396 // are no specific instructions for that format.
402
403 // Likewise, except that we need to replace the nodes with something
404 // more specific.
407 }
408 }
409
410 // Handle integer vector types.
412 if (isTypeLegal(VT)) {
413 // These operations have direct equivalents.
418 if (VT != MVT::v2i64)
424 if (Subtarget.hasVectorEnhancements1())
426 else
430
431 // Convert a GPR scalar to a vector by inserting it into element 0.
433
434 // Use a series of unpacks for extensions.
437
438 // Detect shifts/rotates by a scalar amount and convert them into
439 // V*_BY_SCALAR.
444
445 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
446 // and inverting the result as necessary.
448 }
449 }
450
451 if (Subtarget.hasVector()) {
452 // There should be no need to check for float types other than v2f64
453 // since <2 x f32> isn't a legal type.
462
471 }
472
473 if (Subtarget.hasVectorEnhancements2()) {
482
491 }
492
493 // Handle floating-point types.
494 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
495 I <= MVT::LAST_FP_VALUETYPE;
496 ++I) {
498 if (isTypeLegal(VT)) {
499 // We can use FI for FRINT.
501
502 // We can use the extended form of FI for other rounding operations.
503 if (Subtarget.hasFPExtension()) {
509 }
510
511 // No special instructions for these.
517
518 // Special treatment.
520
521 // Handle constrained floating-point operations.
531 if (Subtarget.hasFPExtension()) {
537 }
538 }
539 }
540
541 // Handle floating-point vector types.
542 if (Subtarget.hasVector()) {
543 // Scalar-to-vector conversion is just a subreg.
546
547 // Some insertions and extractions can be done directly but others
548 // need to go via integers.
553
554 // These operations have direct equivalents.
555 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
556 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
557 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
558 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
559 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
560 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
561 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
562 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
563 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
566 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
569
570 // Handle constrained floating-point operations.
583
588 if (Subtarget.hasVectorEnhancements1()) {
591 }
592 }
593
594 // The vector enhancements facility 1 has instructions for these.
595 if (Subtarget.hasVectorEnhancements1()) {
596 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
597 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
598 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
599 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
600 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
601 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
602 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
603 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
604 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
607 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
610
615
620
625
630
635
636 // Handle constrained floating-point operations.
649 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
650 MVT::v4f32, MVT::v2f64 }) {
655 }
656 }
657
658 // We only have fused f128 multiply-addition on vector registers.
659 if (!Subtarget.hasVectorEnhancements1()) {
662 }
663
664 // We don't have a copysign instruction on vector registers.
665 if (Subtarget.hasVectorEnhancements1())
667
668 // Needed so that we don't try to implement f128 constant loads using
669 // a load-and-extend of a f80 constant (in cases where the constant
670 // would fit in an f80).
671 for (MVT VT : MVT::fp_valuetypes())
672 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
673
674 // We don't have extending load instruction on vector registers.
675 if (Subtarget.hasVectorEnhancements1()) {
676 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
677 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
678 }
679
680 // Floating-point truncation and stores need to be done separately.
681 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
682 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
683 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
684
685 // We have 64-bit FPR<->GPR moves, but need special handling for
686 // 32-bit forms.
687 if (!Subtarget.hasVector()) {
690 }
691
692 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
693 // structure, but VAEND is a no-op.
697
699
700 // Codes for which we want to perform some z-specific combinations.
704 ISD::LOAD,
715 ISD::SDIV,
716 ISD::UDIV,
717 ISD::SREM,
718 ISD::UREM,
721
722 // Handle intrinsics.
725
726 // We want to use MVC in preference to even a single load/store pair.
727 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
729
730 // The main memset sequence is a byte store followed by an MVC.
731 // Two STC or MV..I stores win over that, but the kind of fused stores
732 // generated by target-independent code don't when the byte value is
733 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
734 // than "STC;MVC". Handle the choice in target-specific code instead.
735 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
737
738 // Default to having -disable-strictnode-mutation on
739 IsStrictFPEnabled = true;
740
741 if (Subtarget.isTargetzOS()) {
742 struct RTLibCallMapping {
743 RTLIB::Libcall Code;
744 const char *Name;
745 };
746 static RTLibCallMapping RTLibCallCommon[] = {
747#define HANDLE_LIBCALL(code, name) {RTLIB::code, name},
748#include "ZOSLibcallNames.def"
749 };
750 for (auto &E : RTLibCallCommon)
751 setLibcallName(E.Code, E.Name);
752 }
753}
754
756 return Subtarget.hasSoftFloat();
757}
758
760 LLVMContext &, EVT VT) const {
761 if (!VT.isVector())
762 return MVT::i32;
764}
765
767 const MachineFunction &MF, EVT VT) const {
768 VT = VT.getScalarType();
769
770 if (!VT.isSimple())
771 return false;
772
773 switch (VT.getSimpleVT().SimpleTy) {
774 case MVT::f32:
775 case MVT::f64:
776 return true;
777 case MVT::f128:
778 return Subtarget.hasVectorEnhancements1();
779 default:
780 break;
781 }
782
783 return false;
784}
785
786// Return true if the constant can be generated with a vector instruction,
787// such as VGM, VGMB or VREPI.
789 const SystemZSubtarget &Subtarget) {
790 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
791 if (!Subtarget.hasVector() ||
792 (isFP128 && !Subtarget.hasVectorEnhancements1()))
793 return false;
794
795 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
796 // preferred way of creating all-zero and all-one vectors so give it
797 // priority over other methods below.
798 unsigned Mask = 0;
799 unsigned I = 0;
800 for (; I < SystemZ::VectorBytes; ++I) {
801 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
802 if (Byte == 0xff)
803 Mask |= 1ULL << I;
804 else if (Byte != 0)
805 break;
806 }
807 if (I == SystemZ::VectorBytes) {
809 OpVals.push_back(Mask);
811 return true;
812 }
813
814 if (SplatBitSize > 64)
815 return false;
816
817 auto tryValue = [&](uint64_t Value) -> bool {
818 // Try VECTOR REPLICATE IMMEDIATE
819 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
820 if (isInt<16>(SignedValue)) {
821 OpVals.push_back(((unsigned) SignedValue));
824 SystemZ::VectorBits / SplatBitSize);
825 return true;
826 }
827 // Try VECTOR GENERATE MASK
828 unsigned Start, End;
829 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
830 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
831 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
832 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
833 OpVals.push_back(Start - (64 - SplatBitSize));
834 OpVals.push_back(End - (64 - SplatBitSize));
837 SystemZ::VectorBits / SplatBitSize);
838 return true;
839 }
840 return false;
841 };
842
843 // First try assuming that any undefined bits above the highest set bit
844 // and below the lowest set bit are 1s. This increases the likelihood of
845 // being able to use a sign-extended element value in VECTOR REPLICATE
846 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
847 uint64_t SplatBitsZ = SplatBits.getZExtValue();
848 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
849 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
850 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
851 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
852 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
853 if (tryValue(SplatBitsZ | Upper | Lower))
854 return true;
855
856 // Now try assuming that any undefined bits between the first and
857 // last defined set bits are set. This increases the chances of
858 // using a non-wraparound mask.
859 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
860 return tryValue(SplatBitsZ | Middle);
861}
862
864 if (IntImm.isSingleWord()) {
865 IntBits = APInt(128, IntImm.getZExtValue());
866 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
867 } else
868 IntBits = IntImm;
869 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
870
871 // Find the smallest splat.
872 SplatBits = IntImm;
873 unsigned Width = SplatBits.getBitWidth();
874 while (Width > 8) {
875 unsigned HalfSize = Width / 2;
876 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
877 APInt LowValue = SplatBits.trunc(HalfSize);
878
879 // If the two halves do not match, stop here.
880 if (HighValue != LowValue || 8 > HalfSize)
881 break;
882
883 SplatBits = HighValue;
884 Width = HalfSize;
885 }
886 SplatUndef = 0;
887 SplatBitSize = Width;
888}
889
891 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
892 bool HasAnyUndefs;
893
894 // Get IntBits by finding the 128 bit splat.
895 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
896 true);
897
898 // Get SplatBits by finding the 8 bit or greater splat.
899 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
900 true);
901}
902
904 bool ForCodeSize) const {
905 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
906 if (Imm.isZero() || Imm.isNegZero())
907 return true;
908
910}
911
912/// Returns true if stack probing through inline assembly is requested.
914 // If the function specifically requests inline stack probes, emit them.
915 if (MF.getFunction().hasFnAttribute("probe-stack"))
916 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
917 "inline-asm";
918 return false;
919}
920
923 // Lower fp128 the same way as i128.
924 if (LI->getType()->isFP128Ty())
927}
928
931 // Lower fp128 the same way as i128.
932 if (SI->getValueOperand()->getType()->isFP128Ty())
935}
936
939 // Don't expand subword operations as they require special treatment.
940 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
942
943 // Don't expand if there is a target instruction available.
944 if (Subtarget.hasInterlockedAccess1() &&
945 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
952
954}
955
957 // We can use CGFI or CLGFI.
958 return isInt<32>(Imm) || isUInt<32>(Imm);
959}
960
962 // We can use ALGFI or SLGFI.
963 return isUInt<32>(Imm) || isUInt<32>(-Imm);
964}
965
967 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
968 // Unaligned accesses should never be slower than the expanded version.
969 // We check specifically for aligned accesses in the few cases where
970 // they are required.
971 if (Fast)
972 *Fast = 1;
973 return true;
974}
975
976// Information about the addressing mode for a memory access.
978 // True if a long displacement is supported.
980
981 // True if use of index register is supported.
983
984 AddressingMode(bool LongDispl, bool IdxReg) :
985 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
986};
987
988// Return the desired addressing mode for a Load which has only one use (in
989// the same block) which is a Store.
991 Type *Ty) {
992 // With vector support a Load->Store combination may be combined to either
993 // an MVC or vector operations and it seems to work best to allow the
994 // vector addressing mode.
995 if (HasVector)
996 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
997
998 // Otherwise only the MVC case is special.
999 bool MVC = Ty->isIntegerTy(8);
1000 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1001}
1002
1003// Return the addressing mode which seems most desirable given an LLVM
1004// Instruction pointer.
1005static AddressingMode
1007 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1008 switch (II->getIntrinsicID()) {
1009 default: break;
1010 case Intrinsic::memset:
1011 case Intrinsic::memmove:
1012 case Intrinsic::memcpy:
1013 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1014 }
1015 }
1016
1017 if (isa<LoadInst>(I) && I->hasOneUse()) {
1018 auto *SingleUser = cast<Instruction>(*I->user_begin());
1019 if (SingleUser->getParent() == I->getParent()) {
1020 if (isa<ICmpInst>(SingleUser)) {
1021 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1022 if (C->getBitWidth() <= 64 &&
1023 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1024 // Comparison of memory with 16 bit signed / unsigned immediate
1025 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1026 } else if (isa<StoreInst>(SingleUser))
1027 // Load->Store
1028 return getLoadStoreAddrMode(HasVector, I->getType());
1029 }
1030 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1031 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1032 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1033 // Load->Store
1034 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1035 }
1036
1037 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1038
1039 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1040 // dependencies (LDE only supports small offsets).
1041 // * Utilize the vector registers to hold floating point
1042 // values (vector load / store instructions only support small
1043 // offsets).
1044
1045 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1046 I->getOperand(0)->getType());
1047 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1048 bool IsVectorAccess = MemAccessTy->isVectorTy();
1049
1050 // A store of an extracted vector element will be combined into a VSTE type
1051 // instruction.
1052 if (!IsVectorAccess && isa<StoreInst>(I)) {
1053 Value *DataOp = I->getOperand(0);
1054 if (isa<ExtractElementInst>(DataOp))
1055 IsVectorAccess = true;
1056 }
1057
1058 // A load which gets inserted into a vector element will be combined into a
1059 // VLE type instruction.
1060 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1061 User *LoadUser = *I->user_begin();
1062 if (isa<InsertElementInst>(LoadUser))
1063 IsVectorAccess = true;
1064 }
1065
1066 if (IsFPAccess || IsVectorAccess)
1067 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1068 }
1069
1070 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1071}
1072
1074 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1075 // Punt on globals for now, although they can be used in limited
1076 // RELATIVE LONG cases.
1077 if (AM.BaseGV)
1078 return false;
1079
1080 // Require a 20-bit signed offset.
1081 if (!isInt<20>(AM.BaseOffs))
1082 return false;
1083
1084 bool RequireD12 =
1085 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1086 AddressingMode SupportedAM(!RequireD12, true);
1087 if (I != nullptr)
1088 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1089
1090 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1091 return false;
1092
1093 if (!SupportedAM.IndexReg)
1094 // No indexing allowed.
1095 return AM.Scale == 0;
1096 else
1097 // Indexing is OK but no scale factor can be applied.
1098 return AM.Scale == 0 || AM.Scale == 1;
1099}
1100
1102 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1103 unsigned SrcAS, const AttributeList &FuncAttributes) const {
1104 const int MVCFastLen = 16;
1105
1106 if (Limit != ~unsigned(0)) {
1107 // Don't expand Op into scalar loads/stores in these cases:
1108 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1109 return false; // Small memcpy: Use MVC
1110 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1111 return false; // Small memset (first byte with STC/MVI): Use MVC
1112 if (Op.isZeroMemset())
1113 return false; // Memset zero: Use XC
1114 }
1115
1116 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1117 SrcAS, FuncAttributes);
1118}
1119
1121 const AttributeList &FuncAttributes) const {
1122 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1123}
1124
1125bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1126 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1127 return false;
1128 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1129 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1130 return FromBits > ToBits;
1131}
1132
1134 if (!FromVT.isInteger() || !ToVT.isInteger())
1135 return false;
1136 unsigned FromBits = FromVT.getFixedSizeInBits();
1137 unsigned ToBits = ToVT.getFixedSizeInBits();
1138 return FromBits > ToBits;
1139}
1140
1141//===----------------------------------------------------------------------===//
1142// Inline asm support
1143//===----------------------------------------------------------------------===//
1144
1147 if (Constraint.size() == 1) {
1148 switch (Constraint[0]) {
1149 case 'a': // Address register
1150 case 'd': // Data register (equivalent to 'r')
1151 case 'f': // Floating-point register
1152 case 'h': // High-part register
1153 case 'r': // General-purpose register
1154 case 'v': // Vector register
1155 return C_RegisterClass;
1156
1157 case 'Q': // Memory with base and unsigned 12-bit displacement
1158 case 'R': // Likewise, plus an index
1159 case 'S': // Memory with base and signed 20-bit displacement
1160 case 'T': // Likewise, plus an index
1161 case 'm': // Equivalent to 'T'.
1162 return C_Memory;
1163
1164 case 'I': // Unsigned 8-bit constant
1165 case 'J': // Unsigned 12-bit constant
1166 case 'K': // Signed 16-bit constant
1167 case 'L': // Signed 20-bit displacement (on all targets we support)
1168 case 'M': // 0x7fffffff
1169 return C_Immediate;
1170
1171 default:
1172 break;
1173 }
1174 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1175 switch (Constraint[1]) {
1176 case 'Q': // Address with base and unsigned 12-bit displacement
1177 case 'R': // Likewise, plus an index
1178 case 'S': // Address with base and signed 20-bit displacement
1179 case 'T': // Likewise, plus an index
1180 return C_Address;
1181
1182 default:
1183 break;
1184 }
1185 }
1186 return TargetLowering::getConstraintType(Constraint);
1187}
1188
1191 const char *constraint) const {
1193 Value *CallOperandVal = info.CallOperandVal;
1194 // If we don't have a value, we can't do a match,
1195 // but allow it at the lowest weight.
1196 if (!CallOperandVal)
1197 return CW_Default;
1198 Type *type = CallOperandVal->getType();
1199 // Look at the constraint type.
1200 switch (*constraint) {
1201 default:
1203 break;
1204
1205 case 'a': // Address register
1206 case 'd': // Data register (equivalent to 'r')
1207 case 'h': // High-part register
1208 case 'r': // General-purpose register
1209 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1210 break;
1211
1212 case 'f': // Floating-point register
1213 if (!useSoftFloat())
1214 weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1215 break;
1216
1217 case 'v': // Vector register
1218 if (Subtarget.hasVector())
1219 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1220 : CW_Default;
1221 break;
1222
1223 case 'I': // Unsigned 8-bit constant
1224 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1225 if (isUInt<8>(C->getZExtValue()))
1226 weight = CW_Constant;
1227 break;
1228
1229 case 'J': // Unsigned 12-bit constant
1230 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1231 if (isUInt<12>(C->getZExtValue()))
1232 weight = CW_Constant;
1233 break;
1234
1235 case 'K': // Signed 16-bit constant
1236 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1237 if (isInt<16>(C->getSExtValue()))
1238 weight = CW_Constant;
1239 break;
1240
1241 case 'L': // Signed 20-bit displacement (on all targets we support)
1242 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1243 if (isInt<20>(C->getSExtValue()))
1244 weight = CW_Constant;
1245 break;
1246
1247 case 'M': // 0x7fffffff
1248 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1249 if (C->getZExtValue() == 0x7fffffff)
1250 weight = CW_Constant;
1251 break;
1252 }
1253 return weight;
1254}
1255
1256// Parse a "{tNNN}" register constraint for which the register type "t"
1257// has already been verified. MC is the class associated with "t" and
1258// Map maps 0-based register numbers to LLVM register numbers.
1259static std::pair<unsigned, const TargetRegisterClass *>
1261 const unsigned *Map, unsigned Size) {
1262 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1263 if (isdigit(Constraint[2])) {
1264 unsigned Index;
1265 bool Failed =
1266 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1267 if (!Failed && Index < Size && Map[Index])
1268 return std::make_pair(Map[Index], RC);
1269 }
1270 return std::make_pair(0U, nullptr);
1271}
1272
1273std::pair<unsigned, const TargetRegisterClass *>
1275 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1276 if (Constraint.size() == 1) {
1277 // GCC Constraint Letters
1278 switch (Constraint[0]) {
1279 default: break;
1280 case 'd': // Data register (equivalent to 'r')
1281 case 'r': // General-purpose register
1282 if (VT.getSizeInBits() == 64)
1283 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1284 else if (VT.getSizeInBits() == 128)
1285 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1286 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1287
1288 case 'a': // Address register
1289 if (VT == MVT::i64)
1290 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1291 else if (VT == MVT::i128)
1292 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1293 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1294
1295 case 'h': // High-part register (an LLVM extension)
1296 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1297
1298 case 'f': // Floating-point register
1299 if (!useSoftFloat()) {
1300 if (VT.getSizeInBits() == 64)
1301 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1302 else if (VT.getSizeInBits() == 128)
1303 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1304 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1305 }
1306 break;
1307
1308 case 'v': // Vector register
1309 if (Subtarget.hasVector()) {
1310 if (VT.getSizeInBits() == 32)
1311 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1312 if (VT.getSizeInBits() == 64)
1313 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1314 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1315 }
1316 break;
1317 }
1318 }
1319 if (Constraint.starts_with("{")) {
1320
1321 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1322 // to check the size on.
1323 auto getVTSizeInBits = [&VT]() {
1324 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1325 };
1326
1327 // We need to override the default register parsing for GPRs and FPRs
1328 // because the interpretation depends on VT. The internal names of
1329 // the registers are also different from the external names
1330 // (F0D and F0S instead of F0, etc.).
1331 if (Constraint[1] == 'r') {
1332 if (getVTSizeInBits() == 32)
1333 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1335 if (getVTSizeInBits() == 128)
1336 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1338 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1340 }
1341 if (Constraint[1] == 'f') {
1342 if (useSoftFloat())
1343 return std::make_pair(
1344 0u, static_cast<const TargetRegisterClass *>(nullptr));
1345 if (getVTSizeInBits() == 32)
1346 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1348 if (getVTSizeInBits() == 128)
1349 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1351 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1353 }
1354 if (Constraint[1] == 'v') {
1355 if (!Subtarget.hasVector())
1356 return std::make_pair(
1357 0u, static_cast<const TargetRegisterClass *>(nullptr));
1358 if (getVTSizeInBits() == 32)
1359 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1361 if (getVTSizeInBits() == 64)
1362 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1364 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1366 }
1367 }
1368 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1369}
1370
1371// FIXME? Maybe this could be a TableGen attribute on some registers and
1372// this table could be generated automatically from RegInfo.
1375 const MachineFunction &MF) const {
1376 Register Reg =
1378 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D : 0)
1379 .Case("r15", Subtarget.isTargetELF() ? SystemZ::R15D : 0)
1380 .Default(0);
1381
1382 if (Reg)
1383 return Reg;
1384 report_fatal_error("Invalid register name global variable");
1385}
1386
1388 const Constant *PersonalityFn) const {
1389 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1390}
1391
1393 const Constant *PersonalityFn) const {
1394 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1395}
1396
1398 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1399 SelectionDAG &DAG) const {
1400 // Only support length 1 constraints for now.
1401 if (Constraint.size() == 1) {
1402 switch (Constraint[0]) {
1403 case 'I': // Unsigned 8-bit constant
1404 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1405 if (isUInt<8>(C->getZExtValue()))
1406 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1407 Op.getValueType()));
1408 return;
1409
1410 case 'J': // Unsigned 12-bit constant
1411 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1412 if (isUInt<12>(C->getZExtValue()))
1413 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1414 Op.getValueType()));
1415 return;
1416
1417 case 'K': // Signed 16-bit constant
1418 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1419 if (isInt<16>(C->getSExtValue()))
1420 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1421 Op.getValueType()));
1422 return;
1423
1424 case 'L': // Signed 20-bit displacement (on all targets we support)
1425 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1426 if (isInt<20>(C->getSExtValue()))
1427 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1428 Op.getValueType()));
1429 return;
1430
1431 case 'M': // 0x7fffffff
1432 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1433 if (C->getZExtValue() == 0x7fffffff)
1434 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1435 Op.getValueType()));
1436 return;
1437 }
1438 }
1439 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1440}
1441
1442//===----------------------------------------------------------------------===//
1443// Calling conventions
1444//===----------------------------------------------------------------------===//
1445
1446#include "SystemZGenCallingConv.inc"
1447
1449 CallingConv::ID) const {
1450 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1451 SystemZ::R14D, 0 };
1452 return ScratchRegs;
1453}
1454
1456 Type *ToType) const {
1457 return isTruncateFree(FromType, ToType);
1458}
1459
1461 return CI->isTailCall();
1462}
1463
1464// Value is a value that has been passed to us in the location described by VA
1465// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1466// any loads onto Chain.
1468 CCValAssign &VA, SDValue Chain,
1469 SDValue Value) {
1470 // If the argument has been promoted from a smaller type, insert an
1471 // assertion to capture this.
1472 if (VA.getLocInfo() == CCValAssign::SExt)
1474 DAG.getValueType(VA.getValVT()));
1475 else if (VA.getLocInfo() == CCValAssign::ZExt)
1477 DAG.getValueType(VA.getValVT()));
1478
1479 if (VA.isExtInLoc())
1480 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1481 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1482 // If this is a short vector argument loaded from the stack,
1483 // extend from i64 to full vector size and then bitcast.
1484 assert(VA.getLocVT() == MVT::i64);
1485 assert(VA.getValVT().isVector());
1486 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1487 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1488 } else
1489 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1490 return Value;
1491}
1492
1493// Value is a value of type VA.getValVT() that we need to copy into
1494// the location described by VA. Return a copy of Value converted to
1495// VA.getValVT(). The caller is responsible for handling indirect values.
1497 CCValAssign &VA, SDValue Value) {
1498 switch (VA.getLocInfo()) {
1499 case CCValAssign::SExt:
1500 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1501 case CCValAssign::ZExt:
1502 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1503 case CCValAssign::AExt:
1504 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1505 case CCValAssign::BCvt: {
1506 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1507 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1508 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1509 // For an f32 vararg we need to first promote it to an f64 and then
1510 // bitcast it to an i64.
1511 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1512 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1513 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1514 ? MVT::v2i64
1515 : VA.getLocVT();
1516 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1517 // For ELF, this is a short vector argument to be stored to the stack,
1518 // bitcast to v2i64 and then extract first element.
1519 if (BitCastToType == MVT::v2i64)
1520 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1521 DAG.getConstant(0, DL, MVT::i32));
1522 return Value;
1523 }
1524 case CCValAssign::Full:
1525 return Value;
1526 default:
1527 llvm_unreachable("Unhandled getLocInfo()");
1528 }
1529}
1530
1532 SDLoc DL(In);
1533 SDValue Lo, Hi;
1534 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1535 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1536 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1537 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1538 DAG.getConstant(64, DL, MVT::i32)));
1539 } else {
1540 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1541 }
1542
1543 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1544 MVT::Untyped, Hi, Lo);
1545 return SDValue(Pair, 0);
1546}
1547
1549 SDLoc DL(In);
1550 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1551 DL, MVT::i64, In);
1552 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1553 DL, MVT::i64, In);
1554
1555 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1556 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1557 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1558 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1559 DAG.getConstant(64, DL, MVT::i32));
1560 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1561 } else {
1562 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1563 }
1564}
1565
1567 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1568 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1569 EVT ValueVT = Val.getValueType();
1570 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1571 // Inline assembly operand.
1572 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1573 return true;
1574 }
1575
1576 return false;
1577}
1578
1580 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1581 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1582 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1583 // Inline assembly operand.
1584 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1585 return DAG.getBitcast(ValueVT, Res);
1586 }
1587
1588 return SDValue();
1589}
1590
1592 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1593 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1594 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1596 MachineFrameInfo &MFI = MF.getFrameInfo();
1598 SystemZMachineFunctionInfo *FuncInfo =
1600 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1601 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1602
1603 // Assign locations to all of the incoming arguments.
1605 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1606 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1607 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1608
1609 unsigned NumFixedGPRs = 0;
1610 unsigned NumFixedFPRs = 0;
1611 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1612 SDValue ArgValue;
1613 CCValAssign &VA = ArgLocs[I];
1614 EVT LocVT = VA.getLocVT();
1615 if (VA.isRegLoc()) {
1616 // Arguments passed in registers
1617 const TargetRegisterClass *RC;
1618 switch (LocVT.getSimpleVT().SimpleTy) {
1619 default:
1620 // Integers smaller than i64 should be promoted to i64.
1621 llvm_unreachable("Unexpected argument type");
1622 case MVT::i32:
1623 NumFixedGPRs += 1;
1624 RC = &SystemZ::GR32BitRegClass;
1625 break;
1626 case MVT::i64:
1627 NumFixedGPRs += 1;
1628 RC = &SystemZ::GR64BitRegClass;
1629 break;
1630 case MVT::f32:
1631 NumFixedFPRs += 1;
1632 RC = &SystemZ::FP32BitRegClass;
1633 break;
1634 case MVT::f64:
1635 NumFixedFPRs += 1;
1636 RC = &SystemZ::FP64BitRegClass;
1637 break;
1638 case MVT::f128:
1639 NumFixedFPRs += 2;
1640 RC = &SystemZ::FP128BitRegClass;
1641 break;
1642 case MVT::v16i8:
1643 case MVT::v8i16:
1644 case MVT::v4i32:
1645 case MVT::v2i64:
1646 case MVT::v4f32:
1647 case MVT::v2f64:
1648 RC = &SystemZ::VR128BitRegClass;
1649 break;
1650 }
1651
1652 Register VReg = MRI.createVirtualRegister(RC);
1653 MRI.addLiveIn(VA.getLocReg(), VReg);
1654 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1655 } else {
1656 assert(VA.isMemLoc() && "Argument not register or memory");
1657
1658 // Create the frame index object for this incoming parameter.
1659 // FIXME: Pre-include call frame size in the offset, should not
1660 // need to manually add it here.
1661 int64_t ArgSPOffset = VA.getLocMemOffset();
1662 if (Subtarget.isTargetXPLINK64()) {
1663 auto &XPRegs =
1665 ArgSPOffset += XPRegs.getCallFrameSize();
1666 }
1667 int FI =
1668 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1669
1670 // Create the SelectionDAG nodes corresponding to a load
1671 // from this parameter. Unpromoted ints and floats are
1672 // passed as right-justified 8-byte values.
1673 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1674 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1675 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1676 DAG.getIntPtrConstant(4, DL));
1677 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1679 }
1680
1681 // Convert the value of the argument register into the value that's
1682 // being passed.
1683 if (VA.getLocInfo() == CCValAssign::Indirect) {
1684 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1686 // If the original argument was split (e.g. i128), we need
1687 // to load all parts of it here (using the same address).
1688 unsigned ArgIndex = Ins[I].OrigArgIndex;
1689 assert (Ins[I].PartOffset == 0);
1690 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1691 CCValAssign &PartVA = ArgLocs[I + 1];
1692 unsigned PartOffset = Ins[I + 1].PartOffset;
1693 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1694 DAG.getIntPtrConstant(PartOffset, DL));
1695 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1697 ++I;
1698 }
1699 } else
1700 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1701 }
1702
1703 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
1704 // Save the number of non-varargs registers for later use by va_start, etc.
1705 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1706 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1707
1708 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1709 Subtarget.getSpecialRegisters());
1710
1711 // Likewise the address (in the form of a frame index) of where the
1712 // first stack vararg would be. The 1-byte size here is arbitrary.
1713 // FIXME: Pre-include call frame size in the offset, should not
1714 // need to manually add it here.
1715 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
1716 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
1717 FuncInfo->setVarArgsFrameIndex(FI);
1718 }
1719
1720 if (IsVarArg && Subtarget.isTargetELF()) {
1721 // Save the number of non-varargs registers for later use by va_start, etc.
1722 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1723 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1724
1725 // Likewise the address (in the form of a frame index) of where the
1726 // first stack vararg would be. The 1-byte size here is arbitrary.
1727 int64_t VarArgsOffset = CCInfo.getStackSize();
1728 FuncInfo->setVarArgsFrameIndex(
1729 MFI.CreateFixedObject(1, VarArgsOffset, true));
1730
1731 // ...and a similar frame index for the caller-allocated save area
1732 // that will be used to store the incoming registers.
1733 int64_t RegSaveOffset =
1734 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1735 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1736 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1737
1738 // Store the FPR varargs in the reserved frame slots. (We store the
1739 // GPRs as part of the prologue.)
1740 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1742 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1743 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1744 int FI =
1746 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1748 &SystemZ::FP64BitRegClass);
1749 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1750 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1752 }
1753 // Join the stores, which are independent of one another.
1754 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1755 ArrayRef(&MemOps[NumFixedFPRs],
1756 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
1757 }
1758 }
1759
1760 if (Subtarget.isTargetXPLINK64()) {
1761 // Create virual register for handling incoming "ADA" special register (R5)
1762 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
1763 Register ADAvReg = MRI.createVirtualRegister(RC);
1764 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
1765 Subtarget.getSpecialRegisters());
1766 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
1767 FuncInfo->setADAVirtualRegister(ADAvReg);
1768 }
1769 return Chain;
1770}
1771
1772static bool canUseSiblingCall(const CCState &ArgCCInfo,
1775 // Punt if there are any indirect or stack arguments, or if the call
1776 // needs the callee-saved argument register R6, or if the call uses
1777 // the callee-saved register arguments SwiftSelf and SwiftError.
1778 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1779 CCValAssign &VA = ArgLocs[I];
1781 return false;
1782 if (!VA.isRegLoc())
1783 return false;
1784 Register Reg = VA.getLocReg();
1785 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1786 return false;
1787 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1788 return false;
1789 }
1790 return true;
1791}
1792
1794 unsigned Offset, bool LoadAdr = false) {
1797 unsigned ADAvReg = MFI->getADAVirtualRegister();
1799
1800 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
1801 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
1802
1803 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
1804 if (!LoadAdr)
1805 Result = DAG.getLoad(
1806 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
1808
1809 return Result;
1810}
1811
1812// ADA access using Global value
1813// Note: for functions, address of descriptor is returned
1815 EVT PtrVT) {
1816 unsigned ADAtype;
1817 bool LoadAddr = false;
1818 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
1819 bool IsFunction =
1820 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
1821 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
1822
1823 if (IsFunction) {
1824 if (IsInternal) {
1826 LoadAddr = true;
1827 } else
1829 } else {
1831 }
1832 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
1833
1834 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
1835}
1836
1837static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
1838 SDLoc &DL, SDValue &Chain) {
1839 unsigned ADADelta = 0; // ADA offset in desc.
1840 unsigned EPADelta = 8; // EPA offset in desc.
1843
1844 // XPLink calling convention.
1845 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1846 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
1847 G->getGlobal()->hasPrivateLinkage());
1848 if (IsInternal) {
1851 unsigned ADAvReg = MFI->getADAVirtualRegister();
1852 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
1853 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1854 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
1855 return true;
1856 } else {
1858 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1859 ADA = getADAEntry(DAG, GA, DL, ADADelta);
1860 Callee = getADAEntry(DAG, GA, DL, EPADelta);
1861 }
1862 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1864 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
1865 ADA = getADAEntry(DAG, ES, DL, ADADelta);
1866 Callee = getADAEntry(DAG, ES, DL, EPADelta);
1867 } else {
1868 // Function pointer case
1869 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1870 DAG.getConstant(ADADelta, DL, PtrVT));
1871 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
1873 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
1874 DAG.getConstant(EPADelta, DL, PtrVT));
1875 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
1877 }
1878 return false;
1879}
1880
1881SDValue
1883 SmallVectorImpl<SDValue> &InVals) const {
1884 SelectionDAG &DAG = CLI.DAG;
1885 SDLoc &DL = CLI.DL;
1887 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1889 SDValue Chain = CLI.Chain;
1890 SDValue Callee = CLI.Callee;
1891 bool &IsTailCall = CLI.IsTailCall;
1892 CallingConv::ID CallConv = CLI.CallConv;
1893 bool IsVarArg = CLI.IsVarArg;
1895 EVT PtrVT = getPointerTy(MF.getDataLayout());
1896 LLVMContext &Ctx = *DAG.getContext();
1898
1899 // FIXME: z/OS support to be added in later.
1900 if (Subtarget.isTargetXPLINK64())
1901 IsTailCall = false;
1902
1903 // Analyze the operands of the call, assigning locations to each operand.
1905 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1906 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1907
1908 // We don't support GuaranteedTailCallOpt, only automatically-detected
1909 // sibling calls.
1910 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1911 IsTailCall = false;
1912
1913 // Get a count of how many bytes are to be pushed on the stack.
1914 unsigned NumBytes = ArgCCInfo.getStackSize();
1915
1916 // Mark the start of the call.
1917 if (!IsTailCall)
1918 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1919
1920 // Copy argument values to their designated locations.
1922 SmallVector<SDValue, 8> MemOpChains;
1923 SDValue StackPtr;
1924 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1925 CCValAssign &VA = ArgLocs[I];
1926 SDValue ArgValue = OutVals[I];
1927
1928 if (VA.getLocInfo() == CCValAssign::Indirect) {
1929 // Store the argument in a stack slot and pass its address.
1930 unsigned ArgIndex = Outs[I].OrigArgIndex;
1931 EVT SlotVT;
1932 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1933 // Allocate the full stack space for a promoted (and split) argument.
1934 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1935 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1936 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1937 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1938 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1939 } else {
1940 SlotVT = Outs[I].VT;
1941 }
1942 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1943 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1944 MemOpChains.push_back(
1945 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1947 // If the original argument was split (e.g. i128), we need
1948 // to store all parts of it here (and pass just one address).
1949 assert (Outs[I].PartOffset == 0);
1950 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1951 SDValue PartValue = OutVals[I + 1];
1952 unsigned PartOffset = Outs[I + 1].PartOffset;
1953 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1954 DAG.getIntPtrConstant(PartOffset, DL));
1955 MemOpChains.push_back(
1956 DAG.getStore(Chain, DL, PartValue, Address,
1958 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1959 SlotVT.getStoreSize()) && "Not enough space for argument part!");
1960 ++I;
1961 }
1962 ArgValue = SpillSlot;
1963 } else
1964 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1965
1966 if (VA.isRegLoc()) {
1967 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1968 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1969 // and low values.
1970 if (VA.getLocVT() == MVT::i128)
1971 ArgValue = lowerI128ToGR128(DAG, ArgValue);
1972 // Queue up the argument copies and emit them at the end.
1973 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1974 } else {
1975 assert(VA.isMemLoc() && "Argument not register or memory");
1976
1977 // Work out the address of the stack slot. Unpromoted ints and
1978 // floats are passed as right-justified 8-byte values.
1979 if (!StackPtr.getNode())
1980 StackPtr = DAG.getCopyFromReg(Chain, DL,
1981 Regs->getStackPointerRegister(), PtrVT);
1982 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1983 VA.getLocMemOffset();
1984 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1985 Offset += 4;
1986 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1988
1989 // Emit the store.
1990 MemOpChains.push_back(
1991 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1992
1993 // Although long doubles or vectors are passed through the stack when
1994 // they are vararg (non-fixed arguments), if a long double or vector
1995 // occupies the third and fourth slot of the argument list GPR3 should
1996 // still shadow the third slot of the argument list.
1997 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1998 SDValue ShadowArgValue =
1999 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2000 DAG.getIntPtrConstant(1, DL));
2001 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2002 }
2003 }
2004 }
2005
2006 // Join the stores, which are independent of one another.
2007 if (!MemOpChains.empty())
2008 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2009
2010 // Accept direct calls by converting symbolic call addresses to the
2011 // associated Target* opcodes. Force %r1 to be used for indirect
2012 // tail calls.
2013 SDValue Glue;
2014
2015 if (Subtarget.isTargetXPLINK64()) {
2016 SDValue ADA;
2017 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2018 if (!IsBRASL) {
2019 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2020 ->getAddressOfCalleeRegister();
2021 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2022 Glue = Chain.getValue(1);
2023 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2024 }
2025 RegsToPass.push_back(std::make_pair(
2026 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2027 } else {
2028 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2029 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2030 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2031 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2032 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2033 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2034 } else if (IsTailCall) {
2035 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2036 Glue = Chain.getValue(1);
2037 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2038 }
2039 }
2040
2041 // Build a sequence of copy-to-reg nodes, chained and glued together.
2042 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
2043 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
2044 RegsToPass[I].second, Glue);
2045 Glue = Chain.getValue(1);
2046 }
2047
2048 // The first call operand is the chain and the second is the target address.
2050 Ops.push_back(Chain);
2051 Ops.push_back(Callee);
2052
2053 // Add argument registers to the end of the list so that they are
2054 // known live into the call.
2055 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
2056 Ops.push_back(DAG.getRegister(RegsToPass[I].first,
2057 RegsToPass[I].second.getValueType()));
2058
2059 // Add a register mask operand representing the call-preserved registers.
2060 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2061 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2062 assert(Mask && "Missing call preserved mask for calling convention");
2063 Ops.push_back(DAG.getRegisterMask(Mask));
2064
2065 // Glue the call to the argument copies, if any.
2066 if (Glue.getNode())
2067 Ops.push_back(Glue);
2068
2069 // Emit the call.
2070 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2071 if (IsTailCall) {
2072 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2073 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2074 return Ret;
2075 }
2076 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2077 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2078 Glue = Chain.getValue(1);
2079
2080 // Mark the end of the call, which is glued to the call itself.
2081 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2082 Glue = Chain.getValue(1);
2083
2084 // Assign locations to each value returned by this call.
2086 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2087 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2088
2089 // Copy all of the result registers out of their specified physreg.
2090 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2091 CCValAssign &VA = RetLocs[I];
2092
2093 // Copy the value out, gluing the copy to the end of the call sequence.
2094 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2095 VA.getLocVT(), Glue);
2096 Chain = RetValue.getValue(1);
2097 Glue = RetValue.getValue(2);
2098
2099 // Convert the value of the return register into the value that's
2100 // being returned.
2101 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2102 }
2103
2104 return Chain;
2105}
2106
2107// Generate a call taking the given operands as arguments and returning a
2108// result of type RetVT.
2110 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2111 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2112 bool DoesNotReturn, bool IsReturnValueUsed) const {
2114 Args.reserve(Ops.size());
2115
2117 for (SDValue Op : Ops) {
2118 Entry.Node = Op;
2119 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
2120 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2121 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
2122 Args.push_back(Entry);
2123 }
2124
2125 SDValue Callee =
2126 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2127
2128 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2130 bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
2131 CLI.setDebugLoc(DL)
2132 .setChain(Chain)
2133 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2134 .setNoReturn(DoesNotReturn)
2135 .setDiscardResult(!IsReturnValueUsed)
2136 .setSExtResult(SignExtend)
2137 .setZExtResult(!SignExtend);
2138 return LowerCallTo(CLI);
2139}
2140
2143 MachineFunction &MF, bool isVarArg,
2145 LLVMContext &Context) const {
2146 // Special case that we cannot easily detect in RetCC_SystemZ since
2147 // i128 may not be a legal type.
2148 for (auto &Out : Outs)
2149 if (Out.ArgVT == MVT::i128)
2150 return false;
2151
2153 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
2154 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2155}
2156
2157SDValue
2159 bool IsVarArg,
2161 const SmallVectorImpl<SDValue> &OutVals,
2162 const SDLoc &DL, SelectionDAG &DAG) const {
2164
2165 // Assign locations to each returned value.
2167 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2168 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2169
2170 // Quick exit for void returns
2171 if (RetLocs.empty())
2172 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2173
2174 if (CallConv == CallingConv::GHC)
2175 report_fatal_error("GHC functions return void only");
2176
2177 // Copy the result values into the output registers.
2178 SDValue Glue;
2180 RetOps.push_back(Chain);
2181 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2182 CCValAssign &VA = RetLocs[I];
2183 SDValue RetValue = OutVals[I];
2184
2185 // Make the return register live on exit.
2186 assert(VA.isRegLoc() && "Can only return in registers!");
2187
2188 // Promote the value as required.
2189 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2190
2191 // Chain and glue the copies together.
2192 Register Reg = VA.getLocReg();
2193 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2194 Glue = Chain.getValue(1);
2195 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2196 }
2197
2198 // Update chain and glue.
2199 RetOps[0] = Chain;
2200 if (Glue.getNode())
2201 RetOps.push_back(Glue);
2202
2203 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2204}
2205
2206// Return true if Op is an intrinsic node with chain that returns the CC value
2207// as its only (other) argument. Provide the associated SystemZISD opcode and
2208// the mask of valid CC values if so.
2209static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2210 unsigned &CCValid) {
2211 unsigned Id = Op.getConstantOperandVal(1);
2212 switch (Id) {
2213 case Intrinsic::s390_tbegin:
2214 Opcode = SystemZISD::TBEGIN;
2215 CCValid = SystemZ::CCMASK_TBEGIN;
2216 return true;
2217
2218 case Intrinsic::s390_tbegin_nofloat:
2220 CCValid = SystemZ::CCMASK_TBEGIN;
2221 return true;
2222
2223 case Intrinsic::s390_tend:
2224 Opcode = SystemZISD::TEND;
2225 CCValid = SystemZ::CCMASK_TEND;
2226 return true;
2227
2228 default:
2229 return false;
2230 }
2231}
2232
2233// Return true if Op is an intrinsic node without chain that returns the
2234// CC value as its final argument. Provide the associated SystemZISD
2235// opcode and the mask of valid CC values if so.
2236static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2237 unsigned Id = Op.getConstantOperandVal(0);
2238 switch (Id) {
2239 case Intrinsic::s390_vpkshs:
2240 case Intrinsic::s390_vpksfs:
2241 case Intrinsic::s390_vpksgs:
2242 Opcode = SystemZISD::PACKS_CC;
2243 CCValid = SystemZ::CCMASK_VCMP;
2244 return true;
2245
2246 case Intrinsic::s390_vpklshs:
2247 case Intrinsic::s390_vpklsfs:
2248 case Intrinsic::s390_vpklsgs:
2249 Opcode = SystemZISD::PACKLS_CC;
2250 CCValid = SystemZ::CCMASK_VCMP;
2251 return true;
2252
2253 case Intrinsic::s390_vceqbs:
2254 case Intrinsic::s390_vceqhs:
2255 case Intrinsic::s390_vceqfs:
2256 case Intrinsic::s390_vceqgs:
2257 Opcode = SystemZISD::VICMPES;
2258 CCValid = SystemZ::CCMASK_VCMP;
2259 return true;
2260
2261 case Intrinsic::s390_vchbs:
2262 case Intrinsic::s390_vchhs:
2263 case Intrinsic::s390_vchfs:
2264 case Intrinsic::s390_vchgs:
2265 Opcode = SystemZISD::VICMPHS;
2266 CCValid = SystemZ::CCMASK_VCMP;
2267 return true;
2268
2269 case Intrinsic::s390_vchlbs:
2270 case Intrinsic::s390_vchlhs:
2271 case Intrinsic::s390_vchlfs:
2272 case Intrinsic::s390_vchlgs:
2273 Opcode = SystemZISD::VICMPHLS;
2274 CCValid = SystemZ::CCMASK_VCMP;
2275 return true;
2276
2277 case Intrinsic::s390_vtm:
2278 Opcode = SystemZISD::VTM;
2279 CCValid = SystemZ::CCMASK_VCMP;
2280 return true;
2281
2282 case Intrinsic::s390_vfaebs:
2283 case Intrinsic::s390_vfaehs:
2284 case Intrinsic::s390_vfaefs:
2285 Opcode = SystemZISD::VFAE_CC;
2286 CCValid = SystemZ::CCMASK_ANY;
2287 return true;
2288
2289 case Intrinsic::s390_vfaezbs:
2290 case Intrinsic::s390_vfaezhs:
2291 case Intrinsic::s390_vfaezfs:
2292 Opcode = SystemZISD::VFAEZ_CC;
2293 CCValid = SystemZ::CCMASK_ANY;
2294 return true;
2295
2296 case Intrinsic::s390_vfeebs:
2297 case Intrinsic::s390_vfeehs:
2298 case Intrinsic::s390_vfeefs:
2299 Opcode = SystemZISD::VFEE_CC;
2300 CCValid = SystemZ::CCMASK_ANY;
2301 return true;
2302
2303 case Intrinsic::s390_vfeezbs:
2304 case Intrinsic::s390_vfeezhs:
2305 case Intrinsic::s390_vfeezfs:
2306 Opcode = SystemZISD::VFEEZ_CC;
2307 CCValid = SystemZ::CCMASK_ANY;
2308 return true;
2309
2310 case Intrinsic::s390_vfenebs:
2311 case Intrinsic::s390_vfenehs:
2312 case Intrinsic::s390_vfenefs:
2313 Opcode = SystemZISD::VFENE_CC;
2314 CCValid = SystemZ::CCMASK_ANY;
2315 return true;
2316
2317 case Intrinsic::s390_vfenezbs:
2318 case Intrinsic::s390_vfenezhs:
2319 case Intrinsic::s390_vfenezfs:
2320 Opcode = SystemZISD::VFENEZ_CC;
2321 CCValid = SystemZ::CCMASK_ANY;
2322 return true;
2323
2324 case Intrinsic::s390_vistrbs:
2325 case Intrinsic::s390_vistrhs:
2326 case Intrinsic::s390_vistrfs:
2327 Opcode = SystemZISD::VISTR_CC;
2329 return true;
2330
2331 case Intrinsic::s390_vstrcbs:
2332 case Intrinsic::s390_vstrchs:
2333 case Intrinsic::s390_vstrcfs:
2334 Opcode = SystemZISD::VSTRC_CC;
2335 CCValid = SystemZ::CCMASK_ANY;
2336 return true;
2337
2338 case Intrinsic::s390_vstrczbs:
2339 case Intrinsic::s390_vstrczhs:
2340 case Intrinsic::s390_vstrczfs:
2341 Opcode = SystemZISD::VSTRCZ_CC;
2342 CCValid = SystemZ::CCMASK_ANY;
2343 return true;
2344
2345 case Intrinsic::s390_vstrsb:
2346 case Intrinsic::s390_vstrsh:
2347 case Intrinsic::s390_vstrsf:
2348 Opcode = SystemZISD::VSTRS_CC;
2349 CCValid = SystemZ::CCMASK_ANY;
2350 return true;
2351
2352 case Intrinsic::s390_vstrszb:
2353 case Intrinsic::s390_vstrszh:
2354 case Intrinsic::s390_vstrszf:
2355 Opcode = SystemZISD::VSTRSZ_CC;
2356 CCValid = SystemZ::CCMASK_ANY;
2357 return true;
2358
2359 case Intrinsic::s390_vfcedbs:
2360 case Intrinsic::s390_vfcesbs:
2361 Opcode = SystemZISD::VFCMPES;
2362 CCValid = SystemZ::CCMASK_VCMP;
2363 return true;
2364
2365 case Intrinsic::s390_vfchdbs:
2366 case Intrinsic::s390_vfchsbs:
2367 Opcode = SystemZISD::VFCMPHS;
2368 CCValid = SystemZ::CCMASK_VCMP;
2369 return true;
2370
2371 case Intrinsic::s390_vfchedbs:
2372 case Intrinsic::s390_vfchesbs:
2373 Opcode = SystemZISD::VFCMPHES;
2374 CCValid = SystemZ::CCMASK_VCMP;
2375 return true;
2376
2377 case Intrinsic::s390_vftcidb:
2378 case Intrinsic::s390_vftcisb:
2379 Opcode = SystemZISD::VFTCI;
2380 CCValid = SystemZ::CCMASK_VCMP;
2381 return true;
2382
2383 case Intrinsic::s390_tdc:
2384 Opcode = SystemZISD::TDC;
2385 CCValid = SystemZ::CCMASK_TDC;
2386 return true;
2387
2388 default:
2389 return false;
2390 }
2391}
2392
2393// Emit an intrinsic with chain and an explicit CC register result.
2395 unsigned Opcode) {
2396 // Copy all operands except the intrinsic ID.
2397 unsigned NumOps = Op.getNumOperands();
2399 Ops.reserve(NumOps - 1);
2400 Ops.push_back(Op.getOperand(0));
2401 for (unsigned I = 2; I < NumOps; ++I)
2402 Ops.push_back(Op.getOperand(I));
2403
2404 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2405 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2406 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2407 SDValue OldChain = SDValue(Op.getNode(), 1);
2408 SDValue NewChain = SDValue(Intr.getNode(), 1);
2409 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2410 return Intr.getNode();
2411}
2412
2413// Emit an intrinsic with an explicit CC register result.
2415 unsigned Opcode) {
2416 // Copy all operands except the intrinsic ID.
2417 unsigned NumOps = Op.getNumOperands();
2419 Ops.reserve(NumOps - 1);
2420 for (unsigned I = 1; I < NumOps; ++I)
2421 Ops.push_back(Op.getOperand(I));
2422
2423 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2424 return Intr.getNode();
2425}
2426
2427// CC is a comparison that will be implemented using an integer or
2428// floating-point comparison. Return the condition code mask for
2429// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2430// unsigned comparisons and clear for signed ones. In the floating-point
2431// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2433#define CONV(X) \
2434 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2435 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2436 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2437
2438 switch (CC) {
2439 default:
2440 llvm_unreachable("Invalid integer condition!");
2441
2442 CONV(EQ);
2443 CONV(NE);
2444 CONV(GT);
2445 CONV(GE);
2446 CONV(LT);
2447 CONV(LE);
2448
2449 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2451 }
2452#undef CONV
2453}
2454
2455// If C can be converted to a comparison against zero, adjust the operands
2456// as necessary.
2457static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2458 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2459 return;
2460
2461 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2462 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2463 return;
2464
2465 int64_t Value = ConstOp1->getSExtValue();
2466 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2467 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2468 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2469 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2470 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2471 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2472 }
2473}
2474
2475// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2476// adjust the operands as necessary.
2477static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2478 Comparison &C) {
2479 // For us to make any changes, it must a comparison between a single-use
2480 // load and a constant.
2481 if (!C.Op0.hasOneUse() ||
2482 C.Op0.getOpcode() != ISD::LOAD ||
2483 C.Op1.getOpcode() != ISD::Constant)
2484 return;
2485
2486 // We must have an 8- or 16-bit load.
2487 auto *Load = cast<LoadSDNode>(C.Op0);
2488 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2489 if ((NumBits != 8 && NumBits != 16) ||
2490 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2491 return;
2492
2493 // The load must be an extending one and the constant must be within the
2494 // range of the unextended value.
2495 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2496 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2497 return;
2498 uint64_t Value = ConstOp1->getZExtValue();
2499 uint64_t Mask = (1 << NumBits) - 1;
2500 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2501 // Make sure that ConstOp1 is in range of C.Op0.
2502 int64_t SignedValue = ConstOp1->getSExtValue();
2503 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2504 return;
2505 if (C.ICmpType != SystemZICMP::SignedOnly) {
2506 // Unsigned comparison between two sign-extended values is equivalent
2507 // to unsigned comparison between two zero-extended values.
2508 Value &= Mask;
2509 } else if (NumBits == 8) {
2510 // Try to treat the comparison as unsigned, so that we can use CLI.
2511 // Adjust CCMask and Value as necessary.
2512 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2513 // Test whether the high bit of the byte is set.
2514 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2515 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2516 // Test whether the high bit of the byte is clear.
2517 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2518 else
2519 // No instruction exists for this combination.
2520 return;
2521 C.ICmpType = SystemZICMP::UnsignedOnly;
2522 }
2523 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2524 if (Value > Mask)
2525 return;
2526 // If the constant is in range, we can use any comparison.
2527 C.ICmpType = SystemZICMP::Any;
2528 } else
2529 return;
2530
2531 // Make sure that the first operand is an i32 of the right extension type.
2532 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2535 if (C.Op0.getValueType() != MVT::i32 ||
2536 Load->getExtensionType() != ExtType) {
2537 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2538 Load->getBasePtr(), Load->getPointerInfo(),
2539 Load->getMemoryVT(), Load->getAlign(),
2540 Load->getMemOperand()->getFlags());
2541 // Update the chain uses.
2542 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2543 }
2544
2545 // Make sure that the second operand is an i32 with the right value.
2546 if (C.Op1.getValueType() != MVT::i32 ||
2547 Value != ConstOp1->getZExtValue())
2548 C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2549}
2550
2551// Return true if Op is either an unextended load, or a load suitable
2552// for integer register-memory comparisons of type ICmpType.
2553static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2554 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2555 if (Load) {
2556 // There are no instructions to compare a register with a memory byte.
2557 if (Load->getMemoryVT() == MVT::i8)
2558 return false;
2559 // Otherwise decide on extension type.
2560 switch (Load->getExtensionType()) {
2561 case ISD::NON_EXTLOAD:
2562 return true;
2563 case ISD::SEXTLOAD:
2564 return ICmpType != SystemZICMP::UnsignedOnly;
2565 case ISD::ZEXTLOAD:
2566 return ICmpType != SystemZICMP::SignedOnly;
2567 default:
2568 break;
2569 }
2570 }
2571 return false;
2572}
2573
2574// Return true if it is better to swap the operands of C.
2575static bool shouldSwapCmpOperands(const Comparison &C) {
2576 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2577 if (C.Op0.getValueType() == MVT::i128)
2578 return false;
2579 if (C.Op0.getValueType() == MVT::f128)
2580 return false;
2581
2582 // Always keep a floating-point constant second, since comparisons with
2583 // zero can use LOAD TEST and comparisons with other constants make a
2584 // natural memory operand.
2585 if (isa<ConstantFPSDNode>(C.Op1))
2586 return false;
2587
2588 // Never swap comparisons with zero since there are many ways to optimize
2589 // those later.
2590 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2591 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2592 return false;
2593
2594 // Also keep natural memory operands second if the loaded value is
2595 // only used here. Several comparisons have memory forms.
2596 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2597 return false;
2598
2599 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2600 // In that case we generally prefer the memory to be second.
2601 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2602 // The only exceptions are when the second operand is a constant and
2603 // we can use things like CHHSI.
2604 if (!ConstOp1)
2605 return true;
2606 // The unsigned memory-immediate instructions can handle 16-bit
2607 // unsigned integers.
2608 if (C.ICmpType != SystemZICMP::SignedOnly &&
2609 isUInt<16>(ConstOp1->getZExtValue()))
2610 return false;
2611 // The signed memory-immediate instructions can handle 16-bit
2612 // signed integers.
2613 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2614 isInt<16>(ConstOp1->getSExtValue()))
2615 return false;
2616 return true;
2617 }
2618
2619 // Try to promote the use of CGFR and CLGFR.
2620 unsigned Opcode0 = C.Op0.getOpcode();
2621 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2622 return true;
2623 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2624 return true;
2625 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
2626 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2627 C.Op0.getConstantOperandVal(1) == 0xffffffff)
2628 return true;
2629
2630 return false;
2631}
2632
2633// Check whether C tests for equality between X and Y and whether X - Y
2634// or Y - X is also computed. In that case it's better to compare the
2635// result of the subtraction against zero.
2637 Comparison &C) {
2638 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2639 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2640 for (SDNode *N : C.Op0->uses()) {
2641 if (N->getOpcode() == ISD::SUB &&
2642 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2643 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2644 // Disable the nsw and nuw flags: the backend needs to handle
2645 // overflow as well during comparison elimination.
2646 SDNodeFlags Flags = N->getFlags();
2647 Flags.setNoSignedWrap(false);
2648 Flags.setNoUnsignedWrap(false);
2649 N->setFlags(Flags);
2650 C.Op0 = SDValue(N, 0);
2651 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2652 return;
2653 }
2654 }
2655 }
2656}
2657
2658// Check whether C compares a floating-point value with zero and if that
2659// floating-point value is also negated. In this case we can use the
2660// negation to set CC, so avoiding separate LOAD AND TEST and
2661// LOAD (NEGATIVE/COMPLEMENT) instructions.
2662static void adjustForFNeg(Comparison &C) {
2663 // This optimization is invalid for strict comparisons, since FNEG
2664 // does not raise any exceptions.
2665 if (C.Chain)
2666 return;
2667 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2668 if (C1 && C1->isZero()) {
2669 for (SDNode *N : C.Op0->uses()) {
2670 if (N->getOpcode() == ISD::FNEG) {
2671 C.Op0 = SDValue(N, 0);
2672 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2673 return;
2674 }
2675 }
2676 }
2677}
2678
2679// Check whether C compares (shl X, 32) with 0 and whether X is
2680// also sign-extended. In that case it is better to test the result
2681// of the sign extension using LTGFR.
2682//
2683// This case is important because InstCombine transforms a comparison
2684// with (sext (trunc X)) into a comparison with (shl X, 32).
2685static void adjustForLTGFR(Comparison &C) {
2686 // Check for a comparison between (shl X, 32) and 0.
2687 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
2688 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
2689 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2690 if (C1 && C1->getZExtValue() == 32) {
2691 SDValue ShlOp0 = C.Op0.getOperand(0);
2692 // See whether X has any SIGN_EXTEND_INREG uses.
2693 for (SDNode *N : ShlOp0->uses()) {
2694 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2695 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2696 C.Op0 = SDValue(N, 0);
2697 return;
2698 }
2699 }
2700 }
2701 }
2702}
2703
2704// If C compares the truncation of an extending load, try to compare
2705// the untruncated value instead. This exposes more opportunities to
2706// reuse CC.
2707static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2708 Comparison &C) {
2709 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2710 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2711 C.Op1.getOpcode() == ISD::Constant &&
2712 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
2713 C.Op1->getAsZExtVal() == 0) {
2714 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2715 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
2716 C.Op0.getValueSizeInBits().getFixedValue()) {
2717 unsigned Type = L->getExtensionType();
2718 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2719 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2720 C.Op0 = C.Op0.getOperand(0);
2721 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2722 }
2723 }
2724 }
2725}
2726
2727// Return true if shift operation N has an in-range constant shift value.
2728// Store it in ShiftVal if so.
2729static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2730 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2731 if (!Shift)
2732 return false;
2733
2734 uint64_t Amount = Shift->getZExtValue();
2735 if (Amount >= N.getValueSizeInBits())
2736 return false;
2737
2738 ShiftVal = Amount;
2739 return true;
2740}
2741
2742// Check whether an AND with Mask is suitable for a TEST UNDER MASK
2743// instruction and whether the CC value is descriptive enough to handle
2744// a comparison of type Opcode between the AND result and CmpVal.
2745// CCMask says which comparison result is being tested and BitSize is
2746// the number of bits in the operands. If TEST UNDER MASK can be used,
2747// return the corresponding CC mask, otherwise return 0.
2748static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2749 uint64_t Mask, uint64_t CmpVal,
2750 unsigned ICmpType) {
2751 assert(Mask != 0 && "ANDs with zero should have been removed by now");
2752
2753 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2754 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
2755 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
2756 return 0;
2757
2758 // Work out the masks for the lowest and highest bits.
2760 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
2761
2762 // Signed ordered comparisons are effectively unsigned if the sign
2763 // bit is dropped.
2764 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2765
2766 // Check for equality comparisons with 0, or the equivalent.
2767 if (CmpVal == 0) {
2768 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2770 if (CCMask == SystemZ::CCMASK_CMP_NE)
2772 }
2773 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2774 if (CCMask == SystemZ::CCMASK_CMP_LT)
2776 if (CCMask == SystemZ::CCMASK_CMP_GE)
2778 }
2779 if (EffectivelyUnsigned && CmpVal < Low) {
2780 if (CCMask == SystemZ::CCMASK_CMP_LE)
2782 if (CCMask == SystemZ::CCMASK_CMP_GT)
2784 }
2785
2786 // Check for equality comparisons with the mask, or the equivalent.
2787 if (CmpVal == Mask) {
2788 if (CCMask == SystemZ::CCMASK_CMP_EQ)
2790 if (CCMask == SystemZ::CCMASK_CMP_NE)
2792 }
2793 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2794 if (CCMask == SystemZ::CCMASK_CMP_GT)
2796 if (CCMask == SystemZ::CCMASK_CMP_LE)
2798 }
2799 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2800 if (CCMask == SystemZ::CCMASK_CMP_GE)
2802 if (CCMask == SystemZ::CCMASK_CMP_LT)
2804 }
2805
2806 // Check for ordered comparisons with the top bit.
2807 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2808 if (CCMask == SystemZ::CCMASK_CMP_LE)
2810 if (CCMask == SystemZ::CCMASK_CMP_GT)
2812 }
2813 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2814 if (CCMask == SystemZ::CCMASK_CMP_LT)
2816 if (CCMask == SystemZ::CCMASK_CMP_GE)
2818 }
2819
2820 // If there are just two bits, we can do equality checks for Low and High
2821 // as well.
2822 if (Mask == Low + High) {
2823 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2825 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2827 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2829 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2831 }
2832
2833 // Looks like we've exhausted our options.
2834 return 0;
2835}
2836
2837// See whether C can be implemented as a TEST UNDER MASK instruction.
2838// Update the arguments with the TM version if so.
2840 Comparison &C) {
2841 // Use VECTOR TEST UNDER MASK for i128 operations.
2842 if (C.Op0.getValueType() == MVT::i128) {
2843 // We can use VTM for EQ/NE comparisons of x & y against 0.
2844 if (C.Op0.getOpcode() == ISD::AND &&
2845 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2846 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
2847 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
2848 if (Mask && Mask->getAPIntValue() == 0) {
2849 C.Opcode = SystemZISD::VTM;
2850 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
2851 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
2852 C.CCValid = SystemZ::CCMASK_VCMP;
2853 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2854 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2855 else
2856 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2857 }
2858 }
2859 return;
2860 }
2861
2862 // Check that we have a comparison with a constant.
2863 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2864 if (!ConstOp1)
2865 return;
2866 uint64_t CmpVal = ConstOp1->getZExtValue();
2867
2868 // Check whether the nonconstant input is an AND with a constant mask.
2869 Comparison NewC(C);
2870 uint64_t MaskVal;
2871 ConstantSDNode *Mask = nullptr;
2872 if (C.Op0.getOpcode() == ISD::AND) {
2873 NewC.Op0 = C.Op0.getOperand(0);
2874 NewC.Op1 = C.Op0.getOperand(1);
2875 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2876 if (!Mask)
2877 return;
2878 MaskVal = Mask->getZExtValue();
2879 } else {
2880 // There is no instruction to compare with a 64-bit immediate
2881 // so use TMHH instead if possible. We need an unsigned ordered
2882 // comparison with an i64 immediate.
2883 if (NewC.Op0.getValueType() != MVT::i64 ||
2884 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2885 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2886 NewC.ICmpType == SystemZICMP::SignedOnly)
2887 return;
2888 // Convert LE and GT comparisons into LT and GE.
2889 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2890 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2891 if (CmpVal == uint64_t(-1))
2892 return;
2893 CmpVal += 1;
2894 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2895 }
2896 // If the low N bits of Op1 are zero than the low N bits of Op0 can
2897 // be masked off without changing the result.
2898 MaskVal = -(CmpVal & -CmpVal);
2899 NewC.ICmpType = SystemZICMP::UnsignedOnly;
2900 }
2901 if (!MaskVal)
2902 return;
2903
2904 // Check whether the combination of mask, comparison value and comparison
2905 // type are suitable.
2906 unsigned BitSize = NewC.Op0.getValueSizeInBits();
2907 unsigned NewCCMask, ShiftVal;
2908 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2909 NewC.Op0.getOpcode() == ISD::SHL &&
2910 isSimpleShift(NewC.Op0, ShiftVal) &&
2911 (MaskVal >> ShiftVal != 0) &&
2912 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2913 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2914 MaskVal >> ShiftVal,
2915 CmpVal >> ShiftVal,
2916 SystemZICMP::Any))) {
2917 NewC.Op0 = NewC.Op0.getOperand(0);
2918 MaskVal >>= ShiftVal;
2919 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2920 NewC.Op0.getOpcode() == ISD::SRL &&
2921 isSimpleShift(NewC.Op0, ShiftVal) &&
2922 (MaskVal << ShiftVal != 0) &&
2923 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2924 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2925 MaskVal << ShiftVal,
2926 CmpVal << ShiftVal,
2928 NewC.Op0 = NewC.Op0.getOperand(0);
2929 MaskVal <<= ShiftVal;
2930 } else {
2931 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2932 NewC.ICmpType);
2933 if (!NewCCMask)
2934 return;
2935 }
2936
2937 // Go ahead and make the change.
2938 C.Opcode = SystemZISD::TM;
2939 C.Op0 = NewC.Op0;
2940 if (Mask && Mask->getZExtValue() == MaskVal)
2941 C.Op1 = SDValue(Mask, 0);
2942 else
2943 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2944 C.CCValid = SystemZ::CCMASK_TM;
2945 C.CCMask = NewCCMask;
2946}
2947
2948// Implement i128 comparison in vector registers.
2949static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
2950 Comparison &C) {
2951 if (C.Opcode != SystemZISD::ICMP)
2952 return;
2953 if (C.Op0.getValueType() != MVT::i128)
2954 return;
2955
2956 // (In-)Equality comparisons can be implemented via VCEQGS.
2957 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2958 C.CCMask == SystemZ::CCMASK_CMP_NE) {
2959 C.Opcode = SystemZISD::VICMPES;
2960 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
2961 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
2962 C.CCValid = SystemZ::CCMASK_VCMP;
2963 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
2964 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
2965 else
2966 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
2967 return;
2968 }
2969
2970 // Normalize other comparisons to GT.
2971 bool Swap = false, Invert = false;
2972 switch (C.CCMask) {
2973 case SystemZ::CCMASK_CMP_GT: break;
2974 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
2975 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
2976 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
2977 default: llvm_unreachable("Invalid integer condition!");
2978 }
2979 if (Swap)
2980 std::swap(C.Op0, C.Op1);
2981
2982 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2983 C.Opcode = SystemZISD::UCMP128HI;
2984 else
2985 C.Opcode = SystemZISD::SCMP128HI;
2986 C.CCValid = SystemZ::CCMASK_ANY;
2987 C.CCMask = SystemZ::CCMASK_1;
2988
2989 if (Invert)
2990 C.CCMask ^= C.CCValid;
2991}
2992
2993// See whether the comparison argument contains a redundant AND
2994// and remove it if so. This sometimes happens due to the generic
2995// BRCOND expansion.
2997 Comparison &C) {
2998 if (C.Op0.getOpcode() != ISD::AND)
2999 return;
3000 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3001 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3002 return;
3003 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3004 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3005 return;
3006
3007 C.Op0 = C.Op0.getOperand(0);
3008}
3009
3010// Return a Comparison that tests the condition-code result of intrinsic
3011// node Call against constant integer CC using comparison code Cond.
3012// Opcode is the opcode of the SystemZISD operation for the intrinsic
3013// and CCValid is the set of possible condition-code results.
3014static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3015 SDValue Call, unsigned CCValid, uint64_t CC,
3017 Comparison C(Call, SDValue(), SDValue());
3018 C.Opcode = Opcode;
3019 C.CCValid = CCValid;
3020 if (Cond == ISD::SETEQ)
3021 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3022 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3023 else if (Cond == ISD::SETNE)
3024 // ...and the inverse of that.
3025 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3026 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3027 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3028 // always true for CC>3.
3029 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3030 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3031 // ...and the inverse of that.
3032 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3033 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3034 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3035 // always true for CC>3.
3036 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3037 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3038 // ...and the inverse of that.
3039 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3040 else
3041 llvm_unreachable("Unexpected integer comparison type");
3042 C.CCMask &= CCValid;
3043 return C;
3044}
3045
3046// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3047static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3048 ISD::CondCode Cond, const SDLoc &DL,
3049 SDValue Chain = SDValue(),
3050 bool IsSignaling = false) {
3051 if (CmpOp1.getOpcode() == ISD::Constant) {
3052 assert(!Chain);
3053 unsigned Opcode, CCValid;
3054 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3055 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3056 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3057 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3058 CmpOp1->getAsZExtVal(), Cond);
3059 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3060 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3061 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3062 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3063 CmpOp1->getAsZExtVal(), Cond);
3064 }
3065 Comparison C(CmpOp0, CmpOp1, Chain);
3066 C.CCMask = CCMaskForCondCode(Cond);
3067 if (C.Op0.getValueType().isFloatingPoint()) {
3068 C.CCValid = SystemZ::CCMASK_FCMP;
3069 if (!C.Chain)
3070 C.Opcode = SystemZISD::FCMP;
3071 else if (!IsSignaling)
3072 C.Opcode = SystemZISD::STRICT_FCMP;
3073 else
3074 C.Opcode = SystemZISD::STRICT_FCMPS;
3076 } else {
3077 assert(!C.Chain);
3078 C.CCValid = SystemZ::CCMASK_ICMP;
3079 C.Opcode = SystemZISD::ICMP;
3080 // Choose the type of comparison. Equality and inequality tests can
3081 // use either signed or unsigned comparisons. The choice also doesn't
3082 // matter if both sign bits are known to be clear. In those cases we
3083 // want to give the main isel code the freedom to choose whichever
3084 // form fits best.
3085 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3086 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3087 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3088 C.ICmpType = SystemZICMP::Any;
3089 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3090 C.ICmpType = SystemZICMP::UnsignedOnly;
3091 else
3092 C.ICmpType = SystemZICMP::SignedOnly;
3093 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3094 adjustForRedundantAnd(DAG, DL, C);
3095 adjustZeroCmp(DAG, DL, C);
3096 adjustSubwordCmp(DAG, DL, C);
3097 adjustForSubtraction(DAG, DL, C);
3099 adjustICmpTruncate(DAG, DL, C);
3100 }
3101
3102 if (shouldSwapCmpOperands(C)) {
3103 std::swap(C.Op0, C.Op1);
3104 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3105 }
3106
3108 adjustICmp128(DAG, DL, C);
3109 return C;
3110}
3111
3112// Emit the comparison instruction described by C.
3113static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3114 if (!C.Op1.getNode()) {
3115 SDNode *Node;
3116 switch (C.Op0.getOpcode()) {
3118 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3119 return SDValue(Node, 0);
3121 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3122 return SDValue(Node, Node->getNumValues() - 1);
3123 default:
3124 llvm_unreachable("Invalid comparison operands");
3125 }
3126 }
3127 if (C.Opcode == SystemZISD::ICMP)
3128 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3129 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3130 if (C.Opcode == SystemZISD::TM) {
3131 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3133 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3134 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3135 }
3136 if (C.Opcode == SystemZISD::VICMPES) {
3137 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32);
3138 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3139 return SDValue(Val.getNode(), 1);
3140 }
3141 if (C.Chain) {
3142 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3143 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3144 }
3145 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3146}
3147
3148// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3149// 64 bits. Extend is the extension type to use. Store the high part
3150// in Hi and the low part in Lo.
3151static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3152 SDValue Op0, SDValue Op1, SDValue &Hi,
3153 SDValue &Lo) {
3154 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3155 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3156 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3157 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3158 DAG.getConstant(32, DL, MVT::i64));
3159 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3160 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3161}
3162
3163// Lower a binary operation that produces two VT results, one in each
3164// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3165// and Opcode performs the GR128 operation. Store the even register result
3166// in Even and the odd register result in Odd.
3167static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3168 unsigned Opcode, SDValue Op0, SDValue Op1,
3169 SDValue &Even, SDValue &Odd) {
3170 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3171 bool Is32Bit = is32Bit(VT);
3172 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3173 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3174}
3175
3176// Return an i32 value that is 1 if the CC value produced by CCReg is
3177// in the mask CCMask and 0 otherwise. CC is known to have a value
3178// in CCValid, so other values can be ignored.
3179static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3180 unsigned CCValid, unsigned CCMask) {
3181 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3182 DAG.getConstant(0, DL, MVT::i32),
3183 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3184 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3185 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3186}
3187
3188// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3189// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3190// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3191// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3192// floating-point comparisons.
3195 switch (CC) {
3196 case ISD::SETOEQ:
3197 case ISD::SETEQ:
3198 switch (Mode) {
3199 case CmpMode::Int: return SystemZISD::VICMPE;
3200 case CmpMode::FP: return SystemZISD::VFCMPE;
3201 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3202 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3203 }
3204 llvm_unreachable("Bad mode");
3205
3206 case ISD::SETOGE:
3207 case ISD::SETGE:
3208 switch (Mode) {
3209 case CmpMode::Int: return 0;
3210 case CmpMode::FP: return SystemZISD::VFCMPHE;
3211 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3212 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3213 }
3214 llvm_unreachable("Bad mode");
3215
3216 case ISD::SETOGT:
3217 case ISD::SETGT:
3218 switch (Mode) {
3219 case CmpMode::Int: return SystemZISD::VICMPH;
3220 case CmpMode::FP: return SystemZISD::VFCMPH;
3221 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3222 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3223 }
3224 llvm_unreachable("Bad mode");
3225
3226 case ISD::SETUGT:
3227 switch (Mode) {
3228 case CmpMode::Int: return SystemZISD::VICMPHL;
3229 case CmpMode::FP: return 0;
3230 case CmpMode::StrictFP: return 0;
3231 case CmpMode::SignalingFP: return 0;
3232 }
3233 llvm_unreachable("Bad mode");
3234
3235 default:
3236 return 0;
3237 }
3238}
3239
3240// Return the SystemZISD vector comparison operation for CC or its inverse,
3241// or 0 if neither can be done directly. Indicate in Invert whether the
3242// result is for the inverse of CC. Mode is as above.
3244 bool &Invert) {
3245 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3246 Invert = false;
3247 return Opcode;
3248 }
3249
3250 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3251 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3252 Invert = true;
3253 return Opcode;
3254 }
3255
3256 return 0;
3257}
3258
3259// Return a v2f64 that contains the extended form of elements Start and Start+1
3260// of v4f32 value Op. If Chain is nonnull, return the strict form.
3261static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3262 SDValue Op, SDValue Chain) {
3263 int Mask[] = { Start, -1, Start + 1, -1 };
3264 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3265 if (Chain) {
3266 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3267 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3268 }
3269 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3270}
3271
3272// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3273// producing a result of type VT. If Chain is nonnull, return the strict form.
3274SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3275 const SDLoc &DL, EVT VT,
3276 SDValue CmpOp0,
3277 SDValue CmpOp1,
3278 SDValue Chain) const {
3279 // There is no hardware support for v4f32 (unless we have the vector
3280 // enhancements facility 1), so extend the vector into two v2f64s
3281 // and compare those.
3282 if (CmpOp0.getValueType() == MVT::v4f32 &&
3283 !Subtarget.hasVectorEnhancements1()) {
3284 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3285 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3286 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3287 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3288 if (Chain) {
3289 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3290 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3291 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3292 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3293 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3294 H1.getValue(1), L1.getValue(1),
3295 HRes.getValue(1), LRes.getValue(1) };
3296 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3297 SDValue Ops[2] = { Res, NewChain };
3298 return DAG.getMergeValues(Ops, DL);
3299 }
3300 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3301 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3302 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3303 }
3304 if (Chain) {
3305 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3306 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3307 }
3308 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3309}
3310
3311// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3312// an integer mask of type VT. If Chain is nonnull, we have a strict
3313// floating-point comparison. If in addition IsSignaling is true, we have
3314// a strict signaling floating-point comparison.
3315SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3316 const SDLoc &DL, EVT VT,
3318 SDValue CmpOp0,
3319 SDValue CmpOp1,
3320 SDValue Chain,
3321 bool IsSignaling) const {
3322 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3323 assert (!Chain || IsFP);
3324 assert (!IsSignaling || Chain);
3325 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3326 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3327 bool Invert = false;
3328 SDValue Cmp;
3329 switch (CC) {
3330 // Handle tests for order using (or (ogt y x) (oge x y)).
3331 case ISD::SETUO:
3332 Invert = true;
3333 [[fallthrough]];
3334 case ISD::SETO: {
3335 assert(IsFP && "Unexpected integer comparison");
3336 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3337 DL, VT, CmpOp1, CmpOp0, Chain);
3338 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3339 DL, VT, CmpOp0, CmpOp1, Chain);
3340 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3341 if (Chain)
3342 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3343 LT.getValue(1), GE.getValue(1));
3344 break;
3345 }
3346
3347 // Handle <> tests using (or (ogt y x) (ogt x y)).
3348 case ISD::SETUEQ:
3349 Invert = true;
3350 [[fallthrough]];
3351 case ISD::SETONE: {
3352 assert(IsFP && "Unexpected integer comparison");
3353 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3354 DL, VT, CmpOp1, CmpOp0, Chain);
3355 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3356 DL, VT, CmpOp0, CmpOp1, Chain);
3357 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3358 if (Chain)
3359 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3360 LT.getValue(1), GT.getValue(1));
3361 break;
3362 }
3363
3364 // Otherwise a single comparison is enough. It doesn't really
3365 // matter whether we try the inversion or the swap first, since
3366 // there are no cases where both work.
3367 default:
3368 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3369 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3370 else {
3372 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3373 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3374 else
3375 llvm_unreachable("Unhandled comparison");
3376 }
3377 if (Chain)
3378 Chain = Cmp.getValue(1);
3379 break;
3380 }
3381 if (Invert) {
3382 SDValue Mask =
3383 DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3384 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3385 }
3386 if (Chain && Chain.getNode() != Cmp.getNode()) {
3387 SDValue Ops[2] = { Cmp, Chain };
3388 Cmp = DAG.getMergeValues(Ops, DL);
3389 }
3390 return Cmp;
3391}
3392
3393SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3394 SelectionDAG &DAG) const {
3395 SDValue CmpOp0 = Op.getOperand(0);
3396 SDValue CmpOp1 = Op.getOperand(1);
3397 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3398 SDLoc DL(Op);
3399 EVT VT = Op.getValueType();
3400 if (VT.isVector())
3401 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3402
3403 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3404 SDValue CCReg = emitCmp(DAG, DL, C);
3405 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3406}
3407
3408SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3409 SelectionDAG &DAG,
3410 bool IsSignaling) const {
3411 SDValue Chain = Op.getOperand(0);
3412 SDValue CmpOp0 = Op.getOperand(1);
3413 SDValue CmpOp1 = Op.getOperand(2);
3414 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3415 SDLoc DL(Op);
3416 EVT VT = Op.getNode()->getValueType(0);
3417 if (VT.isVector()) {
3418 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3419 Chain, IsSignaling);
3420 return Res.getValue(Op.getResNo());
3421 }
3422
3423 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3424 SDValue CCReg = emitCmp(DAG, DL, C);
3425 CCReg->setFlags(Op->getFlags());
3426 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3427 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3428 return DAG.getMergeValues(Ops, DL);
3429}
3430
3431SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3432 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3433 SDValue CmpOp0 = Op.getOperand(2);
3434 SDValue CmpOp1 = Op.getOperand(3);
3435 SDValue Dest = Op.getOperand(4);
3436 SDLoc DL(Op);
3437
3438 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3439 SDValue CCReg = emitCmp(DAG, DL, C);
3440 return DAG.getNode(
3441 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3442 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3443 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3444}
3445
3446// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3447// allowing Pos and Neg to be wider than CmpOp.
3448static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3449 return (Neg.getOpcode() == ISD::SUB &&
3450 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3451 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3452 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3453 Pos.getOperand(0) == CmpOp)));
3454}
3455
3456// Return the absolute or negative absolute of Op; IsNegative decides which.
3458 bool IsNegative) {
3459 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3460 if (IsNegative)
3461 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3462 DAG.getConstant(0, DL, Op.getValueType()), Op);
3463 return Op;
3464}
3465
3466SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3467 SelectionDAG &DAG) const {
3468 SDValue CmpOp0 = Op.getOperand(0);
3469 SDValue CmpOp1 = Op.getOperand(1);
3470 SDValue TrueOp = Op.getOperand(2);
3471 SDValue FalseOp = Op.getOperand(3);
3472 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3473 SDLoc DL(Op);
3474
3475 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3476
3477 // Check for absolute and negative-absolute selections, including those
3478 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3479 // This check supplements the one in DAGCombiner.
3480 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3481 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3482 C.Op1.getOpcode() == ISD::Constant &&
3483 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3484 C.Op1->getAsZExtVal() == 0) {
3485 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3486 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3487 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3488 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3489 }
3490
3491 SDValue CCReg = emitCmp(DAG, DL, C);
3492 SDValue Ops[] = {TrueOp, FalseOp,
3493 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3494 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3495
3496 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3497}
3498
3499SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3500 SelectionDAG &DAG) const {
3501 SDLoc DL(Node);
3502 const GlobalValue *GV = Node->getGlobal();
3503 int64_t Offset = Node->getOffset();
3504 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3506
3508 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3509 if (isInt<32>(Offset)) {
3510 // Assign anchors at 1<<12 byte boundaries.
3511 uint64_t Anchor = Offset & ~uint64_t(0xfff);
3512 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3513 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3514
3515 // The offset can be folded into the address if it is aligned to a
3516 // halfword.
3517 Offset -= Anchor;
3518 if (Offset != 0 && (Offset & 1) == 0) {
3519 SDValue Full =
3520 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3521 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3522 Offset = 0;
3523 }
3524 } else {
3525 // Conservatively load a constant offset greater than 32 bits into a
3526 // register below.
3527 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3528 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3529 }
3530 } else if (Subtarget.isTargetELF()) {
3531 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3532 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3533 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3535 } else if (Subtarget.isTargetzOS()) {
3536 Result = getADAEntry(DAG, GV, DL, PtrVT);
3537 } else
3538 llvm_unreachable("Unexpected Subtarget");
3539
3540 // If there was a non-zero offset that we didn't fold, create an explicit
3541 // addition for it.
3542 if (Offset != 0)
3543 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3544 DAG.getConstant(Offset, DL, PtrVT));
3545
3546 return Result;
3547}
3548
3549SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3550 SelectionDAG &DAG,
3551 unsigned Opcode,
3552 SDValue GOTOffset) const {
3553 SDLoc DL(Node);
3554 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3555 SDValue Chain = DAG.getEntryNode();
3556 SDValue Glue;
3557
3560 report_fatal_error("In GHC calling convention TLS is not supported");
3561
3562 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3563 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3564 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3565 Glue = Chain.getValue(1);
3566 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3567 Glue = Chain.getValue(1);
3568
3569 // The first call operand is the chain and the second is the TLS symbol.
3571 Ops.push_back(Chain);
3572 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3573 Node->getValueType(0),
3574 0, 0));
3575
3576 // Add argument registers to the end of the list so that they are
3577 // known live into the call.
3578 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3579 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3580
3581 // Add a register mask operand representing the call-preserved registers.
3582 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3583 const uint32_t *Mask =
3584 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
3585 assert(Mask && "Missing call preserved mask for calling convention");
3586 Ops.push_back(DAG.getRegisterMask(Mask));
3587
3588 // Glue the call to the argument copies.
3589 Ops.push_back(Glue);
3590
3591 // Emit the call.
3592 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3593 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3594 Glue = Chain.getValue(1);
3595
3596 // Copy the return value from %r2.
3597 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3598}
3599
3600SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3601 SelectionDAG &DAG) const {
3602 SDValue Chain = DAG.getEntryNode();
3603 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3604
3605 // The high part of the thread pointer is in access register 0.
3606 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3607 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3608
3609 // The low part of the thread pointer is in access register 1.
3610 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3611 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3612
3613 // Merge them into a single 64-bit address.
3614 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3615 DAG.getConstant(32, DL, PtrVT));
3616 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3617}
3618
3619SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3620 SelectionDAG &DAG) const {
3621 if (DAG.getTarget().useEmulatedTLS())
3622 return LowerToTLSEmulatedModel(Node, DAG);
3623 SDLoc DL(Node);
3624 const GlobalValue *GV = Node->getGlobal();
3625 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3626 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
3627
3630 report_fatal_error("In GHC calling convention TLS is not supported");
3631
3632 SDValue TP = lowerThreadPointer(DL, DAG);
3633
3634 // Get the offset of GA from the thread pointer, based on the TLS model.
3636 switch (model) {
3638 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3641
3642 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3643 Offset = DAG.getLoad(
3644 PtrVT, DL, DAG.getEntryNode(), Offset,
3646
3647 // Call __tls_get_offset to retrieve the offset.
3648 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3649 break;
3650 }
3651
3653 // Load the GOT offset of the module ID.
3656
3657 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3658 Offset = DAG.getLoad(
3659 PtrVT, DL, DAG.getEntryNode(), Offset,
3661
3662 // Call __tls_get_offset to retrieve the module base offset.
3663 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3664
3665 // Note: The SystemZLDCleanupPass will remove redundant computations
3666 // of the module base offset. Count total number of local-dynamic
3667 // accesses to trigger execution of that pass.
3671
3672 // Add the per-symbol offset.
3674
3675 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3676 DTPOffset = DAG.getLoad(
3677 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3679
3680 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3681 break;
3682 }
3683
3684 case TLSModel::InitialExec: {
3685 // Load the offset from the GOT.
3686 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3689 Offset =
3690 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3692 break;
3693 }
3694
3695 case TLSModel::LocalExec: {
3696 // Force the offset into the constant pool and load it from there.
3699
3700 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3701 Offset = DAG.getLoad(
3702 PtrVT, DL, DAG.getEntryNode(), Offset,
3704 break;
3705 }
3706 }
3707
3708 // Add the base and offset together.
3709 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3710}
3711
3712SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3713 SelectionDAG &DAG) const {
3714 SDLoc DL(Node);
3715 const BlockAddress *BA = Node->getBlockAddress();
3716 int64_t Offset = Node->getOffset();
3717 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3718
3719 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3720 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3721 return Result;
3722}
3723
3724SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3725 SelectionDAG &DAG) const {
3726 SDLoc DL(JT);
3727 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3728 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3729
3730 // Use LARL to load the address of the table.
3731 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3732}
3733
3734SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3735 SelectionDAG &DAG) const {
3736 SDLoc DL(CP);
3737 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3738
3740 if (CP->isMachineConstantPoolEntry())
3741 Result =
3742 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3743 else
3744 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3745 CP->getOffset());
3746
3747 // Use LARL to load the address of the constant pool entry.
3748 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3749}
3750
3751SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3752 SelectionDAG &DAG) const {
3753 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3755 MachineFrameInfo &MFI = MF.getFrameInfo();
3756 MFI.setFrameAddressIsTaken(true);
3757
3758 SDLoc DL(Op);
3759 unsigned Depth = Op.getConstantOperandVal(0);
3760 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3761
3762 // By definition, the frame address is the address of the back chain. (In
3763 // the case of packed stack without backchain, return the address where the
3764 // backchain would have been stored. This will either be an unused space or
3765 // contain a saved register).
3766 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3767 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3768
3769 if (Depth > 0) {
3770 // FIXME The frontend should detect this case.
3771 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3772 report_fatal_error("Unsupported stack frame traversal count");
3773
3774 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
3775 while (Depth--) {
3776 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
3778 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
3779 }
3780 }
3781
3782 return BackChain;
3783}
3784
3785SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3786 SelectionDAG &DAG) const {
3788 MachineFrameInfo &MFI = MF.getFrameInfo();
3789 MFI.setReturnAddressIsTaken(true);
3790
3792 return SDValue();
3793
3794 SDLoc DL(Op);
3795 unsigned Depth = Op.getConstantOperandVal(0);
3796 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3797
3798 if (Depth > 0) {
3799 // FIXME The frontend should detect this case.
3800 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
3801 report_fatal_error("Unsupported stack frame traversal count");
3802
3803 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
3804 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3805 int Offset = (TFL->usePackedStack(MF) ? -2 : 14) *
3807 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
3808 DAG.getConstant(Offset, DL, PtrVT));
3809 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
3811 }
3812
3813 // Return R14D, which has the return address. Mark it an implicit live-in.
3814 Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3815 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3816}
3817
3818SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3819 SelectionDAG &DAG) const {
3820 SDLoc DL(Op);
3821 SDValue In = Op.getOperand(0);
3822 EVT InVT = In.getValueType();
3823 EVT ResVT = Op.getValueType();
3824
3825 // Convert loads directly. This is normally done by DAGCombiner,
3826 // but we need this case for bitcasts that are created during lowering
3827 // and which are then lowered themselves.
3828 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3829 if (ISD::isNormalLoad(LoadN)) {
3830 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3831 LoadN->getBasePtr(), LoadN->getMemOperand());
3832 // Update the chain uses.
3833 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3834 return NewLoad;
3835 }
3836
3837 if (InVT == MVT::i32 && ResVT == MVT::f32) {
3838 SDValue In64;
3839 if (Subtarget.hasHighWord()) {
3840 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3841 MVT::i64);
3842 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3843 MVT::i64, SDValue(U64, 0), In);
3844 } else {
3845 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3846 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3847 DAG.getConstant(32, DL, MVT::i64));
3848 }
3849 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3850 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3851 DL, MVT::f32, Out64);
3852 }
3853 if (InVT == MVT::f32 && ResVT == MVT::i32) {
3854 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3855 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3856 MVT::f64, SDValue(U64, 0), In);
3857 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3858 if (Subtarget.hasHighWord())
3859 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3860 MVT::i32, Out64);
3861 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3862 DAG.getConstant(32, DL, MVT::i64));
3863 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3864 }
3865 llvm_unreachable("Unexpected bitcast combination");
3866}
3867
3868SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3869 SelectionDAG &DAG) const {
3870
3871 if (Subtarget.isTargetXPLINK64())
3872 return lowerVASTART_XPLINK(Op, DAG);
3873 else
3874 return lowerVASTART_ELF(Op, DAG);
3875}
3876
3877SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3878 SelectionDAG &DAG) const {
3880 SystemZMachineFunctionInfo *FuncInfo =
3882
3883 SDLoc DL(Op);
3884
3885 // vastart just stores the address of the VarArgsFrameIndex slot into the
3886 // memory location argument.
3887 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3888 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3889 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3890 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3891 MachinePointerInfo(SV));
3892}
3893
3894SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3895 SelectionDAG &DAG) const {
3897 SystemZMachineFunctionInfo *FuncInfo =
3899 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3900
3901 SDValue Chain = Op.getOperand(0);
3902 SDValue Addr = Op.getOperand(1);
3903 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3904 SDLoc DL(Op);
3905
3906 // The initial values of each field.
3907 const unsigned NumFields = 4;
3908 SDValue Fields[NumFields] = {
3909 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3910 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3911 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3912 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3913 };
3914
3915 // Store each field into its respective slot.
3916 SDValue MemOps[NumFields];
3917 unsigned Offset = 0;
3918 for (unsigned I = 0; I < NumFields; ++I) {
3919 SDValue FieldAddr = Addr;
3920 if (Offset != 0)
3921 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3923 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3925 Offset += 8;
3926 }
3927 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3928}
3929
3930SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3931 SelectionDAG &DAG) const {
3932 SDValue Chain = Op.getOperand(0);
3933 SDValue DstPtr = Op.getOperand(1);
3934 SDValue SrcPtr = Op.getOperand(2);
3935 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3936 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3937 SDLoc DL(Op);
3938
3939 uint32_t Sz =
3940 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3941 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3942 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3943 /*isTailCall*/ false, MachinePointerInfo(DstSV),
3944 MachinePointerInfo(SrcSV));
3945}
3946
3947SDValue
3948SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3949 SelectionDAG &DAG) const {
3950 if (Subtarget.isTargetXPLINK64())
3951 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3952 else
3953 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3954}
3955
3956SDValue
3957SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3958 SelectionDAG &DAG) const {
3959 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3961 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3962 SDValue Chain = Op.getOperand(0);
3963 SDValue Size = Op.getOperand(1);
3964 SDValue Align = Op.getOperand(2);
3965 SDLoc DL(Op);
3966
3967 // If user has set the no alignment function attribute, ignore
3968 // alloca alignments.
3969 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
3970
3971 uint64_t StackAlign = TFI->getStackAlignment();
3972 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3973 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3974
3975 SDValue NeededSpace = Size;
3976
3977 // Add extra space for alignment if needed.
3978 EVT PtrVT = getPointerTy(MF.getDataLayout());
3979 if (ExtraAlignSpace)
3980 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3981 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3982
3983 bool IsSigned = false;
3984 bool DoesNotReturn = false;
3985 bool IsReturnValueUsed = false;
3986 EVT VT = Op.getValueType();
3987 SDValue AllocaCall =
3988 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
3989 CallingConv::C, IsSigned, DL, DoesNotReturn,
3990 IsReturnValueUsed)
3991 .first;
3992
3993 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3994 // to end of call in order to ensure it isn't broken up from the call
3995 // sequence.
3996 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3997 Register SPReg = Regs.getStackPointerRegister();
3998 Chain = AllocaCall.getValue(1);
3999 SDValue Glue = AllocaCall.getValue(2);
4000 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4001 Chain = NewSPRegNode.getValue(1);
4002
4003 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4004 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4005 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4006
4007 // Dynamically realign if needed.
4008 if (ExtraAlignSpace) {
4009 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4010 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4011 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4012 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4013 }
4014
4015 SDValue Ops[2] = {Result, Chain};
4016 return DAG.getMergeValues(Ops, DL);
4017}
4018
4019SDValue
4020SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4021 SelectionDAG &DAG) const {
4022 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4024 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4025 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4026
4027 SDValue Chain = Op.getOperand(0);
4028 SDValue Size = Op.getOperand(1);
4029 SDValue Align = Op.getOperand(2);
4030 SDLoc DL(Op);
4031
4032 // If user has set the no alignment function attribute, ignore
4033 // alloca alignments.
4034 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4035
4036 uint64_t StackAlign = TFI->getStackAlignment();
4037 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4038 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4039
4041 SDValue NeededSpace = Size;
4042
4043 // Get a reference to the stack pointer.
4044 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4045
4046 // If we need a backchain, save it now.
4047 SDValue Backchain;
4048 if (StoreBackchain)
4049 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4051
4052 // Add extra space for alignment if needed.
4053 if (ExtraAlignSpace)
4054 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4055 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4056
4057 // Get the new stack pointer value.
4058 SDValue NewSP;
4059 if (hasInlineStackProbe(MF)) {
4061 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4062 Chain = NewSP.getValue(1);
4063 }
4064 else {
4065 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4066 // Copy the new stack pointer back.
4067 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4068 }
4069
4070 // The allocated data lives above the 160 bytes allocated for the standard
4071 // frame, plus any outgoing stack arguments. We don't know how much that
4072 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4073 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4074 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4075
4076 // Dynamically realign if needed.
4077 if (RequiredAlign > StackAlign) {
4078 Result =
4079 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4080 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4081 Result =
4082 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4083 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4084 }
4085
4086 if (StoreBackchain)
4087 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4089
4090 SDValue Ops[2] = { Result, Chain };
4091 return DAG.getMergeValues(Ops, DL);
4092}
4093
4094SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4095 SDValue Op, SelectionDAG &DAG) const {
4096 SDLoc DL(Op);
4097
4098 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4099}
4100
4101SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4102 SelectionDAG &DAG) const {
4103 EVT VT = Op.getValueType();
4104 SDLoc DL(Op);
4105 SDValue Ops[2];
4106 if (is32Bit(VT))
4107 // Just do a normal 64-bit multiplication and extract the results.
4108 // We define this so that it can be used for constant division.
4109 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4110 Op.getOperand(1), Ops[1], Ops[0]);
4111 else if (Subtarget.hasMiscellaneousExtensions2())
4112 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4113 // the high result in the even register. ISD::SMUL_LOHI is defined to
4114 // return the low half first, so the results are in reverse order.
4116 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4117 else {
4118 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4119 //
4120 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4121 //
4122 // but using the fact that the upper halves are either all zeros
4123 // or all ones:
4124 //
4125 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4126 //
4127 // and grouping the right terms together since they are quicker than the
4128 // multiplication:
4129 //
4130 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4131 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4132 SDValue LL = Op.getOperand(0);
4133 SDValue RL = Op.getOperand(1);
4134 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4135 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4136 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4137 // the high result in the even register. ISD::SMUL_LOHI is defined to
4138 // return the low half first, so the results are in reverse order.
4140 LL, RL, Ops[1], Ops[0]);
4141 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4142 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4143 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4144 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4145 }
4146 return DAG.getMergeValues(Ops, DL);
4147}
4148
4149SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4150 SelectionDAG &DAG) const {
4151 EVT VT = Op.getValueType();
4152 SDLoc DL(Op);
4153 SDValue Ops[2];
4154 if (is32Bit(VT))
4155 // Just do a normal 64-bit multiplication and extract the results.
4156 // We define this so that it can be used for constant division.
4157 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4158 Op.getOperand(1), Ops[1], Ops[0]);
4159 else
4160 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4161 // the high result in the even register. ISD::UMUL_LOHI is defined to
4162 // return the low half first, so the results are in reverse order.
4164 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4165 return DAG.getMergeValues(Ops, DL);
4166}
4167
4168SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4169 SelectionDAG &DAG) const {
4170 SDValue Op0 = Op.getOperand(0);
4171 SDValue Op1 = Op.getOperand(1);
4172 EVT VT = Op.getValueType();
4173 SDLoc DL(Op);
4174
4175 // We use DSGF for 32-bit division. This means the first operand must
4176 // always be 64-bit, and the second operand should be 32-bit whenever
4177 // that is possible, to improve performance.
4178 if (is32Bit(VT))
4179 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4180 else if (DAG.ComputeNumSignBits(Op1) > 32)
4181 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4182
4183 // DSG(F) returns the remainder in the even register and the
4184 // quotient in the odd register.
4185 SDValue Ops[2];
4186 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4187 return DAG.getMergeValues(Ops, DL);
4188}
4189
4190SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4191 SelectionDAG &DAG) const {
4192 EVT VT = Op.getValueType();
4193 SDLoc DL(Op);
4194
4195 // DL(G) returns the remainder in the even register and the
4196 // quotient in the odd register.
4197 SDValue Ops[2];
4199 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4200 return DAG.getMergeValues(Ops, DL);
4201}
4202
4203SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4204 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4205
4206 // Get the known-zero masks for each operand.
4207 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4208 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4209 DAG.computeKnownBits(Ops[1])};
4210
4211 // See if the upper 32 bits of one operand and the lower 32 bits of the
4212 // other are known zero. They are the low and high operands respectively.
4213 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4214 Known[1].Zero.getZExtValue() };
4215 unsigned High, Low;
4216 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4217 High = 1, Low = 0;
4218 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4219 High = 0, Low = 1;
4220 else
4221 return Op;
4222
4223 SDValue LowOp = Ops[Low];
4224 SDValue HighOp = Ops[High];
4225
4226 // If the high part is a constant, we're better off using IILH.
4227 if (HighOp.getOpcode() == ISD::Constant)
4228 return Op;
4229
4230 // If the low part is a constant that is outside the range of LHI,
4231 // then we're better off using IILF.
4232 if (LowOp.getOpcode() == ISD::Constant) {
4233 int64_t Value = int32_t(LowOp->getAsZExtVal());
4234 if (!isInt<16>(Value))
4235 return Op;
4236 }
4237
4238 // Check whether the high part is an AND that doesn't change the
4239 // high 32 bits and just masks out low bits. We can skip it if so.
4240 if (HighOp.getOpcode() == ISD::AND &&
4241 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4242 SDValue HighOp0 = HighOp.getOperand(0);
4244 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4245 HighOp = HighOp0;
4246 }
4247
4248 // Take advantage of the fact that all GR32 operations only change the
4249 // low 32 bits by truncating Low to an i32 and inserting it directly
4250 // using a subreg. The interesting cases are those where the truncation
4251 // can be folded.
4252 SDLoc DL(Op);
4253 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4254 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4255 MVT::i64, HighOp, Low32);
4256}
4257
4258// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4259SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4260 SelectionDAG &DAG) const {
4261 SDNode *N = Op.getNode();
4262 SDValue LHS = N->getOperand(0);
4263 SDValue RHS = N->getOperand(1);
4264 SDLoc DL(N);
4265
4266 if (N->getValueType(0) == MVT::i128) {
4267 unsigned BaseOp = 0;
4268 unsigned FlagOp = 0;
4269 bool IsBorrow = false;
4270 switch (Op.getOpcode()) {
4271 default: llvm_unreachable("Unknown instruction!");
4272 case ISD::UADDO:
4273 BaseOp = ISD::ADD;
4274 FlagOp = SystemZISD::VACC;
4275 break;
4276 case ISD::USUBO:
4277 BaseOp = ISD::SUB;
4278 FlagOp = SystemZISD::VSCBI;
4279 IsBorrow = true;
4280 break;
4281 }
4282 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4283 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4284 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4285 DAG.getValueType(MVT::i1));
4286 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4287 if (IsBorrow)
4288 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4289 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4290 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4291 }
4292
4293 unsigned BaseOp = 0;
4294 unsigned CCValid = 0;
4295 unsigned CCMask = 0;
4296
4297 switch (Op.getOpcode()) {
4298 default: llvm_unreachable("Unknown instruction!");
4299 case ISD::SADDO:
4300 BaseOp = SystemZISD::SADDO;
4301 CCValid = SystemZ::CCMASK_ARITH;
4303 break;
4304 case ISD::SSUBO:
4305 BaseOp = SystemZISD::SSUBO;
4306 CCValid = SystemZ::CCMASK_ARITH;
4308 break;
4309 case ISD::UADDO:
4310 BaseOp = SystemZISD::UADDO;
4311 CCValid = SystemZ::CCMASK_LOGICAL;
4313 break;
4314 case ISD::USUBO:
4315 BaseOp = SystemZISD::USUBO;
4316 CCValid = SystemZ::CCMASK_LOGICAL;
4318 break;
4319 }
4320
4321 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4322 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4323
4324 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4325 if (N->getValueType(1) == MVT::i1)
4326 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4327
4328 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4329}
4330
4331static bool isAddCarryChain(SDValue Carry) {
4332 while (Carry.getOpcode() == ISD::UADDO_CARRY)
4333 Carry = Carry.getOperand(2);
4334 return Carry.getOpcode() == ISD::UADDO;
4335}
4336
4337static bool isSubBorrowChain(SDValue Carry) {
4338 while (Carry.getOpcode() == ISD::USUBO_CARRY)
4339 Carry = Carry.getOperand(2);
4340 return Carry.getOpcode() == ISD::USUBO;
4341}
4342
4343// Lower UADDO_CARRY/USUBO_CARRY nodes.
4344SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4345 SelectionDAG &DAG) const {
4346
4347 SDNode *N = Op.getNode();
4348 MVT VT = N->getSimpleValueType(0);
4349
4350 // Let legalize expand this if it isn't a legal type yet.
4351 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4352 return SDValue();
4353
4354 SDValue LHS = N->getOperand(0);
4355 SDValue RHS = N->getOperand(1);
4356 SDValue Carry = Op.getOperand(2);
4357 SDLoc DL(N);
4358
4359 if (VT == MVT::i128) {
4360 unsigned BaseOp = 0;
4361 unsigned FlagOp = 0;
4362 bool IsBorrow = false;
4363 switch (Op.getOpcode()) {
4364 default: llvm_unreachable("Unknown instruction!");
4365 case ISD::UADDO_CARRY:
4366 BaseOp = SystemZISD::VAC;
4367 FlagOp = SystemZISD::VACCC;
4368 break;
4369 case ISD::USUBO_CARRY:
4370 BaseOp = SystemZISD::VSBI;
4371 FlagOp = SystemZISD::VSBCBI;
4372 IsBorrow = true;
4373 break;
4374 }
4375 if (IsBorrow)
4376 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4377 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4378 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4379 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4380 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4381 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4382 DAG.getValueType(MVT::i1));
4383 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4384 if (IsBorrow)
4385 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4386 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4387 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4388 }
4389
4390 unsigned BaseOp = 0;
4391 unsigned CCValid = 0;
4392 unsigned CCMask = 0;
4393
4394 switch (Op.getOpcode()) {
4395 default: llvm_unreachable("Unknown instruction!");
4396 case ISD::UADDO_CARRY:
4397 if (!isAddCarryChain(Carry))
4398 return SDValue();
4399
4400 BaseOp = SystemZISD::ADDCARRY;
4401 CCValid = SystemZ::CCMASK_LOGICAL;
4403 break;
4404 case ISD::USUBO_CARRY:
4405 if (!isSubBorrowChain(Carry))
4406 return SDValue();
4407
4408 BaseOp = SystemZISD::SUBCARRY;
4409 CCValid = SystemZ::CCMASK_LOGICAL;
4411 break;
4412 }
4413
4414 // Set the condition code from the carry flag.
4415 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4416 DAG.getConstant(CCValid, DL, MVT::i32),
4417 DAG.getConstant(CCMask, DL, MVT::i32));
4418
4419 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4420 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4421
4422 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4423 if (N->getValueType(1) == MVT::i1)
4424 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4425
4426 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4427}
4428
4429SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4430 SelectionDAG &DAG) const {
4431 EVT VT = Op.getValueType();
4432 SDLoc DL(Op);
4433 Op = Op.getOperand(0);
4434
4435 if (VT.getScalarSizeInBits() == 128) {
4436 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4437 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4438 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4439 DAG.getConstant(0, DL, MVT::i64));
4440 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4441 return Op;
4442 }
4443
4444 // Handle vector types via VPOPCT.
4445 if (VT.isVector()) {
4446 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4447 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4448 switch (VT.getScalarSizeInBits()) {
4449 case 8:
4450 break;
4451 case 16: {
4452 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4453 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4454 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4455 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4456 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4457 break;
4458 }
4459 case 32: {
4460 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4461 DAG.getConstant(0, DL, MVT::i32));
4462 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4463 break;
4464 }
4465 case 64: {
4466 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4467 DAG.getConstant(0, DL, MVT::i32));
4468 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4469 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4470 break;
4471 }
4472 default:
4473 llvm_unreachable("Unexpected type");
4474 }
4475 return Op;
4476 }
4477
4478 // Get the known-zero mask for the operand.
4479 KnownBits Known = DAG.computeKnownBits(Op);
4480 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
4481 if (NumSignificantBits == 0)
4482 return DAG.getConstant(0, DL, VT);
4483
4484 // Skip known-zero high parts of the operand.
4485 int64_t OrigBitSize = VT.getSizeInBits();
4486 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
4487 BitSize = std::min(BitSize, OrigBitSize);
4488
4489 // The POPCNT instruction counts the number of bits in each byte.
4490 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
4491 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
4492 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
4493
4494 // Add up per-byte counts in a binary tree. All bits of Op at
4495 // position larger than BitSize remain zero throughout.
4496 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
4497 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
4498 if (BitSize != OrigBitSize)
4499 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
4500 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
4501 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4502 }
4503
4504 // Extract overall result from high byte.
4505 if (BitSize > 8)
4506 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
4507 DAG.getConstant(BitSize - 8, DL, VT));
4508
4509 return Op;
4510}
4511
4512SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
4513 SelectionDAG &DAG) const {
4514 SDLoc DL(Op);
4515 AtomicOrdering FenceOrdering =
4516 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
4517 SyncScope::ID FenceSSID =
4518 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
4519
4520 // The only fence that needs an instruction is a sequentially-consistent
4521 // cross-thread fence.
4522 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
4523 FenceSSID == SyncScope::System) {
4524 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
4525 Op.getOperand(0)),
4526 0);
4527 }
4528
4529 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
4530 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
4531}
4532
4533SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
4534 SelectionDAG &DAG) const {
4535 auto *Node = cast<AtomicSDNode>(Op.getNode());
4536 assert(Node->getMemoryVT() == MVT::i128 && "Only custom lowering i128.");
4537 // Use same code to handle both legal and non-legal i128 types.
4540 return DAG.getMergeValues(Results, SDLoc(Op));
4541}
4542
4543// Prepare for a Compare And Swap for a subword operation. This needs to be
4544// done in memory with 4 bytes at natural alignment.
4546 SDValue &AlignedAddr, SDValue &BitShift,
4547 SDValue &NegBitShift) {
4548 EVT PtrVT = Addr.getValueType();
4549 EVT WideVT = MVT::i32;
4550
4551 // Get the address of the containing word.
4552 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4553 DAG.getConstant(-4, DL, PtrVT));
4554
4555 // Get the number of bits that the word must be rotated left in order
4556 // to bring the field to the top bits of a GR32.
4557 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
4558 DAG.getConstant(3, DL, PtrVT));
4559 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
4560
4561 // Get the complementing shift amount, for rotating a field in the top
4562 // bits back to its proper position.
4563 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
4564 DAG.getConstant(0, DL, WideVT), BitShift);
4565
4566}
4567
4568// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
4569// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
4570SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
4571 SelectionDAG &DAG,
4572 unsigned Opcode) const {
4573 auto *Node = cast<AtomicSDNode>(Op.getNode());
4574
4575 // 32-bit operations need no special handling.
4576 EVT NarrowVT = Node->getMemoryVT();
4577 EVT WideVT = MVT::i32;
4578 if (NarrowVT == WideVT)
4579 return Op;
4580
4581 int64_t BitSize = NarrowVT.getSizeInBits();
4582 SDValue ChainIn = Node->getChain();
4583 SDValue Addr = Node->getBasePtr();
4584 SDValue Src2 = Node->getVal();
4585 MachineMemOperand *MMO = Node->getMemOperand();
4586 SDLoc DL(Node);
4587
4588 // Convert atomic subtracts of constants into additions.
4589 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4590 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4592 Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4593 }
4594
4595 SDValue AlignedAddr, BitShift, NegBitShift;
4596 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4597
4598 // Extend the source operand to 32 bits and prepare it for the inner loop.
4599 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
4600 // operations require the source to be shifted in advance. (This shift
4601 // can be folded if the source is constant.) For AND and NAND, the lower
4602 // bits must be set, while for other opcodes they should be left clear.
4603 if (Opcode != SystemZISD::ATOMIC_SWAPW)
4604 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
4605 DAG.getConstant(32 - BitSize, DL, WideVT));
4606 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
4608 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
4609 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
4610
4611 // Construct the ATOMIC_LOADW_* node.
4612 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
4613 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4614 DAG.getConstant(BitSize, DL, WideVT) };
4615 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4616 NarrowVT, MMO);
4617
4618 // Rotate the result of the final CS so that the field is in the lower
4619 // bits of a GR32, then truncate it.
4620 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4621 DAG.getConstant(BitSize, DL, WideVT));
4622 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4623
4624 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4625 return DAG.getMergeValues(RetOps, DL);
4626}
4627
4628// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
4629// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
4630SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
4631 SelectionDAG &DAG) const {
4632 auto *Node = cast<AtomicSDNode>(Op.getNode());
4633 EVT MemVT = Node->getMemoryVT();
4634 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
4635 // A full-width operation: negate and use LAA(G).
4636 assert(Op.getValueType() == MemVT && "Mismatched VTs");
4637 assert(Subtarget.hasInterlockedAccess1() &&
4638 "Should have been expanded by AtomicExpand pass.");
4639 SDValue Src2 = Node->getVal();
4640 SDLoc DL(Src2);
4641 SDValue NegSrc2 =
4642 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
4643 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
4644 Node->getChain(), Node->getBasePtr(), NegSrc2,
4645 Node->getMemOperand());
4646 }
4647
4648 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
4649}
4650
4651// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4652SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4653 SelectionDAG &DAG) const {
4654 auto *Node = cast<AtomicSDNode>(Op.getNode());
4655 SDValue ChainIn = Node->getOperand(0);
4656 SDValue Addr = Node->getOperand(1);
4657 SDValue CmpVal = Node->getOperand(2);
4658 SDValue SwapVal = Node->getOperand(3);
4659 MachineMemOperand *MMO = Node->getMemOperand();
4660 SDLoc DL(Node);
4661
4662 if (Node->getMemoryVT() == MVT::i128) {
4663 // Use same code to handle both legal and non-legal i128 types.
4666 return DAG.getMergeValues(Results, DL);
4667 }
4668
4669 // We have native support for 32-bit and 64-bit compare and swap, but we
4670 // still need to expand extracting the "success" result from the CC.
4671 EVT NarrowVT = Node->getMemoryVT();
4672 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4673 if (NarrowVT == WideVT) {
4674 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4675 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4677 DL, Tys, Ops, NarrowVT, MMO);
4678 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4680
4681 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4682 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4683 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4684 return SDValue();
4685 }
4686
4687 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4688 // via a fullword ATOMIC_CMP_SWAPW operation.
4689 int64_t BitSize = NarrowVT.getSizeInBits();
4690
4691 SDValue AlignedAddr, BitShift, NegBitShift;
4692 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
4693
4694 // Construct the ATOMIC_CMP_SWAPW node.
4695 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4696 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
4697 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
4699 VTList, Ops, NarrowVT, MMO);
4700 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4702
4703 // emitAtomicCmpSwapW() will zero extend the result (original value).
4704 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
4705 DAG.getValueType(NarrowVT));
4706 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
4707 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4708 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4709 return SDValue();
4710}
4711
4713SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
4714 // Because of how we convert atomic_load and atomic_store to normal loads and
4715 // stores in the DAG, we need to ensure that the MMOs are marked volatile
4716 // since DAGCombine hasn't been updated to account for atomic, but non
4717 // volatile loads. (See D57601)
4718 if (auto *SI = dyn_cast<StoreInst>(&I))
4719 if (SI->isAtomic())
4721 if (auto *LI = dyn_cast<LoadInst>(&I))
4722 if (LI->isAtomic())
4724 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
4725 if (AI->isAtomic())
4727 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
4728 if (AI->isAtomic())
4731}
4732
4733SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
4734 SelectionDAG &DAG) const {
4736 auto *Regs = Subtarget.getSpecialRegisters();
4738 report_fatal_error("Variable-sized stack allocations are not supported "
4739 "in GHC calling convention");
4740 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
4741 Regs->getStackPointerRegister(), Op.getValueType());
4742}
4743
4744SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
4745 SelectionDAG &DAG) const {
4747 auto *Regs = Subtarget.getSpecialRegisters();
4748 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4749
4751 report_fatal_error("Variable-sized stack allocations are not supported "
4752 "in GHC calling convention");
4753
4754 SDValue Chain = Op.getOperand(0);
4755 SDValue NewSP = Op.getOperand(1);
4756 SDValue Backchain;
4757 SDLoc DL(Op);
4758
4759 if (StoreBackchain) {
4760 SDValue OldSP = DAG.getCopyFromReg(
4761 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
4762 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4764 }
4765
4766 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
4767
4768 if (StoreBackchain)
4769 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4771
4772 return Chain;
4773}
4774
4775SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
4776 SelectionDAG &DAG) const {
4777 bool IsData = Op.getConstantOperandVal(4);
4778 if (!IsData)
4779 // Just preserve the chain.
4780 return Op.getOperand(0);
4781
4782 SDLoc DL(Op);
4783 bool IsWrite = Op.getConstantOperandVal(2);
4784 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
4785 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
4786 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
4787 Op.getOperand(1)};
4789 Node->getVTList(), Ops,
4790 Node->getMemoryVT(), Node->getMemOperand());
4791}
4792
4793// Convert condition code in CCReg to an i32 value.
4795 SDLoc DL(CCReg);
4796 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
4797 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
4798 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
4799}
4800
4801SDValue
4802SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
4803 SelectionDAG &DAG) const {
4804 unsigned Opcode, CCValid;
4805 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
4806 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
4807 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
4808 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
4809 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
4810 return SDValue();
4811 }
4812
4813 return SDValue();
4814}
4815
4816SDValue
4817SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
4818 SelectionDAG &DAG) const {
4819 unsigned Opcode, CCValid;
4820 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
4821 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
4822 if (Op->getNumValues() == 1)
4823 return getCCResult(DAG, SDValue(Node, 0));
4824 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
4825 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
4826 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
4827 }
4828
4829 unsigned Id = Op.getConstantOperandVal(0);
4830 switch (Id) {
4831 case Intrinsic::thread_pointer:
4832 return lowerThreadPointer(SDLoc(Op), DAG);
4833
4834 case Intrinsic::s390_vpdi:
4835 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
4836 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4837
4838 case Intrinsic::s390_vperm:
4839 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
4840 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4841
4842 case Intrinsic::s390_vuphb:
4843 case Intrinsic::s390_vuphh:
4844 case Intrinsic::s390_vuphf:
4845 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
4846 Op.getOperand(1));
4847
4848 case Intrinsic::s390_vuplhb:
4849 case Intrinsic::s390_vuplhh:
4850 case Intrinsic::s390_vuplhf:
4851 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
4852 Op.getOperand(1));
4853
4854 case Intrinsic::s390_vuplb:
4855 case Intrinsic::s390_vuplhw:
4856 case Intrinsic::s390_vuplf:
4857 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
4858 Op.getOperand(1));
4859
4860 case Intrinsic::s390_vupllb:
4861 case Intrinsic::s390_vupllh:
4862 case Intrinsic::s390_vupllf:
4863 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
4864 Op.getOperand(1));
4865
4866 case Intrinsic::s390_vsumb:
4867 case Intrinsic::s390_vsumh:
4868 case Intrinsic::s390_vsumgh:
4869 case Intrinsic::s390_vsumgf:
4870 case Intrinsic::s390_vsumqf:
4871 case Intrinsic::s390_vsumqg:
4872 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
4873 Op.getOperand(1), Op.getOperand(2));
4874
4875 case Intrinsic::s390_vaq:
4876 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
4877 Op.getOperand(1), Op.getOperand(2));
4878 case Intrinsic::s390_vaccb:
4879 case Intrinsic::s390_vacch:
4880 case Intrinsic::s390_vaccf:
4881 case Intrinsic::s390_vaccg:
4882 case Intrinsic::s390_vaccq:
4883 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
4884 Op.getOperand(1), Op.getOperand(2));
4885 case Intrinsic::s390_vacq:
4886 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
4887 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4888 case Intrinsic::s390_vacccq:
4889 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
4890 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4891
4892 case Intrinsic::s390_vsq:
4893 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
4894 Op.getOperand(1), Op.getOperand(2));
4895 case Intrinsic::s390_vscbib:
4896 case Intrinsic::s390_vscbih:
4897 case Intrinsic::s390_vscbif:
4898 case Intrinsic::s390_vscbig:
4899 case Intrinsic::s390_vscbiq:
4900 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
4901 Op.getOperand(1), Op.getOperand(2));
4902 case Intrinsic::s390_vsbiq:
4903 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
4904 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4905 case Intrinsic::s390_vsbcbiq:
4906 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
4907 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
4908 }
4909
4910 return SDValue();
4911}
4912
4913namespace {
4914// Says that SystemZISD operation Opcode can be used to perform the equivalent
4915// of a VPERM with permute vector Bytes. If Opcode takes three operands,
4916// Operand is the constant third operand, otherwise it is the number of
4917// bytes in each element of the result.
4918struct Permute {
4919 unsigned Opcode;
4920 unsigned Operand;
4921 unsigned char Bytes[SystemZ::VectorBytes];
4922};
4923}
4924
4925static const Permute PermuteForms[] = {
4926 // VMRHG
4928 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
4929 // VMRHF
4931 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
4932 // VMRHH
4934 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
4935 // VMRHB
4937 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
4938 // VMRLG
4940 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
4941 // VMRLF
4943 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
4944 // VMRLH
4946 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
4947 // VMRLB
4949 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
4950 // VPKG
4951 { SystemZISD::PACK, 4,
4952 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
4953 // VPKF
4954 { SystemZISD::PACK, 2,
4955 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
4956 // VPKH
4957 { SystemZISD::PACK, 1,
4958 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
4959 // VPDI V1, V2, 4 (low half of V1, high half of V2)
4961 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
4962 // VPDI V1, V2, 1 (high half of V1, low half of V2)
4964 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
4965};
4966
4967// Called after matching a vector shuffle against a particular pattern.
4968// Both the original shuffle and the pattern have two vector operands.
4969// OpNos[0] is the operand of the original shuffle that should be used for
4970// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
4971// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
4972// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
4973// for operands 0 and 1 of the pattern.
4974static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
4975 if (OpNos[0] < 0) {
4976 if (OpNos[1] < 0)
4977 return false;
4978 OpNo0 = OpNo1 = OpNos[1];
4979 } else if (OpNos[1] < 0) {
4980 OpNo0 = OpNo1 = OpNos[0];
4981 } else {
4982 OpNo0 = OpNos[0];
4983 OpNo1 = OpNos[1];
4984 }
4985 return true;
4986}
4987
4988// Bytes is a VPERM-like permute vector, except that -1 is used for
4989// undefined bytes. Return true if the VPERM can be implemented using P.
4990// When returning true set OpNo0 to the VPERM operand that should be
4991// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
4992//
4993// For example, if swapping the VPERM operands allows P to match, OpNo0
4994// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
4995// operand, but rewriting it to use two duplicated operands allows it to
4996// match P, then OpNo0 and OpNo1 will be the same.
4997static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
4998 unsigned &OpNo0, unsigned &OpNo1) {
4999 int OpNos[] = { -1, -1 };
5000 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5001 int Elt = Bytes[I];
5002 if (Elt >= 0) {
5003 // Make sure that the two permute vectors use the same suboperand
5004 // byte number. Only the operand numbers (the high bits) are
5005 // allowed to differ.
5006 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5007 return false;
5008 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5009 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5010 // Make sure that the operand mappings are consistent with previous
5011 // elements.
5012 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5013 return false;
5014 OpNos[ModelOpNo] = RealOpNo;
5015 }
5016 }
5017 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5018}
5019
5020// As above, but search for a matching permute.
5021static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5022 unsigned &OpNo0, unsigned &OpNo1) {
5023 for (auto &P : PermuteForms)
5024 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5025 return &P;
5026 return nullptr;
5027}
5028
5029// Bytes is a VPERM-like permute vector, except that -1 is used for
5030// undefined bytes. This permute is an operand of an outer permute.
5031// See whether redistributing the -1 bytes gives a shuffle that can be
5032// implemented using P. If so, set Transform to a VPERM-like permute vector
5033// that, when applied to the result of P, gives the original permute in Bytes.
5035 const Permute &P,
5036 SmallVectorImpl<int> &Transform) {
5037 unsigned To = 0;
5038 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5039 int Elt = Bytes[From];
5040 if (Elt < 0)
5041 // Byte number From of the result is undefined.
5042 Transform[From] = -1;
5043 else {
5044 while (P.Bytes[To] != Elt) {
5045 To += 1;
5046 if (To == SystemZ::VectorBytes)
5047 return false;
5048 }
5049 Transform[From] = To;
5050 }
5051 }
5052 return true;
5053}
5054
5055// As above, but search for a matching permute.
5056static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5057 SmallVectorImpl<int> &Transform) {
5058 for (auto &P : PermuteForms)
5059 if (matchDoublePermute(Bytes, P, Transform))
5060 return &P;
5061 return nullptr;
5062}
5063
5064// Convert the mask of the given shuffle op into a byte-level mask,
5065// as if it had type vNi8.
5066static bool getVPermMask(SDValue ShuffleOp,
5067 SmallVectorImpl<int> &Bytes) {
5068 EVT VT = ShuffleOp.getValueType();
5069 unsigned NumElements = VT.getVectorNumElements();
5070 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5071
5072 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5073 Bytes.resize(NumElements * BytesPerElement, -1);
5074 for (unsigned I = 0; I < NumElements; ++I) {
5075 int Index = VSN->getMaskElt(I);
5076 if (Index >= 0)
5077 for (unsigned J = 0; J < BytesPerElement; ++J)
5078 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5079 }
5080 return true;
5081 }
5082 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5083 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5084 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5085 Bytes.resize(NumElements * BytesPerElement, -1);
5086 for (unsigned I = 0; I < NumElements; ++I)
5087 for (unsigned J = 0; J < BytesPerElement; ++J)
5088 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5089 return true;
5090 }
5091 return false;
5092}
5093
5094// Bytes is a VPERM-like permute vector, except that -1 is used for
5095// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5096// the result come from a contiguous sequence of bytes from one input.
5097// Set Base to the selector for the first byte if so.
5098static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5099 unsigned BytesPerElement, int &Base) {
5100 Base = -1;
5101 for (unsigned I = 0; I < BytesPerElement; ++I) {
5102 if (Bytes[Start + I] >= 0) {
5103 unsigned Elem = Bytes[Start + I];
5104 if (Base < 0) {
5105 Base = Elem - I;
5106 // Make sure the bytes would come from one input operand.
5107 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5108 return false;
5109 } else if (unsigned(Base) != Elem - I)
5110 return false;
5111 }
5112 }
5113 return true;
5114}
5115
5116// Bytes is a VPERM-like permute vector, except that -1 is used for
5117// undefined bytes. Return true if it can be performed using VSLDB.
5118// When returning true, set StartIndex to the shift amount and OpNo0
5119// and OpNo1 to the VPERM operands that should be used as the first
5120// and second shift operand respectively.
5122 unsigned &StartIndex, unsigned &OpNo0,
5123 unsigned &OpNo1) {
5124 int OpNos[] = { -1, -1 };
5125 int Shift = -1;
5126 for (unsigned I = 0; I < 16; ++I) {
5127 int Index = Bytes[I];
5128 if (Index >= 0) {
5129 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5130 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5131 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5132 if (Shift < 0)
5133 Shift = ExpectedShift;
5134 else if (Shift != ExpectedShift)
5135 return false;
5136 // Make sure that the operand mappings are consistent with previous
5137 // elements.
5138 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5139 return false;
5140 OpNos[ModelOpNo] = RealOpNo;
5141 }
5142 }
5143 StartIndex = Shift;
5144 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5145}
5146
5147// Create a node that performs P on operands Op0 and Op1, casting the
5148// operands to the appropriate type. The type of the result is determined by P.
5150 const Permute &P, SDValue Op0, SDValue Op1) {
5151 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5152 // elements of a PACK are twice as wide as the outputs.
5153 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5154 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5155 P.Operand);
5156 // Cast both operands to the appropriate type.
5157 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5158 SystemZ::VectorBytes / InBytes);
5159 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5160 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5161 SDValue Op;
5162 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5163 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5164 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5165 } else if (P.Opcode == SystemZISD::PACK) {
5166 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5167 SystemZ::VectorBytes / P.Operand);
5168 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5169 } else {
5170 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5171 }
5172 return Op;
5173}
5174
5175static bool isZeroVector(SDValue N) {
5176 if (N->getOpcode() == ISD::BITCAST)
5177 N = N->getOperand(0);
5178 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5179 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5180 return Op->getZExtValue() == 0;
5181 return ISD::isBuildVectorAllZeros(N.getNode());
5182}
5183
5184// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5185static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5186 for (unsigned I = 0; I < Num ; I++)
5187 if (isZeroVector(Ops[I]))
5188 return I;
5189 return UINT32_MAX;
5190}
5191
5192// Bytes is a VPERM-like permute vector, except that -1 is used for
5193// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5194// VSLDB or VPERM.
5196 SDValue *Ops,
5197 const SmallVectorImpl<int> &Bytes) {
5198 for (unsigned I = 0; I < 2; ++I)
5199 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5200
5201 // First see whether VSLDB can be used.
5202 unsigned StartIndex, OpNo0, OpNo1;
5203 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5204 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5205 Ops[OpNo1],
5206 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5207
5208 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5209 // eliminate a zero vector by reusing any zero index in the permute vector.
5210 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5211 if (ZeroVecIdx != UINT32_MAX) {
5212 bool MaskFirst = true;
5213 int ZeroIdx = -1;
5214 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5215 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5216 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5217 if (OpNo == ZeroVecIdx && I == 0) {
5218 // If the first byte is zero, use mask as first operand.
5219 ZeroIdx = 0;
5220 break;
5221 }
5222 if (OpNo != ZeroVecIdx && Byte == 0) {
5223 // If mask contains a zero, use it by placing that vector first.
5224 ZeroIdx = I + SystemZ::VectorBytes;
5225 MaskFirst = false;
5226 break;
5227 }
5228 }
5229 if (ZeroIdx != -1) {
5230 SDValue IndexNodes[SystemZ::VectorBytes];
5231 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5232 if (Bytes[I] >= 0) {
5233 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5234 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5235 if (OpNo == ZeroVecIdx)
5236 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5237 else {
5238 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5239 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5240 }
5241 } else
5242 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5243 }
5244 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5245 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5246 if (MaskFirst)
5247 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5248 Mask);
5249 else
5250 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5251 Mask);
5252 }
5253 }
5254
5255 SDValue IndexNodes[SystemZ::VectorBytes];
5256 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5257 if (Bytes[I] >= 0)
5258 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5259 else
5260 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5261 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5262 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5263 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5264}
5265
5266namespace {
5267// Describes a general N-operand vector shuffle.
5268struct GeneralShuffle {
5269 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {}
5270 void addUndef();
5271 bool add(SDValue, unsigned);
5272 SDValue getNode(SelectionDAG &, const SDLoc &);
5273 void tryPrepareForUnpack();
5274 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5275 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5276
5277 // The operands of the shuffle.
5279
5280 // Index I is -1 if byte I of the result is undefined. Otherwise the
5281 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5282 // Bytes[I] / SystemZ::VectorBytes.
5284
5285 // The type of the shuffle result.
5286 EVT VT;
5287
5288 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5289 unsigned UnpackFromEltSize;
5290};
5291}
5292
5293// Add an extra undefined element to the shuffle.
5294void GeneralShuffle::addUndef() {
5295 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5296 for (unsigned I = 0; I < BytesPerElement; ++I)
5297 Bytes.push_back(-1);
5298}
5299
5300// Add an extra element to the shuffle, taking it from element Elem of Op.
5301// A null Op indicates a vector input whose value will be calculated later;
5302// there is at most one such input per shuffle and it always has the same
5303// type as the result. Aborts and returns false if the source vector elements
5304// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5305// LLVM they become implicitly extended, but this is rare and not optimized.
5306bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5307 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5308
5309 // The source vector can have wider elements than the result,
5310 // either through an explicit TRUNCATE or because of type legalization.
5311 // We want the least significant part.
5312 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5313 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5314
5315 // Return false if the source elements are smaller than their destination
5316 // elements.
5317 if (FromBytesPerElement < BytesPerElement)
5318 return false;
5319
5320 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5321 (FromBytesPerElement - BytesPerElement));
5322
5323 // Look through things like shuffles and bitcasts.
5324 while (Op.getNode()) {
5325 if (Op.getOpcode() == ISD::BITCAST)
5326 Op = Op.getOperand(0);
5327 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5328 // See whether the bytes we need come from a contiguous part of one
5329 // operand.
5331 if (!getVPermMask(Op, OpBytes))
5332 break;
5333 int NewByte;
5334 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5335 break;
5336 if (NewByte < 0) {
5337 addUndef();
5338 return true;
5339 }
5340 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5341 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5342 } else if (Op.isUndef()) {
5343 addUndef();
5344 return true;
5345 } else
5346 break;
5347 }
5348
5349 // Make sure that the source of the extraction is in Ops.
5350 unsigned OpNo = 0;
5351 for (; OpNo < Ops.size(); ++OpNo)
5352 if (Ops[OpNo] == Op)
5353 break;
5354 if (OpNo == Ops.size())
5355 Ops.push_back(Op);
5356
5357 // Add the element to Bytes.
5358 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5359 for (unsigned I = 0; I < BytesPerElement; ++I)
5360 Bytes.push_back(Base + I);
5361
5362 return true;
5363}
5364
5365// Return SDNodes for the completed shuffle.
5366SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
5367 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
5368
5369 if (Ops.size() == 0)
5370 return DAG.getUNDEF(VT);
5371
5372 // Use a single unpack if possible as the last operation.
5373 tryPrepareForUnpack();
5374
5375 // Make sure that there are at least two shuffle operands.
5376 if (Ops.size() == 1)
5377 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
5378
5379 // Create a tree of shuffles, deferring root node until after the loop.
5380 // Try to redistribute the undefined elements of non-root nodes so that
5381 // the non-root shuffles match something like a pack or merge, then adjust
5382 // the parent node's permute vector to compensate for the new order.
5383 // Among other things, this copes with vectors like <2 x i16> that were
5384 // padded with undefined elements during type legalization.
5385 //
5386 // In the best case this redistribution will lead to the whole tree
5387 // using packs and merges. It should rarely be a loss in other cases.
5388 unsigned Stride = 1;
5389 for (; Stride * 2 < Ops.size(); Stride *= 2) {
5390 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
5391 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
5392
5393 // Create a mask for just these two operands.
5395 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5396 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
5397 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
5398 if (OpNo == I)
5399 NewBytes[J] = Byte;
5400 else if (OpNo == I + Stride)
5401 NewBytes[J] = SystemZ::VectorBytes + Byte;
5402 else
5403 NewBytes[J] = -1;
5404 }
5405 // See if it would be better to reorganize NewMask to avoid using VPERM.
5407 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
5408 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
5409 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
5410 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
5411 if (NewBytes[J] >= 0) {
5412 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
5413 "Invalid double permute");
5414 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
5415 } else
5416 assert(NewBytesMap[J] < 0 && "Invalid double permute");
5417 }
5418 } else {
5419 // Just use NewBytes on the operands.
5420 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
5421 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
5422 if (NewBytes[J] >= 0)
5423 Bytes[J] = I * SystemZ::VectorBytes + J;
5424 }
5425 }
5426 }
5427
5428 // Now we just have 2 inputs. Put the second operand in Ops[1].
5429 if (Stride > 1) {
5430 Ops[1] = Ops[Stride];
5431 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5432 if (Bytes[I] >= int(SystemZ::VectorBytes))
5433 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
5434 }
5435
5436 // Look for an instruction that can do the permute without resorting
5437 // to VPERM.
5438 unsigned OpNo0, OpNo1;
5439 SDValue Op;
5440 if (unpackWasPrepared() && Ops[1].isUndef())
5441 Op = Ops[0];
5442 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
5443 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
5444 else
5445 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
5446
5447 Op = insertUnpackIfPrepared(DAG, DL, Op);
5448
5449 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5450}
5451
5452#ifndef NDEBUG
5453static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
5454 dbgs() << Msg.c_str() << " { ";
5455 for (unsigned i = 0; i < Bytes.size(); i++)
5456 dbgs() << Bytes[i] << " ";
5457 dbgs() << "}\n";
5458}
5459#endif
5460
5461// If the Bytes vector matches an unpack operation, prepare to do the unpack
5462// after all else by removing the zero vector and the effect of the unpack on
5463// Bytes.
5464void GeneralShuffle::tryPrepareForUnpack() {
5465 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
5466 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
5467 return;
5468
5469 // Only do this if removing the zero vector reduces the depth, otherwise
5470 // the critical path will increase with the final unpack.
5471 if (Ops.size() > 2 &&
5472 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
5473 return;
5474
5475 // Find an unpack that would allow removing the zero vector from Ops.
5476 UnpackFromEltSize = 1;
5477 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
5478 bool MatchUnpack = true;
5480 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
5481 unsigned ToEltSize = UnpackFromEltSize * 2;
5482 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
5483 if (!IsZextByte)
5484 SrcBytes.push_back(Bytes[Elt]);
5485 if (Bytes[Elt] != -1) {
5486 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
5487 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
5488 MatchUnpack = false;
5489 break;
5490 }
5491 }
5492 }
5493 if (MatchUnpack) {
5494 if (Ops.size() == 2) {
5495 // Don't use unpack if a single source operand needs rearrangement.
5496 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++)
5497 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) {
5498 UnpackFromEltSize = UINT_MAX;
5499 return;
5500 }
5501 }
5502 break;
5503 }
5504 }
5505 if (UnpackFromEltSize > 4)
5506 return;
5507
5508 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
5509 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
5510 << ".\n";
5511 dumpBytes(Bytes, "Original Bytes vector:"););
5512
5513 // Apply the unpack in reverse to the Bytes array.
5514 unsigned B = 0;
5515 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
5516 Elt += UnpackFromEltSize;
5517 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
5518 Bytes[B] = Bytes[Elt];
5519 }
5520 while (B < SystemZ::VectorBytes)
5521 Bytes[B++] = -1;
5522
5523 // Remove the zero vector from Ops
5524 Ops.erase(&Ops[ZeroVecOpNo]);
5525 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5526 if (Bytes[I] >= 0) {
5527 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5528 if (OpNo > ZeroVecOpNo)
5529 Bytes[I] -= SystemZ::VectorBytes;
5530 }
5531
5532 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
5533 dbgs() << "\n";);
5534}
5535
5536SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
5537 const SDLoc &DL,
5538 SDValue Op) {
5539 if (!unpackWasPrepared())
5540 return Op;
5541 unsigned InBits = UnpackFromEltSize * 8;
5542 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
5543 SystemZ::VectorBits / InBits);
5544 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
5545 unsigned OutBits = InBits * 2;
5546 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
5547 SystemZ::VectorBits / OutBits);
5548 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp);
5549}
5550
5551// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
5553 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
5554 if (!Op.getOperand(I).isUndef())
5555 return false;
5556 return true;
5557}
5558
5559// Return a vector of type VT that contains Value in the first element.
5560// The other elements don't matter.
5562 SDValue Value) {
5563 // If we have a constant, replicate it to all elements and let the
5564 // BUILD_VECTOR lowering take care of it.
5565 if (Value.getOpcode() == ISD::Constant ||
5566 Value.getOpcode() == ISD::ConstantFP) {
5568 return DAG.getBuildVector(VT, DL, Ops);
5569 }
5570 if (Value.isUndef())
5571 return DAG.getUNDEF(VT);
5572 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
5573}
5574
5575// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
5576// element 1. Used for cases in which replication is cheap.
5578 SDValue Op0, SDValue Op1) {
5579 if (Op0.isUndef()) {
5580 if (Op1.isUndef())
5581 return DAG.getUNDEF(VT);
5582 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
5583 }
5584 if (Op1.isUndef())
5585 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
5586 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
5587 buildScalarToVector(DAG, DL, VT, Op0),
5588 buildScalarToVector(DAG, DL, VT, Op1));
5589}
5590
5591// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
5592// vector for them.
5594 SDValue Op1) {
5595 if (Op0.isUndef() && Op1.isUndef())
5596 return DAG.getUNDEF(MVT::v2i64);
5597 // If one of the two inputs is undefined then replicate the other one,
5598 // in order to avoid using another register unnecessarily.
5599 if (Op0.isUndef())
5600 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5601 else if (Op1.isUndef())
5602 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5603 else {
5604 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
5605 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
5606 }
5607 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
5608}
5609
5610// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
5611// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
5612// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
5613// would benefit from this representation and return it if so.
5615 BuildVectorSDNode *BVN) {
5616 EVT VT = BVN->getValueType(0);
5617 unsigned NumElements = VT.getVectorNumElements();
5618
5619 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
5620 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
5621 // need a BUILD_VECTOR, add an additional placeholder operand for that
5622 // BUILD_VECTOR and store its operands in ResidueOps.
5623 GeneralShuffle GS(VT);
5625 bool FoundOne = false;
5626 for (unsigned I = 0; I < NumElements; ++I) {
5627 SDValue Op = BVN->getOperand(I);
5628 if (Op.getOpcode() == ISD::TRUNCATE)
5629 Op = Op.getOperand(0);
5630 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5631 Op.getOperand(1).getOpcode() == ISD::Constant) {
5632 unsigned Elem = Op.getConstantOperandVal(1);
5633 if (!GS.add(Op.getOperand(0), Elem))
5634 return SDValue();
5635 FoundOne = true;
5636 } else if (Op.isUndef()) {
5637 GS.addUndef();
5638 } else {
5639 if (!GS.add(SDValue(), ResidueOps.size()))
5640 return SDValue();
5641 ResidueOps.push_back(BVN->getOperand(I));
5642 }
5643 }
5644
5645 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
5646 if (!FoundOne)
5647 return SDValue();
5648
5649 // Create the BUILD_VECTOR for the remaining elements, if any.
5650 if (!ResidueOps.empty()) {
5651 while (ResidueOps.size() < NumElements)
5652 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
5653 for (auto &Op : GS.Ops) {
5654 if (!Op.getNode()) {
5655 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
5656 break;
5657 }
5658 }
5659 }
5660 return GS.getNode(DAG, SDLoc(BVN));
5661}
5662
5663bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
5664 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
5665 return true;
5666 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
5667 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
5668 return true;
5669 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
5670 return true;
5671 return false;
5672}
5673
5674// Combine GPR scalar values Elems into a vector of type VT.
5675SDValue
5676SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
5677 SmallVectorImpl<SDValue> &Elems) const {
5678 // See whether there is a single replicated value.
5680 unsigned int NumElements = Elems.size();
5681 unsigned int Count = 0;
5682 for (auto Elem : Elems) {
5683 if (!Elem.isUndef()) {
5684 if (!Single.getNode())
5685 Single = Elem;
5686 else if (Elem != Single) {
5687 Single = SDValue();
5688 break;
5689 }
5690 Count += 1;
5691 }
5692 }
5693 // There are three cases here:
5694 //
5695 // - if the only defined element is a loaded one, the best sequence
5696 // is a replicating load.
5697 //
5698 // - otherwise, if the only defined element is an i64 value, we will
5699 // end up with the same VLVGP sequence regardless of whether we short-cut
5700 // for replication or fall through to the later code.
5701 //
5702 // - otherwise, if the only defined element is an i32 or smaller value,
5703 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
5704 // This is only a win if the single defined element is used more than once.
5705 // In other cases we're better off using a single VLVGx.
5706 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
5707 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
5708
5709 // If all elements are loads, use VLREP/VLEs (below).
5710 bool AllLoads = true;
5711 for (auto Elem : Elems)
5712 if (!isVectorElementLoad(Elem)) {
5713 AllLoads = false;
5714 break;
5715 }
5716
5717 // The best way of building a v2i64 from two i64s is to use VLVGP.
5718 if (VT == MVT::v2i64 && !AllLoads)
5719 return joinDwords(DAG, DL, Elems[0], Elems[1]);
5720
5721 // Use a 64-bit merge high to combine two doubles.
5722 if (VT == MVT::v2f64 && !AllLoads)
5723 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5724
5725 // Build v4f32 values directly from the FPRs:
5726 //
5727 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
5728 // V V VMRHF
5729 // <ABxx> <CDxx>
5730 // V VMRHG
5731 // <ABCD>
5732 if (VT == MVT::v4f32 && !AllLoads) {
5733 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
5734 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
5735 // Avoid unnecessary undefs by reusing the other operand.
5736 if (Op01.isUndef())
5737 Op01 = Op23;
5738 else if (Op23.isUndef())
5739 Op23 = Op01;
5740 // Merging identical replications is a no-op.
5741 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
5742 return Op01;
5743 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
5744 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
5746 DL, MVT::v2i64, Op01, Op23);
5747 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
5748 }
5749
5750 // Collect the constant terms.
5753
5754 unsigned NumConstants = 0;
5755 for (unsigned I = 0; I < NumElements; ++I) {
5756 SDValue Elem = Elems[I];
5757 if (Elem.getOpcode() == ISD::Constant ||
5758 Elem.getOpcode() == ISD::ConstantFP) {
5759 NumConstants += 1;
5760 Constants[I] = Elem;
5761 Done[I] = true;
5762 }
5763 }
5764 // If there was at least one constant, fill in the other elements of
5765 // Constants with undefs to get a full vector constant and use that
5766 // as the starting point.
5768 SDValue ReplicatedVal;
5769 if (NumConstants > 0) {
5770 for (unsigned I = 0; I < NumElements; ++I)
5771 if (!Constants[I].getNode())
5772 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
5773 Result = DAG.getBuildVector(VT, DL, Constants);
5774 } else {
5775 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
5776 // avoid a false dependency on any previous contents of the vector
5777 // register.
5778
5779 // Use a VLREP if at least one element is a load. Make sure to replicate
5780 // the load with the most elements having its value.
5781 std::map<const SDNode*, unsigned> UseCounts;
5782 SDNode *LoadMaxUses = nullptr;
5783 for (unsigned I = 0; I < NumElements; ++I)
5784 if (isVectorElementLoad(Elems[I])) {
5785 SDNode *Ld = Elems[I].getNode();
5786 UseCounts[Ld]++;
5787 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
5788 LoadMaxUses = Ld;
5789 }
5790 if (LoadMaxUses != nullptr) {
5791 ReplicatedVal = SDValue(LoadMaxUses, 0);
5792 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
5793 } else {
5794 // Try to use VLVGP.
5795 unsigned I1 = NumElements / 2 - 1;
5796 unsigned I2 = NumElements - 1;
5797 bool Def1 = !Elems[I1].isUndef();
5798 bool Def2 = !Elems[I2].isUndef();
5799 if (Def1 || Def2) {
5800 SDValue Elem1 = Elems[Def1 ? I1 : I2];
5801 SDValue Elem2 = Elems[Def2 ? I2 : I1];
5802 Result = DAG.getNode(ISD::BITCAST, DL, VT,
5803 joinDwords(DAG, DL, Elem1, Elem2));
5804 Done[I1] = true;
5805 Done[I2] = true;
5806 } else
5807 Result = DAG.getUNDEF(VT);
5808 }
5809 }
5810
5811 // Use VLVGx to insert the other elements.
5812 for (unsigned I = 0; I < NumElements; ++I)
5813 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
5814 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
5815 DAG.getConstant(I, DL, MVT::i32));
5816 return Result;
5817}
5818
5819SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
5820 SelectionDAG &DAG) const {
5821 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
5822 SDLoc DL(Op);
5823 EVT VT = Op.getValueType();
5824
5825 if (BVN->isConstant()) {
5826 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
5827 return Op;
5828
5829 // Fall back to loading it from memory.
5830 return SDValue();
5831 }
5832
5833 // See if we should use shuffles to construct the vector from other vectors.
5834 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
5835 return Res;
5836
5837 // Detect SCALAR_TO_VECTOR conversions.
5839 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
5840
5841 // Otherwise use buildVector to build the vector up from GPRs.
5842 unsigned NumElements = Op.getNumOperands();
5844 for (unsigned I = 0; I < NumElements; ++I)
5845 Ops[I] = Op.getOperand(I);
5846 return buildVector(DAG, DL, VT, Ops);
5847}
5848
5849SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
5850 SelectionDAG &DAG) const {
5851 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
5852 SDLoc DL(Op);
5853 EVT VT = Op.getValueType();
5854 unsigned NumElements = VT.getVectorNumElements();
5855
5856 if (VSN->isSplat()) {
5857 SDValue Op0 = Op.getOperand(0);
5858 unsigned Index = VSN->getSplatIndex();
5860 "Splat index should be defined and in first operand");
5861 // See whether the value we're splatting is directly available as a scalar.
5862 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
5864 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
5865 // Otherwise keep it as a vector-to-vector operation.
5866 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
5867 DAG.getTargetConstant(Index, DL, MVT::i32));
5868 }
5869
5870 GeneralShuffle GS(VT);
5871 for (unsigned I = 0; I < NumElements; ++I) {
5872 int Elt = VSN->getMaskElt(I);
5873 if (Elt < 0)
5874 GS.addUndef();
5875 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
5876 unsigned(Elt) % NumElements))
5877 return SDValue();
5878 }
5879 return GS.getNode(DAG, SDLoc(VSN));
5880}
5881
5882SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
5883 SelectionDAG &DAG) const {
5884 SDLoc DL(Op);
5885 // Just insert the scalar into element 0 of an undefined vector.
5886 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
5887 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
5888 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
5889}
5890
5891SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
5892 SelectionDAG &DAG) const {
5893 // Handle insertions of floating-point values.
5894 SDLoc DL(Op);
5895 SDValue Op0 = Op.getOperand(0);
5896 SDValue Op1 = Op.getOperand(1);
5897 SDValue Op2 = Op.getOperand(2);
5898 EVT VT = Op.getValueType();
5899
5900 // Insertions into constant indices of a v2f64 can be done using VPDI.
5901 // However, if the inserted value is a bitcast or a constant then it's
5902 // better to use GPRs, as below.
5903 if (VT == MVT::v2f64 &&
5904 Op1.getOpcode() != ISD::BITCAST &&
5905 Op1.getOpcode() != ISD::ConstantFP &&
5906 Op2.getOpcode() == ISD::Constant) {
5907 uint64_t Index = Op2->getAsZExtVal();
5908 unsigned Mask = VT.getVectorNumElements() - 1;
5909 if (Index <= Mask)
5910 return Op;
5911 }
5912
5913 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
5915 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
5916 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
5917 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
5918 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
5919 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5920}
5921
5922SDValue
5923SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
5924 SelectionDAG &DAG) const {
5925 // Handle extractions of floating-point values.
5926 SDLoc DL(Op);
5927 SDValue Op0 = Op.getOperand(0);
5928 SDValue Op1 = Op.getOperand(1);
5929 EVT VT = Op.getValueType();
5930 EVT VecVT = Op0.getValueType();
5931
5932 // Extractions of constant indices can be done directly.
5933 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
5934 uint64_t Index = CIndexN->getZExtValue();
5935 unsigned Mask = VecVT.getVectorNumElements() - 1;
5936 if (Index <= Mask)
5937 return Op;
5938 }
5939
5940 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
5941 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
5942 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
5943 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
5944 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
5945 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
5946}
5947
5948SDValue SystemZTargetLowering::
5949lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5950 SDValue PackedOp = Op.getOperand(0);
5951 EVT OutVT = Op.getValueType();
5952 EVT InVT = PackedOp.getValueType();
5953 unsigned ToBits = OutVT.getScalarSizeInBits();
5954 unsigned FromBits = InVT.getScalarSizeInBits();
5955 do {
5956 FromBits *= 2;
5957 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
5958 SystemZ::VectorBits / FromBits);
5959 PackedOp =
5960 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp);
5961 } while (FromBits != ToBits);
5962 return PackedOp;
5963}
5964
5965// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
5966SDValue SystemZTargetLowering::
5967lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
5968 SDValue PackedOp = Op.getOperand(0);
5969 SDLoc DL(Op);
5970 EVT OutVT = Op.getValueType();
5971 EVT InVT = PackedOp.getValueType();
5972 unsigned InNumElts = InVT.getVectorNumElements();
5973 unsigned OutNumElts = OutVT.getVectorNumElements();
5974 unsigned NumInPerOut = InNumElts / OutNumElts;
5975
5976 SDValue ZeroVec =
5977 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
5978
5979 SmallVector<int, 16> Mask(InNumElts);
5980 unsigned ZeroVecElt = InNumElts;
5981 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
5982 unsigned MaskElt = PackedElt * NumInPerOut;
5983 unsigned End = MaskElt + NumInPerOut - 1;
5984 for (; MaskElt < End; MaskElt++)
5985 Mask[MaskElt] = ZeroVecElt++;
5986 Mask[MaskElt] = PackedElt;
5987 }
5988 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
5989 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
5990}
5991
5992SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
5993 unsigned ByScalar) const {
5994 // Look for cases where a vector shift can use the *_BY_SCALAR form.
5995 SDValue Op0 = Op.getOperand(0);
5996 SDValue Op1 = Op.getOperand(1);
5997 SDLoc DL(Op);
5998 EVT VT = Op.getValueType();
5999 unsigned ElemBitSize = VT.getScalarSizeInBits();
6000
6001 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6002 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6003 APInt SplatBits, SplatUndef;
6004 unsigned SplatBitSize;
6005 bool HasAnyUndefs;
6006 // Check for constant splats. Use ElemBitSize as the minimum element
6007 // width and reject splats that need wider elements.
6008 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6009 ElemBitSize, true) &&
6010 SplatBitSize == ElemBitSize) {
6011 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6012 DL, MVT::i32);
6013 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6014 }
6015 // Check for variable splats.
6016 BitVector UndefElements;
6017 SDValue Splat = BVN->getSplatValue(&UndefElements);
6018 if (Splat) {
6019 // Since i32 is the smallest legal type, we either need a no-op
6020 // or a truncation.
6021 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6022 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6023 }
6024 }
6025
6026 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6027 // and the shift amount is directly available in a GPR.
6028 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6029 if (VSN->isSplat()) {
6030 SDValue VSNOp0 = VSN->getOperand(0);
6031 unsigned Index = VSN->getSplatIndex();
6033 "Splat index should be defined and in first operand");
6034 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6035 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6036 // Since i32 is the smallest legal type, we either need a no-op
6037 // or a truncation.
6038 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6039 VSNOp0.getOperand(Index));
6040 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6041 }
6042 }
6043 }
6044
6045 // Otherwise just treat the current form as legal.
6046 return Op;
6047}
6048
6049SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
6050 SelectionDAG &DAG) const {
6051 SDLoc DL(Op);
6052 MVT ResultVT = Op.getSimpleValueType();
6053 SDValue Arg = Op.getOperand(0);
6054 unsigned Check = Op.getConstantOperandVal(1);
6055
6056 unsigned TDCMask = 0;
6057 if (Check & fcSNan)
6059 if (Check & fcQNan)
6061 if (Check & fcPosInf)
6063 if (Check & fcNegInf)
6065 if (Check & fcPosNormal)
6067 if (Check & fcNegNormal)
6069 if (Check & fcPosSubnormal)
6071 if (Check & fcNegSubnormal)
6073 if (Check & fcPosZero)
6074 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
6075 if (Check & fcNegZero)
6076 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
6077 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
6078
6079 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
6080 return getCCResult(DAG, Intr);
6081}
6082
6083SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
6084 SelectionDAG &DAG) const {
6085 SDLoc DL(Op);
6086 SDValue Chain = Op.getOperand(0);
6087
6088 // STCKF only supports a memory operand, so we have to use a temporary.
6089 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
6090 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6091 MachinePointerInfo MPI =
6093
6094 // Use STCFK to store the TOD clock into the temporary.
6095 SDValue StoreOps[] = {Chain, StackPtr};
6096 Chain = DAG.getMemIntrinsicNode(
6097 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
6099
6100 // And read it back from there.
6101 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
6102}
6103
6105 SelectionDAG &DAG) const {
6106 switch (Op.getOpcode()) {
6107 case ISD::FRAMEADDR:
6108 return lowerFRAMEADDR(Op, DAG);
6109 case ISD::RETURNADDR:
6110 return lowerRETURNADDR(Op, DAG);
6111 case ISD::BR_CC:
6112 return lowerBR_CC(Op, DAG);
6113 case ISD::SELECT_CC:
6114 return lowerSELECT_CC(Op, DAG);
6115 case ISD::SETCC:
6116 return lowerSETCC(Op, DAG);
6117 case ISD::STRICT_FSETCC:
6118 return lowerSTRICT_FSETCC(Op, DAG, false);
6120 return lowerSTRICT_FSETCC(Op, DAG, true);
6121 case ISD::GlobalAddress:
6122 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
6124 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
6125 case ISD::BlockAddress:
6126 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
6127 case ISD::JumpTable:
6128 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
6129 case ISD::ConstantPool:
6130 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
6131 case ISD::BITCAST:
6132 return lowerBITCAST(Op, DAG);
6133 case ISD::VASTART:
6134 return lowerVASTART(Op, DAG);
6135 case ISD::VACOPY:
6136 return lowerVACOPY(Op, DAG);
6138 return lowerDYNAMIC_STACKALLOC(Op, DAG);
6140 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
6141 case ISD::SMUL_LOHI:
6142 return lowerSMUL_LOHI(Op, DAG);
6143 case ISD::UMUL_LOHI:
6144 return lowerUMUL_LOHI(Op, DAG);
6145 case ISD::SDIVREM:
6146 return lowerSDIVREM(Op, DAG);
6147 case ISD::UDIVREM:
6148 return lowerUDIVREM(Op, DAG);
6149 case ISD::SADDO:
6150 case ISD::SSUBO:
6151 case ISD::UADDO:
6152 case ISD::USUBO:
6153 return lowerXALUO(Op, DAG);
6154 case ISD::UADDO_CARRY:
6155 case ISD::USUBO_CARRY:
6156 return lowerUADDSUBO_CARRY(Op, DAG);
6157 case ISD::OR:
6158 return lowerOR(Op, DAG);
6159 case ISD::CTPOP:
6160 return lowerCTPOP(Op, DAG);
6161 case ISD::ATOMIC_FENCE:
6162 return lowerATOMIC_FENCE(Op, DAG);
6163 case ISD::ATOMIC_SWAP:
6164 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
6165 case ISD::ATOMIC_STORE:
6166 case ISD::ATOMIC_LOAD:
6167 return lowerATOMIC_LDST_I128(Op, DAG);
6169 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
6171 return lowerATOMIC_LOAD_SUB(Op, DAG);
6173 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
6175 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
6177 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
6179 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
6181 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
6183 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
6185 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
6187 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
6189 return lowerATOMIC_CMP_SWAP(Op, DAG);
6190 case ISD::STACKSAVE:
6191 return lowerSTACKSAVE(Op, DAG);
6192 case ISD::STACKRESTORE:
6193 return lowerSTACKRESTORE(Op, DAG);
6194 case ISD::PREFETCH:
6195 return lowerPREFETCH(Op, DAG);
6197 return lowerINTRINSIC_W_CHAIN(Op, DAG);
6199 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
6200 case ISD::BUILD_VECTOR:
6201 return lowerBUILD_VECTOR(Op, DAG);
6203 return lowerVECTOR_SHUFFLE(Op, DAG);
6205 return lowerSCALAR_TO_VECTOR(Op, DAG);
6207 return lowerINSERT_VECTOR_ELT(Op, DAG);
6209 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
6211 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
6213 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
6214 case ISD::SHL:
6215 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
6216 case ISD::SRL:
6217 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
6218 case ISD::SRA:
6219 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
6220 case ISD::ROTL:
6221 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
6222 case ISD::IS_FPCLASS:
6223 return lowerIS_FPCLASS(Op, DAG);
6224 case ISD::GET_ROUNDING:
6225 return lowerGET_ROUNDING(Op, DAG);
6227 return lowerREADCYCLECOUNTER(Op, DAG);
6228 default:
6229 llvm_unreachable("Unexpected node to lower");
6230 }
6231}
6232
6233// Lower operations with invalid operand or result types (currently used
6234// only for 128-bit integer types).
6235void
6238 SelectionDAG &DAG) const {
6239 switch (N->getOpcode()) {
6240 case ISD::ATOMIC_LOAD: {
6241 SDLoc DL(N);
6242 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
6243 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
6244 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6246 DL, Tys, Ops, MVT::i128, MMO);
6247 Results.push_back(lowerGR128ToI128(DAG, Res));
6248 Results.push_back(Res.getValue(1));
6249 break;
6250 }
6251 case ISD::ATOMIC_STORE: {
6252 SDLoc DL(N);
6253 SDVTList Tys = DAG.getVTList(MVT::Other);
6254 SDValue Ops[] = {N->getOperand(0), lowerI128ToGR128(DAG, N->getOperand(1)),
6255 N->getOperand(2)};
6256 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6258 DL, Tys, Ops, MVT::i128, MMO);
6259 // We have to enforce sequential consistency by performing a
6260 // serialization operation after the store.
6261 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
6263 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
6264 MVT::Other, Res), 0);
6265 Results.push_back(Res);
6266 break;
6267 }
6269 SDLoc DL(N);
6270 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
6271 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
6272 lowerI128ToGR128(DAG, N->getOperand(2)),
6273 lowerI128ToGR128(DAG, N->getOperand(3)) };
6274 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
6276 DL, Tys, Ops, MVT::i128, MMO);
6277 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
6279 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
6280 Results.push_back(lowerGR128ToI128(DAG, Res));
6281 Results.push_back(Success);
6282 Results.push_back(Res.getValue(2));
6283 break;
6284 }
6285 case ISD::BITCAST: {
6286 SDValue Src = N->getOperand(0);
6287 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 &&
6288 !useSoftFloat()) {
6289 SDLoc DL(N);
6290 SDValue Lo, Hi;
6291 if (getRepRegClassFor(MVT::f128) == &SystemZ::VR128BitRegClass) {
6292 SDValue VecBC = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Src);
6293 Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6294 DAG.getConstant(1, DL, MVT::i32));
6295 Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, VecBC,
6296 DAG.getConstant(0, DL, MVT::i32));
6297 } else {
6298 assert(getRepRegClassFor(MVT::f128) == &SystemZ::FP128BitRegClass &&
6299 "Unrecognized register class for f128.");
6300 SDValue LoFP = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
6301 DL, MVT::f64, Src);
6302 SDValue HiFP = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
6303 DL, MVT::f64, Src);
6304 Lo = DAG.getNode(ISD::BITCAST, DL, MVT::i64, LoFP);
6305 Hi = DAG.getNode(ISD::BITCAST, DL, MVT::i64, HiFP);
6306 }
6307 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi));
6308 }
6309 break;
6310 }
6311 default:
6312 llvm_unreachable("Unexpected node to lower");
6313 }
6314}
6315
6316void
6319 SelectionDAG &DAG) const {
6320 return LowerOperationWrapper(N, Results, DAG);
6321}
6322
6323const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
6324#define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME
6325 switch ((SystemZISD::NodeType)Opcode) {
6326 case SystemZISD::FIRST_NUMBER: break;
6327 OPCODE(RET_GLUE);
6328 OPCODE(CALL);
6329 OPCODE(SIBCALL);
6330 OPCODE(TLS_GDCALL);
6331 OPCODE(TLS_LDCALL);
6332 OPCODE(PCREL_WRAPPER);
6333 OPCODE(PCREL_OFFSET);
6334 OPCODE(ICMP);
6335 OPCODE(FCMP);
6336 OPCODE(STRICT_FCMP);
6337 OPCODE(STRICT_FCMPS);
6338 OPCODE(TM);
6339 OPCODE(BR_CCMASK);
6340 OPCODE(SELECT_CCMASK);
6341 OPCODE(ADJDYNALLOC);
6342 OPCODE(PROBED_ALLOCA);
6343 OPCODE(POPCNT);
6344 OPCODE(SMUL_LOHI);
6345 OPCODE(UMUL_LOHI);
6346 OPCODE(SDIVREM);
6347 OPCODE(UDIVREM);
6348 OPCODE(SADDO);
6349 OPCODE(SSUBO);
6350 OPCODE(UADDO);
6351 OPCODE(USUBO);
6352 OPCODE(ADDCARRY);
6353 OPCODE(SUBCARRY);
6354 OPCODE(GET_CCMASK);
6355 OPCODE(MVC);
6356 OPCODE(NC);
6357 OPCODE(OC);
6358 OPCODE(XC);
6359 OPCODE(CLC);
6360 OPCODE(MEMSET_MVC);
6361 OPCODE(STPCPY);
6362 OPCODE(STRCMP);
6363 OPCODE(SEARCH_STRING);
6364 OPCODE(IPM);
6365 OPCODE(TBEGIN);
6366 OPCODE(TBEGIN_NOFLOAT);
6367 OPCODE(TEND);
6368 OPCODE(BYTE_MASK);
6369 OPCODE(ROTATE_MASK);
6370 OPCODE(REPLICATE);
6371 OPCODE(JOIN_DWORDS);
6372 OPCODE(SPLAT);
6373 OPCODE(MERGE_HIGH);
6374 OPCODE(MERGE_LOW);
6375 OPCODE(SHL_DOUBLE);
6376 OPCODE(PERMUTE_DWORDS);
6377 OPCODE(PERMUTE);
6378 OPCODE(PACK);
6379 OPCODE(PACKS_CC);
6380 OPCODE(PACKLS_CC);
6381 OPCODE(UNPACK_HIGH);
6382 OPCODE(UNPACKL_HIGH);
6383 OPCODE(UNPACK_LOW);
6384 OPCODE(UNPACKL_LOW);
6385 OPCODE(VSHL_BY_SCALAR);
6386 OPCODE(VSRL_BY_SCALAR);
6387 OPCODE(VSRA_BY_SCALAR);
6388 OPCODE(VROTL_BY_SCALAR);
6389 OPCODE(VSUM);
6390 OPCODE(VACC);
6391 OPCODE(VSCBI);
6392 OPCODE(VAC);
6393 OPCODE(VSBI);
6394 OPCODE(VACCC);
6395 OPCODE(VSBCBI);
6396 OPCODE(VICMPE);
6397 OPCODE(VICMPH);
6398 OPCODE(VICMPHL);
6399 OPCODE(VICMPES);
6400 OPCODE(VICMPHS);
6401 OPCODE(VICMPHLS);
6402 OPCODE(VFCMPE);
6403 OPCODE(STRICT_VFCMPE);
6404 OPCODE(STRICT_VFCMPES);
6405 OPCODE(VFCMPH);
6406 OPCODE(STRICT_VFCMPH);
6407 OPCODE(STRICT_VFCMPHS);
6408 OPCODE(VFCMPHE);
6409 OPCODE(STRICT_VFCMPHE);
6410 OPCODE(STRICT_VFCMPHES);
6411 OPCODE(VFCMPES);
6412 OPCODE(VFCMPHS);
6413 OPCODE(VFCMPHES);
6414 OPCODE(VFTCI);
6415 OPCODE(VEXTEND);
6416 OPCODE(STRICT_VEXTEND);
6417 OPCODE(VROUND);
6418 OPCODE(STRICT_VROUND);
6419 OPCODE(VTM);
6420 OPCODE(SCMP128HI);
6421 OPCODE(UCMP128HI);
6422 OPCODE(VFAE_CC);
6423 OPCODE(VFAEZ_CC);
6424 OPCODE(VFEE_CC);
6425 OPCODE(VFEEZ_CC);
6426 OPCODE(VFENE_CC);
6427 OPCODE(VFENEZ_CC);
6428 OPCODE(VISTR_CC);
6429 OPCODE(VSTRC_CC);
6430 OPCODE(VSTRCZ_CC);
6431 OPCODE(VSTRS_CC);
6432 OPCODE(VSTRSZ_CC);
6433 OPCODE(TDC);
6434 OPCODE(ATOMIC_SWAPW);
6435 OPCODE(ATOMIC_LOADW_ADD);
6436 OPCODE(ATOMIC_LOADW_SUB);
6437 OPCODE(ATOMIC_LOADW_AND);
6438 OPCODE(ATOMIC_LOADW_OR);
6439 OPCODE(ATOMIC_LOADW_XOR);
6440 OPCODE(ATOMIC_LOADW_NAND);
6441 OPCODE(ATOMIC_LOADW_MIN);
6442 OPCODE(ATOMIC_LOADW_MAX);
6443 OPCODE(ATOMIC_LOADW_UMIN);
6444 OPCODE(ATOMIC_LOADW_UMAX);
6445 OPCODE(ATOMIC_CMP_SWAPW);
6446 OPCODE(ATOMIC_CMP_SWAP);
6447 OPCODE(ATOMIC_LOAD_128);
6448 OPCODE(ATOMIC_STORE_128);
6449 OPCODE(ATOMIC_CMP_SWAP_128);
6450 OPCODE(LRV);
6451 OPCODE(STRV);
6452 OPCODE(VLER);
6453 OPCODE(VSTER);
6454 OPCODE(STCKF);
6456 OPCODE(ADA_ENTRY);
6457 }
6458 return nullptr;
6459#undef OPCODE
6460}
6461
6462// Return true if VT is a vector whose elements are a whole number of bytes
6463// in width. Also check for presence of vector support.
6464bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
6465 if (!Subtarget.hasVector())
6466 return false;
6467
6468 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
6469}
6470
6471// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
6472// producing a result of type ResVT. Op is a possibly bitcast version
6473// of the input vector and Index is the index (based on type VecVT) that
6474// should be extracted. Return the new extraction if a simplification
6475// was possible or if Force is true.
6476SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
6477 EVT VecVT, SDValue Op,
6478 unsigned Index,
6479 DAGCombinerInfo &DCI,
6480 bool Force) const {
6481 SelectionDAG &DAG = DCI.DAG;
6482
6483 // The number of bytes being extracted.
6484 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6485
6486 for (;;) {
6487 unsigned Opcode = Op.getOpcode();
6488 if (Opcode == ISD::BITCAST)
6489 // Look through bitcasts.
6490 Op = Op.getOperand(0);
6491 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
6492 canTreatAsByteVector(Op.getValueType())) {
6493 // Get a VPERM-like permute mask and see whether the bytes covered
6494 // by the extracted element are a contiguous sequence from one
6495 // source operand.
6497 if (!getVPermMask(Op, Bytes))
6498 break;
6499 int First;
6500 if (!getShuffleInput(Bytes, Index * BytesPerElement,
6501 BytesPerElement, First))
6502 break;
6503 if (First < 0)
6504 return DAG.getUNDEF(ResVT);
6505 // Make sure the contiguous sequence starts at a multiple of the
6506 // original element size.
6507 unsigned Byte = unsigned(First) % Bytes.size();
6508 if (Byte % BytesPerElement != 0)
6509 break;
6510 // We can get the extracted value directly from an input.
6511 Index = Byte / BytesPerElement;
6512 Op = Op.getOperand(unsigned(First) / Bytes.size());
6513 Force = true;
6514 } else if (Opcode == ISD::BUILD_VECTOR &&
6515 canTreatAsByteVector(Op.getValueType())) {
6516 // We can only optimize this case if the BUILD_VECTOR elements are
6517 // at least as wide as the extracted value.
6518 EVT OpVT = Op.getValueType();
6519 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6520 if (OpBytesPerElement < BytesPerElement)
6521 break;
6522 // Make sure that the least-significant bit of the extracted value
6523 // is the least significant bit of an input.
6524 unsigned End = (Index + 1) * BytesPerElement;
6525 if (End % OpBytesPerElement != 0)
6526 break;
6527 // We're extracting the low part of one operand of the BUILD_VECTOR.
6528 Op = Op.getOperand(End / OpBytesPerElement - 1);
6529 if (!Op.getValueType().isInteger()) {
6530 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
6531 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
6532 DCI.AddToWorklist(Op.getNode());
6533 }
6534 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
6535 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
6536 if (VT != ResVT) {
6537 DCI.AddToWorklist(Op.getNode());
6538 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
6539 }
6540 return Op;
6541 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
6543 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
6544 canTreatAsByteVector(Op.getValueType()) &&
6545 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
6546 // Make sure that only the unextended bits are significant.
6547 EVT ExtVT = Op.getValueType();
6548 EVT OpVT = Op.getOperand(0).getValueType();
6549 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
6550 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
6551 unsigned Byte = Index * BytesPerElement;
6552 unsigned SubByte = Byte % ExtBytesPerElement;
6553 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
6554 if (SubByte < MinSubByte ||
6555 SubByte + BytesPerElement > ExtBytesPerElement)
6556 break;
6557 // Get the byte offset of the unextended element
6558 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
6559 // ...then add the byte offset relative to that element.
6560 Byte += SubByte - MinSubByte;
6561 if (Byte % BytesPerElement != 0)
6562 break;
6563 Op = Op.getOperand(0);
6564 Index = Byte / BytesPerElement;
6565 Force = true;
6566 } else
6567 break;
6568 }
6569 if (Force) {
6570 if (Op.getValueType() != VecVT) {
6571 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
6572 DCI.AddToWorklist(Op.getNode());
6573 }
6574 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
6575 DAG.getConstant(Index, DL, MVT::i32));
6576 }
6577 return SDValue();
6578}
6579
6580// Optimize vector operations in scalar value Op on the basis that Op
6581// is truncated to TruncVT.
6582SDValue SystemZTargetLowering::combineTruncateExtract(
6583 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
6584 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
6585 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
6586 // of type TruncVT.
6587 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6588 TruncVT.getSizeInBits() % 8 == 0) {
6589 SDValue Vec = Op.getOperand(0);
6590 EVT VecVT = Vec.getValueType();
6591 if (canTreatAsByteVector(VecVT)) {
6592 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
6593 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
6594 unsigned TruncBytes = TruncVT.getStoreSize();
6595 if (BytesPerElement % TruncBytes == 0) {
6596 // Calculate the value of Y' in the above description. We are
6597 // splitting the original elements into Scale equal-sized pieces
6598 // and for truncation purposes want the last (least-significant)
6599 // of these pieces for IndexN. This is easiest to do by calculating
6600 // the start index of the following element and then subtracting 1.
6601 unsigned Scale = BytesPerElement / TruncBytes;
6602 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
6603
6604 // Defer the creation of the bitcast from X to combineExtract,
6605 // which might be able to optimize the extraction.
6606 VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
6607 VecVT.getStoreSize() / TruncBytes);
6608 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
6609 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
6610 }
6611 }
6612 }
6613 }
6614 return SDValue();
6615}
6616
6617// Replace ALoad with a new ATOMIC_LOAD with a result that is extended to VT
6618// per ETy.
6620 ISD::LoadExtType ETy) {
6621 if (VT.getSizeInBits() > 64)
6622 return SDValue();
6623 EVT OrigVT = ALoad->getValueType(0);
6624 assert(OrigVT.getSizeInBits() < VT.getSizeInBits() && "VT should be wider.");
6625 EVT MemoryVT = ALoad->getMemoryVT();
6626 auto *NewALoad = dyn_cast<AtomicSDNode>(DAG.getAtomic(
6627 ISD::ATOMIC_LOAD, SDLoc(ALoad), MemoryVT, VT, ALoad->getChain(),
6628 ALoad->getBasePtr(), ALoad->getMemOperand()));
6629 NewALoad->setExtensionType(ETy);
6631 SDValue(ALoad, 0),
6632 DAG.getNode(ISD::TRUNCATE, SDLoc(ALoad), OrigVT, SDValue(NewALoad, 0)));
6633 // Update the chain uses.
6634 DAG.ReplaceAllUsesOfValueWith(SDValue(ALoad, 1), SDValue(NewALoad, 1));
6635 return SDValue(NewALoad, 0);
6636}
6637
6638SDValue SystemZTargetLowering::combineZERO_EXTEND(
6639 SDNode *N, DAGCombinerInfo &DCI) const {
6640 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
6641 SelectionDAG &DAG = DCI.DAG;
6642 SDValue N0 = N->getOperand(0);
6643 EVT VT = N->getValueType(0);
6645 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
6646 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6647 if (TrueOp && FalseOp) {
6648 SDLoc DL(N0);
6649 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
6650 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
6651 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
6652 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
6653 // If N0 has multiple uses, change other uses as well.
6654 if (!N0.hasOneUse()) {
6655 SDValue TruncSelect =
6656 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
6657 DCI.CombineTo(N0.getNode(), TruncSelect);
6658 }
6659 return NewSelect;
6660 }
6661 }
6662 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
6663 // of the result is smaller than the size of X and all the truncated bits
6664 // of X are already zero.
6665 if (N0.getOpcode() == ISD::XOR &&
6666 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
6667 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
6668 N0.getOperand(1).getOpcode() == ISD::Constant) {
6669 SDValue X = N0.getOperand(0).getOperand(0);
6670 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
6671 KnownBits Known = DAG.computeKnownBits(X);
6672 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
6673 N0.getValueSizeInBits(),
6674 VT.getSizeInBits());
6675 if (TruncatedBits.isSubsetOf(Known.Zero)) {
6676 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
6678 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
6679 X, DAG.getConstant(Mask, SDLoc(N0), VT));
6680 }
6681 }
6682 }
6683
6684 // Fold into ATOMIC_LOAD unless it is already sign extending.
6685 if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
6686 if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
6687 ALoad->getExtensionType() != ISD::SEXTLOAD)
6688 return extendAtomicLoad(ALoad, VT, DAG, ISD::ZEXTLOAD);
6689
6690 return SDValue();
6691}
6692
6693SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
6694 SDNode *N, DAGCombinerInfo &DCI) const {
6695 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
6696 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
6697 // into (select_cc LHS, RHS, -1, 0, COND)
6698 SelectionDAG &DAG = DCI.DAG;
6699 SDValue N0 = N->getOperand(0);
6700 EVT VT = N->getValueType(0);
6701 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
6702 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
6703 N0 = N0.getOperand(0);
6704 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
6705 SDLoc DL(N0);
6706 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
6707 DAG.getConstant(-1, DL, VT), DAG.getConstant(0, DL, VT),
6708 N0.getOperand(2) };
6709 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
6710 }
6711 return SDValue();
6712}
6713
6714SDValue SystemZTargetLowering::combineSIGN_EXTEND(
6715 SDNode *N, DAGCombinerInfo &DCI) const {
6716 // Convert (sext (ashr (shl X, C1), C2)) to
6717 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
6718 // cheap as narrower ones.
6719 SelectionDAG &DAG = DCI.DAG;
6720 SDValue N0 = N->getOperand(0);
6721 EVT VT = N->getValueType(0);
6722 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
6723 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
6724 SDValue Inner = N0.getOperand(0);
6725 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
6726 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
6727 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
6728 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
6729 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
6730 EVT ShiftVT = N0.getOperand(1).getValueType();
6731 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
6732 Inner.getOperand(0));
6733 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
6734 DAG.getConstant(NewShlAmt, SDLoc(Inner),
6735 ShiftVT));
6736 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
6737 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
6738 }
6739 }
6740 }
6741
6742 // Fold into ATOMIC_LOAD unless it is already zero extending.
6743 if (auto *ALoad = dyn_cast<AtomicSDNode>(N0))
6744 if (ALoad->getOpcode() == ISD::ATOMIC_LOAD &&
6745 ALoad->getExtensionType() != ISD::ZEXTLOAD)
6746 return extendAtomicLoad(ALoad, VT, DAG, ISD::SEXTLOAD);
6747
6748 return SDValue();
6749}
6750
6751SDValue SystemZTargetLowering::combineMERGE(
6752 SDNode *N, DAGCombinerInfo &DCI) const {
6753 SelectionDAG &DAG = DCI.DAG;
6754 unsigned Opcode = N->getOpcode();
6755 SDValue Op0 = N->getOperand(0);
6756 SDValue Op1 = N->getOperand(1);
6757 if (Op0.getOpcode() == ISD::BITCAST)
6758 Op0 = Op0.getOperand(0);
6760 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
6761 // for v4f32.
6762 if (Op1 == N->getOperand(0))
6763 return Op1;
6764 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
6765 EVT VT = Op1.getValueType();
6766 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
6767 if (ElemBytes <= 4) {
6768 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
6771 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
6772 SystemZ::VectorBytes / ElemBytes / 2);
6773 if (VT != InVT) {
6774 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
6775 DCI.AddToWorklist(Op1.getNode());
6776 }
6777 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
6778 DCI.AddToWorklist(Op.getNode());
6779 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
6780 }
6781 }
6782 return SDValue();
6783}
6784
6785SDValue SystemZTargetLowering::combineLOAD(
6786 SDNode *N, DAGCombinerInfo &DCI) const {
6787 SelectionDAG &DAG = DCI.DAG;
6788 EVT LdVT = N->getValueType(0);
6789 SDLoc DL(N);
6790
6791 // Replace an i128 load that is used solely to move its value into GPRs
6792 // by separate loads of both halves.
6793 if (LdVT == MVT::i128) {
6794 LoadSDNode *LD = cast<LoadSDNode>(N);
6795 if (!LD->isSimple() || !ISD::isNormalLoad(LD))
6796 return SDValue();
6797
6798 // Scan through all users.
6800 int UsedElements = 0;
6801 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
6802 UI != UIEnd; ++UI) {
6803 // Skip the uses of the chain.
6804 if (UI.getUse().getResNo() != 0)
6805 continue;
6806
6807 // Verify every user is a TRUNCATE to i64 of the low or high half ...
6808 SDNode *User = *UI;
6809 int Index = 1;
6810 if (User->getOpcode() == ISD::SRL &&
6811 User->getOperand(1).getOpcode() == ISD::Constant &&
6812 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
6813 User = *User->use_begin();
6814 Index = 0;
6815 }
6816 if (User->getOpcode() != ISD::TRUNCATE ||
6817 User->getValueType(0) != MVT::i64)
6818 return SDValue();
6819
6820 // ... and no half is extracted twice.
6821 if (UsedElements & (1 << Index))
6822 return SDValue();
6823
6824 UsedElements |= 1 << Index;
6825 Users.push_back(std::make_pair(User, Index));
6826 }
6827
6828 // Rewrite each extraction as an independent load.
6829 SmallVector<SDValue, 2> ArgChains;
6830 for (auto UserAndIndex : Users) {
6831 SDNode *User = UserAndIndex.first;
6832 unsigned Offset = User->getValueType(0).getStoreSize() * UserAndIndex.second;
6833 SDValue Ptr =
6834 DAG.getMemBasePlusOffset(LD->getBasePtr(), TypeSize::getFixed(Offset), DL);
6835 SDValue EltLoad =
6836 DAG.getLoad(User->getValueType(0), DL, LD->getChain(), Ptr,
6837 LD->getPointerInfo().getWithOffset(Offset),
6838 LD->getOriginalAlign(), LD->getMemOperand()->getFlags(),
6839 LD->getAAInfo());
6840
6841 DCI.CombineTo(User, EltLoad, true);
6842 ArgChains.push_back(EltLoad.getValue(1));
6843 }
6844
6845 // Collect all chains via TokenFactor.
6846 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
6847 ArgChains);
6848 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
6849 DCI.AddToWorklist(Chain.getNode());
6850 return SDValue(N, 0);
6851 }
6852
6853 if (LdVT.isVector() || LdVT.isInteger())
6854 return SDValue();
6855 // Transform a scalar load that is REPLICATEd as well as having other
6856 // use(s) to the form where the other use(s) use the first element of the
6857 // REPLICATE instead of the load. Otherwise instruction selection will not
6858 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
6859 // point loads.
6860
6861 SDValue Replicate;
6862 SmallVector<SDNode*, 8> OtherUses;
6863 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
6864 UI != UE; ++UI) {
6865 if (UI->getOpcode() == SystemZISD::REPLICATE) {
6866 if (Replicate)
6867 return SDValue(); // Should never happen
6868 Replicate = SDValue(*UI, 0);
6869 }
6870 else if (UI.getUse().getResNo() == 0)
6871 OtherUses.push_back(*UI);
6872 }
6873 if (!Replicate || OtherUses.empty())
6874 return SDValue();
6875
6876 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
6877 Replicate, DAG.getConstant(0, DL, MVT::i32));
6878 // Update uses of the loaded Value while preserving old chains.
6879 for (SDNode *U : OtherUses) {
6881 for (SDValue Op : U->ops())
6882 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
6883 DAG.UpdateNodeOperands(U, Ops);
6884 }
6885 return SDValue(N, 0);
6886}
6887
6888bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
6889 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
6890 return true;
6891 if (Subtarget.hasVectorEnhancements2())
6892 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
6893 return true;
6894 return false;
6895}
6896
6898 if (!VT.isVector() || !VT.isSimple() ||
6899 VT.getSizeInBits() != 128 ||
6900 VT.getScalarSizeInBits() % 8 != 0)
6901 return false;
6902
6903 unsigned NumElts = VT.getVectorNumElements();
6904 for (unsigned i = 0; i < NumElts; ++i) {
6905 if (M[i] < 0) continue; // ignore UNDEF indices
6906 if ((unsigned) M[i] != NumElts - 1 - i)
6907 return false;
6908 }
6909
6910 return true;
6911}
6912
6913static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
6914 for (auto *U : StoredVal->uses()) {
6915 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
6916 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
6917 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
6918 continue;
6919 } else if (isa<BuildVectorSDNode>(U)) {
6920 SDValue BuildVector = SDValue(U, 0);
6921 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
6922 isOnlyUsedByStores(BuildVector, DAG))
6923 continue;
6924 }
6925 return false;
6926 }
6927 return true;
6928}
6929
6930static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart) {
6931 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
6932 return false;
6933
6934 SDValue Op0 = Val.getOperand(0);
6935 SDValue Op1 = Val.getOperand(1);
6936
6937 if (Op0.getOpcode() == ISD::SHL)
6938 std::swap(Op0, Op1);
6939 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
6940 Op1.getOperand(1).getOpcode() != ISD::Constant ||
6941 Op1.getConstantOperandVal(1) != 64)
6942 return false;
6943 Op1 = Op1.getOperand(0);
6944
6945 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
6946 Op0.getOperand(0).getValueType() != MVT::i64)
6947 return false;
6948 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
6949 Op1.getOperand(0).getValueType() != MVT::i64)
6950 return false;
6951
6952 LoPart = Op0.getOperand(0);
6953 HiPart = Op1.getOperand(0);
6954 return true;
6955}
6956
6957SDValue SystemZTargetLowering::combineSTORE(
6958 SDNode *N, DAGCombinerInfo &DCI) const {
6959 SelectionDAG &DAG = DCI.DAG;
6960 auto *SN = cast<StoreSDNode>(N);
6961 auto &Op1 = N->getOperand(1);
6962 EVT MemVT = SN->getMemoryVT();
6963 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
6964 // for the extraction to be done on a vMiN value, so that we can use VSTE.
6965 // If X has wider elements then convert it to:
6966 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
6967 if (MemVT.isInteger() && SN->isTruncatingStore()) {
6968 if (SDValue Value =
6969 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
6970 DCI.AddToWorklist(Value.getNode());
6971
6972 // Rewrite the store with the new form of stored value.
6973 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
6974 SN->getBasePtr(), SN->getMemoryVT(),
6975 SN->getMemOperand());
6976 }
6977 }
6978 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
6979 if (!SN->isTruncatingStore() &&
6980 Op1.getOpcode() == ISD::BSWAP &&
6981 Op1.getNode()->hasOneUse() &&
6982 canLoadStoreByteSwapped(Op1.getValueType())) {
6983
6984 SDValue BSwapOp = Op1.getOperand(0);
6985
6986 if (BSwapOp.getValueType() == MVT::i16)
6987 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
6988
6989 SDValue Ops[] = {
6990 N->getOperand(0), BSwapOp, N->getOperand(2)
6991 };
6992
6993 return
6994 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
6995 Ops, MemVT, SN->getMemOperand());
6996 }
6997 // Combine STORE (element-swap) into VSTER
6998 if (!SN->isTruncatingStore() &&
6999 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
7000 Op1.getNode()->hasOneUse() &&
7001 Subtarget.hasVectorEnhancements2()) {
7002 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
7003 ArrayRef<int> ShuffleMask = SVN->getMask();
7004 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
7005 SDValue Ops[] = {
7006 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
7007 };
7008
7010 DAG.getVTList(MVT::Other),
7011 Ops, MemVT, SN->getMemOperand());
7012 }
7013 }
7014
7015 // Combine STORE (READCYCLECOUNTER) into STCKF.
7016 if (!SN->isTruncatingStore() &&
7018 Op1.hasOneUse() &&
7019 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
7020 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
7022 DAG.getVTList(MVT::Other),
7023 Ops, MemVT, SN->getMemOperand());
7024 }
7025
7026 // Transform a store of an i128 moved from GPRs into two separate stores.
7027 if (MemVT == MVT::i128 && SN->isSimple() && ISD::isNormalStore(SN)) {
7028 SDValue LoPart, HiPart;
7029 if (isMovedFromParts(Op1, LoPart, HiPart)) {
7030 SDLoc DL(SN);
7031 SDValue Chain0 =
7032 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(),
7033 SN->getPointerInfo(), SN->getOriginalAlign(),
7034 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7035 SDValue Chain1 =
7036 DAG.getStore(SN->getChain(), DL, LoPart,
7037 DAG.getObjectPtrOffset(DL, SN->getBasePtr(),
7039 SN->getPointerInfo().getWithOffset(8),
7040 SN->getOriginalAlign(),
7041 SN->getMemOperand()->getFlags(), SN->getAAInfo());
7042
7043 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
7044 }
7045 }
7046
7047 // Replicate a reg or immediate with VREP instead of scalar multiply or
7048 // immediate load. It seems best to do this during the first DAGCombine as
7049 // it is straight-forward to handle the zero-extend node in the initial
7050 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
7051 // extracting an i16 element from a v16i8 vector).
7052 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
7053 isOnlyUsedByStores(Op1, DAG)) {
7054 SDValue Word = SDValue();
7055 EVT WordVT;
7056
7057 // Find a replicated immediate and return it if found in Word and its
7058 // type in WordVT.
7059 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
7060 // Some constants are better handled with a scalar store.
7061 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
7062 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
7063 return;
7064 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, C->getZExtValue()));
7065 if (VCI.isVectorConstantLegal(Subtarget) &&
7066 VCI.Opcode == SystemZISD::REPLICATE) {
7067 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
7068 WordVT = VCI.VecVT.getScalarType();
7069 }
7070 };
7071
7072 // Find a replicated register and return it if found in Word and its type
7073 // in WordVT.
7074 auto FindReplicatedReg = [&](SDValue MulOp) {
7075 EVT MulVT = MulOp.getValueType();
7076 if (MulOp->getOpcode() == ISD::MUL &&
7077 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
7078 // Find a zero extended value and its type.
7079 SDValue LHS = MulOp->getOperand(0);
7080 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
7081 WordVT = LHS->getOperand(0).getValueType();
7082 else if (LHS->getOpcode() == ISD::AssertZext)
7083 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
7084 else
7085 return;
7086 // Find a replicating constant, e.g. 0x00010001.
7087 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
7089 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
7090 if (VCI.isVectorConstantLegal(Subtarget) &&
7091 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
7092 WordVT == VCI.VecVT.getScalarType())
7093 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
7094 }
7095 }
7096 };
7097
7098 if (isa<BuildVectorSDNode>(Op1) &&
7099 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
7100 SDValue SplatVal = Op1->getOperand(0);
7101 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
7102 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
7103 else
7104 FindReplicatedReg(SplatVal);
7105 } else {
7106 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
7107 FindReplicatedImm(C, MemVT.getStoreSize());
7108 else
7109 FindReplicatedReg(Op1);
7110 }
7111
7112 if (Word != SDValue()) {
7113 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
7114 "Bad type handling");
7115 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
7116 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
7117 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
7118 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
7119 SN->getBasePtr(), SN->getMemOperand());
7120 }
7121 }
7122
7123 return SDValue();
7124}
7125
7126SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
7127 SDNode *N, DAGCombinerInfo &DCI) const {
7128 SelectionDAG &DAG = DCI.DAG;
7129 // Combine element-swap (LOAD) into VLER
7130 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7131 N->getOperand(0).hasOneUse() &&
7132 Subtarget.hasVectorEnhancements2()) {
7133 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
7134 ArrayRef<int> ShuffleMask = SVN->getMask();
7135 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
7136 SDValue Load = N->getOperand(0);
7137 LoadSDNode *LD = cast<LoadSDNode>(Load);
7138
7139 // Create the element-swapping load.
7140 SDValue Ops[] = {
7141 LD->getChain(), // Chain
7142 LD->getBasePtr() // Ptr
7143 };
7144 SDValue ESLoad =
7146 DAG.getVTList(LD->getValueType(0), MVT::Other),
7147 Ops, LD->getMemoryVT(), LD->getMemOperand());
7148
7149 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
7150 // by the load dead.
7151 DCI.CombineTo(N, ESLoad);
7152
7153 // Next, combine the load away, we give it a bogus result value but a real
7154 // chain result. The result value is dead because the shuffle is dead.
7155 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
7156
7157 // Return N so it doesn't get rechecked!
7158 return SDValue(N, 0);
7159 }
7160 }
7161
7162 return SDValue();
7163}
7164
7165SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
7166 SDNode *N, DAGCombinerInfo &DCI) const {
7167 SelectionDAG &DAG = DCI.DAG;
7168
7169 if (!Subtarget.hasVector())
7170 return SDValue();
7171
7172 // Look through bitcasts that retain the number of vector elements.
7173 SDValue Op = N->getOperand(0);
7174 if (Op.getOpcode() == ISD::BITCAST &&
7175 Op.getValueType().isVector() &&
7176 Op.getOperand(0).getValueType().isVector() &&
7177 Op.getValueType().getVectorNumElements() ==
7178 Op.getOperand(0).getValueType().getVectorNumElements())
7179 Op = Op.getOperand(0);
7180
7181 // Pull BSWAP out of a vector extraction.
7182 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
7183 EVT VecVT = Op.getValueType();
7184 EVT EltVT = VecVT.getVectorElementType();
7185 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
7186 Op.getOperand(0), N->getOperand(1));
7187 DCI.AddToWorklist(Op.getNode());
7188 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
7189 if (EltVT != N->getValueType(0)) {
7190 DCI.AddToWorklist(Op.getNode());
7191 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
7192 }
7193 return Op;
7194 }
7195
7196 // Try to simplify a vector extraction.
7197 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
7198 SDValue Op0 = N->getOperand(0);
7199 EVT VecVT = Op0.getValueType();
7200 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
7201 IndexN->getZExtValue(), DCI, false);
7202 }
7203 return SDValue();
7204}
7205
7206SDValue SystemZTargetLowering::combineJOIN_DWORDS(
7207 SDNode *N, DAGCombinerInfo &DCI) const {
7208 SelectionDAG &DAG = DCI.DAG;
7209 // (join_dwords X, X) == (replicate X)
7210 if (N->getOperand(0) == N->getOperand(1))
7211 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
7212 N->getOperand(0));
7213 return SDValue();
7214}
7215
7217 SDValue Chain1 = N1->getOperand(0);
7218 SDValue Chain2 = N2->getOperand(0);
7219
7220 // Trivial case: both nodes take the same chain.
7221 if (Chain1 == Chain2)
7222 return Chain1;
7223
7224 // FIXME - we could handle more complex cases via TokenFactor,
7225 // assuming we can verify that this would not create a cycle.
7226 return SDValue();
7227}
7228
7229SDValue SystemZTargetLowering::combineFP_ROUND(
7230 SDNode *N, DAGCombinerInfo &DCI) const {
7231
7232 if (!Subtarget.hasVector())
7233 return SDValue();
7234
7235 // (fpround (extract_vector_elt X 0))
7236 // (fpround (extract_vector_elt X 1)) ->
7237 // (extract_vector_elt (VROUND X) 0)
7238 // (extract_vector_elt (VROUND X) 2)
7239 //
7240 // This is a special case since the target doesn't really support v2f32s.
7241 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7242 SelectionDAG &DAG = DCI.DAG;
7243 SDValue Op0 = N->getOperand(OpNo);
7244 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
7246 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
7247 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7248 Op0.getConstantOperandVal(1) == 0) {
7249 SDValue Vec = Op0.getOperand(0);
7250 for (auto *U : Vec->uses()) {
7251 if (U != Op0.getNode() && U->hasOneUse() &&
7252 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7253 U->getOperand(0) == Vec &&
7254 U->getOperand(1).getOpcode() == ISD::Constant &&
7255 U->getConstantOperandVal(1) == 1) {
7256 SDValue OtherRound = SDValue(*U->use_begin(), 0);
7257 if (OtherRound.getOpcode() == N->getOpcode() &&
7258 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
7259 OtherRound.getValueType() == MVT::f32) {
7260 SDValue VRound, Chain;
7261 if (N->isStrictFPOpcode()) {
7262 Chain = MergeInputChains(N, OtherRound.getNode());
7263 if (!Chain)
7264 continue;
7266 {MVT::v4f32, MVT::Other}, {Chain, Vec});
7267 Chain = VRound.getValue(1);
7268 } else
7269 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
7270 MVT::v4f32, Vec);
7271 DCI.AddToWorklist(VRound.getNode());
7272 SDValue Extract1 =
7273 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
7274 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
7275 DCI.AddToWorklist(Extract1.getNode());
7276 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
7277 if (Chain)
7278 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
7279 SDValue Extract0 =
7280 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
7281 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7282 if (Chain)
7283 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7284 N->getVTList(), Extract0, Chain);
7285 return Extract0;
7286 }
7287 }
7288 }
7289 }
7290 return SDValue();
7291}
7292
7293SDValue SystemZTargetLowering::combineFP_EXTEND(
7294 SDNode *N, DAGCombinerInfo &DCI) const {
7295
7296 if (!Subtarget.hasVector())
7297 return SDValue();
7298
7299 // (fpextend (extract_vector_elt X 0))
7300 // (fpextend (extract_vector_elt X 2)) ->
7301 // (extract_vector_elt (VEXTEND X) 0)
7302 // (extract_vector_elt (VEXTEND X) 1)
7303 //
7304 // This is a special case since the target doesn't really support v2f32s.
7305 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
7306 SelectionDAG &DAG = DCI.DAG;
7307 SDValue Op0 = N->getOperand(OpNo);
7308 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
7310 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
7311 Op0.getOperand(1).getOpcode() == ISD::Constant &&
7312 Op0.getConstantOperandVal(1) == 0) {
7313 SDValue Vec = Op0.getOperand(0);
7314 for (auto *U : Vec->uses()) {
7315 if (U != Op0.getNode() && U->hasOneUse() &&
7316 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7317 U->getOperand(0) == Vec &&
7318 U->getOperand(1).getOpcode() == ISD::Constant &&
7319 U->getConstantOperandVal(1) == 2) {
7320 SDValue OtherExtend = SDValue(*U->use_begin(), 0);
7321 if (OtherExtend.getOpcode() == N->getOpcode() &&
7322 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
7323 OtherExtend.getValueType() == MVT::f64) {
7324 SDValue VExtend, Chain;
7325 if (N->isStrictFPOpcode()) {
7326 Chain = MergeInputChains(N, OtherExtend.getNode());
7327 if (!Chain)
7328 continue;
7329 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
7330 {MVT::v2f64, MVT::Other}, {Chain, Vec});
7331 Chain = VExtend.getValue(1);
7332 } else
7333 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
7334 MVT::v2f64, Vec);
7335 DCI.AddToWorklist(VExtend.getNode());
7336 SDValue Extract1 =
7337 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
7338 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
7339 DCI.AddToWorklist(Extract1.getNode());
7340 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
7341 if (Chain)
7342 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
7343 SDValue Extract0 =
7344 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
7345 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
7346 if (Chain)
7347 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
7348 N->getVTList(), Extract0, Chain);
7349 return Extract0;
7350 }
7351 }
7352 }
7353 }
7354 return SDValue();
7355}
7356
7357SDValue SystemZTargetLowering::combineINT_TO_FP(
7358 SDNode *N, DAGCombinerInfo &DCI) const {
7359 if (DCI.Level != BeforeLegalizeTypes)
7360 return SDValue();
7361 SelectionDAG &DAG = DCI.DAG;
7362 LLVMContext &Ctx = *DAG.getContext();
7363 unsigned Opcode = N->getOpcode();
7364 EVT OutVT = N->getValueType(0);
7365 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
7366 SDValue Op = N->getOperand(0);
7367 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
7368 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
7369
7370 // Insert an extension before type-legalization to avoid scalarization, e.g.:
7371 // v2f64 = uint_to_fp v2i16
7372 // =>
7373 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
7374 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
7375 OutScalarBits <= 64) {
7376 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
7377 EVT ExtVT = EVT::getVectorVT(
7378 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
7379 unsigned ExtOpcode =
7381 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
7382 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
7383 }
7384 return SDValue();
7385}
7386
7387SDValue SystemZTargetLowering::combineBSWAP(
7388 SDNode *N, DAGCombinerInfo &DCI) const {
7389 SelectionDAG &DAG = DCI.DAG;
7390 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
7391 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
7392 N->getOperand(0).hasOneUse() &&
7393 canLoadStoreByteSwapped(N->getValueType(0))) {
7394 SDValue Load = N->getOperand(0);
7395 LoadSDNode *LD = cast<LoadSDNode>(Load);
7396
7397 // Create the byte-swapping load.
7398 SDValue Ops[] = {
7399 LD->getChain(), // Chain
7400 LD->getBasePtr() // Ptr
7401 };
7402 EVT LoadVT = N->getValueType(0);
7403 if (LoadVT == MVT::i16)
7404 LoadVT = MVT::i32;
7405 SDValue BSLoad =
7407 DAG.getVTList(LoadVT, MVT::Other),
7408 Ops, LD->getMemoryVT(), LD->getMemOperand());
7409
7410 // If this is an i16 load, insert the truncate.
7411 SDValue ResVal = BSLoad;
7412 if (N->getValueType(0) == MVT::i16)
7413 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
7414
7415 // First, combine the bswap away. This makes the value produced by the
7416 // load dead.
7417 DCI.CombineTo(N, ResVal);
7418
7419 // Next, combine the load away, we give it a bogus result value but a real
7420 // chain result. The result value is dead because the bswap is dead.
7421 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
7422
7423 // Return N so it doesn't get rechecked!
7424 return SDValue(N, 0);
7425 }
7426
7427 // Look through bitcasts that retain the number of vector elements.
7428 SDValue Op = N->getOperand(0);
7429 if (Op.getOpcode() == ISD::BITCAST &&
7430 Op.getValueType().isVector() &&
7431 Op.getOperand(0).getValueType().isVector() &&
7432 Op.getValueType().getVectorNumElements() ==
7433 Op.getOperand(0).getValueType().getVectorNumElements())
7434 Op = Op.getOperand(0);
7435
7436 // Push BSWAP into a vector insertion if at least one side then simplifies.
7437 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
7438 SDValue Vec = Op.getOperand(0);
7439 SDValue Elt = Op.getOperand(1);
7440 SDValue Idx = Op.getOperand(2);
7441
7443 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
7445 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
7446 (canLoadStoreByteSwapped(N->getValueType(0)) &&
7447 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
7448 EVT VecVT = N->getValueType(0);
7449 EVT EltVT = N->getValueType(0).getVectorElementType();
7450 if (VecVT != Vec.getValueType()) {
7451 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
7452 DCI.AddToWorklist(Vec.getNode());
7453 }
7454 if (EltVT != Elt.getValueType()) {
7455 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
7456 DCI.AddToWorklist(Elt.getNode());
7457 }
7458 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
7459 DCI.AddToWorklist(Vec.getNode());
7460 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
7461 DCI.AddToWorklist(Elt.getNode());
7462 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
7463 Vec, Elt, Idx);
7464 }
7465 }
7466
7467 // Push BSWAP into a vector shuffle if at least one side then simplifies.
7468 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
7469 if (SV && Op.hasOneUse()) {
7470 SDValue Op0 = Op.getOperand(0);
7471 SDValue Op1 = Op.getOperand(1);
7472
7474 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
7476 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
7477 EVT VecVT = N->getValueType(0);
7478 if (VecVT != Op0.getValueType()) {
7479 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
7480 DCI.AddToWorklist(Op0.getNode());
7481 }
7482 if (VecVT != Op1.getValueType()) {
7483 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
7484 DCI.AddToWorklist(Op1.getNode());
7485 }
7486 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
7487 DCI.AddToWorklist(Op0.getNode());
7488 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
7489 DCI.AddToWorklist(Op1.getNode());
7490 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
7491 }
7492 }
7493
7494 return SDValue();
7495}
7496
7497static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
7498 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
7499 // set by the CCReg instruction using the CCValid / CCMask masks,
7500 // If the CCReg instruction is itself a ICMP testing the condition
7501 // code set by some other instruction, see whether we can directly
7502 // use that condition code.
7503
7504 // Verify that we have an ICMP against some constant.
7505 if (CCValid != SystemZ::CCMASK_ICMP)
7506 return false;
7507 auto *ICmp = CCReg.getNode();
7508 if (ICmp->getOpcode() != SystemZISD::ICMP)
7509 return false;
7510 auto *CompareLHS = ICmp->getOperand(0).getNode();
7511 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1));
7512 if (!CompareRHS)
7513 return false;
7514
7515 // Optimize the case where CompareLHS is a SELECT_CCMASK.
7516 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) {
7517 // Verify that we have an appropriate mask for a EQ or NE comparison.
7518 bool Invert = false;
7519 if (CCMask == SystemZ::CCMASK_CMP_NE)
7520 Invert = !Invert;
7521 else if (CCMask != SystemZ::CCMASK_CMP_EQ)
7522 return false;
7523
7524 // Verify that the ICMP compares against one of select values.
7525 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0));
7526 if (!TrueVal)
7527 return false;
7528 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7529 if (!FalseVal)
7530 return false;
7531 if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
7532 Invert = !Invert;
7533 else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
7534 return false;
7535
7536 // Compute the effective CC mask for the new branch or select.
7537 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2));
7538 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3));
7539 if (!NewCCValid || !NewCCMask)
7540 return false;
7541 CCValid = NewCCValid->getZExtValue();
7542 CCMask = NewCCMask->getZExtValue();
7543 if (Invert)
7544 CCMask ^= CCValid;
7545
7546 // Return the updated CCReg link.
7547 CCReg = CompareLHS->getOperand(4);
7548 return true;
7549 }
7550
7551 // Optimize the case where CompareRHS is (SRA (SHL (IPM))).
7552 if (CompareLHS->getOpcode() == ISD::SRA) {
7553 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
7554 if (!SRACount || SRACount->getZExtValue() != 30)
7555 return false;
7556 auto *SHL = CompareLHS->getOperand(0).getNode();
7557 if (SHL->getOpcode() != ISD::SHL)
7558 return false;
7559 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1));
7560 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC)
7561 return false;
7562 auto *IPM = SHL->getOperand(0).getNode();
7563 if (IPM->getOpcode() != SystemZISD::IPM)
7564 return false;
7565
7566 // Avoid introducing CC spills (because SRA would clobber CC).
7567 if (!CompareLHS->hasOneUse())
7568 return false;
7569 // Verify that the ICMP compares against zero.
7570 if (CompareRHS->getZExtValue() != 0)
7571 return false;
7572
7573 // Compute the effective CC mask for the new branch or select.
7574 CCMask = SystemZ::reverseCCMask(CCMask);
7575
7576 // Return the updated CCReg link.
7577 CCReg = IPM->getOperand(0);
7578 return true;
7579 }
7580
7581 return false;
7582}
7583
7584SDValue SystemZTargetLowering::combineBR_CCMASK(
7585 SDNode *N, DAGCombinerInfo &DCI) const {
7586 SelectionDAG &DAG = DCI.DAG;
7587
7588 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
7589 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7590 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7591 if (!CCValid || !CCMask)
7592 return SDValue();
7593
7594 int CCValidVal = CCValid->getZExtValue();
7595 int CCMaskVal = CCMask->getZExtValue();
7596 SDValue Chain = N->getOperand(0);
7597 SDValue CCReg = N->getOperand(4);
7598
7599 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7600 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
7601 Chain,
7602 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7603 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7604 N->getOperand(3), CCReg);
7605 return SDValue();
7606}
7607
7608SDValue SystemZTargetLowering::combineSELECT_CCMASK(
7609 SDNode *N, DAGCombinerInfo &DCI) const {
7610 SelectionDAG &DAG = DCI.DAG;
7611
7612 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
7613 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
7614 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
7615 if (!CCValid || !CCMask)
7616 return SDValue();
7617
7618 int CCValidVal = CCValid->getZExtValue();
7619 int CCMaskVal = CCMask->getZExtValue();
7620 SDValue CCReg = N->getOperand(4);
7621
7622 if (combineCCMask(CCReg, CCValidVal, CCMaskVal))
7623 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0),
7624 N->getOperand(0), N->getOperand(1),
7625 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
7626 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
7627 CCReg);
7628 return SDValue();
7629}
7630
7631
7632SDValue SystemZTargetLowering::combineGET_CCMASK(
7633 SDNode *N, DAGCombinerInfo &DCI) const {
7634
7635 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
7636 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
7637 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
7638 if (!CCValid || !CCMask)
7639 return SDValue();
7640 int CCValidVal = CCValid->getZExtValue();
7641 int CCMaskVal = CCMask->getZExtValue();
7642
7643 SDValue Select = N->getOperand(0);
7644 if (Select->getOpcode() == ISD::TRUNCATE)
7645 Select = Select->getOperand(0);
7646 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
7647 return SDValue();
7648
7649 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
7650 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
7651 if (!SelectCCValid || !SelectCCMask)
7652 return SDValue();
7653 int SelectCCValidVal = SelectCCValid->getZExtValue();
7654 int SelectCCMaskVal = SelectCCMask->getZExtValue();
7655
7656 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
7657 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
7658 if (!TrueVal || !FalseVal)
7659 return SDValue();
7660 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
7661 ;
7662 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
7663 SelectCCMaskVal ^= SelectCCValidVal;
7664 else
7665 return SDValue();
7666
7667 if (SelectCCValidVal & ~CCValidVal)
7668 return SDValue();
7669 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
7670 return SDValue();
7671
7672 return Select->getOperand(4);
7673}
7674
7675SDValue SystemZTargetLowering::combineIntDIVREM(
7676 SDNode *N, DAGCombinerInfo &DCI) const {
7677 SelectionDAG &DAG = DCI.DAG;
7678 EVT VT = N->getValueType(0);
7679 // In the case where the divisor is a vector of constants a cheaper
7680 // sequence of instructions can replace the divide. BuildSDIV is called to
7681 // do this during DAG combining, but it only succeeds when it can build a
7682 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
7683 // since it is not Legal but Custom it can only happen before
7684 // legalization. Therefore we must scalarize this early before Combine
7685 // 1. For widened vectors, this is already the result of type legalization.
7686 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
7687 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
7688 return DAG.UnrollVectorOp(N);
7689 return SDValue();
7690}
7691
7692SDValue SystemZTargetLowering::combineINTRINSIC(
7693 SDNode *N, DAGCombinerInfo &DCI) const {
7694 SelectionDAG &DAG = DCI.DAG;
7695
7696 unsigned Id = N->getConstantOperandVal(1);
7697 switch (Id) {
7698 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
7699 // or larger is simply a vector load.
7700 case Intrinsic::s390_vll:
7701 case Intrinsic::s390_vlrl:
7702 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
7703 if (C->getZExtValue() >= 15)
7704 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
7705 N->getOperand(3), MachinePointerInfo());
7706 break;
7707 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
7708 case Intrinsic::s390_vstl:
7709 case Intrinsic::s390_vstrl:
7710 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
7711 if (C->getZExtValue() >= 15)
7712 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
7713 N->getOperand(4), MachinePointerInfo());
7714 break;
7715 }
7716
7717 return SDValue();
7718}
7719
7720SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
7721 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
7722 return N->getOperand(0);
7723 return N;
7724}
7725
7727 DAGCombinerInfo &DCI) const {
7728 switch(N->getOpcode()) {
7729 default: break;
7730 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
7731 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
7732 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
7734 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
7735 case ISD::LOAD: return combineLOAD(N, DCI);
7736 case ISD::STORE: return combineSTORE(N, DCI);
7737 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
7738 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
7739 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
7741 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
7743 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
7744 case ISD::SINT_TO_FP:
7745 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
7746 case ISD::BSWAP: return combineBSWAP(N, DCI);
7747 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
7748 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
7749 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
7750 case ISD::SDIV:
7751 case ISD::UDIV:
7752 case ISD::SREM:
7753 case ISD::UREM: return combineIntDIVREM(N, DCI);
7755 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
7756 }
7757
7758 return SDValue();
7759}
7760
7761// Return the demanded elements for the OpNo source operand of Op. DemandedElts
7762// are for Op.
7763static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
7764 unsigned OpNo) {
7765 EVT VT = Op.getValueType();
7766 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
7767 APInt SrcDemE;
7768 unsigned Opcode = Op.getOpcode();
7769 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7770 unsigned Id = Op.getConstantOperandVal(0);
7771 switch (Id) {
7772 case Intrinsic::s390_vpksh: // PACKS
7773 case Intrinsic::s390_vpksf:
7774 case Intrinsic::s390_vpksg:
7775 case Intrinsic::s390_vpkshs: // PACKS_CC
7776 case Intrinsic::s390_vpksfs:
7777 case Intrinsic::s390_vpksgs:
7778 case Intrinsic::s390_vpklsh: // PACKLS
7779 case Intrinsic::s390_vpklsf:
7780 case Intrinsic::s390_vpklsg:
7781 case Intrinsic::s390_vpklshs: // PACKLS_CC
7782 case Intrinsic::s390_vpklsfs:
7783 case Intrinsic::s390_vpklsgs:
7784 // VECTOR PACK truncates the elements of two source vectors into one.
7785 SrcDemE = DemandedElts;
7786 if (OpNo == 2)
7787 SrcDemE.lshrInPlace(NumElts / 2);
7788 SrcDemE = SrcDemE.trunc(NumElts / 2);
7789 break;
7790 // VECTOR UNPACK extends half the elements of the source vector.
7791 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7792 case Intrinsic::s390_vuphh:
7793 case Intrinsic::s390_vuphf:
7794 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7795 case Intrinsic::s390_vuplhh:
7796 case Intrinsic::s390_vuplhf:
7797 SrcDemE = APInt(NumElts * 2, 0);
7798 SrcDemE.insertBits(DemandedElts, 0);
7799 break;
7800 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7801 case Intrinsic::s390_vuplhw:
7802 case Intrinsic::s390_vuplf:
7803 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7804 case Intrinsic::s390_vupllh:
7805 case Intrinsic::s390_vupllf:
7806 SrcDemE = APInt(NumElts * 2, 0);
7807 SrcDemE.insertBits(DemandedElts, NumElts);
7808 break;
7809 case Intrinsic::s390_vpdi: {
7810 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
7811 SrcDemE = APInt(NumElts, 0);
7812 if (!DemandedElts[OpNo - 1])
7813 break;
7814 unsigned Mask = Op.getConstantOperandVal(3);
7815 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
7816 // Demand input element 0 or 1, given by the mask bit value.
7817 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
7818 break;
7819 }
7820 case Intrinsic::s390_vsldb: {
7821 // VECTOR SHIFT LEFT DOUBLE BY BYTE
7822 assert(VT == MVT::v16i8 && "Unexpected type.");
7823 unsigned FirstIdx = Op.getConstantOperandVal(3);
7824 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
7825 unsigned NumSrc0Els = 16 - FirstIdx;
7826 SrcDemE = APInt(NumElts, 0);
7827 if (OpNo == 1) {
7828 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
7829 SrcDemE.insertBits(DemEls, FirstIdx);
7830 } else {
7831 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
7832 SrcDemE.insertBits(DemEls, 0);
7833 }
7834 break;
7835 }
7836 case Intrinsic::s390_vperm:
7837 SrcDemE = APInt(NumElts, 1);
7838 break;
7839 default:
7840 llvm_unreachable("Unhandled intrinsic.");
7841 break;
7842 }
7843 } else {
7844 switch (Opcode) {
7846 // Scalar operand.
7847 SrcDemE = APInt(1, 1);
7848 break;
7850 SrcDemE = DemandedElts;
7851 break;
7852 default:
7853 llvm_unreachable("Unhandled opcode.");
7854 break;
7855 }
7856 }
7857 return SrcDemE;
7858}
7859
7860static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
7861 const APInt &DemandedElts,
7862 const SelectionDAG &DAG, unsigned Depth,
7863 unsigned OpNo) {
7864 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7865 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7866 KnownBits LHSKnown =
7867 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7868 KnownBits RHSKnown =
7869 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7870 Known = LHSKnown.intersectWith(RHSKnown);
7871}
7872
7873void
7875 KnownBits &Known,
7876 const APInt &DemandedElts,
7877 const SelectionDAG &DAG,
7878 unsigned Depth) const {
7879 Known.resetAll();
7880
7881 // Intrinsic CC result is returned in the two low bits.
7882 unsigned tmp0, tmp1; // not used
7883 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) {
7884 Known.Zero.setBitsFrom(2);
7885 return;
7886 }
7887 EVT VT = Op.getValueType();
7888 if (Op.getResNo() != 0 || VT == MVT::Untyped)
7889 return;
7890 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
7891 "KnownBits does not match VT in bitwidth");
7892 assert ((!VT.isVector() ||
7893 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
7894 "DemandedElts does not match VT number of elements");
7895 unsigned BitWidth = Known.getBitWidth();
7896 unsigned Opcode = Op.getOpcode();
7897 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7898 bool IsLogical = false;
7899 unsigned Id = Op.getConstantOperandVal(0);
7900 switch (Id) {
7901 case Intrinsic::s390_vpksh: // PACKS
7902 case Intrinsic::s390_vpksf:
7903 case Intrinsic::s390_vpksg:
7904 case Intrinsic::s390_vpkshs: // PACKS_CC
7905 case Intrinsic::s390_vpksfs:
7906 case Intrinsic::s390_vpksgs:
7907 case Intrinsic::s390_vpklsh: // PACKLS
7908 case Intrinsic::s390_vpklsf:
7909 case Intrinsic::s390_vpklsg:
7910 case Intrinsic::s390_vpklshs: // PACKLS_CC
7911 case Intrinsic::s390_vpklsfs:
7912 case Intrinsic::s390_vpklsgs:
7913 case Intrinsic::s390_vpdi:
7914 case Intrinsic::s390_vsldb:
7915 case Intrinsic::s390_vperm:
7916 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
7917 break;
7918 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
7919 case Intrinsic::s390_vuplhh:
7920 case Intrinsic::s390_vuplhf:
7921 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
7922 case Intrinsic::s390_vupllh:
7923 case Intrinsic::s390_vupllf:
7924 IsLogical = true;
7925 [[fallthrough]];
7926 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
7927 case Intrinsic::s390_vuphh:
7928 case Intrinsic::s390_vuphf:
7929 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
7930 case Intrinsic::s390_vuplhw:
7931 case Intrinsic::s390_vuplf: {
7932 SDValue SrcOp = Op.getOperand(1);
7933 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
7934 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
7935 if (IsLogical) {
7936 Known = Known.zext(BitWidth);
7937 } else
7938 Known = Known.sext(BitWidth);
7939 break;
7940 }
7941 default:
7942 break;
7943 }
7944 } else {
7945 switch (Opcode) {
7948 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
7949 break;
7950 case SystemZISD::REPLICATE: {
7951 SDValue SrcOp = Op.getOperand(0);
7952 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
7953 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp))
7954 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
7955 break;
7956 }
7957 default:
7958 break;
7959 }
7960 }
7961
7962 // Known has the width of the source operand(s). Adjust if needed to match
7963 // the passed bitwidth.
7964 if (Known.getBitWidth() != BitWidth)
7965 Known = Known.anyextOrTrunc(BitWidth);
7966}
7967
7968static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
7969 const SelectionDAG &DAG, unsigned Depth,
7970 unsigned OpNo) {
7971 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
7972 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
7973 if (LHS == 1) return 1; // Early out.
7974 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
7975 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
7976 if (RHS == 1) return 1; // Early out.
7977 unsigned Common = std::min(LHS, RHS);
7978 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
7979 EVT VT = Op.getValueType();
7980 unsigned VTBits = VT.getScalarSizeInBits();
7981 if (SrcBitWidth > VTBits) { // PACK
7982 unsigned SrcExtraBits = SrcBitWidth - VTBits;
7983 if (Common > SrcExtraBits)
7984 return (Common - SrcExtraBits);
7985 return 1;
7986 }
7987 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
7988 return Common;
7989}
7990
7991unsigned
7993 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
7994 unsigned Depth) const {
7995 if (Op.getResNo() != 0)
7996 return 1;
7997 unsigned Opcode = Op.getOpcode();
7998 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
7999 unsigned Id = Op.getConstantOperandVal(0);
8000 switch (Id) {
8001 case Intrinsic::s390_vpksh: // PACKS
8002 case Intrinsic::s390_vpksf:
8003 case Intrinsic::s390_vpksg:
8004 case Intrinsic::s390_vpkshs: // PACKS_CC
8005 case Intrinsic::s390_vpksfs:
8006 case Intrinsic::s390_vpksgs:
8007 case Intrinsic::s390_vpklsh: // PACKLS
8008 case Intrinsic::s390_vpklsf:
8009 case Intrinsic::s390_vpklsg:
8010 case Intrinsic::s390_vpklshs: // PACKLS_CC
8011 case Intrinsic::s390_vpklsfs:
8012 case Intrinsic::s390_vpklsgs:
8013 case Intrinsic::s390_vpdi:
8014 case Intrinsic::s390_vsldb:
8015 case Intrinsic::s390_vperm:
8016 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
8017 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
8018 case Intrinsic::s390_vuphh:
8019 case Intrinsic::s390_vuphf:
8020 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
8021 case Intrinsic::s390_vuplhw:
8022 case Intrinsic::s390_vuplf: {
8023 SDValue PackedOp = Op.getOperand(1);
8024 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
8025 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
8026 EVT VT = Op.getValueType();
8027 unsigned VTBits = VT.getScalarSizeInBits();
8028 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
8029 return Tmp;
8030 }
8031 default:
8032 break;
8033 }
8034 } else {
8035 switch (Opcode) {
8037 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
8038 default:
8039 break;
8040 }
8041 }
8042
8043 return 1;
8044}
8045
8048 const APInt &DemandedElts, const SelectionDAG &DAG,
8049 bool PoisonOnly, unsigned Depth) const {
8050 switch (Op->getOpcode()) {
8053 return true;
8054 }
8055 return false;
8056}
8057
8058unsigned
8060 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
8061 unsigned StackAlign = TFI->getStackAlignment();
8062 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
8063 "Unexpected stack alignment");
8064 // The default stack probe size is 4096 if the function has no
8065 // stack-probe-size attribute.
8066 unsigned StackProbeSize =
8067 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
8068 // Round down to the stack alignment.
8069 StackProbeSize &= ~(StackAlign - 1);
8070 return StackProbeSize ? StackProbeSize : StackAlign;
8071}
8072
8073//===----------------------------------------------------------------------===//
8074// Custom insertion
8075//===----------------------------------------------------------------------===//
8076
8077// Force base value Base into a register before MI. Return the register.
8079 const SystemZInstrInfo *TII) {
8080 MachineBasicBlock *MBB = MI.getParent();
8081 MachineFunction &MF = *MBB->getParent();
8083
8084 if (Base.isReg()) {
8085 // Copy Base into a new virtual register to help register coalescing in
8086 // cases with multiple uses.
8087 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8088 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
8089 .add(Base);
8090 return Reg;
8091 }
8092
8093 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8094 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
8095 .add(Base)
8096 .addImm(0)
8097 .addReg(0);
8098 return Reg;
8099}
8100
8101// The CC operand of MI might be missing a kill marker because there
8102// were multiple uses of CC, and ISel didn't know which to mark.
8103// Figure out whether MI should have had a kill marker.
8105 // Scan forward through BB for a use/def of CC.
8107 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
8108 const MachineInstr& mi = *miI;
8109 if (mi.readsRegister(SystemZ::CC))
8110 return false;
8111 if (mi.definesRegister(SystemZ::CC))
8112 break; // Should have kill-flag - update below.
8113 }
8114
8115 // If we hit the end of the block, check whether CC is live into a
8116 // successor.
8117 if (miI == MBB->end()) {
8118 for (const MachineBasicBlock *Succ : MBB->successors())
8119 if (Succ->isLiveIn(SystemZ::CC))
8120 return false;
8121 }
8122
8123 return true;
8124}
8125
8126// Return true if it is OK for this Select pseudo-opcode to be cascaded
8127// together with other Select pseudo-opcodes into a single basic-block with
8128// a conditional jump around it.
8130 switch (MI.getOpcode()) {
8131 case SystemZ::Select32:
8132 case SystemZ::Select64:
8133 case SystemZ::Select128:
8134 case SystemZ::SelectF32:
8135 case SystemZ::SelectF64:
8136 case SystemZ::SelectF128:
8137 case SystemZ::SelectVR32:
8138 case SystemZ::SelectVR64:
8139 case SystemZ::SelectVR128:
8140 return true;
8141
8142 default:
8143 return false;
8144 }
8145}
8146
8147// Helper function, which inserts PHI functions into SinkMBB:
8148// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
8149// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
8151 MachineBasicBlock *TrueMBB,
8152 MachineBasicBlock *FalseMBB,
8153 MachineBasicBlock *SinkMBB) {
8154 MachineFunction *MF = TrueMBB->getParent();
8156
8157 MachineInstr *FirstMI = Selects.front();
8158 unsigned CCValid = FirstMI->getOperand(3).getImm();
8159 unsigned CCMask = FirstMI->getOperand(4).getImm();
8160
8161 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
8162
8163 // As we are creating the PHIs, we have to be careful if there is more than
8164 // one. Later Selects may reference the results of earlier Selects, but later
8165 // PHIs have to reference the individual true/false inputs from earlier PHIs.
8166 // That also means that PHI construction must work forward from earlier to
8167 // later, and that the code must maintain a mapping from earlier PHI's
8168 // destination registers, and the registers that went into the PHI.
8170
8171 for (auto *MI : Selects) {
8172 Register DestReg = MI->getOperand(0).getReg();
8173 Register TrueReg = MI->getOperand(1).getReg();
8174 Register FalseReg = MI->getOperand(2).getReg();
8175
8176 // If this Select we are generating is the opposite condition from
8177 // the jump we generated, then we have to swap the operands for the
8178 // PHI that is going to be generated.
8179 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
8180 std::swap(TrueReg, FalseReg);
8181
8182 if (RegRewriteTable.contains(TrueReg))
8183 TrueReg = RegRewriteTable[TrueReg].first;
8184
8185 if (RegRewriteTable.contains(FalseReg))
8186 FalseReg = RegRewriteTable[FalseReg].second;
8187
8188 DebugLoc DL = MI->getDebugLoc();
8189 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
8190 .addReg(TrueReg).addMBB(TrueMBB)
8191 .addReg(FalseReg).addMBB(FalseMBB);
8192
8193 // Add this PHI to the rewrite table.
8194 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
8195 }
8196
8198}
8199
8200// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
8202SystemZTargetLowering::emitSelect(MachineInstr &MI,
8203 MachineBasicBlock *MBB) const {
8204 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
8205 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8206
8207 unsigned CCValid = MI.getOperand(3).getImm();
8208 unsigned CCMask = MI.getOperand(4).getImm();
8209
8210 // If we have a sequence of Select* pseudo instructions using the
8211 // same condition code value, we want to expand all of them into
8212 // a single pair of basic blocks using the same condition.
8215 Selects.push_back(&MI);
8216 unsigned Count = 0;
8217 for (MachineInstr &NextMI : llvm::make_range(
8218 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
8219 if (isSelectPseudo(NextMI)) {
8220 assert(NextMI.getOperand(3).getImm() == CCValid &&
8221 "Bad CCValid operands since CC was not redefined.");
8222 if (NextMI.getOperand(4).getImm() == CCMask ||
8223 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
8224 Selects.push_back(&NextMI);
8225 continue;
8226 }
8227 break;
8228 }
8229 if (NextMI.definesRegister(SystemZ::CC) || NextMI.usesCustomInsertionHook())
8230 break;
8231 bool User = false;
8232 for (auto *SelMI : Selects)
8233 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
8234 User = true;
8235 break;
8236 }
8237 if (NextMI.isDebugInstr()) {
8238 if (User) {
8239 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
8240 DbgValues.push_back(&NextMI);
8241 }
8242 } else if (User || ++Count > 20)
8243 break;
8244 }
8245
8246 MachineInstr *LastMI = Selects.back();
8247 bool CCKilled =
8248 (LastMI->killsRegister(SystemZ::CC) || checkCCKill(*LastMI, MBB));
8249 MachineBasicBlock *StartMBB = MBB;
8251 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8252
8253 // Unless CC was killed in the last Select instruction, mark it as
8254 // live-in to both FalseMBB and JoinMBB.
8255 if (!CCKilled) {
8256 FalseMBB->addLiveIn(SystemZ::CC);
8257 JoinMBB->addLiveIn(SystemZ::CC);
8258 }
8259
8260 // StartMBB:
8261 // BRC CCMask, JoinMBB
8262 // # fallthrough to FalseMBB
8263 MBB = StartMBB;
8264 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8265 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8266 MBB->addSuccessor(JoinMBB);
8267 MBB->addSuccessor(FalseMBB);
8268
8269 // FalseMBB:
8270 // # fallthrough to JoinMBB
8271 MBB = FalseMBB;
8272 MBB->addSuccessor(JoinMBB);
8273
8274 // JoinMBB:
8275 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
8276 // ...
8277 MBB = JoinMBB;
8278 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
8279 for (auto *SelMI : Selects)
8280 SelMI->eraseFromParent();
8281
8283 for (auto *DbgMI : DbgValues)
8284 MBB->splice(InsertPos, StartMBB, DbgMI);
8285
8286 return JoinMBB;
8287}
8288
8289// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
8290// StoreOpcode is the store to use and Invert says whether the store should
8291// happen when the condition is false rather than true. If a STORE ON
8292// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
8293MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
8295 unsigned StoreOpcode,
8296 unsigned STOCOpcode,
8297 bool Invert) const {
8298 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8299
8300 Register SrcReg = MI.getOperand(0).getReg();
8301 MachineOperand Base = MI.getOperand(1);
8302 int64_t Disp = MI.getOperand(2).getImm();
8303 Register IndexReg = MI.getOperand(3).getReg();
8304 unsigned CCValid = MI.getOperand(4).getImm();
8305 unsigned CCMask = MI.getOperand(5).getImm();
8306 DebugLoc DL = MI.getDebugLoc();
8307
8308 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
8309
8310 // ISel pattern matching also adds a load memory operand of the same
8311 // address, so take special care to find the storing memory operand.
8312 MachineMemOperand *MMO = nullptr;
8313 for (auto *I : MI.memoperands())
8314 if (I->isStore()) {
8315 MMO = I;
8316 break;
8317 }
8318
8319 // Use STOCOpcode if possible. We could use different store patterns in
8320 // order to avoid matching the index register, but the performance trade-offs
8321 // might be more complicated in that case.
8322 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
8323 if (Invert)
8324 CCMask ^= CCValid;
8325
8326 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
8327 .addReg(SrcReg)
8328 .add(Base)
8329 .addImm(Disp)
8330 .addImm(CCValid)
8331 .addImm(CCMask)
8332 .addMemOperand(MMO);
8333
8334 MI.eraseFromParent();
8335 return MBB;
8336 }
8337
8338 // Get the condition needed to branch around the store.
8339 if (!Invert)
8340 CCMask ^= CCValid;
8341
8342 MachineBasicBlock *StartMBB = MBB;
8344 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
8345
8346 // Unless CC was killed in the CondStore instruction, mark it as
8347 // live-in to both FalseMBB and JoinMBB.
8348 if (!MI.killsRegister(SystemZ::CC) && !checkCCKill(MI, JoinMBB)) {
8349 FalseMBB->addLiveIn(SystemZ::CC);
8350 JoinMBB->addLiveIn(SystemZ::CC);
8351 }
8352
8353 // StartMBB:
8354 // BRC CCMask, JoinMBB
8355 // # fallthrough to FalseMBB
8356 MBB = StartMBB;
8357 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8358 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
8359 MBB->addSuccessor(JoinMBB);
8360 MBB->addSuccessor(FalseMBB);
8361
8362 // FalseMBB:
8363 // store %SrcReg, %Disp(%Index,%Base)
8364 // # fallthrough to JoinMBB
8365 MBB = FalseMBB;
8366 BuildMI(MBB, DL, TII->get(StoreOpcode))
8367 .addReg(SrcReg)
8368 .add(Base)
8369 .addImm(Disp)
8370 .addReg(IndexReg)
8371 .addMemOperand(MMO);
8372 MBB->addSuccessor(JoinMBB);
8373
8374 MI.eraseFromParent();
8375 return JoinMBB;
8376}
8377
8378// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
8380SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
8382 bool Unsigned) const {
8383 MachineFunction &MF = *MBB->getParent();
8384 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8386
8387 // Synthetic instruction to compare 128-bit values.
8388 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
8389 Register Op0 = MI.getOperand(0).getReg();
8390 Register Op1 = MI.getOperand(1).getReg();
8391
8392 MachineBasicBlock *StartMBB = MBB;
8394 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
8395
8396 // StartMBB:
8397 //
8398 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
8399 // Swap the inputs to get:
8400 // CC 1 if high(Op0) > high(Op1)
8401 // CC 2 if high(Op0) < high(Op1)
8402 // CC 0 if high(Op0) == high(Op1)
8403 //
8404 // If CC != 0, we'd done, so jump over the next instruction.
8405 //
8406 // VEC[L]G Op1, Op0
8407 // JNE JoinMBB
8408 // # fallthrough to HiEqMBB
8409 MBB = StartMBB;
8410 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
8411 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
8412 .addReg(Op1).addReg(Op0);
8413 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
8415 MBB->addSuccessor(JoinMBB);
8416 MBB->addSuccessor(HiEqMBB);
8417
8418 // HiEqMBB:
8419 //
8420 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
8421 // Since we already know the high parts are equal, the CC
8422 // result will only depend on the low parts:
8423 // CC 1 if low(Op0) > low(Op1)
8424 // CC 3 if low(Op0) <= low(Op1)
8425 //
8426 // VCHLGS Tmp, Op0, Op1
8427 // # fallthrough to JoinMBB
8428 MBB = HiEqMBB;
8429 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
8430 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
8431 .addReg(Op0).addReg(Op1);
8432 MBB->addSuccessor(JoinMBB);
8433
8434 // Mark CC as live-in to JoinMBB.
8435 JoinMBB->addLiveIn(SystemZ::CC);
8436
8437 MI.eraseFromParent();
8438 return JoinMBB;
8439}
8440
8441// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
8442// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
8443// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
8444// whether the field should be inverted after performing BinOpcode (e.g. for
8445// NAND).
8446MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
8447 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
8448 bool Invert) const {
8449 MachineFunction &MF = *MBB->getParent();
8450 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8452
8453 // Extract the operands. Base can be a register or a frame index.
8454 // Src2 can be a register or immediate.
8455 Register Dest = MI.getOperand(0).getReg();
8456 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8457 int64_t Disp = MI.getOperand(2).getImm();
8458 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
8459 Register BitShift = MI.getOperand(4).getReg();
8460 Register NegBitShift = MI.getOperand(5).getReg();
8461 unsigned BitSize = MI.getOperand(6).getImm();
8462 DebugLoc DL = MI.getDebugLoc();
8463
8464 // Get the right opcodes for the displacement.
8465 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8466 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8467 assert(LOpcode && CSOpcode && "Displacement out of range");
8468
8469 // Create virtual registers for temporary results.
8470 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8471 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8472 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8473 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8474 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8475
8476 // Insert a basic block for the main loop.
8477 MachineBasicBlock *StartMBB = MBB;
8479 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8480
8481 // StartMBB:
8482 // ...
8483 // %OrigVal = L Disp(%Base)
8484 // # fall through to LoopMBB
8485 MBB = StartMBB;
8486 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8487 MBB->addSuccessor(LoopMBB);
8488
8489 // LoopMBB:
8490 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
8491 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8492 // %RotatedNewVal = OP %RotatedOldVal, %Src2
8493 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8494 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8495 // JNE LoopMBB
8496 // # fall through to DoneMBB
8497 MBB = LoopMBB;
8498 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8499 .addReg(OrigVal).addMBB(StartMBB)
8500 .addReg(Dest).addMBB(LoopMBB);
8501 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8502 .addReg(OldVal).addReg(BitShift).addImm(0);
8503 if (Invert) {
8504 // Perform the operation normally and then invert every bit of the field.
8505 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8506 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
8507 // XILF with the upper BitSize bits set.
8508 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
8509 .addReg(Tmp).addImm(-1U << (32 - BitSize));
8510 } else if (BinOpcode)
8511 // A simply binary operation.
8512 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
8513 .addReg(RotatedOldVal)
8514 .add(Src2);
8515 else
8516 // Use RISBG to rotate Src2 into position and use it to replace the
8517 // field in RotatedOldVal.
8518 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
8519 .addReg(RotatedOldVal).addReg(Src2.getReg())
8520 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
8521 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8522 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8523 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8524 .addReg(OldVal)
8525 .addReg(NewVal)
8526 .add(Base)
8527 .addImm(Disp);
8528 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8530 MBB->addSuccessor(LoopMBB);
8531 MBB->addSuccessor(DoneMBB);
8532
8533 MI.eraseFromParent();
8534 return DoneMBB;
8535}
8536
8537// Implement EmitInstrWithCustomInserter for subword pseudo
8538// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
8539// instruction that should be used to compare the current field with the
8540// minimum or maximum value. KeepOldMask is the BRC condition-code mask
8541// for when the current field should be kept.
8542MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
8543 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
8544 unsigned KeepOldMask) const {
8545 MachineFunction &MF = *MBB->getParent();
8546 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8548
8549 // Extract the operands. Base can be a register or a frame index.
8550 Register Dest = MI.getOperand(0).getReg();
8551 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8552 int64_t Disp = MI.getOperand(2).getImm();
8553 Register Src2 = MI.getOperand(3).getReg();
8554 Register BitShift = MI.getOperand(4).getReg();
8555 Register NegBitShift = MI.getOperand(5).getReg();
8556 unsigned BitSize = MI.getOperand(6).getImm();
8557 DebugLoc DL = MI.getDebugLoc();
8558
8559 // Get the right opcodes for the displacement.
8560 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8561 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8562 assert(LOpcode && CSOpcode && "Displacement out of range");
8563
8564 // Create virtual registers for temporary results.
8565 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8566 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8567 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8568 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8569 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8570 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
8571
8572 // Insert 3 basic blocks for the loop.
8573 MachineBasicBlock *StartMBB = MBB;
8575 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8576 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
8577 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
8578
8579 // StartMBB:
8580 // ...
8581 // %OrigVal = L Disp(%Base)
8582 // # fall through to LoopMBB
8583 MBB = StartMBB;
8584 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
8585 MBB->addSuccessor(LoopMBB);
8586
8587 // LoopMBB:
8588 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
8589 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
8590 // CompareOpcode %RotatedOldVal, %Src2
8591 // BRC KeepOldMask, UpdateMBB
8592 MBB = LoopMBB;
8593 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8594 .addReg(OrigVal).addMBB(StartMBB)
8595 .addReg(Dest).addMBB(UpdateMBB);
8596 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
8597 .addReg(OldVal).addReg(BitShift).addImm(0);
8598 BuildMI(MBB, DL, TII->get(CompareOpcode))
8599 .addReg(RotatedOldVal).addReg(Src2);
8600 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8601 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
8602 MBB->addSuccessor(UpdateMBB);
8603 MBB->addSuccessor(UseAltMBB);
8604
8605 // UseAltMBB:
8606 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
8607 // # fall through to UpdateMBB
8608 MBB = UseAltMBB;
8609 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
8610 .addReg(RotatedOldVal).addReg(Src2)
8611 .addImm(32).addImm(31 + BitSize).addImm(0);
8612 MBB->addSuccessor(UpdateMBB);
8613
8614 // UpdateMBB:
8615 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
8616 // [ %RotatedAltVal, UseAltMBB ]
8617 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
8618 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
8619 // JNE LoopMBB
8620 // # fall through to DoneMBB
8621 MBB = UpdateMBB;
8622 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
8623 .addReg(RotatedOldVal).addMBB(LoopMBB)
8624 .addReg(RotatedAltVal).addMBB(UseAltMBB);
8625 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
8626 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
8627 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
8628 .addReg(OldVal)
8629 .addReg(NewVal)
8630 .add(Base)
8631 .addImm(Disp);
8632 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8634 MBB->addSuccessor(LoopMBB);
8635 MBB->addSuccessor(DoneMBB);
8636
8637 MI.eraseFromParent();
8638 return DoneMBB;
8639}
8640
8641// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
8642// instruction MI.
8644SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
8645 MachineBasicBlock *MBB) const {
8646 MachineFunction &MF = *MBB->getParent();
8647 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8649
8650 // Extract the operands. Base can be a register or a frame index.
8651 Register Dest = MI.getOperand(0).getReg();
8652 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
8653 int64_t Disp = MI.getOperand(2).getImm();
8654 Register CmpVal = MI.getOperand(3).getReg();
8655 Register OrigSwapVal = MI.getOperand(4).getReg();
8656 Register BitShift = MI.getOperand(5).getReg();
8657 Register NegBitShift = MI.getOperand(6).getReg();
8658 int64_t BitSize = MI.getOperand(7).getImm();
8659 DebugLoc DL = MI.getDebugLoc();
8660
8661 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
8662
8663 // Get the right opcodes for the displacement and zero-extension.
8664 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
8665 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
8666 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
8667 assert(LOpcode && CSOpcode && "Displacement out of range");
8668
8669 // Create virtual registers for temporary results.
8670 Register OrigOldVal = MRI.createVirtualRegister(RC);
8671 Register OldVal = MRI.createVirtualRegister(RC);
8672 Register SwapVal = MRI.createVirtualRegister(RC);
8673 Register StoreVal = MRI.createVirtualRegister(RC);
8674 Register OldValRot = MRI.createVirtualRegister(RC);
8675 Register RetryOldVal = MRI.createVirtualRegister(RC);
8676 Register RetrySwapVal = MRI.createVirtualRegister(RC);
8677
8678 // Insert 2 basic blocks for the loop.
8679 MachineBasicBlock *StartMBB = MBB;
8681 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8682 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
8683
8684 // StartMBB:
8685 // ...
8686 // %OrigOldVal = L Disp(%Base)
8687 // # fall through to LoopMBB
8688 MBB = StartMBB;
8689 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
8690 .add(Base)
8691 .addImm(Disp)
8692 .addReg(0);
8693 MBB->addSuccessor(LoopMBB);
8694
8695 // LoopMBB:
8696 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
8697 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
8698 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
8699 // ^^ The low BitSize bits contain the field
8700 // of interest.
8701 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
8702 // ^^ Replace the upper 32-BitSize bits of the
8703 // swap value with those that we loaded and rotated.
8704 // %Dest = LL[CH] %OldValRot
8705 // CR %Dest, %CmpVal
8706 // JNE DoneMBB
8707 // # Fall through to SetMBB
8708 MBB = LoopMBB;
8709 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
8710 .addReg(OrigOldVal).addMBB(StartMBB)
8711 .addReg(RetryOldVal).addMBB(SetMBB);
8712 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
8713 .addReg(OrigSwapVal).addMBB(StartMBB)
8714 .addReg(RetrySwapVal).addMBB(SetMBB);
8715 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
8716 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
8717 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
8718 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
8719 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
8720 .addReg(OldValRot);
8721 BuildMI(MBB, DL, TII->get(SystemZ::CR))
8722 .addReg(Dest).addReg(CmpVal);
8723 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8726 MBB->addSuccessor(DoneMBB);
8727 MBB->addSuccessor(SetMBB);
8728
8729 // SetMBB:
8730 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
8731 // ^^ Rotate the new field to its proper position.
8732 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
8733 // JNE LoopMBB
8734 // # fall through to ExitMBB
8735 MBB = SetMBB;
8736 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
8737 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
8738 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
8739 .addReg(OldVal)
8740 .addReg(StoreVal)
8741 .add(Base)
8742 .addImm(Disp);
8743 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8745 MBB->addSuccessor(LoopMBB);
8746 MBB->addSuccessor(DoneMBB);
8747
8748 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
8749 // to the block after the loop. At this point, CC may have been defined
8750 // either by the CR in LoopMBB or by the CS in SetMBB.
8751 if (!MI.registerDefIsDead(SystemZ::CC))
8752 DoneMBB->addLiveIn(SystemZ::CC);
8753
8754 MI.eraseFromParent();
8755 return DoneMBB;
8756}
8757
8758// Emit a move from two GR64s to a GR128.
8760SystemZTargetLowering::emitPair128(MachineInstr &MI,
8761 MachineBasicBlock *MBB) const {
8762 MachineFunction &MF = *MBB->getParent();
8763 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8765 DebugLoc DL = MI.getDebugLoc();
8766
8767 Register Dest = MI.getOperand(0).getReg();
8768 Register Hi = MI.getOperand(1).getReg();
8769 Register Lo = MI.getOperand(2).getReg();
8770 Register Tmp1 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8771 Register Tmp2 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8772
8773 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Tmp1);
8774 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Tmp2)
8775 .addReg(Tmp1).addReg(Hi).addImm(SystemZ::subreg_h64);
8776 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8777 .addReg(Tmp2).addReg(Lo).addImm(SystemZ::subreg_l64);
8778
8779 MI.eraseFromParent();
8780 return MBB;
8781}
8782
8783// Emit an extension from a GR64 to a GR128. ClearEven is true
8784// if the high register of the GR128 value must be cleared or false if
8785// it's "don't care".
8786MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
8788 bool ClearEven) const {
8789 MachineFunction &MF = *MBB->getParent();
8790 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8792 DebugLoc DL = MI.getDebugLoc();
8793
8794 Register Dest = MI.getOperand(0).getReg();
8795 Register Src = MI.getOperand(1).getReg();
8796 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8797
8798 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
8799 if (ClearEven) {
8800 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
8801 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8802
8803 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
8804 .addImm(0);
8805 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
8806 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
8807 In128 = NewIn128;
8808 }
8809 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
8810 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
8811
8812 MI.eraseFromParent();
8813 return MBB;
8814}
8815
8817SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
8819 unsigned Opcode, bool IsMemset) const {
8820 MachineFunction &MF = *MBB->getParent();
8821 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
8823 DebugLoc DL = MI.getDebugLoc();
8824
8825 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
8826 uint64_t DestDisp = MI.getOperand(1).getImm();
8827 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
8828 uint64_t SrcDisp;
8829
8830 // Fold the displacement Disp if it is out of range.
8831 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
8832 if (!isUInt<12>(Disp)) {
8833 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8834 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
8835 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
8836 .add(Base).addImm(Disp).addReg(0);
8837 Base = MachineOperand::CreateReg(Reg, false);
8838 Disp = 0;
8839 }
8840 };
8841
8842 if (!IsMemset) {
8843 SrcBase = earlyUseOperand(MI.getOperand(2));
8844 SrcDisp = MI.getOperand(3).getImm();
8845 } else {
8846 SrcBase = DestBase;
8847 SrcDisp = DestDisp++;
8848 foldDisplIfNeeded(DestBase, DestDisp);
8849 }
8850
8851 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
8852 bool IsImmForm = LengthMO.isImm();
8853 bool IsRegForm = !IsImmForm;
8854
8855 // Build and insert one Opcode of Length, with special treatment for memset.
8856 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
8858 MachineOperand DBase, uint64_t DDisp,
8860 unsigned Length) -> void {
8861 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
8862 if (IsMemset) {
8863 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
8864 if (ByteMO.isImm())
8865 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
8866 .add(SBase).addImm(SDisp).add(ByteMO);
8867 else
8868 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
8869 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
8870 if (--Length == 0)
8871 return;
8872 }
8873 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
8874 .add(DBase).addImm(DDisp).addImm(Length)
8875 .add(SBase).addImm(SDisp)
8876 .setMemRefs(MI.memoperands());
8877 };
8878
8879 bool NeedsLoop = false;
8880 uint64_t ImmLength = 0;
8881 Register LenAdjReg = SystemZ::NoRegister;
8882 if (IsImmForm) {
8883 ImmLength = LengthMO.getImm();
8884 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
8885 if (ImmLength == 0) {
8886 MI.eraseFromParent();
8887 return MBB;
8888 }
8889 if (Opcode == SystemZ::CLC) {
8890 if (ImmLength > 3 * 256)
8891 // A two-CLC sequence is a clear win over a loop, not least because
8892 // it needs only one branch. A three-CLC sequence needs the same
8893 // number of branches as a loop (i.e. 2), but is shorter. That
8894 // brings us to lengths greater than 768 bytes. It seems relatively
8895 // likely that a difference will be found within the first 768 bytes,
8896 // so we just optimize for the smallest number of branch
8897 // instructions, in order to avoid polluting the prediction buffer
8898 // too much.
8899 NeedsLoop = true;
8900 } else if (ImmLength > 6 * 256)
8901 // The heuristic we use is to prefer loops for anything that would
8902 // require 7 or more MVCs. With these kinds of sizes there isn't much
8903 // to choose between straight-line code and looping code, since the
8904 // time will be dominated by the MVCs themselves.
8905 NeedsLoop = true;
8906 } else {
8907 NeedsLoop = true;
8908 LenAdjReg = LengthMO.getReg();
8909 }
8910
8911 // When generating more than one CLC, all but the last will need to
8912 // branch to the end when a difference is found.
8913 MachineBasicBlock *EndMBB =
8914 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
8916 : nullptr);
8917
8918 if (NeedsLoop) {
8919 Register StartCountReg =
8920 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
8921 if (IsImmForm) {
8922 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
8923 ImmLength &= 255;
8924 } else {
8925 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
8926 .addReg(LenAdjReg)
8927 .addReg(0)
8928 .addImm(8);
8929 }
8930
8931 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
8932 auto loadZeroAddress = [&]() -> MachineOperand {
8933 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
8934 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
8935 return MachineOperand::CreateReg(Reg, false);
8936 };
8937 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
8938 DestBase = loadZeroAddress();
8939 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
8940 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
8941
8942 MachineBasicBlock *StartMBB = nullptr;
8943 MachineBasicBlock *LoopMBB = nullptr;
8944 MachineBasicBlock *NextMBB = nullptr;
8945 MachineBasicBlock *DoneMBB = nullptr;
8946 MachineBasicBlock *AllDoneMBB = nullptr;
8947
8948 Register StartSrcReg = forceReg(MI, SrcBase, TII);
8949 Register StartDestReg =
8950 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
8951
8952 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
8953 Register ThisSrcReg = MRI.createVirtualRegister(RC);
8954 Register ThisDestReg =
8955 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
8956 Register NextSrcReg = MRI.createVirtualRegister(RC);
8957 Register NextDestReg =
8958 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
8959 RC = &SystemZ::GR64BitRegClass;
8960 Register ThisCountReg = MRI.createVirtualRegister(RC);
8961 Register NextCountReg = MRI.createVirtualRegister(RC);
8962
8963 if (IsRegForm) {
8964 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
8965 StartMBB = SystemZ::emitBlockAfter(MBB);
8966 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
8967 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
8968 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
8969
8970 // MBB:
8971 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
8972 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8973 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
8974 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8976 .addMBB(AllDoneMBB);
8977 MBB->addSuccessor(AllDoneMBB);
8978 if (!IsMemset)
8979 MBB->addSuccessor(StartMBB);
8980 else {
8981 // MemsetOneCheckMBB:
8982 // # Jump to MemsetOneMBB for a memset of length 1, or
8983 // # fall thru to StartMBB.
8984 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
8985 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
8986 MBB->addSuccessor(MemsetOneCheckMBB);
8987 MBB = MemsetOneCheckMBB;
8988 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
8989 .addReg(LenAdjReg).addImm(-1);
8990 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
8992 .addMBB(MemsetOneMBB);
8993 MBB->addSuccessor(MemsetOneMBB, {10, 100});
8994 MBB->addSuccessor(StartMBB, {90, 100});
8995
8996 // MemsetOneMBB:
8997 // # Jump back to AllDoneMBB after a single MVI or STC.
8998 MBB = MemsetOneMBB;
8999 insertMemMemOp(MBB, MBB->end(),
9000 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
9001 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
9002 1);
9003 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
9004 MBB->addSuccessor(AllDoneMBB);
9005 }
9006
9007 // StartMBB:
9008 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
9009 MBB = StartMBB;
9010 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9011 .addReg(StartCountReg).addImm(0);
9012 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9014 .addMBB(DoneMBB);
9015 MBB->addSuccessor(DoneMBB);
9016 MBB->addSuccessor(LoopMBB);
9017 }
9018 else {
9019 StartMBB = MBB;
9020 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
9021 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9022 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
9023
9024 // StartMBB:
9025 // # fall through to LoopMBB
9026 MBB->addSuccessor(LoopMBB);
9027
9028 DestBase = MachineOperand::CreateReg(NextDestReg, false);
9029 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
9030 if (EndMBB && !ImmLength)
9031 // If the loop handled the whole CLC range, DoneMBB will be empty with
9032 // CC live-through into EndMBB, so add it as live-in.
9033 DoneMBB->addLiveIn(SystemZ::CC);
9034 }
9035
9036 // LoopMBB:
9037 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
9038 // [ %NextDestReg, NextMBB ]
9039 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
9040 // [ %NextSrcReg, NextMBB ]
9041 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
9042 // [ %NextCountReg, NextMBB ]
9043 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
9044 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
9045 // ( JLH EndMBB )
9046 //
9047 // The prefetch is used only for MVC. The JLH is used only for CLC.
9048 MBB = LoopMBB;
9049 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
9050 .addReg(StartDestReg).addMBB(StartMBB)
9051 .addReg(NextDestReg).addMBB(NextMBB);
9052 if (!HaveSingleBase)
9053 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
9054 .addReg(StartSrcReg).addMBB(StartMBB)
9055 .addReg(NextSrcReg).addMBB(NextMBB);
9056 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
9057 .addReg(StartCountReg).addMBB(StartMBB)
9058 .addReg(NextCountReg).addMBB(NextMBB);
9059 if (Opcode == SystemZ::MVC)
9060 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
9062 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
9063 insertMemMemOp(MBB, MBB->end(),
9064 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
9065 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
9066 if (EndMBB) {
9067 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9069 .addMBB(EndMBB);
9070 MBB->addSuccessor(EndMBB);
9071 MBB->addSuccessor(NextMBB);
9072 }
9073
9074 // NextMBB:
9075 // %NextDestReg = LA 256(%ThisDestReg)
9076 // %NextSrcReg = LA 256(%ThisSrcReg)
9077 // %NextCountReg = AGHI %ThisCountReg, -1
9078 // CGHI %NextCountReg, 0
9079 // JLH LoopMBB
9080 // # fall through to DoneMBB
9081 //
9082 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
9083 MBB = NextMBB;
9084 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
9085 .addReg(ThisDestReg).addImm(256).addReg(0);
9086 if (!HaveSingleBase)
9087 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
9088 .addReg(ThisSrcReg).addImm(256).addReg(0);
9089 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
9090 .addReg(ThisCountReg).addImm(-1);
9091 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9092 .addReg(NextCountReg).addImm(0);
9093 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9095 .addMBB(LoopMBB);
9096 MBB->addSuccessor(LoopMBB);
9097 MBB->addSuccessor(DoneMBB);
9098
9099 MBB = DoneMBB;
9100 if (IsRegForm) {
9101 // DoneMBB:
9102 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
9103 // # Use EXecute Relative Long for the remainder of the bytes. The target
9104 // instruction of the EXRL will have a length field of 1 since 0 is an
9105 // illegal value. The number of bytes processed becomes (%LenAdjReg &
9106 // 0xff) + 1.
9107 // # Fall through to AllDoneMBB.
9108 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9109 Register RemDestReg = HaveSingleBase ? RemSrcReg
9110 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9111 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
9112 .addReg(StartDestReg).addMBB(StartMBB)
9113 .addReg(NextDestReg).addMBB(NextMBB);
9114 if (!HaveSingleBase)
9115 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
9116 .addReg(StartSrcReg).addMBB(StartMBB)
9117 .addReg(NextSrcReg).addMBB(NextMBB);
9118 if (IsMemset)
9119 insertMemMemOp(MBB, MBB->end(),
9120 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
9121 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
9122 MachineInstrBuilder EXRL_MIB =
9123 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
9124 .addImm(Opcode)
9125 .addReg(LenAdjReg)
9126 .addReg(RemDestReg).addImm(DestDisp)
9127 .addReg(RemSrcReg).addImm(SrcDisp);
9128 MBB->addSuccessor(AllDoneMBB);
9129 MBB = AllDoneMBB;
9130 if (Opcode != SystemZ::MVC) {
9131 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
9132 if (EndMBB)
9133 MBB->addLiveIn(SystemZ::CC);
9134 }
9135 }
9137 }
9138
9139 // Handle any remaining bytes with straight-line code.
9140 while (ImmLength > 0) {
9141 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
9142 // The previous iteration might have created out-of-range displacements.
9143 // Apply them using LA/LAY if so.
9144 foldDisplIfNeeded(DestBase, DestDisp);
9145 foldDisplIfNeeded(SrcBase, SrcDisp);
9146 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
9147 DestDisp += ThisLength;
9148 SrcDisp += ThisLength;
9149 ImmLength -= ThisLength;
9150 // If there's another CLC to go, branch to the end if a difference
9151 // was found.
9152 if (EndMBB && ImmLength > 0) {
9154 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9156 .addMBB(EndMBB);
9157 MBB->addSuccessor(EndMBB);
9158 MBB->addSuccessor(NextMBB);
9159 MBB = NextMBB;
9160 }
9161 }
9162 if (EndMBB) {
9163 MBB->addSuccessor(EndMBB);
9164 MBB = EndMBB;
9165 MBB->addLiveIn(SystemZ::CC);
9166 }
9167
9168 MI.eraseFromParent();
9169 return MBB;
9170}
9171
9172// Decompose string pseudo-instruction MI into a loop that continually performs
9173// Opcode until CC != 3.
9174MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
9175 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9176 MachineFunction &MF = *MBB->getParent();
9177 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9179 DebugLoc DL = MI.getDebugLoc();
9180
9181 uint64_t End1Reg = MI.getOperand(0).getReg();
9182 uint64_t Start1Reg = MI.getOperand(1).getReg();
9183 uint64_t Start2Reg = MI.getOperand(2).getReg();
9184 uint64_t CharReg = MI.getOperand(3).getReg();
9185
9186 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
9187 uint64_t This1Reg = MRI.createVirtualRegister(RC);
9188 uint64_t This2Reg = MRI.createVirtualRegister(RC);
9189 uint64_t End2Reg = MRI.createVirtualRegister(RC);
9190
9191 MachineBasicBlock *StartMBB = MBB;
9193 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
9194
9195 // StartMBB:
9196 // # fall through to LoopMBB
9197 MBB->addSuccessor(LoopMBB);
9198
9199 // LoopMBB:
9200 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
9201 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
9202 // R0L = %CharReg
9203 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
9204 // JO LoopMBB
9205 // # fall through to DoneMBB
9206 //
9207 // The load of R0L can be hoisted by post-RA LICM.
9208 MBB = LoopMBB;
9209
9210 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
9211 .addReg(Start1Reg).addMBB(StartMBB)
9212 .addReg(End1Reg).addMBB(LoopMBB);
9213 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
9214 .addReg(Start2Reg).addMBB(StartMBB)
9215 .addReg(End2Reg).addMBB(LoopMBB);
9216 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
9217 BuildMI(MBB, DL, TII->get(Opcode))
9218 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
9219 .addReg(This1Reg).addReg(This2Reg);
9220 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9222 MBB->addSuccessor(LoopMBB);
9223 MBB->addSuccessor(DoneMBB);
9224
9225 DoneMBB->addLiveIn(SystemZ::CC);
9226
9227 MI.eraseFromParent();
9228 return DoneMBB;
9229}
9230
9231// Update TBEGIN instruction with final opcode and register clobbers.
9232MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
9233 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
9234 bool NoFloat) const {
9235 MachineFunction &MF = *MBB->getParent();
9236 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9237 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9238
9239 // Update opcode.
9240 MI.setDesc(TII->get(Opcode));
9241
9242 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
9243 // Make sure to add the corresponding GRSM bits if they are missing.
9244 uint64_t Control = MI.getOperand(2).getImm();
9245 static const unsigned GPRControlBit[16] = {
9246 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
9247 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
9248 };
9249 Control |= GPRControlBit[15];
9250 if (TFI->hasFP(MF))
9251 Control |= GPRControlBit[11];
9252 MI.getOperand(2).setImm(Control);
9253
9254 // Add GPR clobbers.
9255 for (int I = 0; I < 16; I++) {
9256 if ((Control & GPRControlBit[I]) == 0) {
9257 unsigned Reg = SystemZMC::GR64Regs[I];
9258 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9259 }
9260 }
9261
9262 // Add FPR/VR clobbers.
9263 if (!NoFloat && (Control & 4) != 0) {
9264 if (Subtarget.hasVector()) {
9265 for (unsigned Reg : SystemZMC::VR128Regs) {
9266 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9267 }
9268 } else {
9269 for (unsigned Reg : SystemZMC::FP64Regs) {
9270 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
9271 }
9272 }
9273 }
9274
9275 return MBB;
9276}
9277
9278MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
9279 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
9280 MachineFunction &MF = *MBB->getParent();
9282 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9283 DebugLoc DL = MI.getDebugLoc();
9284
9285 Register SrcReg = MI.getOperand(0).getReg();
9286
9287 // Create new virtual register of the same class as source.
9288 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
9289 Register DstReg = MRI->createVirtualRegister(RC);
9290
9291 // Replace pseudo with a normal load-and-test that models the def as
9292 // well.
9293 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
9294 .addReg(SrcReg)
9295 .setMIFlags(MI.getFlags());
9296 MI.eraseFromParent();
9297
9298 return MBB;
9299}
9300
9301MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
9303 MachineFunction &MF = *MBB->getParent();
9305 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9306 DebugLoc DL = MI.getDebugLoc();
9307 const unsigned ProbeSize = getStackProbeSize(MF);
9308 Register DstReg = MI.getOperand(0).getReg();
9309 Register SizeReg = MI.getOperand(2).getReg();
9310
9311 MachineBasicBlock *StartMBB = MBB;
9313 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
9314 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
9315 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
9316 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
9317
9320
9321 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9322 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9323
9324 // LoopTestMBB
9325 // BRC TailTestMBB
9326 // # fallthrough to LoopBodyMBB
9327 StartMBB->addSuccessor(LoopTestMBB);
9328 MBB = LoopTestMBB;
9329 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
9330 .addReg(SizeReg)
9331 .addMBB(StartMBB)
9332 .addReg(IncReg)
9333 .addMBB(LoopBodyMBB);
9334 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
9335 .addReg(PHIReg)
9336 .addImm(ProbeSize);
9337 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9339 .addMBB(TailTestMBB);
9340 MBB->addSuccessor(LoopBodyMBB);
9341 MBB->addSuccessor(TailTestMBB);
9342
9343 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
9344 // J LoopTestMBB
9345 MBB = LoopBodyMBB;
9346 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
9347 .addReg(PHIReg)
9348 .addImm(ProbeSize);
9349 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
9350 .addReg(SystemZ::R15D)
9351 .addImm(ProbeSize);
9352 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9353 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
9354 .setMemRefs(VolLdMMO);
9355 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
9356 MBB->addSuccessor(LoopTestMBB);
9357
9358 // TailTestMBB
9359 // BRC DoneMBB
9360 // # fallthrough to TailMBB
9361 MBB = TailTestMBB;
9362 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
9363 .addReg(PHIReg)
9364 .addImm(0);
9365 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9367 .addMBB(DoneMBB);
9368 MBB->addSuccessor(TailMBB);
9369 MBB->addSuccessor(DoneMBB);
9370
9371 // TailMBB
9372 // # fallthrough to DoneMBB
9373 MBB = TailMBB;
9374 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
9375 .addReg(SystemZ::R15D)
9376 .addReg(PHIReg);
9377 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
9378 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
9379 .setMemRefs(VolLdMMO);
9380 MBB->addSuccessor(DoneMBB);
9381
9382 // DoneMBB
9383 MBB = DoneMBB;
9384 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
9385 .addReg(SystemZ::R15D);
9386
9387 MI.eraseFromParent();
9388 return DoneMBB;
9389}
9390
9391SDValue SystemZTargetLowering::
9392getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
9394 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
9395 SDLoc DL(SP);
9396 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
9397 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
9398}
9399
9402 switch (MI.getOpcode()) {
9403 case SystemZ::Select32:
9404 case SystemZ::Select64:
9405 case SystemZ::Select128:
9406 case SystemZ::SelectF32:
9407 case SystemZ::SelectF64:
9408 case SystemZ::SelectF128:
9409 case SystemZ::SelectVR32:
9410 case SystemZ::SelectVR64:
9411 case SystemZ::SelectVR128:
9412 return emitSelect(MI, MBB);
9413
9414 case SystemZ::CondStore8Mux:
9415 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
9416 case SystemZ::CondStore8MuxInv:
9417 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
9418 case SystemZ::CondStore16Mux:
9419 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
9420 case SystemZ::CondStore16MuxInv:
9421 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
9422 case SystemZ::CondStore32Mux:
9423 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
9424 case SystemZ::CondStore32MuxInv:
9425 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
9426 case SystemZ::CondStore8:
9427 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
9428 case SystemZ::CondStore8Inv:
9429 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
9430 case SystemZ::CondStore16:
9431 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
9432 case SystemZ::CondStore16Inv:
9433 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
9434 case SystemZ::CondStore32:
9435 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
9436 case SystemZ::CondStore32Inv:
9437 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
9438 case SystemZ::CondStore64:
9439 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
9440 case SystemZ::CondStore64Inv:
9441 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
9442 case SystemZ::CondStoreF32:
9443 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
9444 case SystemZ::CondStoreF32Inv:
9445 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
9446 case SystemZ::CondStoreF64:
9447 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
9448 case SystemZ::CondStoreF64Inv:
9449 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
9450
9451 case SystemZ::SCmp128Hi:
9452 return emitICmp128Hi(MI, MBB, false);
9453 case SystemZ::UCmp128Hi:
9454 return emitICmp128Hi(MI, MBB, true);
9455
9456 case SystemZ::PAIR128:
9457 return emitPair128(MI, MBB);
9458 case SystemZ::AEXT128:
9459 return emitExt128(MI, MBB, false);
9460 case SystemZ::ZEXT128:
9461 return emitExt128(MI, MBB, true);
9462
9463 case SystemZ::ATOMIC_SWAPW:
9464 return emitAtomicLoadBinary(MI, MBB, 0);
9465
9466 case SystemZ::ATOMIC_LOADW_AR:
9467 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
9468 case SystemZ::ATOMIC_LOADW_AFI:
9469 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
9470
9471 case SystemZ::ATOMIC_LOADW_SR:
9472 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
9473
9474 case SystemZ::ATOMIC_LOADW_NR:
9475 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
9476 case SystemZ::ATOMIC_LOADW_NILH:
9477 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
9478
9479 case SystemZ::ATOMIC_LOADW_OR:
9480 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
9481 case SystemZ::ATOMIC_LOADW_OILH:
9482 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
9483
9484 case SystemZ::ATOMIC_LOADW_XR:
9485 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
9486 case SystemZ::ATOMIC_LOADW_XILF:
9487 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
9488
9489 case SystemZ::ATOMIC_LOADW_NRi:
9490 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
9491 case SystemZ::ATOMIC_LOADW_NILHi:
9492 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
9493
9494 case SystemZ::ATOMIC_LOADW_MIN:
9495 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
9496 case SystemZ::ATOMIC_LOADW_MAX:
9497 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
9498 case SystemZ::ATOMIC_LOADW_UMIN:
9499 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
9500 case SystemZ::ATOMIC_LOADW_UMAX:
9501 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
9502
9503 case SystemZ::ATOMIC_CMP_SWAPW:
9504 return emitAtomicCmpSwapW(MI, MBB);
9505 case SystemZ::MVCImm:
9506 case SystemZ::MVCReg:
9507 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
9508 case SystemZ::NCImm:
9509 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
9510 case SystemZ::OCImm:
9511 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
9512 case SystemZ::XCImm:
9513 case SystemZ::XCReg:
9514 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
9515 case SystemZ::CLCImm:
9516 case SystemZ::CLCReg:
9517 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
9518 case SystemZ::MemsetImmImm:
9519 case SystemZ::MemsetImmReg:
9520 case SystemZ::MemsetRegImm:
9521 case SystemZ::MemsetRegReg:
9522 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
9523 case SystemZ::CLSTLoop:
9524 return emitStringWrapper(MI, MBB, SystemZ::CLST);
9525 case SystemZ::MVSTLoop:
9526 return emitStringWrapper(MI, MBB, SystemZ::MVST);
9527 case SystemZ::SRSTLoop:
9528 return emitStringWrapper(MI, MBB, SystemZ::SRST);
9529 case SystemZ::TBEGIN:
9530 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
9531 case SystemZ::TBEGIN_nofloat:
9532 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
9533 case SystemZ::TBEGINC:
9534 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
9535 case SystemZ::LTEBRCompare_Pseudo:
9536 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
9537 case SystemZ::LTDBRCompare_Pseudo:
9538 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
9539 case SystemZ::LTXBRCompare_Pseudo:
9540 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
9541
9542 case SystemZ::PROBED_ALLOCA:
9543 return emitProbedAlloca(MI, MBB);
9544
9545 case TargetOpcode::STACKMAP:
9546 case TargetOpcode::PATCHPOINT:
9547 return emitPatchPoint(MI, MBB);
9548
9549 default:
9550 llvm_unreachable("Unexpected instr type to insert");
9551 }
9552}
9553
9554// This is only used by the isel schedulers, and is needed only to prevent
9555// compiler from crashing when list-ilp is used.
9556const TargetRegisterClass *
9557SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
9558 if (VT == MVT::Untyped)
9559 return &SystemZ::ADDR128BitRegClass;
9561}
9562
9563SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
9564 SelectionDAG &DAG) const {
9565 SDLoc dl(Op);
9566 /*
9567 The rounding method is in FPC Byte 3 bits 6-7, and has the following
9568 settings:
9569 00 Round to nearest
9570 01 Round to 0
9571 10 Round to +inf
9572 11 Round to -inf
9573
9574 FLT_ROUNDS, on the other hand, expects the following:
9575 -1 Undefined
9576 0 Round to 0
9577 1 Round to nearest
9578 2 Round to +inf
9579 3 Round to -inf
9580 */
9581
9582 // Save FPC to register.
9583 SDValue Chain = Op.getOperand(0);
9584 SDValue EFPC(
9585 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
9586 Chain = EFPC.getValue(1);
9587
9588 // Transform as necessary
9589 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
9590 DAG.getConstant(3, dl, MVT::i32));
9591 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
9592 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
9593 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
9594 DAG.getConstant(1, dl, MVT::i32)));
9595
9596 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
9597 DAG.getConstant(1, dl, MVT::i32));
9598 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
9599
9600 return DAG.getMergeValues({RetVal, Chain}, dl);
9601}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
Function Alias Analysis Results
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
#define Check(C,...)
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define RegName(no)
lazy value info
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
uint64_t High
LLVMContext & Context
#define P(N)
const char LLVMTargetMachineRef TM
static bool isSelectPseudo(MachineInstr &MI)
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
#define OPCODE(NAME)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static bool isMovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static SDValue extendAtomicLoad(AtomicSDNode *ALoad, EVT VT, SelectionDAG &DAG, ISD::LoadExtType ETy)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1491
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition: APInt.h:1364
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1308
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition: APInt.h:236
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1439
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition: APInt.h:300
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition: APInt.cpp:368
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition: APInt.h:1235
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition: APInt.h:836
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
@ Add
*p = old + v
Definition: Instructions.h:764
@ Or
*p = old | v
Definition: Instructions.h:772
@ Sub
*p = old - v
Definition: Instructions.h:766
@ And
*p = old & v
Definition: Instructions.h:768
@ Xor
*p = old ^ v
Definition: Instructions.h:774
BinOp getOperation() const
Definition: Instructions.h:845
This is an SDNode representing atomic operations.
const SDValue & getBasePtr() const
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:349
The address of a basic block.
Definition: Constants.h:889
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:703
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition: Function.cpp:715
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:556
bool hasPrivateLinkage() const
Definition: GlobalValue.h:526
bool hasInternalLinkage() const
Definition: GlobalValue.h:525
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
This class is used to represent ISD::LOAD nodes.
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
MachineFunctionProperties & reset(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr fully defines the specified register.
bool readsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr reads the specified register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
bool hasOneUse() const
Return true if there is exactly one use of this node.
iterator_range< use_iterator > uses()
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
const SDValue & getOperand(unsigned Num) const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
void setFlags(SDNodeFlags NewFlags)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDNode * isConstantIntBuildVectorOrConstantInt(SDValue N) const
Test whether the given value is a constant int or similar node.
SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:478
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:732
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
Definition: SelectionDAG.h:862
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:845
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:739
SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void reserve(size_type N)
Definition: SmallVector.h:676
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
This class is used to represent ISD::STORE nodes.
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:466
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition: StringRef.h:257
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition: StringRef.h:680
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
iterator end() const
Definition: StringRef.h:113
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
A SystemZ-specific class detailing special use registers particular for calling conventions.
A SystemZ-specific constant pool value.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const
const TargetFrameLowering * getFrameLowering() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
const SystemZRegisterInfo * getRegisterInfo() const override
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
bool useSoftFloat() const override
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual bool findOptimalMemOpLowering(std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
bool isFP128Ty() const
Return true if this is 'fp128'.
Definition: Type.h:163
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
use_iterator use_begin()
Definition: Value.h:360
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1126
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1122
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:476
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1269
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:714
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1155
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1271
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1241
@ STRICT_FCEIL
Definition: ISDOpcodes.h:426
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1272
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1031
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1254
@ STRICT_FMINIMUM
Definition: ISDOpcodes.h:436
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:790
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:688
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1228
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1233
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition: ISDOpcodes.h:820
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:477
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1267
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1268
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:411
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
Definition: ISDOpcodes.h:1400
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:450
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:620
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1221
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:988
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:930
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1077
@ SSUBO
Same for subtraction.
Definition: ISDOpcodes.h:327
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1270
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1056
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition: ISDOpcodes.h:507
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1237
@ UNDEF
UNDEF - An undefined node.
Definition: ISDOpcodes.h:211
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition: ISDOpcodes.h:627
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1151
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition: ISDOpcodes.h:323
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:430
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:880
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:651
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1265
@ STRICT_FMAXIMUM
Definition: ISDOpcodes.h:435
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:424
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:780
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:425
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ ATOMIC_LOAD_UMAX
Definition: ISDOpcodes.h:1273
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:971
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:1041
@ ConstantPool
Definition: ISDOpcodes.h:82
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition: ISDOpcodes.h:809
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:888
@ STRICT_FROUND
Definition: ISDOpcodes.h:428
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:736
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:303
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:449
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:427
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ ATOMIC_LOAD_ADD
Definition: ISDOpcodes.h:1263
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:443
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:465
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:442
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
Definition: ISDOpcodes.h:984
@ ATOMIC_LOAD_SUB
Definition: ISDOpcodes.h:1264
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1182
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:470
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1208
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:400
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ATOMIC_SWAP
Val, OUTCHAIN = ATOMIC_SWAP(INCHAIN, ptr, amt) Val, OUTCHAIN = ATOMIC_LOAD_[OpName](INCHAIN,...
Definition: ISDOpcodes.h:1262
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:831
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:423
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1070
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ AssertZext
Definition: ISDOpcodes.h:62
@ STRICT_FRINT
Definition: ISDOpcodes.h:422
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1320
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1523
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1503
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:148
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition: LLVMContext.h:57
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition: SystemZ.h:40
static bool isImmHH(uint64_t Val)
Definition: SystemZ.h:176
const unsigned CCMASK_TEND
Definition: SystemZ.h:97
const unsigned CCMASK_CS_EQ
Definition: SystemZ.h:67
const unsigned CCMASK_TBEGIN
Definition: SystemZ.h:92
const unsigned CCMASK_0
Definition: SystemZ.h:27
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition: SystemZ.h:82
const unsigned CCMASK_LOGICAL_CARRY
Definition: SystemZ.h:60
const unsigned TDCMASK_NORMAL_MINUS
Definition: SystemZ.h:122
const unsigned CCMASK_TDC
Definition: SystemZ.h:109
const unsigned CCMASK_FCMP
Definition: SystemZ.h:48
const unsigned CCMASK_TM_SOME_0
Definition: SystemZ.h:81
static bool isImmHL(uint64_t Val)
Definition: SystemZ.h:171
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition: SystemZ.h:124
const unsigned PFD_READ
Definition: SystemZ.h:115
const unsigned CCMASK_1
Definition: SystemZ.h:28
const unsigned TDCMASK_NORMAL_PLUS
Definition: SystemZ.h:121
const unsigned PFD_WRITE
Definition: SystemZ.h:116
const unsigned CCMASK_CMP_GT
Definition: SystemZ.h:37
const unsigned TDCMASK_QNAN_MINUS
Definition: SystemZ.h:128
const unsigned CCMASK_CS
Definition: SystemZ.h:69
const unsigned CCMASK_ANY
Definition: SystemZ.h:31
const unsigned CCMASK_ARITH
Definition: SystemZ.h:55
const unsigned CCMASK_TM_MIXED_MSB_0
Definition: SystemZ.h:78
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition: SystemZ.h:123
static bool isImmLL(uint64_t Val)
Definition: SystemZ.h:161
const unsigned VectorBits
Definition: SystemZ.h:154
static bool isImmLH(uint64_t Val)
Definition: SystemZ.h:166
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition: SystemZ.h:125
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition: SystemZ.h:77
const unsigned IPM_CC
Definition: SystemZ.h:112
const unsigned CCMASK_CMP_LE
Definition: SystemZ.h:39
const unsigned CCMASK_CMP_O
Definition: SystemZ.h:44
const unsigned CCMASK_CMP_EQ
Definition: SystemZ.h:35
const unsigned VectorBytes
Definition: SystemZ.h:158
const unsigned TDCMASK_INFINITY_MINUS
Definition: SystemZ.h:126
const unsigned CCMASK_ICMP
Definition: SystemZ.h:47
const unsigned CCMASK_VCMP_ALL
Definition: SystemZ.h:101
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition: SystemZ.h:104
const unsigned CCMASK_TM_MIXED_MSB_1
Definition: SystemZ.h:79
const unsigned CCMASK_TM_MSB_0
Definition: SystemZ.h:83
const unsigned CCMASK_ARITH_OVERFLOW
Definition: SystemZ.h:54
const unsigned CCMASK_CS_NE
Definition: SystemZ.h:68
const unsigned TDCMASK_SNAN_PLUS
Definition: SystemZ.h:129
const unsigned CCMASK_TM
Definition: SystemZ.h:85
const unsigned CCMASK_3
Definition: SystemZ.h:30
const unsigned CCMASK_CMP_LT
Definition: SystemZ.h:36
const unsigned CCMASK_CMP_NE
Definition: SystemZ.h:38
const unsigned TDCMASK_ZERO_PLUS
Definition: SystemZ.h:119
const unsigned TDCMASK_QNAN_PLUS
Definition: SystemZ.h:127
const unsigned TDCMASK_ZERO_MINUS
Definition: SystemZ.h:120
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition: SystemZ.h:80
const unsigned CCMASK_LOGICAL_BORROW
Definition: SystemZ.h:62
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition: SystemZ.h:43
const unsigned CCMASK_LOGICAL
Definition: SystemZ.h:64
const unsigned CCMASK_TM_MSB_1
Definition: SystemZ.h:84
const unsigned TDCMASK_SNAN_MINUS
Definition: SystemZ.h:130
@ GeneralDynamic
Definition: CodeGen.h:46
@ GS
Definition: X86.h:205
Reg
All possible values of the reg field in the ModR/M byte.
support::ulittle32_t Word
Definition: IRSymtab.h:52
NodeAddr< CodeNode * > Code
Definition: RDFGraph.h:388
constexpr const char32_t SBase
@ FalseVal
Definition: TGLexer.h:59
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition: MathExtras.h:326
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Done
Definition: Threading.h:61
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition: Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition: bit.h:342
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: bit.h:281
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
@ BeforeLegalizeTypes
Definition: DAGCombine.h:16
@ Mul
Product of integers.
DWARFExpression::Operation Op
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
#define EQ(a, b)
Definition: regexec.c:112
#define NC
Definition: regutils.h:42
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:136
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:146
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:370
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:366
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:313
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:156
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition: KnownBits.h:182
unsigned getBitWidth() const
Get the bit width of this value.
Definition: KnownBits.h:40
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition: KnownBits.h:168
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:71
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition: KnownBits.h:307
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition: KnownBits.h:176
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition: KnownBits.h:141
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
These are IR-level optimization flags that may be propagated to SDNodes.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})