LLVM  14.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84 
85  auto *Regs = STI.getSpecialRegisters();
86 
87  // Set up the register classes.
88  if (Subtarget.hasHighWord())
89  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90  else
91  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93  if (!useSoftFloat()) {
94  if (Subtarget.hasVector()) {
95  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97  } else {
98  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100  }
101  if (Subtarget.hasVectorEnhancements1())
102  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103  else
104  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105 
106  if (Subtarget.hasVector()) {
107  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113  }
114  }
115 
116  // Compute derived properties from the register classes
118 
119  // Set up special registers.
120  setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121 
122  // TODO: It may be better to default to latency-oriented scheduling, however
123  // LLVM's current latency-oriented scheduler can't handle physreg definitions
124  // such as SystemZ has with CC, so set this to the register-pressure
125  // scheduler, because it can.
127 
130 
131  // Instructions are strings of 2-byte aligned 2-byte values.
133  // For performance reasons we prefer 16-byte alignment.
135 
136  // Handle operations that are handled in a similar way for all types.
137  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139  ++I) {
140  MVT VT = MVT::SimpleValueType(I);
141  if (isTypeLegal(VT)) {
142  // Lower SET_CC into an IPM-based sequence.
146 
147  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 
150  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
153  }
154  }
155 
156  // Expand jump table branches as address arithmetic followed by an
157  // indirect jump.
159 
160  // Expand BRCOND into a BR_CC (see above).
162 
163  // Handle integer types.
164  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166  ++I) {
167  MVT VT = MVT::SimpleValueType(I);
168  if (isTypeLegal(VT)) {
170 
171  // Expand individual DIV and REMs into DIVREMs.
178 
179  // Support addition/subtraction with overflow.
182 
183  // Support addition/subtraction with carry.
186 
187  // Support carry in as value rather than glue.
190 
191  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192  // stores, putting a serialization instruction after the stores.
195 
196  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197  // available, or if the operand is constant.
199 
200  // Use POPCNT on z196 and above.
201  if (Subtarget.hasPopulationCount())
203  else
205 
206  // No special instructions for these.
209 
210  // Use *MUL_LOHI where possible instead of MULH*.
215 
216  // Only z196 and above have native support for conversions to unsigned.
217  // On z10, promoting to i64 doesn't generate an inexact condition for
218  // values that are outside the i32 range but in the i64 range, so use
219  // the default expansion.
220  if (!Subtarget.hasFPExtension())
222 
223  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224  // default to Expand, so need to be modified to Legal where appropriate.
226  if (Subtarget.hasFPExtension())
228 
229  // And similarly for STRICT_[SU]INT_TO_FP.
231  if (Subtarget.hasFPExtension())
233  }
234  }
235 
236  // Type legalization will convert 8- and 16-bit atomic operations into
237  // forms that operate on i32s (but still keeping the original memory VT).
238  // Lower them into full i32 operations.
250 
251  // Even though i128 is not a legal type, we still need to custom lower
252  // the atomic operations in order to exploit SystemZ instructions.
255 
256  // We can use the CC result of compare-and-swap to implement
257  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
261 
263 
264  // Traps are legal, as we will convert them to "j .+2".
266 
267  // z10 has instructions for signed but not unsigned FP conversion.
268  // Handle unsigned 32-bit types as signed 64-bit types.
269  if (!Subtarget.hasFPExtension()) {
274  }
275 
276  // We have native support for a 64-bit CTLZ, via FLOGR.
280 
281  // On z15 we have native support for a 64-bit CTPOP.
282  if (Subtarget.hasMiscellaneousExtensions3()) {
285  }
286 
287  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 
290  // Expand 128 bit shifts without using a libcall.
294  setLibcallName(RTLIB::SRL_I128, nullptr);
295  setLibcallName(RTLIB::SHL_I128, nullptr);
296  setLibcallName(RTLIB::SRA_I128, nullptr);
297 
298  // Handle bitcast from fp128 to i128.
300 
301  // We have native instructions for i8, i16 and i32 extensions, but not i1.
303  for (MVT VT : MVT::integer_valuetypes()) {
307  }
308 
309  // Handle the various types of symbolic address.
315 
316  // We need to handle dynamic allocations specially because of the
317  // 160-byte area at the bottom of the stack.
320 
321  // Use custom expanders so that we can force the function to use
322  // a frame pointer.
325 
326  // Handle prefetches with PFD or PFDRL.
328 
329  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
330  // Assume by default that all vector operations need to be expanded.
331  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
332  if (getOperationAction(Opcode, VT) == Legal)
333  setOperationAction(Opcode, VT, Expand);
334 
335  // Likewise all truncating stores and extending loads.
336  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
337  setTruncStoreAction(VT, InnerVT, Expand);
338  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
339  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
340  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
341  }
342 
343  if (isTypeLegal(VT)) {
344  // These operations are legal for anything that can be stored in a
345  // vector register, even if there is no native support for the format
346  // as such. In particular, we can do these for v4f32 even though there
347  // are no specific instructions for that format.
353 
354  // Likewise, except that we need to replace the nodes with something
355  // more specific.
358  }
359  }
360 
361  // Handle integer vector types.
363  if (isTypeLegal(VT)) {
364  // These operations have direct equivalents.
369  if (VT != MVT::v2i64)
375  if (Subtarget.hasVectorEnhancements1())
377  else
381 
382  // Convert a GPR scalar to a vector by inserting it into element 0.
384 
385  // Use a series of unpacks for extensions.
388 
389  // Detect shifts by a scalar amount and convert them into
390  // V*_BY_SCALAR.
394 
395  // At present ROTL isn't matched by DAGCombiner. ROTR should be
396  // converted into ROTL.
399 
400  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
401  // and inverting the result as necessary.
404  if (Subtarget.hasVectorEnhancements1())
406  }
407  }
408 
409  if (Subtarget.hasVector()) {
410  // There should be no need to check for float types other than v2f64
411  // since <2 x f32> isn't a legal type.
420 
429  }
430 
431  if (Subtarget.hasVectorEnhancements2()) {
440 
449  }
450 
451  // Handle floating-point types.
452  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
454  ++I) {
455  MVT VT = MVT::SimpleValueType(I);
456  if (isTypeLegal(VT)) {
457  // We can use FI for FRINT.
459 
460  // We can use the extended form of FI for other rounding operations.
461  if (Subtarget.hasFPExtension()) {
467  }
468 
469  // No special instructions for these.
475 
476  // Handle constrained floating-point operations.
486  if (Subtarget.hasFPExtension()) {
492  }
493  }
494  }
495 
496  // Handle floating-point vector types.
497  if (Subtarget.hasVector()) {
498  // Scalar-to-vector conversion is just a subreg.
501 
502  // Some insertions and extractions can be done directly but others
503  // need to go via integers.
508 
509  // These operations have direct equivalents.
524 
525  // Handle constrained floating-point operations.
538  }
539 
540  // The vector enhancements facility 1 has instructions for these.
541  if (Subtarget.hasVectorEnhancements1()) {
556 
561 
566 
571 
576 
581 
582  // Handle constrained floating-point operations.
595  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
596  MVT::v4f32, MVT::v2f64 }) {
601  }
602  }
603 
604  // We only have fused f128 multiply-addition on vector registers.
605  if (!Subtarget.hasVectorEnhancements1()) {
608  }
609 
610  // We don't have a copysign instruction on vector registers.
611  if (Subtarget.hasVectorEnhancements1())
613 
614  // Needed so that we don't try to implement f128 constant loads using
615  // a load-and-extend of a f80 constant (in cases where the constant
616  // would fit in an f80).
617  for (MVT VT : MVT::fp_valuetypes())
619 
620  // We don't have extending load instruction on vector registers.
621  if (Subtarget.hasVectorEnhancements1()) {
624  }
625 
626  // Floating-point truncation and stores need to be done separately.
630 
631  // We have 64-bit FPR<->GPR moves, but need special handling for
632  // 32-bit forms.
633  if (!Subtarget.hasVector()) {
636  }
637 
638  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
639  // structure, but VAEND is a no-op.
643 
644  // Codes for which we want to perform some z-specific combinations.
665 
666  // Handle intrinsics.
669 
670  // We want to use MVC in preference to even a single load/store pair.
671  MaxStoresPerMemcpy = 0;
673 
674  // The main memset sequence is a byte store followed by an MVC.
675  // Two STC or MV..I stores win over that, but the kind of fused stores
676  // generated by target-independent code don't when the byte value is
677  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
678  // than "STC;MVC". Handle the choice in target-specific code instead.
679  MaxStoresPerMemset = 0;
681 
682  // Default to having -disable-strictnode-mutation on
683  IsStrictFPEnabled = true;
684 }
685 
687  return Subtarget.hasSoftFloat();
688 }
689 
691  LLVMContext &, EVT VT) const {
692  if (!VT.isVector())
693  return MVT::i32;
695 }
696 
698  const MachineFunction &MF, EVT VT) const {
699  VT = VT.getScalarType();
700 
701  if (!VT.isSimple())
702  return false;
703 
704  switch (VT.getSimpleVT().SimpleTy) {
705  case MVT::f32:
706  case MVT::f64:
707  return true;
708  case MVT::f128:
709  return Subtarget.hasVectorEnhancements1();
710  default:
711  break;
712  }
713 
714  return false;
715 }
716 
717 // Return true if the constant can be generated with a vector instruction,
718 // such as VGM, VGMB or VREPI.
720  const SystemZSubtarget &Subtarget) {
721  const SystemZInstrInfo *TII =
722  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
723  if (!Subtarget.hasVector() ||
724  (isFP128 && !Subtarget.hasVectorEnhancements1()))
725  return false;
726 
727  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
728  // preferred way of creating all-zero and all-one vectors so give it
729  // priority over other methods below.
730  unsigned Mask = 0;
731  unsigned I = 0;
732  for (; I < SystemZ::VectorBytes; ++I) {
733  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
734  if (Byte == 0xff)
735  Mask |= 1ULL << I;
736  else if (Byte != 0)
737  break;
738  }
739  if (I == SystemZ::VectorBytes) {
741  OpVals.push_back(Mask);
743  return true;
744  }
745 
746  if (SplatBitSize > 64)
747  return false;
748 
749  auto tryValue = [&](uint64_t Value) -> bool {
750  // Try VECTOR REPLICATE IMMEDIATE
751  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
752  if (isInt<16>(SignedValue)) {
753  OpVals.push_back(((unsigned) SignedValue));
755  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
756  SystemZ::VectorBits / SplatBitSize);
757  return true;
758  }
759  // Try VECTOR GENERATE MASK
760  unsigned Start, End;
761  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
762  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
763  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
764  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
765  OpVals.push_back(Start - (64 - SplatBitSize));
766  OpVals.push_back(End - (64 - SplatBitSize));
768  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
769  SystemZ::VectorBits / SplatBitSize);
770  return true;
771  }
772  return false;
773  };
774 
775  // First try assuming that any undefined bits above the highest set bit
776  // and below the lowest set bit are 1s. This increases the likelihood of
777  // being able to use a sign-extended element value in VECTOR REPLICATE
778  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
779  uint64_t SplatBitsZ = SplatBits.getZExtValue();
780  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
781  uint64_t Lower =
782  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
783  uint64_t Upper =
784  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
785  if (tryValue(SplatBitsZ | Upper | Lower))
786  return true;
787 
788  // Now try assuming that any undefined bits between the first and
789  // last defined set bits are set. This increases the chances of
790  // using a non-wraparound mask.
791  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
792  return tryValue(SplatBitsZ | Middle);
793 }
794 
796  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
797  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
798  SplatBits = FPImm.bitcastToAPInt();
799  unsigned Width = SplatBits.getBitWidth();
800  IntBits <<= (SystemZ::VectorBits - Width);
801 
802  // Find the smallest splat.
803  while (Width > 8) {
804  unsigned HalfSize = Width / 2;
805  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
806  APInt LowValue = SplatBits.trunc(HalfSize);
807 
808  // If the two halves do not match, stop here.
809  if (HighValue != LowValue || 8 > HalfSize)
810  break;
811 
812  SplatBits = HighValue;
813  Width = HalfSize;
814  }
815  SplatUndef = 0;
816  SplatBitSize = Width;
817 }
818 
820  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
821  bool HasAnyUndefs;
822 
823  // Get IntBits by finding the 128 bit splat.
824  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
825  true);
826 
827  // Get SplatBits by finding the 8 bit or greater splat.
828  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
829  true);
830 }
831 
833  bool ForCodeSize) const {
834  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
835  if (Imm.isZero() || Imm.isNegZero())
836  return true;
837 
838  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
839 }
840 
841 /// Returns true if stack probing through inline assembly is requested.
843  // If the function specifically requests inline stack probes, emit them.
844  if (MF.getFunction().hasFnAttribute("probe-stack"))
845  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
846  "inline-asm";
847  return false;
848 }
849 
851  // We can use CGFI or CLGFI.
852  return isInt<32>(Imm) || isUInt<32>(Imm);
853 }
854 
856  // We can use ALGFI or SLGFI.
857  return isUInt<32>(Imm) || isUInt<32>(-Imm);
858 }
859 
861  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
862  // Unaligned accesses should never be slower than the expanded version.
863  // We check specifically for aligned accesses in the few cases where
864  // they are required.
865  if (Fast)
866  *Fast = true;
867  return true;
868 }
869 
870 // Information about the addressing mode for a memory access.
872  // True if a long displacement is supported.
874 
875  // True if use of index register is supported.
876  bool IndexReg;
877 
878  AddressingMode(bool LongDispl, bool IdxReg) :
879  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
880 };
881 
882 // Return the desired addressing mode for a Load which has only one use (in
883 // the same block) which is a Store.
884 static AddressingMode getLoadStoreAddrMode(bool HasVector,
885  Type *Ty) {
886  // With vector support a Load->Store combination may be combined to either
887  // an MVC or vector operations and it seems to work best to allow the
888  // vector addressing mode.
889  if (HasVector)
890  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
891 
892  // Otherwise only the MVC case is special.
893  bool MVC = Ty->isIntegerTy(8);
894  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
895 }
896 
897 // Return the addressing mode which seems most desirable given an LLVM
898 // Instruction pointer.
899 static AddressingMode
901  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
902  switch (II->getIntrinsicID()) {
903  default: break;
904  case Intrinsic::memset:
905  case Intrinsic::memmove:
906  case Intrinsic::memcpy:
907  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
908  }
909  }
910 
911  if (isa<LoadInst>(I) && I->hasOneUse()) {
912  auto *SingleUser = cast<Instruction>(*I->user_begin());
913  if (SingleUser->getParent() == I->getParent()) {
914  if (isa<ICmpInst>(SingleUser)) {
915  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
916  if (C->getBitWidth() <= 64 &&
917  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
918  // Comparison of memory with 16 bit signed / unsigned immediate
919  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
920  } else if (isa<StoreInst>(SingleUser))
921  // Load->Store
922  return getLoadStoreAddrMode(HasVector, I->getType());
923  }
924  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
925  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
926  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
927  // Load->Store
928  return getLoadStoreAddrMode(HasVector, LoadI->getType());
929  }
930 
931  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
932 
933  // * Use LDE instead of LE/LEY for z13 to avoid partial register
934  // dependencies (LDE only supports small offsets).
935  // * Utilize the vector registers to hold floating point
936  // values (vector load / store instructions only support small
937  // offsets).
938 
939  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
940  I->getOperand(0)->getType());
941  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
942  bool IsVectorAccess = MemAccessTy->isVectorTy();
943 
944  // A store of an extracted vector element will be combined into a VSTE type
945  // instruction.
946  if (!IsVectorAccess && isa<StoreInst>(I)) {
947  Value *DataOp = I->getOperand(0);
948  if (isa<ExtractElementInst>(DataOp))
949  IsVectorAccess = true;
950  }
951 
952  // A load which gets inserted into a vector element will be combined into a
953  // VLE type instruction.
954  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
955  User *LoadUser = *I->user_begin();
956  if (isa<InsertElementInst>(LoadUser))
957  IsVectorAccess = true;
958  }
959 
960  if (IsFPAccess || IsVectorAccess)
961  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
962  }
963 
964  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
965 }
966 
968  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
969  // Punt on globals for now, although they can be used in limited
970  // RELATIVE LONG cases.
971  if (AM.BaseGV)
972  return false;
973 
974  // Require a 20-bit signed offset.
975  if (!isInt<20>(AM.BaseOffs))
976  return false;
977 
978  AddressingMode SupportedAM(true, true);
979  if (I != nullptr)
980  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
981 
982  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
983  return false;
984 
985  if (!SupportedAM.IndexReg)
986  // No indexing allowed.
987  return AM.Scale == 0;
988  else
989  // Indexing is OK but no scale factor can be applied.
990  return AM.Scale == 0 || AM.Scale == 1;
991 }
992 
994  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
995  return false;
996  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
997  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
998  return FromBits > ToBits;
999 }
1000 
1002  if (!FromVT.isInteger() || !ToVT.isInteger())
1003  return false;
1004  unsigned FromBits = FromVT.getFixedSizeInBits();
1005  unsigned ToBits = ToVT.getFixedSizeInBits();
1006  return FromBits > ToBits;
1007 }
1008 
1009 //===----------------------------------------------------------------------===//
1010 // Inline asm support
1011 //===----------------------------------------------------------------------===//
1012 
1015  if (Constraint.size() == 1) {
1016  switch (Constraint[0]) {
1017  case 'a': // Address register
1018  case 'd': // Data register (equivalent to 'r')
1019  case 'f': // Floating-point register
1020  case 'h': // High-part register
1021  case 'r': // General-purpose register
1022  case 'v': // Vector register
1023  return C_RegisterClass;
1024 
1025  case 'Q': // Memory with base and unsigned 12-bit displacement
1026  case 'R': // Likewise, plus an index
1027  case 'S': // Memory with base and signed 20-bit displacement
1028  case 'T': // Likewise, plus an index
1029  case 'm': // Equivalent to 'T'.
1030  return C_Memory;
1031 
1032  case 'I': // Unsigned 8-bit constant
1033  case 'J': // Unsigned 12-bit constant
1034  case 'K': // Signed 16-bit constant
1035  case 'L': // Signed 20-bit displacement (on all targets we support)
1036  case 'M': // 0x7fffffff
1037  return C_Immediate;
1038 
1039  default:
1040  break;
1041  }
1042  }
1043  return TargetLowering::getConstraintType(Constraint);
1044 }
1045 
1048  const char *constraint) const {
1049  ConstraintWeight weight = CW_Invalid;
1050  Value *CallOperandVal = info.CallOperandVal;
1051  // If we don't have a value, we can't do a match,
1052  // but allow it at the lowest weight.
1053  if (!CallOperandVal)
1054  return CW_Default;
1055  Type *type = CallOperandVal->getType();
1056  // Look at the constraint type.
1057  switch (*constraint) {
1058  default:
1060  break;
1061 
1062  case 'a': // Address register
1063  case 'd': // Data register (equivalent to 'r')
1064  case 'h': // High-part register
1065  case 'r': // General-purpose register
1066  if (CallOperandVal->getType()->isIntegerTy())
1067  weight = CW_Register;
1068  break;
1069 
1070  case 'f': // Floating-point register
1071  if (type->isFloatingPointTy())
1072  weight = CW_Register;
1073  break;
1074 
1075  case 'v': // Vector register
1076  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1077  Subtarget.hasVector())
1078  weight = CW_Register;
1079  break;
1080 
1081  case 'I': // Unsigned 8-bit constant
1082  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1083  if (isUInt<8>(C->getZExtValue()))
1084  weight = CW_Constant;
1085  break;
1086 
1087  case 'J': // Unsigned 12-bit constant
1088  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1089  if (isUInt<12>(C->getZExtValue()))
1090  weight = CW_Constant;
1091  break;
1092 
1093  case 'K': // Signed 16-bit constant
1094  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1095  if (isInt<16>(C->getSExtValue()))
1096  weight = CW_Constant;
1097  break;
1098 
1099  case 'L': // Signed 20-bit displacement (on all targets we support)
1100  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1101  if (isInt<20>(C->getSExtValue()))
1102  weight = CW_Constant;
1103  break;
1104 
1105  case 'M': // 0x7fffffff
1106  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1107  if (C->getZExtValue() == 0x7fffffff)
1108  weight = CW_Constant;
1109  break;
1110  }
1111  return weight;
1112 }
1113 
1114 // Parse a "{tNNN}" register constraint for which the register type "t"
1115 // has already been verified. MC is the class associated with "t" and
1116 // Map maps 0-based register numbers to LLVM register numbers.
1117 static std::pair<unsigned, const TargetRegisterClass *>
1119  const unsigned *Map, unsigned Size) {
1120  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1121  if (isdigit(Constraint[2])) {
1122  unsigned Index;
1123  bool Failed =
1124  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1125  if (!Failed && Index < Size && Map[Index])
1126  return std::make_pair(Map[Index], RC);
1127  }
1128  return std::make_pair(0U, nullptr);
1129 }
1130 
1131 std::pair<unsigned, const TargetRegisterClass *>
1133  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1134  if (Constraint.size() == 1) {
1135  // GCC Constraint Letters
1136  switch (Constraint[0]) {
1137  default: break;
1138  case 'd': // Data register (equivalent to 'r')
1139  case 'r': // General-purpose register
1140  if (VT == MVT::i64)
1141  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1142  else if (VT == MVT::i128)
1143  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1144  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1145 
1146  case 'a': // Address register
1147  if (VT == MVT::i64)
1148  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1149  else if (VT == MVT::i128)
1150  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1151  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1152 
1153  case 'h': // High-part register (an LLVM extension)
1154  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1155 
1156  case 'f': // Floating-point register
1157  if (!useSoftFloat()) {
1158  if (VT == MVT::f64)
1159  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1160  else if (VT == MVT::f128)
1161  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1162  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1163  }
1164  break;
1165  case 'v': // Vector register
1166  if (Subtarget.hasVector()) {
1167  if (VT == MVT::f32)
1168  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1169  if (VT == MVT::f64)
1170  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1171  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1172  }
1173  break;
1174  }
1175  }
1176  if (Constraint.size() > 0 && Constraint[0] == '{') {
1177  // We need to override the default register parsing for GPRs and FPRs
1178  // because the interpretation depends on VT. The internal names of
1179  // the registers are also different from the external names
1180  // (F0D and F0S instead of F0, etc.).
1181  if (Constraint[1] == 'r') {
1182  if (VT == MVT::i32)
1183  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1184  SystemZMC::GR32Regs, 16);
1185  if (VT == MVT::i128)
1186  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1187  SystemZMC::GR128Regs, 16);
1188  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1189  SystemZMC::GR64Regs, 16);
1190  }
1191  if (Constraint[1] == 'f') {
1192  if (useSoftFloat())
1193  return std::make_pair(
1194  0u, static_cast<const TargetRegisterClass *>(nullptr));
1195  if (VT == MVT::f32)
1196  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1197  SystemZMC::FP32Regs, 16);
1198  if (VT == MVT::f128)
1199  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1200  SystemZMC::FP128Regs, 16);
1201  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1202  SystemZMC::FP64Regs, 16);
1203  }
1204  if (Constraint[1] == 'v') {
1205  if (!Subtarget.hasVector())
1206  return std::make_pair(
1207  0u, static_cast<const TargetRegisterClass *>(nullptr));
1208  if (VT == MVT::f32)
1209  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1210  SystemZMC::VR32Regs, 32);
1211  if (VT == MVT::f64)
1212  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1213  SystemZMC::VR64Regs, 32);
1214  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1215  SystemZMC::VR128Regs, 32);
1216  }
1217  }
1218  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1219 }
1220 
1221 // FIXME? Maybe this could be a TableGen attribute on some registers and
1222 // this table could be generated automatically from RegInfo.
1224  const MachineFunction &MF) const {
1225 
1227  .Case("r15", SystemZ::R15D)
1228  .Default(0);
1229  if (Reg)
1230  return Reg;
1231  report_fatal_error("Invalid register name global variable");
1232 }
1233 
1235 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1236  std::vector<SDValue> &Ops,
1237  SelectionDAG &DAG) const {
1238  // Only support length 1 constraints for now.
1239  if (Constraint.length() == 1) {
1240  switch (Constraint[0]) {
1241  case 'I': // Unsigned 8-bit constant
1242  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1243  if (isUInt<8>(C->getZExtValue()))
1244  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1245  Op.getValueType()));
1246  return;
1247 
1248  case 'J': // Unsigned 12-bit constant
1249  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1250  if (isUInt<12>(C->getZExtValue()))
1251  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1252  Op.getValueType()));
1253  return;
1254 
1255  case 'K': // Signed 16-bit constant
1256  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1257  if (isInt<16>(C->getSExtValue()))
1258  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1259  Op.getValueType()));
1260  return;
1261 
1262  case 'L': // Signed 20-bit displacement (on all targets we support)
1263  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1264  if (isInt<20>(C->getSExtValue()))
1265  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1266  Op.getValueType()));
1267  return;
1268 
1269  case 'M': // 0x7fffffff
1270  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1271  if (C->getZExtValue() == 0x7fffffff)
1272  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1273  Op.getValueType()));
1274  return;
1275  }
1276  }
1277  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1278 }
1279 
1280 //===----------------------------------------------------------------------===//
1281 // Calling conventions
1282 //===----------------------------------------------------------------------===//
1283 
1284 #include "SystemZGenCallingConv.inc"
1285 
1287  CallingConv::ID) const {
1288  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1289  SystemZ::R14D, 0 };
1290  return ScratchRegs;
1291 }
1292 
1294  Type *ToType) const {
1295  return isTruncateFree(FromType, ToType);
1296 }
1297 
1299  return CI->isTailCall();
1300 }
1301 
1302 // We do not yet support 128-bit single-element vector types. If the user
1303 // attempts to use such types as function argument or return type, prefer
1304 // to error out instead of emitting code violating the ABI.
1305 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1306  if (ArgVT.isVector() && !VT.isVector())
1307  report_fatal_error("Unsupported vector argument or return type");
1308 }
1309 
1311  for (unsigned i = 0; i < Ins.size(); ++i)
1312  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1313 }
1314 
1316  for (unsigned i = 0; i < Outs.size(); ++i)
1317  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1318 }
1319 
1320 // Value is a value that has been passed to us in the location described by VA
1321 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1322 // any loads onto Chain.
1324  CCValAssign &VA, SDValue Chain,
1325  SDValue Value) {
1326  // If the argument has been promoted from a smaller type, insert an
1327  // assertion to capture this.
1328  if (VA.getLocInfo() == CCValAssign::SExt)
1329  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1330  DAG.getValueType(VA.getValVT()));
1331  else if (VA.getLocInfo() == CCValAssign::ZExt)
1332  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1333  DAG.getValueType(VA.getValVT()));
1334 
1335  if (VA.isExtInLoc())
1336  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1337  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1338  // If this is a short vector argument loaded from the stack,
1339  // extend from i64 to full vector size and then bitcast.
1340  assert(VA.getLocVT() == MVT::i64);
1341  assert(VA.getValVT().isVector());
1343  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1344  } else
1345  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1346  return Value;
1347 }
1348 
1349 // Value is a value of type VA.getValVT() that we need to copy into
1350 // the location described by VA. Return a copy of Value converted to
1351 // VA.getValVT(). The caller is responsible for handling indirect values.
1353  CCValAssign &VA, SDValue Value) {
1354  switch (VA.getLocInfo()) {
1355  case CCValAssign::SExt:
1356  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1357  case CCValAssign::ZExt:
1358  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1359  case CCValAssign::AExt:
1360  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1361  case CCValAssign::BCvt: {
1362  assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1363  assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
1364  VA.getValVT() == MVT::f128);
1365  MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1366  ? MVT::v2i64
1367  : VA.getLocVT();
1368  Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1369  // For ELF, this is a short vector argument to be stored to the stack,
1370  // bitcast to v2i64 and then extract first element.
1371  if (BitCastToType == MVT::v2i64)
1372  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1373  DAG.getConstant(0, DL, MVT::i32));
1374  return Value;
1375  }
1376  case CCValAssign::Full:
1377  return Value;
1378  default:
1379  llvm_unreachable("Unhandled getLocInfo()");
1380  }
1381 }
1382 
1384  SDLoc DL(In);
1386  DAG.getIntPtrConstant(0, DL));
1388  DAG.getIntPtrConstant(1, DL));
1389  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1390  MVT::Untyped, Hi, Lo);
1391  return SDValue(Pair, 0);
1392 }
1393 
1395  SDLoc DL(In);
1396  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1397  DL, MVT::i64, In);
1398  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1399  DL, MVT::i64, In);
1400  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1401 }
1402 
1404  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1405  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1406  EVT ValueVT = Val.getValueType();
1407  assert((ValueVT != MVT::i128 ||
1408  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1409  (NumParts == 2 && PartVT == MVT::i64))) &&
1410  "Unknown handling of i128 value.");
1411  if (ValueVT == MVT::i128 && NumParts == 1) {
1412  // Inline assembly operand.
1413  Parts[0] = lowerI128ToGR128(DAG, Val);
1414  return true;
1415  }
1416  return false;
1417 }
1418 
1420  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1421  MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1422  assert((ValueVT != MVT::i128 ||
1423  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1424  (NumParts == 2 && PartVT == MVT::i64))) &&
1425  "Unknown handling of i128 value.");
1426  if (ValueVT == MVT::i128 && NumParts == 1)
1427  // Inline assembly operand.
1428  return lowerGR128ToI128(DAG, Parts[0]);
1429  return SDValue();
1430 }
1431 
1433  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1434  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1435  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1436  MachineFunction &MF = DAG.getMachineFunction();
1437  MachineFrameInfo &MFI = MF.getFrameInfo();
1439  SystemZMachineFunctionInfo *FuncInfo =
1441  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1442  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1443 
1444  // Detect unsupported vector argument types.
1445  if (Subtarget.hasVector())
1447 
1448  // Assign locations to all of the incoming arguments.
1450  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1451  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1452 
1453  unsigned NumFixedGPRs = 0;
1454  unsigned NumFixedFPRs = 0;
1455  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1456  SDValue ArgValue;
1457  CCValAssign &VA = ArgLocs[I];
1458  EVT LocVT = VA.getLocVT();
1459  if (VA.isRegLoc()) {
1460  // Arguments passed in registers
1461  const TargetRegisterClass *RC;
1462  switch (LocVT.getSimpleVT().SimpleTy) {
1463  default:
1464  // Integers smaller than i64 should be promoted to i64.
1465  llvm_unreachable("Unexpected argument type");
1466  case MVT::i32:
1467  NumFixedGPRs += 1;
1468  RC = &SystemZ::GR32BitRegClass;
1469  break;
1470  case MVT::i64:
1471  NumFixedGPRs += 1;
1472  RC = &SystemZ::GR64BitRegClass;
1473  break;
1474  case MVT::f32:
1475  NumFixedFPRs += 1;
1476  RC = &SystemZ::FP32BitRegClass;
1477  break;
1478  case MVT::f64:
1479  NumFixedFPRs += 1;
1480  RC = &SystemZ::FP64BitRegClass;
1481  break;
1482  case MVT::f128:
1483  NumFixedFPRs += 2;
1484  RC = &SystemZ::FP128BitRegClass;
1485  break;
1486  case MVT::v16i8:
1487  case MVT::v8i16:
1488  case MVT::v4i32:
1489  case MVT::v2i64:
1490  case MVT::v4f32:
1491  case MVT::v2f64:
1492  RC = &SystemZ::VR128BitRegClass;
1493  break;
1494  }
1495 
1496  Register VReg = MRI.createVirtualRegister(RC);
1497  MRI.addLiveIn(VA.getLocReg(), VReg);
1498  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1499  } else {
1500  assert(VA.isMemLoc() && "Argument not register or memory");
1501 
1502  // Create the frame index object for this incoming parameter.
1503  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1504  VA.getLocMemOffset(), true);
1505 
1506  // Create the SelectionDAG nodes corresponding to a load
1507  // from this parameter. Unpromoted ints and floats are
1508  // passed as right-justified 8-byte values.
1509  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1510  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1511  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1512  DAG.getIntPtrConstant(4, DL));
1513  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1515  }
1516 
1517  // Convert the value of the argument register into the value that's
1518  // being passed.
1519  if (VA.getLocInfo() == CCValAssign::Indirect) {
1520  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1521  MachinePointerInfo()));
1522  // If the original argument was split (e.g. i128), we need
1523  // to load all parts of it here (using the same address).
1524  unsigned ArgIndex = Ins[I].OrigArgIndex;
1525  assert (Ins[I].PartOffset == 0);
1526  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1527  CCValAssign &PartVA = ArgLocs[I + 1];
1528  unsigned PartOffset = Ins[I + 1].PartOffset;
1529  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1530  DAG.getIntPtrConstant(PartOffset, DL));
1531  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1532  MachinePointerInfo()));
1533  ++I;
1534  }
1535  } else
1536  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1537  }
1538 
1539  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1540  if (IsVarArg && Subtarget.isTargetELF()) {
1541  // Save the number of non-varargs registers for later use by va_start, etc.
1542  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1543  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1544 
1545  // Likewise the address (in the form of a frame index) of where the
1546  // first stack vararg would be. The 1-byte size here is arbitrary.
1547  int64_t StackSize = CCInfo.getNextStackOffset();
1548  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1549 
1550  // ...and a similar frame index for the caller-allocated save area
1551  // that will be used to store the incoming registers.
1552  int64_t RegSaveOffset =
1553  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1554  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1555  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1556 
1557  // Store the FPR varargs in the reserved frame slots. (We store the
1558  // GPRs as part of the prologue.)
1559  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1561  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1562  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1563  int FI =
1565  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1566  unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1567  &SystemZ::FP64BitRegClass);
1568  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1569  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1571  }
1572  // Join the stores, which are independent of one another.
1573  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1574  makeArrayRef(&MemOps[NumFixedFPRs],
1575  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1576  }
1577  }
1578 
1579  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1580  // register (R5)
1581  return Chain;
1582 }
1583 
1584 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1587  // Punt if there are any indirect or stack arguments, or if the call
1588  // needs the callee-saved argument register R6, or if the call uses
1589  // the callee-saved register arguments SwiftSelf and SwiftError.
1590  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1591  CCValAssign &VA = ArgLocs[I];
1592  if (VA.getLocInfo() == CCValAssign::Indirect)
1593  return false;
1594  if (!VA.isRegLoc())
1595  return false;
1596  Register Reg = VA.getLocReg();
1597  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1598  return false;
1599  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1600  return false;
1601  }
1602  return true;
1603 }
1604 
1605 SDValue
1607  SmallVectorImpl<SDValue> &InVals) const {
1608  SelectionDAG &DAG = CLI.DAG;
1609  SDLoc &DL = CLI.DL;
1611  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1613  SDValue Chain = CLI.Chain;
1614  SDValue Callee = CLI.Callee;
1615  bool &IsTailCall = CLI.IsTailCall;
1616  CallingConv::ID CallConv = CLI.CallConv;
1617  bool IsVarArg = CLI.IsVarArg;
1618  MachineFunction &MF = DAG.getMachineFunction();
1619  EVT PtrVT = getPointerTy(MF.getDataLayout());
1620  LLVMContext &Ctx = *DAG.getContext();
1622 
1623  // FIXME: z/OS support to be added in later.
1624  if (Subtarget.isTargetXPLINK64())
1625  IsTailCall = false;
1626 
1627  // Detect unsupported vector argument and return types.
1628  if (Subtarget.hasVector()) {
1629  VerifyVectorTypes(Outs);
1631  }
1632 
1633  // Analyze the operands of the call, assigning locations to each operand.
1635  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1636  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1637 
1638  // We don't support GuaranteedTailCallOpt, only automatically-detected
1639  // sibling calls.
1640  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1641  IsTailCall = false;
1642 
1643  // Get a count of how many bytes are to be pushed on the stack.
1644  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1645 
1646  if (Subtarget.isTargetXPLINK64())
1647  // Although the XPLINK specifications for AMODE64 state that minimum size
1648  // of the param area is minimum 32 bytes and no rounding is otherwise
1649  // specified, we round this area in 64 bytes increments to be compatible
1650  // with existing compilers.
1651  NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1652 
1653  // Mark the start of the call.
1654  if (!IsTailCall)
1655  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1656 
1657  // Copy argument values to their designated locations.
1659  SmallVector<SDValue, 8> MemOpChains;
1660  SDValue StackPtr;
1661  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1662  CCValAssign &VA = ArgLocs[I];
1663  SDValue ArgValue = OutVals[I];
1664 
1665  if (VA.getLocInfo() == CCValAssign::Indirect) {
1666  // Store the argument in a stack slot and pass its address.
1667  unsigned ArgIndex = Outs[I].OrigArgIndex;
1668  EVT SlotVT;
1669  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1670  // Allocate the full stack space for a promoted (and split) argument.
1671  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1672  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1673  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1674  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1675  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1676  } else {
1677  SlotVT = Outs[I].ArgVT;
1678  }
1679  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1680  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1681  MemOpChains.push_back(
1682  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1684  // If the original argument was split (e.g. i128), we need
1685  // to store all parts of it here (and pass just one address).
1686  assert (Outs[I].PartOffset == 0);
1687  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1688  SDValue PartValue = OutVals[I + 1];
1689  unsigned PartOffset = Outs[I + 1].PartOffset;
1690  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1691  DAG.getIntPtrConstant(PartOffset, DL));
1692  MemOpChains.push_back(
1693  DAG.getStore(Chain, DL, PartValue, Address,
1695  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1696  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1697  ++I;
1698  }
1699  ArgValue = SpillSlot;
1700  } else
1701  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1702 
1703  if (VA.isRegLoc()) {
1704  // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1705  // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1706  // and low values.
1707  if (VA.getLocVT() == MVT::i128)
1708  ArgValue = lowerI128ToGR128(DAG, ArgValue);
1709  // Queue up the argument copies and emit them at the end.
1710  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1711  } else {
1712  assert(VA.isMemLoc() && "Argument not register or memory");
1713 
1714  // Work out the address of the stack slot. Unpromoted ints and
1715  // floats are passed as right-justified 8-byte values.
1716  if (!StackPtr.getNode())
1717  StackPtr = DAG.getCopyFromReg(Chain, DL,
1718  Regs->getStackPointerRegister(), PtrVT);
1719  unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1720  VA.getLocMemOffset();
1721  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1722  Offset += 4;
1723  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1724  DAG.getIntPtrConstant(Offset, DL));
1725 
1726  // Emit the store.
1727  MemOpChains.push_back(
1728  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1729 
1730  // Although long doubles or vectors are passed through the stack when
1731  // they are vararg (non-fixed arguments), if a long double or vector
1732  // occupies the third and fourth slot of the argument list GPR3 should
1733  // still shadow the third slot of the argument list.
1734  if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1735  SDValue ShadowArgValue =
1736  DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1737  DAG.getIntPtrConstant(1, DL));
1738  RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1739  }
1740  }
1741  }
1742 
1743  // Join the stores, which are independent of one another.
1744  if (!MemOpChains.empty())
1745  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1746 
1747  // Accept direct calls by converting symbolic call addresses to the
1748  // associated Target* opcodes. Force %r1 to be used for indirect
1749  // tail calls.
1750  SDValue Glue;
1751  // FIXME: Add support for XPLINK using the ADA register.
1752  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1753  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1755  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1756  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1758  } else if (IsTailCall) {
1759  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1760  Glue = Chain.getValue(1);
1761  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1762  }
1763 
1764  // Build a sequence of copy-to-reg nodes, chained and glued together.
1765  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1766  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1767  RegsToPass[I].second, Glue);
1768  Glue = Chain.getValue(1);
1769  }
1770 
1771  // The first call operand is the chain and the second is the target address.
1773  Ops.push_back(Chain);
1774  Ops.push_back(Callee);
1775 
1776  // Add argument registers to the end of the list so that they are
1777  // known live into the call.
1778  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1779  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1780  RegsToPass[I].second.getValueType()));
1781 
1782  // Add a register mask operand representing the call-preserved registers.
1783  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1784  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1785  assert(Mask && "Missing call preserved mask for calling convention");
1786  Ops.push_back(DAG.getRegisterMask(Mask));
1787 
1788  // Glue the call to the argument copies, if any.
1789  if (Glue.getNode())
1790  Ops.push_back(Glue);
1791 
1792  // Emit the call.
1793  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1794  if (IsTailCall)
1795  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1796  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1797  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1798  Glue = Chain.getValue(1);
1799 
1800  // Mark the end of the call, which is glued to the call itself.
1801  Chain = DAG.getCALLSEQ_END(Chain,
1802  DAG.getConstant(NumBytes, DL, PtrVT, true),
1803  DAG.getConstant(0, DL, PtrVT, true),
1804  Glue, DL);
1805  Glue = Chain.getValue(1);
1806 
1807  // Assign locations to each value returned by this call.
1809  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1810  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1811 
1812  // Copy all of the result registers out of their specified physreg.
1813  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1814  CCValAssign &VA = RetLocs[I];
1815 
1816  // Copy the value out, gluing the copy to the end of the call sequence.
1817  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1818  VA.getLocVT(), Glue);
1819  Chain = RetValue.getValue(1);
1820  Glue = RetValue.getValue(2);
1821 
1822  // Convert the value of the return register into the value that's
1823  // being returned.
1824  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1825  }
1826 
1827  return Chain;
1828 }
1829 
1832  MachineFunction &MF, bool isVarArg,
1833  const SmallVectorImpl<ISD::OutputArg> &Outs,
1834  LLVMContext &Context) const {
1835  // Detect unsupported vector return types.
1836  if (Subtarget.hasVector())
1837  VerifyVectorTypes(Outs);
1838 
1839  // Special case that we cannot easily detect in RetCC_SystemZ since
1840  // i128 is not a legal type.
1841  for (auto &Out : Outs)
1842  if (Out.ArgVT == MVT::i128)
1843  return false;
1844 
1846  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1847  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1848 }
1849 
1850 SDValue
1852  bool IsVarArg,
1853  const SmallVectorImpl<ISD::OutputArg> &Outs,
1854  const SmallVectorImpl<SDValue> &OutVals,
1855  const SDLoc &DL, SelectionDAG &DAG) const {
1856  MachineFunction &MF = DAG.getMachineFunction();
1857 
1858  // Detect unsupported vector return types.
1859  if (Subtarget.hasVector())
1860  VerifyVectorTypes(Outs);
1861 
1862  // Assign locations to each returned value.
1864  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1865  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1866 
1867  // Quick exit for void returns
1868  if (RetLocs.empty())
1869  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1870 
1871  if (CallConv == CallingConv::GHC)
1872  report_fatal_error("GHC functions return void only");
1873 
1874  // Copy the result values into the output registers.
1875  SDValue Glue;
1876  SmallVector<SDValue, 4> RetOps;
1877  RetOps.push_back(Chain);
1878  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1879  CCValAssign &VA = RetLocs[I];
1880  SDValue RetValue = OutVals[I];
1881 
1882  // Make the return register live on exit.
1883  assert(VA.isRegLoc() && "Can only return in registers!");
1884 
1885  // Promote the value as required.
1886  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1887 
1888  // Chain and glue the copies together.
1889  Register Reg = VA.getLocReg();
1890  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1891  Glue = Chain.getValue(1);
1892  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1893  }
1894 
1895  // Update chain and glue.
1896  RetOps[0] = Chain;
1897  if (Glue.getNode())
1898  RetOps.push_back(Glue);
1899 
1900  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1901 }
1902 
1903 // Return true if Op is an intrinsic node with chain that returns the CC value
1904 // as its only (other) argument. Provide the associated SystemZISD opcode and
1905 // the mask of valid CC values if so.
1906 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1907  unsigned &CCValid) {
1908  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1909  switch (Id) {
1910  case Intrinsic::s390_tbegin:
1911  Opcode = SystemZISD::TBEGIN;
1912  CCValid = SystemZ::CCMASK_TBEGIN;
1913  return true;
1914 
1915  case Intrinsic::s390_tbegin_nofloat:
1916  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1917  CCValid = SystemZ::CCMASK_TBEGIN;
1918  return true;
1919 
1920  case Intrinsic::s390_tend:
1921  Opcode = SystemZISD::TEND;
1922  CCValid = SystemZ::CCMASK_TEND;
1923  return true;
1924 
1925  default:
1926  return false;
1927  }
1928 }
1929 
1930 // Return true if Op is an intrinsic node without chain that returns the
1931 // CC value as its final argument. Provide the associated SystemZISD
1932 // opcode and the mask of valid CC values if so.
1933 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1934  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1935  switch (Id) {
1936  case Intrinsic::s390_vpkshs:
1937  case Intrinsic::s390_vpksfs:
1938  case Intrinsic::s390_vpksgs:
1939  Opcode = SystemZISD::PACKS_CC;
1940  CCValid = SystemZ::CCMASK_VCMP;
1941  return true;
1942 
1943  case Intrinsic::s390_vpklshs:
1944  case Intrinsic::s390_vpklsfs:
1945  case Intrinsic::s390_vpklsgs:
1946  Opcode = SystemZISD::PACKLS_CC;
1947  CCValid = SystemZ::CCMASK_VCMP;
1948  return true;
1949 
1950  case Intrinsic::s390_vceqbs:
1951  case Intrinsic::s390_vceqhs:
1952  case Intrinsic::s390_vceqfs:
1953  case Intrinsic::s390_vceqgs:
1954  Opcode = SystemZISD::VICMPES;
1955  CCValid = SystemZ::CCMASK_VCMP;
1956  return true;
1957 
1958  case Intrinsic::s390_vchbs:
1959  case Intrinsic::s390_vchhs:
1960  case Intrinsic::s390_vchfs:
1961  case Intrinsic::s390_vchgs:
1962  Opcode = SystemZISD::VICMPHS;
1963  CCValid = SystemZ::CCMASK_VCMP;
1964  return true;
1965 
1966  case Intrinsic::s390_vchlbs:
1967  case Intrinsic::s390_vchlhs:
1968  case Intrinsic::s390_vchlfs:
1969  case Intrinsic::s390_vchlgs:
1970  Opcode = SystemZISD::VICMPHLS;
1971  CCValid = SystemZ::CCMASK_VCMP;
1972  return true;
1973 
1974  case Intrinsic::s390_vtm:
1975  Opcode = SystemZISD::VTM;
1976  CCValid = SystemZ::CCMASK_VCMP;
1977  return true;
1978 
1979  case Intrinsic::s390_vfaebs:
1980  case Intrinsic::s390_vfaehs:
1981  case Intrinsic::s390_vfaefs:
1982  Opcode = SystemZISD::VFAE_CC;
1983  CCValid = SystemZ::CCMASK_ANY;
1984  return true;
1985 
1986  case Intrinsic::s390_vfaezbs:
1987  case Intrinsic::s390_vfaezhs:
1988  case Intrinsic::s390_vfaezfs:
1989  Opcode = SystemZISD::VFAEZ_CC;
1990  CCValid = SystemZ::CCMASK_ANY;
1991  return true;
1992 
1993  case Intrinsic::s390_vfeebs:
1994  case Intrinsic::s390_vfeehs:
1995  case Intrinsic::s390_vfeefs:
1996  Opcode = SystemZISD::VFEE_CC;
1997  CCValid = SystemZ::CCMASK_ANY;
1998  return true;
1999 
2000  case Intrinsic::s390_vfeezbs:
2001  case Intrinsic::s390_vfeezhs:
2002  case Intrinsic::s390_vfeezfs:
2003  Opcode = SystemZISD::VFEEZ_CC;
2004  CCValid = SystemZ::CCMASK_ANY;
2005  return true;
2006 
2007  case Intrinsic::s390_vfenebs:
2008  case Intrinsic::s390_vfenehs:
2009  case Intrinsic::s390_vfenefs:
2010  Opcode = SystemZISD::VFENE_CC;
2011  CCValid = SystemZ::CCMASK_ANY;
2012  return true;
2013 
2014  case Intrinsic::s390_vfenezbs:
2015  case Intrinsic::s390_vfenezhs:
2016  case Intrinsic::s390_vfenezfs:
2017  Opcode = SystemZISD::VFENEZ_CC;
2018  CCValid = SystemZ::CCMASK_ANY;
2019  return true;
2020 
2021  case Intrinsic::s390_vistrbs:
2022  case Intrinsic::s390_vistrhs:
2023  case Intrinsic::s390_vistrfs:
2024  Opcode = SystemZISD::VISTR_CC;
2026  return true;
2027 
2028  case Intrinsic::s390_vstrcbs:
2029  case Intrinsic::s390_vstrchs:
2030  case Intrinsic::s390_vstrcfs:
2031  Opcode = SystemZISD::VSTRC_CC;
2032  CCValid = SystemZ::CCMASK_ANY;
2033  return true;
2034 
2035  case Intrinsic::s390_vstrczbs:
2036  case Intrinsic::s390_vstrczhs:
2037  case Intrinsic::s390_vstrczfs:
2038  Opcode = SystemZISD::VSTRCZ_CC;
2039  CCValid = SystemZ::CCMASK_ANY;
2040  return true;
2041 
2042  case Intrinsic::s390_vstrsb:
2043  case Intrinsic::s390_vstrsh:
2044  case Intrinsic::s390_vstrsf:
2045  Opcode = SystemZISD::VSTRS_CC;
2046  CCValid = SystemZ::CCMASK_ANY;
2047  return true;
2048 
2049  case Intrinsic::s390_vstrszb:
2050  case Intrinsic::s390_vstrszh:
2051  case Intrinsic::s390_vstrszf:
2052  Opcode = SystemZISD::VSTRSZ_CC;
2053  CCValid = SystemZ::CCMASK_ANY;
2054  return true;
2055 
2056  case Intrinsic::s390_vfcedbs:
2057  case Intrinsic::s390_vfcesbs:
2058  Opcode = SystemZISD::VFCMPES;
2059  CCValid = SystemZ::CCMASK_VCMP;
2060  return true;
2061 
2062  case Intrinsic::s390_vfchdbs:
2063  case Intrinsic::s390_vfchsbs:
2064  Opcode = SystemZISD::VFCMPHS;
2065  CCValid = SystemZ::CCMASK_VCMP;
2066  return true;
2067 
2068  case Intrinsic::s390_vfchedbs:
2069  case Intrinsic::s390_vfchesbs:
2070  Opcode = SystemZISD::VFCMPHES;
2071  CCValid = SystemZ::CCMASK_VCMP;
2072  return true;
2073 
2074  case Intrinsic::s390_vftcidb:
2075  case Intrinsic::s390_vftcisb:
2076  Opcode = SystemZISD::VFTCI;
2077  CCValid = SystemZ::CCMASK_VCMP;
2078  return true;
2079 
2080  case Intrinsic::s390_tdc:
2081  Opcode = SystemZISD::TDC;
2082  CCValid = SystemZ::CCMASK_TDC;
2083  return true;
2084 
2085  default:
2086  return false;
2087  }
2088 }
2089 
2090 // Emit an intrinsic with chain and an explicit CC register result.
2092  unsigned Opcode) {
2093  // Copy all operands except the intrinsic ID.
2094  unsigned NumOps = Op.getNumOperands();
2096  Ops.reserve(NumOps - 1);
2097  Ops.push_back(Op.getOperand(0));
2098  for (unsigned I = 2; I < NumOps; ++I)
2099  Ops.push_back(Op.getOperand(I));
2100 
2101  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2102  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2103  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2104  SDValue OldChain = SDValue(Op.getNode(), 1);
2105  SDValue NewChain = SDValue(Intr.getNode(), 1);
2106  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2107  return Intr.getNode();
2108 }
2109 
2110 // Emit an intrinsic with an explicit CC register result.
2112  unsigned Opcode) {
2113  // Copy all operands except the intrinsic ID.
2114  unsigned NumOps = Op.getNumOperands();
2116  Ops.reserve(NumOps - 1);
2117  for (unsigned I = 1; I < NumOps; ++I)
2118  Ops.push_back(Op.getOperand(I));
2119 
2120  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2121  return Intr.getNode();
2122 }
2123 
2124 // CC is a comparison that will be implemented using an integer or
2125 // floating-point comparison. Return the condition code mask for
2126 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2127 // unsigned comparisons and clear for signed ones. In the floating-point
2128 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2129 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2130 #define CONV(X) \
2131  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2132  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2133  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2134 
2135  switch (CC) {
2136  default:
2137  llvm_unreachable("Invalid integer condition!");
2138 
2139  CONV(EQ);
2140  CONV(NE);
2141  CONV(GT);
2142  CONV(GE);
2143  CONV(LT);
2144  CONV(LE);
2145 
2146  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2147  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2148  }
2149 #undef CONV
2150 }
2151 
2152 // If C can be converted to a comparison against zero, adjust the operands
2153 // as necessary.
2154 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2155  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2156  return;
2157 
2158  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2159  if (!ConstOp1)
2160  return;
2161 
2162  int64_t Value = ConstOp1->getSExtValue();
2163  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2164  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2165  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2166  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2167  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2168  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2169  }
2170 }
2171 
2172 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2173 // adjust the operands as necessary.
2174 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2175  Comparison &C) {
2176  // For us to make any changes, it must a comparison between a single-use
2177  // load and a constant.
2178  if (!C.Op0.hasOneUse() ||
2179  C.Op0.getOpcode() != ISD::LOAD ||
2180  C.Op1.getOpcode() != ISD::Constant)
2181  return;
2182 
2183  // We must have an 8- or 16-bit load.
2184  auto *Load = cast<LoadSDNode>(C.Op0);
2185  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2186  if ((NumBits != 8 && NumBits != 16) ||
2187  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2188  return;
2189 
2190  // The load must be an extending one and the constant must be within the
2191  // range of the unextended value.
2192  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2193  uint64_t Value = ConstOp1->getZExtValue();
2194  uint64_t Mask = (1 << NumBits) - 1;
2195  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2196  // Make sure that ConstOp1 is in range of C.Op0.
2197  int64_t SignedValue = ConstOp1->getSExtValue();
2198  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2199  return;
2200  if (C.ICmpType != SystemZICMP::SignedOnly) {
2201  // Unsigned comparison between two sign-extended values is equivalent
2202  // to unsigned comparison between two zero-extended values.
2203  Value &= Mask;
2204  } else if (NumBits == 8) {
2205  // Try to treat the comparison as unsigned, so that we can use CLI.
2206  // Adjust CCMask and Value as necessary.
2207  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2208  // Test whether the high bit of the byte is set.
2209  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2210  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2211  // Test whether the high bit of the byte is clear.
2212  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2213  else
2214  // No instruction exists for this combination.
2215  return;
2216  C.ICmpType = SystemZICMP::UnsignedOnly;
2217  }
2218  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2219  if (Value > Mask)
2220  return;
2221  // If the constant is in range, we can use any comparison.
2222  C.ICmpType = SystemZICMP::Any;
2223  } else
2224  return;
2225 
2226  // Make sure that the first operand is an i32 of the right extension type.
2227  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2228  ISD::SEXTLOAD :
2229  ISD::ZEXTLOAD);
2230  if (C.Op0.getValueType() != MVT::i32 ||
2231  Load->getExtensionType() != ExtType) {
2232  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2233  Load->getBasePtr(), Load->getPointerInfo(),
2234  Load->getMemoryVT(), Load->getAlignment(),
2235  Load->getMemOperand()->getFlags());
2236  // Update the chain uses.
2237  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2238  }
2239 
2240  // Make sure that the second operand is an i32 with the right value.
2241  if (C.Op1.getValueType() != MVT::i32 ||
2242  Value != ConstOp1->getZExtValue())
2243  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2244 }
2245 
2246 // Return true if Op is either an unextended load, or a load suitable
2247 // for integer register-memory comparisons of type ICmpType.
2248 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2249  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2250  if (Load) {
2251  // There are no instructions to compare a register with a memory byte.
2252  if (Load->getMemoryVT() == MVT::i8)
2253  return false;
2254  // Otherwise decide on extension type.
2255  switch (Load->getExtensionType()) {
2256  case ISD::NON_EXTLOAD:
2257  return true;
2258  case ISD::SEXTLOAD:
2259  return ICmpType != SystemZICMP::UnsignedOnly;
2260  case ISD::ZEXTLOAD:
2261  return ICmpType != SystemZICMP::SignedOnly;
2262  default:
2263  break;
2264  }
2265  }
2266  return false;
2267 }
2268 
2269 // Return true if it is better to swap the operands of C.
2270 static bool shouldSwapCmpOperands(const Comparison &C) {
2271  // Leave f128 comparisons alone, since they have no memory forms.
2272  if (C.Op0.getValueType() == MVT::f128)
2273  return false;
2274 
2275  // Always keep a floating-point constant second, since comparisons with
2276  // zero can use LOAD TEST and comparisons with other constants make a
2277  // natural memory operand.
2278  if (isa<ConstantFPSDNode>(C.Op1))
2279  return false;
2280 
2281  // Never swap comparisons with zero since there are many ways to optimize
2282  // those later.
2283  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2284  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2285  return false;
2286 
2287  // Also keep natural memory operands second if the loaded value is
2288  // only used here. Several comparisons have memory forms.
2289  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2290  return false;
2291 
2292  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2293  // In that case we generally prefer the memory to be second.
2294  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2295  // The only exceptions are when the second operand is a constant and
2296  // we can use things like CHHSI.
2297  if (!ConstOp1)
2298  return true;
2299  // The unsigned memory-immediate instructions can handle 16-bit
2300  // unsigned integers.
2301  if (C.ICmpType != SystemZICMP::SignedOnly &&
2302  isUInt<16>(ConstOp1->getZExtValue()))
2303  return false;
2304  // The signed memory-immediate instructions can handle 16-bit
2305  // signed integers.
2306  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2307  isInt<16>(ConstOp1->getSExtValue()))
2308  return false;
2309  return true;
2310  }
2311 
2312  // Try to promote the use of CGFR and CLGFR.
2313  unsigned Opcode0 = C.Op0.getOpcode();
2314  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2315  return true;
2316  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2317  return true;
2318  if (C.ICmpType != SystemZICMP::SignedOnly &&
2319  Opcode0 == ISD::AND &&
2320  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2321  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2322  return true;
2323 
2324  return false;
2325 }
2326 
2327 // Check whether C tests for equality between X and Y and whether X - Y
2328 // or Y - X is also computed. In that case it's better to compare the
2329 // result of the subtraction against zero.
2330 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2331  Comparison &C) {
2332  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2333  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2334  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2335  SDNode *N = *I;
2336  if (N->getOpcode() == ISD::SUB &&
2337  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2338  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2339  C.Op0 = SDValue(N, 0);
2340  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2341  return;
2342  }
2343  }
2344  }
2345 }
2346 
2347 // Check whether C compares a floating-point value with zero and if that
2348 // floating-point value is also negated. In this case we can use the
2349 // negation to set CC, so avoiding separate LOAD AND TEST and
2350 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2351 static void adjustForFNeg(Comparison &C) {
2352  // This optimization is invalid for strict comparisons, since FNEG
2353  // does not raise any exceptions.
2354  if (C.Chain)
2355  return;
2356  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2357  if (C1 && C1->isZero()) {
2358  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2359  SDNode *N = *I;
2360  if (N->getOpcode() == ISD::FNEG) {
2361  C.Op0 = SDValue(N, 0);
2362  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2363  return;
2364  }
2365  }
2366  }
2367 }
2368 
2369 // Check whether C compares (shl X, 32) with 0 and whether X is
2370 // also sign-extended. In that case it is better to test the result
2371 // of the sign extension using LTGFR.
2372 //
2373 // This case is important because InstCombine transforms a comparison
2374 // with (sext (trunc X)) into a comparison with (shl X, 32).
2375 static void adjustForLTGFR(Comparison &C) {
2376  // Check for a comparison between (shl X, 32) and 0.
2377  if (C.Op0.getOpcode() == ISD::SHL &&
2378  C.Op0.getValueType() == MVT::i64 &&
2379  C.Op1.getOpcode() == ISD::Constant &&
2380  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2381  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2382  if (C1 && C1->getZExtValue() == 32) {
2383  SDValue ShlOp0 = C.Op0.getOperand(0);
2384  // See whether X has any SIGN_EXTEND_INREG uses.
2385  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2386  SDNode *N = *I;
2387  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2388  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2389  C.Op0 = SDValue(N, 0);
2390  return;
2391  }
2392  }
2393  }
2394  }
2395 }
2396 
2397 // If C compares the truncation of an extending load, try to compare
2398 // the untruncated value instead. This exposes more opportunities to
2399 // reuse CC.
2400 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2401  Comparison &C) {
2402  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2403  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2404  C.Op1.getOpcode() == ISD::Constant &&
2405  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2406  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2407  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2408  C.Op0.getValueSizeInBits().getFixedSize()) {
2409  unsigned Type = L->getExtensionType();
2410  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2411  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2412  C.Op0 = C.Op0.getOperand(0);
2413  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2414  }
2415  }
2416  }
2417 }
2418 
2419 // Return true if shift operation N has an in-range constant shift value.
2420 // Store it in ShiftVal if so.
2421 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2422  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2423  if (!Shift)
2424  return false;
2425 
2426  uint64_t Amount = Shift->getZExtValue();
2427  if (Amount >= N.getValueSizeInBits())
2428  return false;
2429 
2430  ShiftVal = Amount;
2431  return true;
2432 }
2433 
2434 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2435 // instruction and whether the CC value is descriptive enough to handle
2436 // a comparison of type Opcode between the AND result and CmpVal.
2437 // CCMask says which comparison result is being tested and BitSize is
2438 // the number of bits in the operands. If TEST UNDER MASK can be used,
2439 // return the corresponding CC mask, otherwise return 0.
2440 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2441  uint64_t Mask, uint64_t CmpVal,
2442  unsigned ICmpType) {
2443  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2444 
2445  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2448  return 0;
2449 
2450  // Work out the masks for the lowest and highest bits.
2451  unsigned HighShift = 63 - countLeadingZeros(Mask);
2452  uint64_t High = uint64_t(1) << HighShift;
2454 
2455  // Signed ordered comparisons are effectively unsigned if the sign
2456  // bit is dropped.
2457  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2458 
2459  // Check for equality comparisons with 0, or the equivalent.
2460  if (CmpVal == 0) {
2461  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2462  return SystemZ::CCMASK_TM_ALL_0;
2463  if (CCMask == SystemZ::CCMASK_CMP_NE)
2465  }
2466  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2467  if (CCMask == SystemZ::CCMASK_CMP_LT)
2468  return SystemZ::CCMASK_TM_ALL_0;
2469  if (CCMask == SystemZ::CCMASK_CMP_GE)
2471  }
2472  if (EffectivelyUnsigned && CmpVal < Low) {
2473  if (CCMask == SystemZ::CCMASK_CMP_LE)
2474  return SystemZ::CCMASK_TM_ALL_0;
2475  if (CCMask == SystemZ::CCMASK_CMP_GT)
2477  }
2478 
2479  // Check for equality comparisons with the mask, or the equivalent.
2480  if (CmpVal == Mask) {
2481  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2482  return SystemZ::CCMASK_TM_ALL_1;
2483  if (CCMask == SystemZ::CCMASK_CMP_NE)
2485  }
2486  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2487  if (CCMask == SystemZ::CCMASK_CMP_GT)
2488  return SystemZ::CCMASK_TM_ALL_1;
2489  if (CCMask == SystemZ::CCMASK_CMP_LE)
2491  }
2492  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2493  if (CCMask == SystemZ::CCMASK_CMP_GE)
2494  return SystemZ::CCMASK_TM_ALL_1;
2495  if (CCMask == SystemZ::CCMASK_CMP_LT)
2497  }
2498 
2499  // Check for ordered comparisons with the top bit.
2500  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2501  if (CCMask == SystemZ::CCMASK_CMP_LE)
2502  return SystemZ::CCMASK_TM_MSB_0;
2503  if (CCMask == SystemZ::CCMASK_CMP_GT)
2504  return SystemZ::CCMASK_TM_MSB_1;
2505  }
2506  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2507  if (CCMask == SystemZ::CCMASK_CMP_LT)
2508  return SystemZ::CCMASK_TM_MSB_0;
2509  if (CCMask == SystemZ::CCMASK_CMP_GE)
2510  return SystemZ::CCMASK_TM_MSB_1;
2511  }
2512 
2513  // If there are just two bits, we can do equality checks for Low and High
2514  // as well.
2515  if (Mask == Low + High) {
2516  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2518  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2520  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2522  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2524  }
2525 
2526  // Looks like we've exhausted our options.
2527  return 0;
2528 }
2529 
2530 // See whether C can be implemented as a TEST UNDER MASK instruction.
2531 // Update the arguments with the TM version if so.
2532 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2533  Comparison &C) {
2534  // Check that we have a comparison with a constant.
2535  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2536  if (!ConstOp1)
2537  return;
2538  uint64_t CmpVal = ConstOp1->getZExtValue();
2539 
2540  // Check whether the nonconstant input is an AND with a constant mask.
2541  Comparison NewC(C);
2542  uint64_t MaskVal;
2543  ConstantSDNode *Mask = nullptr;
2544  if (C.Op0.getOpcode() == ISD::AND) {
2545  NewC.Op0 = C.Op0.getOperand(0);
2546  NewC.Op1 = C.Op0.getOperand(1);
2547  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2548  if (!Mask)
2549  return;
2550  MaskVal = Mask->getZExtValue();
2551  } else {
2552  // There is no instruction to compare with a 64-bit immediate
2553  // so use TMHH instead if possible. We need an unsigned ordered
2554  // comparison with an i64 immediate.
2555  if (NewC.Op0.getValueType() != MVT::i64 ||
2556  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2557  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2558  NewC.ICmpType == SystemZICMP::SignedOnly)
2559  return;
2560  // Convert LE and GT comparisons into LT and GE.
2561  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2562  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2563  if (CmpVal == uint64_t(-1))
2564  return;
2565  CmpVal += 1;
2566  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2567  }
2568  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2569  // be masked off without changing the result.
2570  MaskVal = -(CmpVal & -CmpVal);
2571  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2572  }
2573  if (!MaskVal)
2574  return;
2575 
2576  // Check whether the combination of mask, comparison value and comparison
2577  // type are suitable.
2578  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2579  unsigned NewCCMask, ShiftVal;
2580  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2581  NewC.Op0.getOpcode() == ISD::SHL &&
2582  isSimpleShift(NewC.Op0, ShiftVal) &&
2583  (MaskVal >> ShiftVal != 0) &&
2584  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2585  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2586  MaskVal >> ShiftVal,
2587  CmpVal >> ShiftVal,
2588  SystemZICMP::Any))) {
2589  NewC.Op0 = NewC.Op0.getOperand(0);
2590  MaskVal >>= ShiftVal;
2591  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2592  NewC.Op0.getOpcode() == ISD::SRL &&
2593  isSimpleShift(NewC.Op0, ShiftVal) &&
2594  (MaskVal << ShiftVal != 0) &&
2595  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2596  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2597  MaskVal << ShiftVal,
2598  CmpVal << ShiftVal,
2600  NewC.Op0 = NewC.Op0.getOperand(0);
2601  MaskVal <<= ShiftVal;
2602  } else {
2603  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2604  NewC.ICmpType);
2605  if (!NewCCMask)
2606  return;
2607  }
2608 
2609  // Go ahead and make the change.
2610  C.Opcode = SystemZISD::TM;
2611  C.Op0 = NewC.Op0;
2612  if (Mask && Mask->getZExtValue() == MaskVal)
2613  C.Op1 = SDValue(Mask, 0);
2614  else
2615  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2616  C.CCValid = SystemZ::CCMASK_TM;
2617  C.CCMask = NewCCMask;
2618 }
2619 
2620 // See whether the comparison argument contains a redundant AND
2621 // and remove it if so. This sometimes happens due to the generic
2622 // BRCOND expansion.
2623 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2624  Comparison &C) {
2625  if (C.Op0.getOpcode() != ISD::AND)
2626  return;
2627  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2628  if (!Mask)
2629  return;
2630  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2631  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2632  return;
2633 
2634  C.Op0 = C.Op0.getOperand(0);
2635 }
2636 
2637 // Return a Comparison that tests the condition-code result of intrinsic
2638 // node Call against constant integer CC using comparison code Cond.
2639 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2640 // and CCValid is the set of possible condition-code results.
2641 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2642  SDValue Call, unsigned CCValid, uint64_t CC,
2643  ISD::CondCode Cond) {
2644  Comparison C(Call, SDValue(), SDValue());
2645  C.Opcode = Opcode;
2646  C.CCValid = CCValid;
2647  if (Cond == ISD::SETEQ)
2648  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2649  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2650  else if (Cond == ISD::SETNE)
2651  // ...and the inverse of that.
2652  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2653  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2654  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2655  // always true for CC>3.
2656  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2657  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2658  // ...and the inverse of that.
2659  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2660  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2661  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2662  // always true for CC>3.
2663  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2664  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2665  // ...and the inverse of that.
2666  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2667  else
2668  llvm_unreachable("Unexpected integer comparison type");
2669  C.CCMask &= CCValid;
2670  return C;
2671 }
2672 
2673 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2674 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2675  ISD::CondCode Cond, const SDLoc &DL,
2676  SDValue Chain = SDValue(),
2677  bool IsSignaling = false) {
2678  if (CmpOp1.getOpcode() == ISD::Constant) {
2679  assert(!Chain);
2680  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2681  unsigned Opcode, CCValid;
2682  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2683  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2684  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2685  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2686  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2687  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2688  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2689  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2690  }
2691  Comparison C(CmpOp0, CmpOp1, Chain);
2692  C.CCMask = CCMaskForCondCode(Cond);
2693  if (C.Op0.getValueType().isFloatingPoint()) {
2694  C.CCValid = SystemZ::CCMASK_FCMP;
2695  if (!C.Chain)
2696  C.Opcode = SystemZISD::FCMP;
2697  else if (!IsSignaling)
2698  C.Opcode = SystemZISD::STRICT_FCMP;
2699  else
2700  C.Opcode = SystemZISD::STRICT_FCMPS;
2701  adjustForFNeg(C);
2702  } else {
2703  assert(!C.Chain);
2704  C.CCValid = SystemZ::CCMASK_ICMP;
2705  C.Opcode = SystemZISD::ICMP;
2706  // Choose the type of comparison. Equality and inequality tests can
2707  // use either signed or unsigned comparisons. The choice also doesn't
2708  // matter if both sign bits are known to be clear. In those cases we
2709  // want to give the main isel code the freedom to choose whichever
2710  // form fits best.
2711  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2712  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2713  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2714  C.ICmpType = SystemZICMP::Any;
2715  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2716  C.ICmpType = SystemZICMP::UnsignedOnly;
2717  else
2718  C.ICmpType = SystemZICMP::SignedOnly;
2719  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2720  adjustForRedundantAnd(DAG, DL, C);
2721  adjustZeroCmp(DAG, DL, C);
2722  adjustSubwordCmp(DAG, DL, C);
2723  adjustForSubtraction(DAG, DL, C);
2724  adjustForLTGFR(C);
2725  adjustICmpTruncate(DAG, DL, C);
2726  }
2727 
2728  if (shouldSwapCmpOperands(C)) {
2729  std::swap(C.Op0, C.Op1);
2730  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2731  }
2732 
2733  adjustForTestUnderMask(DAG, DL, C);
2734  return C;
2735 }
2736 
2737 // Emit the comparison instruction described by C.
2738 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2739  if (!C.Op1.getNode()) {
2740  SDNode *Node;
2741  switch (C.Op0.getOpcode()) {
2743  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2744  return SDValue(Node, 0);
2746  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2747  return SDValue(Node, Node->getNumValues() - 1);
2748  default:
2749  llvm_unreachable("Invalid comparison operands");
2750  }
2751  }
2752  if (C.Opcode == SystemZISD::ICMP)
2753  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2754  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2755  if (C.Opcode == SystemZISD::TM) {
2756  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2757  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2758  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2759  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2760  }
2761  if (C.Chain) {
2762  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2763  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2764  }
2765  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2766 }
2767 
2768 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2769 // 64 bits. Extend is the extension type to use. Store the high part
2770 // in Hi and the low part in Lo.
2771 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2772  SDValue Op0, SDValue Op1, SDValue &Hi,
2773  SDValue &Lo) {
2774  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2775  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2776  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2777  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2778  DAG.getConstant(32, DL, MVT::i64));
2779  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2780  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2781 }
2782 
2783 // Lower a binary operation that produces two VT results, one in each
2784 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2785 // and Opcode performs the GR128 operation. Store the even register result
2786 // in Even and the odd register result in Odd.
2787 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2788  unsigned Opcode, SDValue Op0, SDValue Op1,
2789  SDValue &Even, SDValue &Odd) {
2790  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2791  bool Is32Bit = is32Bit(VT);
2792  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2793  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2794 }
2795 
2796 // Return an i32 value that is 1 if the CC value produced by CCReg is
2797 // in the mask CCMask and 0 otherwise. CC is known to have a value
2798 // in CCValid, so other values can be ignored.
2799 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2800  unsigned CCValid, unsigned CCMask) {
2801  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2802  DAG.getConstant(0, DL, MVT::i32),
2803  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2804  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2805  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2806 }
2807 
2808 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2809 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2810 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2811 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2812 // floating-point comparisons.
2813 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2815  switch (CC) {
2816  case ISD::SETOEQ:
2817  case ISD::SETEQ:
2818  switch (Mode) {
2819  case CmpMode::Int: return SystemZISD::VICMPE;
2820  case CmpMode::FP: return SystemZISD::VFCMPE;
2823  }
2824  llvm_unreachable("Bad mode");
2825 
2826  case ISD::SETOGE:
2827  case ISD::SETGE:
2828  switch (Mode) {
2829  case CmpMode::Int: return 0;
2830  case CmpMode::FP: return SystemZISD::VFCMPHE;
2833  }
2834  llvm_unreachable("Bad mode");
2835 
2836  case ISD::SETOGT:
2837  case ISD::SETGT:
2838  switch (Mode) {
2839  case CmpMode::Int: return SystemZISD::VICMPH;
2840  case CmpMode::FP: return SystemZISD::VFCMPH;
2843  }
2844  llvm_unreachable("Bad mode");
2845 
2846  case ISD::SETUGT:
2847  switch (Mode) {
2848  case CmpMode::Int: return SystemZISD::VICMPHL;
2849  case CmpMode::FP: return 0;
2850  case CmpMode::StrictFP: return 0;
2851  case CmpMode::SignalingFP: return 0;
2852  }
2853  llvm_unreachable("Bad mode");
2854 
2855  default:
2856  return 0;
2857  }
2858 }
2859 
2860 // Return the SystemZISD vector comparison operation for CC or its inverse,
2861 // or 0 if neither can be done directly. Indicate in Invert whether the
2862 // result is for the inverse of CC. Mode is as above.
2864  bool &Invert) {
2865  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2866  Invert = false;
2867  return Opcode;
2868  }
2869 
2871  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2872  Invert = true;
2873  return Opcode;
2874  }
2875 
2876  return 0;
2877 }
2878 
2879 // Return a v2f64 that contains the extended form of elements Start and Start+1
2880 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2881 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2882  SDValue Op, SDValue Chain) {
2883  int Mask[] = { Start, -1, Start + 1, -1 };
2885  if (Chain) {
2887  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2888  }
2889  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2890 }
2891 
2892 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2893 // producing a result of type VT. If Chain is nonnull, return the strict form.
2894 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2895  const SDLoc &DL, EVT VT,
2896  SDValue CmpOp0,
2897  SDValue CmpOp1,
2898  SDValue Chain) const {
2899  // There is no hardware support for v4f32 (unless we have the vector
2900  // enhancements facility 1), so extend the vector into two v2f64s
2901  // and compare those.
2902  if (CmpOp0.getValueType() == MVT::v4f32 &&
2903  !Subtarget.hasVectorEnhancements1()) {
2904  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2905  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2906  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2907  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2908  if (Chain) {
2910  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2911  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2912  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2913  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2914  H1.getValue(1), L1.getValue(1),
2915  HRes.getValue(1), LRes.getValue(1) };
2916  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2917  SDValue Ops[2] = { Res, NewChain };
2918  return DAG.getMergeValues(Ops, DL);
2919  }
2920  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2921  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2922  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2923  }
2924  if (Chain) {
2925  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2926  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2927  }
2928  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2929 }
2930 
2931 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2932 // an integer mask of type VT. If Chain is nonnull, we have a strict
2933 // floating-point comparison. If in addition IsSignaling is true, we have
2934 // a strict signaling floating-point comparison.
2935 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2936  const SDLoc &DL, EVT VT,
2937  ISD::CondCode CC,
2938  SDValue CmpOp0,
2939  SDValue CmpOp1,
2940  SDValue Chain,
2941  bool IsSignaling) const {
2942  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2943  assert (!Chain || IsFP);
2944  assert (!IsSignaling || Chain);
2945  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2946  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2947  bool Invert = false;
2948  SDValue Cmp;
2949  switch (CC) {
2950  // Handle tests for order using (or (ogt y x) (oge x y)).
2951  case ISD::SETUO:
2952  Invert = true;
2954  case ISD::SETO: {
2955  assert(IsFP && "Unexpected integer comparison");
2956  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2957  DL, VT, CmpOp1, CmpOp0, Chain);
2958  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2959  DL, VT, CmpOp0, CmpOp1, Chain);
2960  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2961  if (Chain)
2962  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2963  LT.getValue(1), GE.getValue(1));
2964  break;
2965  }
2966 
2967  // Handle <> tests using (or (ogt y x) (ogt x y)).
2968  case ISD::SETUEQ:
2969  Invert = true;
2971  case ISD::SETONE: {
2972  assert(IsFP && "Unexpected integer comparison");
2973  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2974  DL, VT, CmpOp1, CmpOp0, Chain);
2975  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2976  DL, VT, CmpOp0, CmpOp1, Chain);
2977  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2978  if (Chain)
2979  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2980  LT.getValue(1), GT.getValue(1));
2981  break;
2982  }
2983 
2984  // Otherwise a single comparison is enough. It doesn't really
2985  // matter whether we try the inversion or the swap first, since
2986  // there are no cases where both work.
2987  default:
2988  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2989  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2990  else {
2992  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2993  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2994  else
2995  llvm_unreachable("Unhandled comparison");
2996  }
2997  if (Chain)
2998  Chain = Cmp.getValue(1);
2999  break;
3000  }
3001  if (Invert) {
3002  SDValue Mask =
3003  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3004  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3005  }
3006  if (Chain && Chain.getNode() != Cmp.getNode()) {
3007  SDValue Ops[2] = { Cmp, Chain };
3008  Cmp = DAG.getMergeValues(Ops, DL);
3009  }
3010  return Cmp;
3011 }
3012 
3013 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3014  SelectionDAG &DAG) const {
3015  SDValue CmpOp0 = Op.getOperand(0);
3016  SDValue CmpOp1 = Op.getOperand(1);
3017  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3018  SDLoc DL(Op);
3019  EVT VT = Op.getValueType();
3020  if (VT.isVector())
3021  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3022 
3023  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3024  SDValue CCReg = emitCmp(DAG, DL, C);
3025  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3026 }
3027 
3028 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3029  SelectionDAG &DAG,
3030  bool IsSignaling) const {
3031  SDValue Chain = Op.getOperand(0);
3032  SDValue CmpOp0 = Op.getOperand(1);
3033  SDValue CmpOp1 = Op.getOperand(2);
3034  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3035  SDLoc DL(Op);
3036  EVT VT = Op.getNode()->getValueType(0);
3037  if (VT.isVector()) {
3038  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3039  Chain, IsSignaling);
3040  return Res.getValue(Op.getResNo());
3041  }
3042 
3043  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3044  SDValue CCReg = emitCmp(DAG, DL, C);
3045  CCReg->setFlags(Op->getFlags());
3046  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3047  SDValue Ops[2] = { Result, CCReg.getValue(1) };
3048  return DAG.getMergeValues(Ops, DL);
3049 }
3050 
3051 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3052  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3053  SDValue CmpOp0 = Op.getOperand(2);
3054  SDValue CmpOp1 = Op.getOperand(3);
3055  SDValue Dest = Op.getOperand(4);
3056  SDLoc DL(Op);
3057 
3058  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3059  SDValue CCReg = emitCmp(DAG, DL, C);
3060  return DAG.getNode(
3061  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3062  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3063  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3064 }
3065 
3066 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3067 // allowing Pos and Neg to be wider than CmpOp.
3068 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3069  return (Neg.getOpcode() == ISD::SUB &&
3070  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3071  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3072  Neg.getOperand(1) == Pos &&
3073  (Pos == CmpOp ||
3074  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3075  Pos.getOperand(0) == CmpOp)));
3076 }
3077 
3078 // Return the absolute or negative absolute of Op; IsNegative decides which.
3080  bool IsNegative) {
3081  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3082  if (IsNegative)
3083  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3084  DAG.getConstant(0, DL, Op.getValueType()), Op);
3085  return Op;
3086 }
3087 
3088 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3089  SelectionDAG &DAG) const {
3090  SDValue CmpOp0 = Op.getOperand(0);
3091  SDValue CmpOp1 = Op.getOperand(1);
3092  SDValue TrueOp = Op.getOperand(2);
3093  SDValue FalseOp = Op.getOperand(3);
3094  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3095  SDLoc DL(Op);
3096 
3097  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3098 
3099  // Check for absolute and negative-absolute selections, including those
3100  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3101  // This check supplements the one in DAGCombiner.
3102  if (C.Opcode == SystemZISD::ICMP &&
3103  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3104  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3105  C.Op1.getOpcode() == ISD::Constant &&
3106  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3107  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3108  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3109  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3110  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3111  }
3112 
3113  SDValue CCReg = emitCmp(DAG, DL, C);
3114  SDValue Ops[] = {TrueOp, FalseOp,
3115  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3116  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3117 
3118  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3119 }
3120 
3121 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3122  SelectionDAG &DAG) const {
3123  SDLoc DL(Node);
3124  const GlobalValue *GV = Node->getGlobal();
3125  int64_t Offset = Node->getOffset();
3126  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3128 
3129  SDValue Result;
3130  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3131  if (isInt<32>(Offset)) {
3132  // Assign anchors at 1<<12 byte boundaries.
3133  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3134  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3135  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3136 
3137  // The offset can be folded into the address if it is aligned to a
3138  // halfword.
3139  Offset -= Anchor;
3140  if (Offset != 0 && (Offset & 1) == 0) {
3141  SDValue Full =
3142  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3143  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3144  Offset = 0;
3145  }
3146  } else {
3147  // Conservatively load a constant offset greater than 32 bits into a
3148  // register below.
3149  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3150  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3151  }
3152  } else {
3153  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3154  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3155  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3157  }
3158 
3159  // If there was a non-zero offset that we didn't fold, create an explicit
3160  // addition for it.
3161  if (Offset != 0)
3162  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3163  DAG.getConstant(Offset, DL, PtrVT));
3164 
3165  return Result;
3166 }
3167 
3168 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3169  SelectionDAG &DAG,
3170  unsigned Opcode,
3171  SDValue GOTOffset) const {
3172  SDLoc DL(Node);
3173  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3174  SDValue Chain = DAG.getEntryNode();
3175  SDValue Glue;
3176 
3179  report_fatal_error("In GHC calling convention TLS is not supported");
3180 
3181  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3182  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3183  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3184  Glue = Chain.getValue(1);
3185  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3186  Glue = Chain.getValue(1);
3187 
3188  // The first call operand is the chain and the second is the TLS symbol.
3190  Ops.push_back(Chain);
3191  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3192  Node->getValueType(0),
3193  0, 0));
3194 
3195  // Add argument registers to the end of the list so that they are
3196  // known live into the call.
3197  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3198  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3199 
3200  // Add a register mask operand representing the call-preserved registers.
3201  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3202  const uint32_t *Mask =
3204  assert(Mask && "Missing call preserved mask for calling convention");
3205  Ops.push_back(DAG.getRegisterMask(Mask));
3206 
3207  // Glue the call to the argument copies.
3208  Ops.push_back(Glue);
3209 
3210  // Emit the call.
3211  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3212  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3213  Glue = Chain.getValue(1);
3214 
3215  // Copy the return value from %r2.
3216  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3217 }
3218 
3219 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3220  SelectionDAG &DAG) const {
3221  SDValue Chain = DAG.getEntryNode();
3222  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3223 
3224  // The high part of the thread pointer is in access register 0.
3225  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3226  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3227 
3228  // The low part of the thread pointer is in access register 1.
3229  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3230  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3231 
3232  // Merge them into a single 64-bit address.
3233  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3234  DAG.getConstant(32, DL, PtrVT));
3235  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3236 }
3237 
3238 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3239  SelectionDAG &DAG) const {
3240  if (DAG.getTarget().useEmulatedTLS())
3241  return LowerToTLSEmulatedModel(Node, DAG);
3242  SDLoc DL(Node);
3243  const GlobalValue *GV = Node->getGlobal();
3244  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3246 
3249  report_fatal_error("In GHC calling convention TLS is not supported");
3250 
3251  SDValue TP = lowerThreadPointer(DL, DAG);
3252 
3253  // Get the offset of GA from the thread pointer, based on the TLS model.
3254  SDValue Offset;
3255  switch (model) {
3256  case TLSModel::GeneralDynamic: {
3257  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3260 
3261  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3262  Offset = DAG.getLoad(
3263  PtrVT, DL, DAG.getEntryNode(), Offset,
3265 
3266  // Call __tls_get_offset to retrieve the offset.
3267  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3268  break;
3269  }
3270 
3271  case TLSModel::LocalDynamic: {
3272  // Load the GOT offset of the module ID.
3275 
3276  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3277  Offset = DAG.getLoad(
3278  PtrVT, DL, DAG.getEntryNode(), Offset,
3280 
3281  // Call __tls_get_offset to retrieve the module base offset.
3282  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3283 
3284  // Note: The SystemZLDCleanupPass will remove redundant computations
3285  // of the module base offset. Count total number of local-dynamic
3286  // accesses to trigger execution of that pass.
3290 
3291  // Add the per-symbol offset.
3293 
3294  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3295  DTPOffset = DAG.getLoad(
3296  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3298 
3299  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3300  break;
3301  }
3302 
3303  case TLSModel::InitialExec: {
3304  // Load the offset from the GOT.
3305  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3308  Offset =
3309  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3311  break;
3312  }
3313 
3314  case TLSModel::LocalExec: {
3315  // Force the offset into the constant pool and load it from there.
3318 
3319  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3320  Offset = DAG.getLoad(
3321  PtrVT, DL, DAG.getEntryNode(), Offset,
3323  break;
3324  }
3325  }
3326 
3327  // Add the base and offset together.
3328  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3329 }
3330 
3331 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3332  SelectionDAG &DAG) const {
3333  SDLoc DL(Node);
3334  const BlockAddress *BA = Node->getBlockAddress();
3335  int64_t Offset = Node->getOffset();
3336  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3337 
3338  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3339  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3340  return Result;
3341 }
3342 
3343 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3344  SelectionDAG &DAG) const {
3345  SDLoc DL(JT);
3346  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3347  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3348 
3349  // Use LARL to load the address of the table.
3350  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3351 }
3352 
3353 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3354  SelectionDAG &DAG) const {
3355  SDLoc DL(CP);
3356  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3357 
3358  SDValue Result;
3359  if (CP->isMachineConstantPoolEntry())
3360  Result =
3361  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3362  else
3363  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3364  CP->getOffset());
3365 
3366  // Use LARL to load the address of the constant pool entry.
3367  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3368 }
3369 
3370 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3371  SelectionDAG &DAG) const {
3372  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3373  MachineFunction &MF = DAG.getMachineFunction();
3374  MachineFrameInfo &MFI = MF.getFrameInfo();
3375  MFI.setFrameAddressIsTaken(true);
3376 
3377  SDLoc DL(Op);
3378  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3379  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3380 
3381  // By definition, the frame address is the address of the back chain. (In
3382  // the case of packed stack without backchain, return the address where the
3383  // backchain would have been stored. This will either be an unused space or
3384  // contain a saved register).
3385  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3386  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3387 
3388  // FIXME The frontend should detect this case.
3389  if (Depth > 0) {
3390  report_fatal_error("Unsupported stack frame traversal count");
3391  }
3392 
3393  return BackChain;
3394 }
3395 
3396 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3397  SelectionDAG &DAG) const {
3398  MachineFunction &MF = DAG.getMachineFunction();
3399  MachineFrameInfo &MFI = MF.getFrameInfo();
3400  MFI.setReturnAddressIsTaken(true);
3401 
3403  return SDValue();
3404 
3405  SDLoc DL(Op);
3406  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3407  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3408 
3409  // FIXME The frontend should detect this case.
3410  if (Depth > 0) {
3411  report_fatal_error("Unsupported stack frame traversal count");
3412  }
3413 
3414  // Return R14D, which has the return address. Mark it an implicit live-in.
3415  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3416  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3417 }
3418 
3419 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3420  SelectionDAG &DAG) const {
3421  SDLoc DL(Op);
3422  SDValue In = Op.getOperand(0);
3423  EVT InVT = In.getValueType();
3424  EVT ResVT = Op.getValueType();
3425 
3426  // Convert loads directly. This is normally done by DAGCombiner,
3427  // but we need this case for bitcasts that are created during lowering
3428  // and which are then lowered themselves.
3429  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3430  if (ISD::isNormalLoad(LoadN)) {
3431  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3432  LoadN->getBasePtr(), LoadN->getMemOperand());
3433  // Update the chain uses.
3434  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3435  return NewLoad;
3436  }
3437 
3438  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3439  SDValue In64;
3440  if (Subtarget.hasHighWord()) {
3441  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3442  MVT::i64);
3443  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3444  MVT::i64, SDValue(U64, 0), In);
3445  } else {
3446  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3447  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3448  DAG.getConstant(32, DL, MVT::i64));
3449  }
3450  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3451  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3452  DL, MVT::f32, Out64);
3453  }
3454  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3455  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3456  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3457  MVT::f64, SDValue(U64, 0), In);
3458  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3459  if (Subtarget.hasHighWord())
3460  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3461  MVT::i32, Out64);
3462  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3463  DAG.getConstant(32, DL, MVT::i64));
3464  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3465  }
3466  llvm_unreachable("Unexpected bitcast combination");
3467 }
3468 
3469 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3470  SelectionDAG &DAG) const {
3471  MachineFunction &MF = DAG.getMachineFunction();
3472  SystemZMachineFunctionInfo *FuncInfo =
3474  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3475 
3476  SDValue Chain = Op.getOperand(0);
3477  SDValue Addr = Op.getOperand(1);
3478  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3479  SDLoc DL(Op);
3480 
3481  // The initial values of each field.
3482  const unsigned NumFields = 4;
3483  SDValue Fields[NumFields] = {
3484  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3485  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3486  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3487  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3488  };
3489 
3490  // Store each field into its respective slot.
3491  SDValue MemOps[NumFields];
3492  unsigned Offset = 0;
3493  for (unsigned I = 0; I < NumFields; ++I) {
3494  SDValue FieldAddr = Addr;
3495  if (Offset != 0)
3496  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3497  DAG.getIntPtrConstant(Offset, DL));
3498  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3499  MachinePointerInfo(SV, Offset));
3500  Offset += 8;
3501  }
3502  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3503 }
3504 
3505 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3506  SelectionDAG &DAG) const {
3507  SDValue Chain = Op.getOperand(0);
3508  SDValue DstPtr = Op.getOperand(1);
3509  SDValue SrcPtr = Op.getOperand(2);
3510  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3511  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3512  SDLoc DL(Op);
3513 
3514  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3515  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3516  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3517  MachinePointerInfo(SrcSV));
3518 }
3519 
3520 SDValue SystemZTargetLowering::
3521 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3522  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3523  MachineFunction &MF = DAG.getMachineFunction();
3524  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3525  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3526 
3527  SDValue Chain = Op.getOperand(0);
3528  SDValue Size = Op.getOperand(1);
3529  SDValue Align = Op.getOperand(2);
3530  SDLoc DL(Op);
3531 
3532  // If user has set the no alignment function attribute, ignore
3533  // alloca alignments.
3534  uint64_t AlignVal =
3535  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3536 
3538  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3539  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3540 
3542  SDValue NeededSpace = Size;
3543 
3544  // Get a reference to the stack pointer.
3545  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3546 
3547  // If we need a backchain, save it now.
3548  SDValue Backchain;
3549  if (StoreBackchain)
3550  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3551  MachinePointerInfo());
3552 
3553  // Add extra space for alignment if needed.
3554  if (ExtraAlignSpace)
3555  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3556  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3557 
3558  // Get the new stack pointer value.
3559  SDValue NewSP;
3560  if (hasInlineStackProbe(MF)) {
3561  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3562  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3563  Chain = NewSP.getValue(1);
3564  }
3565  else {
3566  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3567  // Copy the new stack pointer back.
3568  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3569  }
3570 
3571  // The allocated data lives above the 160 bytes allocated for the standard
3572  // frame, plus any outgoing stack arguments. We don't know how much that
3573  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3574  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3575  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3576 
3577  // Dynamically realign if needed.
3578  if (RequiredAlign > StackAlign) {
3579  Result =
3580  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3581  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3582  Result =
3583  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3584  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3585  }
3586 
3587  if (StoreBackchain)
3588  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3589  MachinePointerInfo());
3590 
3591  SDValue Ops[2] = { Result, Chain };
3592  return DAG.getMergeValues(Ops, DL);
3593 }
3594 
3595 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3596  SDValue Op, SelectionDAG &DAG) const {
3597  SDLoc DL(Op);
3598 
3600 }
3601 
3602 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3603  SelectionDAG &DAG) const {
3604  EVT VT = Op.getValueType();
3605  SDLoc DL(Op);
3606  SDValue Ops[2];
3607  if (is32Bit(VT))
3608  // Just do a normal 64-bit multiplication and extract the results.
3609  // We define this so that it can be used for constant division.
3610  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3611  Op.getOperand(1), Ops[1], Ops[0]);
3612  else if (Subtarget.hasMiscellaneousExtensions2())
3613  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3614  // the high result in the even register. ISD::SMUL_LOHI is defined to
3615  // return the low half first, so the results are in reverse order.
3617  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3618  else {
3619  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3620  //
3621  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3622  //
3623  // but using the fact that the upper halves are either all zeros
3624  // or all ones:
3625  //
3626  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3627  //
3628  // and grouping the right terms together since they are quicker than the
3629  // multiplication:
3630  //
3631  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3632  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3633  SDValue LL = Op.getOperand(0);
3634  SDValue RL = Op.getOperand(1);
3635  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3636  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3637  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3638  // the high result in the even register. ISD::SMUL_LOHI is defined to
3639  // return the low half first, so the results are in reverse order.
3641  LL, RL, Ops[1], Ops[0]);
3642  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3643  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3644  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3645  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3646  }
3647  return DAG.getMergeValues(Ops, DL);
3648 }
3649 
3650 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3651  SelectionDAG &DAG) const {
3652  EVT VT = Op.getValueType();
3653  SDLoc DL(Op);
3654  SDValue Ops[2];
3655  if (is32Bit(VT))
3656  // Just do a normal 64-bit multiplication and extract the results.
3657  // We define this so that it can be used for constant division.
3658  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3659  Op.getOperand(1), Ops[1], Ops[0]);
3660  else
3661  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3662  // the high result in the even register. ISD::UMUL_LOHI is defined to
3663  // return the low half first, so the results are in reverse order.
3665  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3666  return DAG.getMergeValues(Ops, DL);
3667 }
3668 
3669 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3670  SelectionDAG &DAG) const {
3671  SDValue Op0 = Op.getOperand(0);
3672  SDValue Op1 = Op.getOperand(1);
3673  EVT VT = Op.getValueType();
3674  SDLoc DL(Op);
3675 
3676  // We use DSGF for 32-bit division. This means the first operand must
3677  // always be 64-bit, and the second operand should be 32-bit whenever
3678  // that is possible, to improve performance.
3679  if (is32Bit(VT))
3680  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3681  else if (DAG.ComputeNumSignBits(Op1) > 32)
3682  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3683 
3684  // DSG(F) returns the remainder in the even register and the
3685  // quotient in the odd register.
3686  SDValue Ops[2];
3687  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3688  return DAG.getMergeValues(Ops, DL);
3689 }
3690 
3691 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3692  SelectionDAG &DAG) const {
3693  EVT VT = Op.getValueType();
3694  SDLoc DL(Op);
3695 
3696  // DL(G) returns the remainder in the even register and the
3697  // quotient in the odd register.
3698  SDValue Ops[2];
3700  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3701  return DAG.getMergeValues(Ops, DL);
3702 }
3703 
3704 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3705  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3706 
3707  // Get the known-zero masks for each operand.
3708  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3709  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3710  DAG.computeKnownBits(Ops[1])};
3711 
3712  // See if the upper 32 bits of one operand and the lower 32 bits of the
3713  // other are known zero. They are the low and high operands respectively.
3714  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3715  Known[1].Zero.getZExtValue() };
3716  unsigned High, Low;
3717  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3718  High = 1, Low = 0;
3719  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3720  High = 0, Low = 1;
3721  else
3722  return Op;
3723 
3724  SDValue LowOp = Ops[Low];
3725  SDValue HighOp = Ops[High];
3726 
3727  // If the high part is a constant, we're better off using IILH.
3728  if (HighOp.getOpcode() == ISD::Constant)
3729  return Op;
3730 
3731  // If the low part is a constant that is outside the range of LHI,
3732  // then we're better off using IILF.
3733  if (LowOp.getOpcode() == ISD::Constant) {
3734  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3735  if (!isInt<16>(Value))
3736  return Op;
3737  }
3738 
3739  // Check whether the high part is an AND that doesn't change the
3740  // high 32 bits and just masks out low bits. We can skip it if so.
3741  if (HighOp.getOpcode() == ISD::AND &&
3742  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3743  SDValue HighOp0 = HighOp.getOperand(0);
3744  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3745  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3746  HighOp = HighOp0;
3747  }
3748 
3749  // Take advantage of the fact that all GR32 operations only change the
3750  // low 32 bits by truncating Low to an i32 and inserting it directly
3751  // using a subreg. The interesting cases are those where the truncation
3752  // can be folded.
3753  SDLoc DL(Op);
3754  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3755  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3756  MVT::i64, HighOp, Low32);
3757 }
3758 
3759 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3760 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3761  SelectionDAG &DAG) const {
3762  SDNode *N = Op.getNode();
3763  SDValue LHS = N->getOperand(0);
3764  SDValue RHS = N->getOperand(1);
3765  SDLoc DL(N);
3766  unsigned BaseOp = 0;
3767  unsigned CCValid = 0;
3768  unsigned CCMask = 0;
3769 
3770  switch (Op.getOpcode()) {
3771  default: llvm_unreachable("Unknown instruction!");
3772  case ISD::SADDO:
3773  BaseOp = SystemZISD::SADDO;
3774  CCValid = SystemZ::CCMASK_ARITH;
3776  break;
3777  case ISD::SSUBO:
3778  BaseOp = SystemZISD::SSUBO;
3779  CCValid = SystemZ::CCMASK_ARITH;
3781  break;
3782  case ISD::UADDO:
3783  BaseOp = SystemZISD::UADDO;
3784  CCValid = SystemZ::CCMASK_LOGICAL;
3786  break;
3787  case ISD::USUBO:
3788  BaseOp = SystemZISD::USUBO;
3789  CCValid = SystemZ::CCMASK_LOGICAL;
3791  break;
3792  }
3793 
3794  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3795  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3796 
3797  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3798  if (N->getValueType(1) == MVT::i1)
3799  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3800 
3801  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3802 }
3803 
3804 static bool isAddCarryChain(SDValue Carry) {
3805  while (Carry.getOpcode() == ISD::ADDCARRY)
3806  Carry = Carry.getOperand(2);
3807  return Carry.getOpcode() == ISD::UADDO;
3808 }
3809 
3810 static bool isSubBorrowChain(SDValue Carry) {
3811  while (Carry.getOpcode() == ISD::SUBCARRY)
3812  Carry = Carry.getOperand(2);
3813  return Carry.getOpcode() == ISD::USUBO;
3814 }
3815 
3816 // Lower ADDCARRY/SUBCARRY nodes.
3817 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3818  SelectionDAG &DAG) const {
3819 
3820  SDNode *N = Op.getNode();
3821  MVT VT = N->getSimpleValueType(0);
3822 
3823  // Let legalize expand this if it isn't a legal type yet.
3824  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3825  return SDValue();
3826 
3827  SDValue LHS = N->getOperand(0);
3828  SDValue RHS = N->getOperand(1);
3829  SDValue Carry = Op.getOperand(2);
3830  SDLoc DL(N);
3831  unsigned BaseOp = 0;
3832  unsigned CCValid = 0;
3833  unsigned CCMask = 0;
3834 
3835  switch (Op.getOpcode()) {
3836  default: llvm_unreachable("Unknown instruction!");
3837  case ISD::ADDCARRY:
3838  if (!isAddCarryChain(Carry))
3839  return SDValue();
3840 
3841  BaseOp = SystemZISD::ADDCARRY;
3842  CCValid = SystemZ::CCMASK_LOGICAL;
3844  break;
3845  case ISD::SUBCARRY:
3846  if (!isSubBorrowChain(Carry))
3847  return SDValue();
3848 
3849  BaseOp = SystemZISD::SUBCARRY;
3850  CCValid = SystemZ::CCMASK_LOGICAL;
3852  break;
3853  }
3854 
3855  // Set the condition code from the carry flag.
3856  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3857  DAG.getConstant(CCValid, DL, MVT::i32),
3858  DAG.getConstant(CCMask, DL, MVT::i32));
3859 
3860  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3861  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3862 
3863  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3864  if (N->getValueType(1) == MVT::i1)
3865  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3866 
3867  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3868 }
3869 
3870 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3871  SelectionDAG &DAG) const {
3872  EVT VT = Op.getValueType();
3873  SDLoc DL(Op);
3874  Op = Op.getOperand(0);
3875 
3876  // Handle vector types via VPOPCT.
3877  if (VT.isVector()) {
3878  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3880  switch (VT.getScalarSizeInBits()) {
3881  case 8:
3882  break;
3883  case 16: {
3884  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3885  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3887  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3889  break;
3890  }
3891  case 32: {
3893  DAG.getConstant(0, DL, MVT::i32));
3894  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3895  break;
3896  }
3897  case 64: {
3899  DAG.getConstant(0, DL, MVT::i32));
3900  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3901  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3902  break;
3903  }
3904  default:
3905  llvm_unreachable("Unexpected type");
3906  }
3907  return Op;
3908  }
3909 
3910  // Get the known-zero mask for the operand.
3911  KnownBits Known = DAG.computeKnownBits(Op);
3912  unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3913  if (NumSignificantBits == 0)
3914  return DAG.getConstant(0, DL, VT);
3915 
3916  // Skip known-zero high parts of the operand.
3917  int64_t OrigBitSize = VT.getSizeInBits();
3918  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3919  BitSize = std::min(BitSize, OrigBitSize);
3920 
3921  // The POPCNT instruction counts the number of bits in each byte.
3922  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3924  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3925 
3926  // Add up per-byte counts in a binary tree. All bits of Op at
3927  // position larger than BitSize remain zero throughout.
3928  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3929  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3930  if (BitSize != OrigBitSize)
3931  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3932  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3933  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3934  }
3935 
3936  // Extract overall result from high byte.
3937  if (BitSize > 8)
3938  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3939  DAG.getConstant(BitSize - 8, DL, VT));
3940 
3941  return Op;
3942 }
3943 
3944 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3945  SelectionDAG &DAG) const {
3946  SDLoc DL(Op);
3947  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3948  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3949  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3950  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3951 
3952  // The only fence that needs an instruction is a sequentially-consistent
3953  // cross-thread fence.
3954  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3955  FenceSSID == SyncScope::System) {
3956  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3957  Op.getOperand(0)),
3958  0);
3959  }
3960 
3961  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3962  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3963 }
3964 
3965 // Op is an atomic load. Lower it into a normal volatile load.
3966 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3967  SelectionDAG &DAG) const {
3968  auto *Node = cast<AtomicSDNode>(Op.getNode());
3969  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3970  Node->getChain(), Node->getBasePtr(),
3971  Node->getMemoryVT(), Node->getMemOperand());
3972 }
3973 
3974 // Op is an atomic store. Lower it into a normal volatile store.
3975 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3976  SelectionDAG &DAG) const {
3977  auto *Node = cast<AtomicSDNode>(Op.getNode());
3978  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3979  Node->getBasePtr(), Node->getMemoryVT(),
3980  Node->getMemOperand());
3981  // We have to enforce sequential consistency by performing a
3982  // serialization operation after the store.
3983  if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
3984  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3985  MVT::Other, Chain), 0);
3986  return Chain;
3987 }
3988 
3989 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3990 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3991 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3992  SelectionDAG &DAG,
3993  unsigned Opcode) const {
3994  auto *Node = cast<AtomicSDNode>(Op.getNode());
3995 
3996  // 32-bit operations need no code outside the main loop.
3997  EVT NarrowVT = Node->getMemoryVT();
3998  EVT WideVT = MVT::i32;
3999  if (NarrowVT == WideVT)
4000  return Op;
4001 
4002  int64_t BitSize = NarrowVT.getSizeInBits();
4003  SDValue ChainIn = Node->getChain();
4004  SDValue Addr = Node->getBasePtr();
4005  SDValue Src2 = Node->getVal();
4006  MachineMemOperand *MMO = Node->getMemOperand();
4007  SDLoc DL(Node);
4008  EVT PtrVT = Addr.getValueType();
4009 
4010  // Convert atomic subtracts of constants into additions.
4011  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
4012  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
4014  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
4015  }
4016 
4017  // Get the address of the containing word.
4018  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
4019  DAG.