LLVM  14.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84 
85  // Set up the register classes.
86  if (Subtarget.hasHighWord())
87  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
88  else
89  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
90  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
91  if (!useSoftFloat()) {
92  if (Subtarget.hasVector()) {
93  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
94  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
95  } else {
96  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
97  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
98  }
99  if (Subtarget.hasVectorEnhancements1())
100  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
101  else
102  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
103 
104  if (Subtarget.hasVector()) {
105  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
106  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
107  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
111  }
112  }
113 
114  // Compute derived properties from the register classes
116 
117  // Set up special registers.
119 
120  // TODO: It may be better to default to latency-oriented scheduling, however
121  // LLVM's current latency-oriented scheduler can't handle physreg definitions
122  // such as SystemZ has with CC, so set this to the register-pressure
123  // scheduler, because it can.
125 
128 
129  // Instructions are strings of 2-byte aligned 2-byte values.
131  // For performance reasons we prefer 16-byte alignment.
133 
134  // Handle operations that are handled in a similar way for all types.
135  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
137  ++I) {
138  MVT VT = MVT::SimpleValueType(I);
139  if (isTypeLegal(VT)) {
140  // Lower SET_CC into an IPM-based sequence.
144 
145  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
147 
148  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151  }
152  }
153 
154  // Expand jump table branches as address arithmetic followed by an
155  // indirect jump.
157 
158  // Expand BRCOND into a BR_CC (see above).
160 
161  // Handle integer types.
162  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
164  ++I) {
165  MVT VT = MVT::SimpleValueType(I);
166  if (isTypeLegal(VT)) {
168 
169  // Expand individual DIV and REMs into DIVREMs.
176 
177  // Support addition/subtraction with overflow.
180 
181  // Support addition/subtraction with carry.
184 
185  // Support carry in as value rather than glue.
188 
189  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
190  // stores, putting a serialization instruction after the stores.
193 
194  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
195  // available, or if the operand is constant.
197 
198  // Use POPCNT on z196 and above.
199  if (Subtarget.hasPopulationCount())
201  else
203 
204  // No special instructions for these.
207 
208  // Use *MUL_LOHI where possible instead of MULH*.
213 
214  // Only z196 and above have native support for conversions to unsigned.
215  // On z10, promoting to i64 doesn't generate an inexact condition for
216  // values that are outside the i32 range but in the i64 range, so use
217  // the default expansion.
218  if (!Subtarget.hasFPExtension())
220 
221  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
222  // default to Expand, so need to be modified to Legal where appropriate.
224  if (Subtarget.hasFPExtension())
226 
227  // And similarly for STRICT_[SU]INT_TO_FP.
229  if (Subtarget.hasFPExtension())
231  }
232  }
233 
234  // Type legalization will convert 8- and 16-bit atomic operations into
235  // forms that operate on i32s (but still keeping the original memory VT).
236  // Lower them into full i32 operations.
248 
249  // Even though i128 is not a legal type, we still need to custom lower
250  // the atomic operations in order to exploit SystemZ instructions.
253 
254  // We can use the CC result of compare-and-swap to implement
255  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
259 
261 
262  // Traps are legal, as we will convert them to "j .+2".
264 
265  // z10 has instructions for signed but not unsigned FP conversion.
266  // Handle unsigned 32-bit types as signed 64-bit types.
267  if (!Subtarget.hasFPExtension()) {
272  }
273 
274  // We have native support for a 64-bit CTLZ, via FLOGR.
278 
279  // On z15 we have native support for a 64-bit CTPOP.
280  if (Subtarget.hasMiscellaneousExtensions3()) {
283  }
284 
285  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
287 
288  // Expand 128 bit shifts without using a libcall.
292  setLibcallName(RTLIB::SRL_I128, nullptr);
293  setLibcallName(RTLIB::SHL_I128, nullptr);
294  setLibcallName(RTLIB::SRA_I128, nullptr);
295 
296  // We have native instructions for i8, i16 and i32 extensions, but not i1.
298  for (MVT VT : MVT::integer_valuetypes()) {
302  }
303 
304  // Handle the various types of symbolic address.
310 
311  // We need to handle dynamic allocations specially because of the
312  // 160-byte area at the bottom of the stack.
315 
316  // Use custom expanders so that we can force the function to use
317  // a frame pointer.
320 
321  // Handle prefetches with PFD or PFDRL.
323 
324  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
325  // Assume by default that all vector operations need to be expanded.
326  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
327  if (getOperationAction(Opcode, VT) == Legal)
328  setOperationAction(Opcode, VT, Expand);
329 
330  // Likewise all truncating stores and extending loads.
331  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
332  setTruncStoreAction(VT, InnerVT, Expand);
333  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
334  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
335  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
336  }
337 
338  if (isTypeLegal(VT)) {
339  // These operations are legal for anything that can be stored in a
340  // vector register, even if there is no native support for the format
341  // as such. In particular, we can do these for v4f32 even though there
342  // are no specific instructions for that format.
348 
349  // Likewise, except that we need to replace the nodes with something
350  // more specific.
353  }
354  }
355 
356  // Handle integer vector types.
358  if (isTypeLegal(VT)) {
359  // These operations have direct equivalents.
364  if (VT != MVT::v2i64)
370  if (Subtarget.hasVectorEnhancements1())
372  else
376 
377  // Convert a GPR scalar to a vector by inserting it into element 0.
379 
380  // Use a series of unpacks for extensions.
383 
384  // Detect shifts by a scalar amount and convert them into
385  // V*_BY_SCALAR.
389 
390  // At present ROTL isn't matched by DAGCombiner. ROTR should be
391  // converted into ROTL.
394 
395  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
396  // and inverting the result as necessary.
399  if (Subtarget.hasVectorEnhancements1())
401  }
402  }
403 
404  if (Subtarget.hasVector()) {
405  // There should be no need to check for float types other than v2f64
406  // since <2 x f32> isn't a legal type.
415 
424  }
425 
426  if (Subtarget.hasVectorEnhancements2()) {
435 
444  }
445 
446  // Handle floating-point types.
447  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
449  ++I) {
450  MVT VT = MVT::SimpleValueType(I);
451  if (isTypeLegal(VT)) {
452  // We can use FI for FRINT.
454 
455  // We can use the extended form of FI for other rounding operations.
456  if (Subtarget.hasFPExtension()) {
462  }
463 
464  // No special instructions for these.
470 
471  // Handle constrained floating-point operations.
481  if (Subtarget.hasFPExtension()) {
487  }
488  }
489  }
490 
491  // Handle floating-point vector types.
492  if (Subtarget.hasVector()) {
493  // Scalar-to-vector conversion is just a subreg.
496 
497  // Some insertions and extractions can be done directly but others
498  // need to go via integers.
503 
504  // These operations have direct equivalents.
519 
520  // Handle constrained floating-point operations.
533  }
534 
535  // The vector enhancements facility 1 has instructions for these.
536  if (Subtarget.hasVectorEnhancements1()) {
551 
556 
561 
566 
571 
576 
577  // Handle constrained floating-point operations.
590  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
591  MVT::v4f32, MVT::v2f64 }) {
596  }
597  }
598 
599  // We only have fused f128 multiply-addition on vector registers.
600  if (!Subtarget.hasVectorEnhancements1()) {
603  }
604 
605  // We don't have a copysign instruction on vector registers.
606  if (Subtarget.hasVectorEnhancements1())
608 
609  // Needed so that we don't try to implement f128 constant loads using
610  // a load-and-extend of a f80 constant (in cases where the constant
611  // would fit in an f80).
612  for (MVT VT : MVT::fp_valuetypes())
614 
615  // We don't have extending load instruction on vector registers.
616  if (Subtarget.hasVectorEnhancements1()) {
619  }
620 
621  // Floating-point truncation and stores need to be done separately.
625 
626  // We have 64-bit FPR<->GPR moves, but need special handling for
627  // 32-bit forms.
628  if (!Subtarget.hasVector()) {
631  }
632 
633  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
634  // structure, but VAEND is a no-op.
638 
639  // Codes for which we want to perform some z-specific combinations.
660 
661  // Handle intrinsics.
664 
665  // We want to use MVC in preference to even a single load/store pair.
666  MaxStoresPerMemcpy = 0;
668 
669  // The main memset sequence is a byte store followed by an MVC.
670  // Two STC or MV..I stores win over that, but the kind of fused stores
671  // generated by target-independent code don't when the byte value is
672  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
673  // than "STC;MVC". Handle the choice in target-specific code instead.
674  MaxStoresPerMemset = 0;
676 
677  // Default to having -disable-strictnode-mutation on
678  IsStrictFPEnabled = true;
679 }
680 
682  return Subtarget.hasSoftFloat();
683 }
684 
686  LLVMContext &, EVT VT) const {
687  if (!VT.isVector())
688  return MVT::i32;
690 }
691 
693  const MachineFunction &MF, EVT VT) const {
694  VT = VT.getScalarType();
695 
696  if (!VT.isSimple())
697  return false;
698 
699  switch (VT.getSimpleVT().SimpleTy) {
700  case MVT::f32:
701  case MVT::f64:
702  return true;
703  case MVT::f128:
704  return Subtarget.hasVectorEnhancements1();
705  default:
706  break;
707  }
708 
709  return false;
710 }
711 
712 // Return true if the constant can be generated with a vector instruction,
713 // such as VGM, VGMB or VREPI.
715  const SystemZSubtarget &Subtarget) {
716  const SystemZInstrInfo *TII =
717  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
718  if (!Subtarget.hasVector() ||
719  (isFP128 && !Subtarget.hasVectorEnhancements1()))
720  return false;
721 
722  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
723  // preferred way of creating all-zero and all-one vectors so give it
724  // priority over other methods below.
725  unsigned Mask = 0;
726  unsigned I = 0;
727  for (; I < SystemZ::VectorBytes; ++I) {
728  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
729  if (Byte == 0xff)
730  Mask |= 1ULL << I;
731  else if (Byte != 0)
732  break;
733  }
734  if (I == SystemZ::VectorBytes) {
736  OpVals.push_back(Mask);
738  return true;
739  }
740 
741  if (SplatBitSize > 64)
742  return false;
743 
744  auto tryValue = [&](uint64_t Value) -> bool {
745  // Try VECTOR REPLICATE IMMEDIATE
746  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
747  if (isInt<16>(SignedValue)) {
748  OpVals.push_back(((unsigned) SignedValue));
750  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
751  SystemZ::VectorBits / SplatBitSize);
752  return true;
753  }
754  // Try VECTOR GENERATE MASK
755  unsigned Start, End;
756  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
757  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
758  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
759  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
760  OpVals.push_back(Start - (64 - SplatBitSize));
761  OpVals.push_back(End - (64 - SplatBitSize));
763  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
764  SystemZ::VectorBits / SplatBitSize);
765  return true;
766  }
767  return false;
768  };
769 
770  // First try assuming that any undefined bits above the highest set bit
771  // and below the lowest set bit are 1s. This increases the likelihood of
772  // being able to use a sign-extended element value in VECTOR REPLICATE
773  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
774  uint64_t SplatBitsZ = SplatBits.getZExtValue();
775  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
776  uint64_t Lower =
777  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
778  uint64_t Upper =
779  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
780  if (tryValue(SplatBitsZ | Upper | Lower))
781  return true;
782 
783  // Now try assuming that any undefined bits between the first and
784  // last defined set bits are set. This increases the chances of
785  // using a non-wraparound mask.
786  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
787  return tryValue(SplatBitsZ | Middle);
788 }
789 
791  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
792  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
793  SplatBits = FPImm.bitcastToAPInt();
794  unsigned Width = SplatBits.getBitWidth();
795  IntBits <<= (SystemZ::VectorBits - Width);
796 
797  // Find the smallest splat.
798  while (Width > 8) {
799  unsigned HalfSize = Width / 2;
800  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
801  APInt LowValue = SplatBits.trunc(HalfSize);
802 
803  // If the two halves do not match, stop here.
804  if (HighValue != LowValue || 8 > HalfSize)
805  break;
806 
807  SplatBits = HighValue;
808  Width = HalfSize;
809  }
810  SplatUndef = 0;
811  SplatBitSize = Width;
812 }
813 
815  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
816  bool HasAnyUndefs;
817 
818  // Get IntBits by finding the 128 bit splat.
819  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
820  true);
821 
822  // Get SplatBits by finding the 8 bit or greater splat.
823  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
824  true);
825 }
826 
828  bool ForCodeSize) const {
829  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
830  if (Imm.isZero() || Imm.isNegZero())
831  return true;
832 
833  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
834 }
835 
836 /// Returns true if stack probing through inline assembly is requested.
838  // If the function specifically requests inline stack probes, emit them.
839  if (MF.getFunction().hasFnAttribute("probe-stack"))
840  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
841  "inline-asm";
842  return false;
843 }
844 
846  // We can use CGFI or CLGFI.
847  return isInt<32>(Imm) || isUInt<32>(Imm);
848 }
849 
851  // We can use ALGFI or SLGFI.
852  return isUInt<32>(Imm) || isUInt<32>(-Imm);
853 }
854 
856  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
857  // Unaligned accesses should never be slower than the expanded version.
858  // We check specifically for aligned accesses in the few cases where
859  // they are required.
860  if (Fast)
861  *Fast = true;
862  return true;
863 }
864 
865 // Information about the addressing mode for a memory access.
867  // True if a long displacement is supported.
869 
870  // True if use of index register is supported.
871  bool IndexReg;
872 
873  AddressingMode(bool LongDispl, bool IdxReg) :
874  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
875 };
876 
877 // Return the desired addressing mode for a Load which has only one use (in
878 // the same block) which is a Store.
879 static AddressingMode getLoadStoreAddrMode(bool HasVector,
880  Type *Ty) {
881  // With vector support a Load->Store combination may be combined to either
882  // an MVC or vector operations and it seems to work best to allow the
883  // vector addressing mode.
884  if (HasVector)
885  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
886 
887  // Otherwise only the MVC case is special.
888  bool MVC = Ty->isIntegerTy(8);
889  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
890 }
891 
892 // Return the addressing mode which seems most desirable given an LLVM
893 // Instruction pointer.
894 static AddressingMode
896  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
897  switch (II->getIntrinsicID()) {
898  default: break;
899  case Intrinsic::memset:
900  case Intrinsic::memmove:
901  case Intrinsic::memcpy:
902  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
903  }
904  }
905 
906  if (isa<LoadInst>(I) && I->hasOneUse()) {
907  auto *SingleUser = cast<Instruction>(*I->user_begin());
908  if (SingleUser->getParent() == I->getParent()) {
909  if (isa<ICmpInst>(SingleUser)) {
910  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
911  if (C->getBitWidth() <= 64 &&
912  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
913  // Comparison of memory with 16 bit signed / unsigned immediate
914  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
915  } else if (isa<StoreInst>(SingleUser))
916  // Load->Store
917  return getLoadStoreAddrMode(HasVector, I->getType());
918  }
919  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
920  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
921  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
922  // Load->Store
923  return getLoadStoreAddrMode(HasVector, LoadI->getType());
924  }
925 
926  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
927 
928  // * Use LDE instead of LE/LEY for z13 to avoid partial register
929  // dependencies (LDE only supports small offsets).
930  // * Utilize the vector registers to hold floating point
931  // values (vector load / store instructions only support small
932  // offsets).
933 
934  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
935  I->getOperand(0)->getType());
936  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
937  bool IsVectorAccess = MemAccessTy->isVectorTy();
938 
939  // A store of an extracted vector element will be combined into a VSTE type
940  // instruction.
941  if (!IsVectorAccess && isa<StoreInst>(I)) {
942  Value *DataOp = I->getOperand(0);
943  if (isa<ExtractElementInst>(DataOp))
944  IsVectorAccess = true;
945  }
946 
947  // A load which gets inserted into a vector element will be combined into a
948  // VLE type instruction.
949  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
950  User *LoadUser = *I->user_begin();
951  if (isa<InsertElementInst>(LoadUser))
952  IsVectorAccess = true;
953  }
954 
955  if (IsFPAccess || IsVectorAccess)
956  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
957  }
958 
959  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
960 }
961 
963  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
964  // Punt on globals for now, although they can be used in limited
965  // RELATIVE LONG cases.
966  if (AM.BaseGV)
967  return false;
968 
969  // Require a 20-bit signed offset.
970  if (!isInt<20>(AM.BaseOffs))
971  return false;
972 
973  AddressingMode SupportedAM(true, true);
974  if (I != nullptr)
975  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
976 
977  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
978  return false;
979 
980  if (!SupportedAM.IndexReg)
981  // No indexing allowed.
982  return AM.Scale == 0;
983  else
984  // Indexing is OK but no scale factor can be applied.
985  return AM.Scale == 0 || AM.Scale == 1;
986 }
987 
989  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
990  return false;
991  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
992  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
993  return FromBits > ToBits;
994 }
995 
997  if (!FromVT.isInteger() || !ToVT.isInteger())
998  return false;
999  unsigned FromBits = FromVT.getFixedSizeInBits();
1000  unsigned ToBits = ToVT.getFixedSizeInBits();
1001  return FromBits > ToBits;
1002 }
1003 
1004 //===----------------------------------------------------------------------===//
1005 // Inline asm support
1006 //===----------------------------------------------------------------------===//
1007 
1010  if (Constraint.size() == 1) {
1011  switch (Constraint[0]) {
1012  case 'a': // Address register
1013  case 'd': // Data register (equivalent to 'r')
1014  case 'f': // Floating-point register
1015  case 'h': // High-part register
1016  case 'r': // General-purpose register
1017  case 'v': // Vector register
1018  return C_RegisterClass;
1019 
1020  case 'Q': // Memory with base and unsigned 12-bit displacement
1021  case 'R': // Likewise, plus an index
1022  case 'S': // Memory with base and signed 20-bit displacement
1023  case 'T': // Likewise, plus an index
1024  case 'm': // Equivalent to 'T'.
1025  return C_Memory;
1026 
1027  case 'I': // Unsigned 8-bit constant
1028  case 'J': // Unsigned 12-bit constant
1029  case 'K': // Signed 16-bit constant
1030  case 'L': // Signed 20-bit displacement (on all targets we support)
1031  case 'M': // 0x7fffffff
1032  return C_Immediate;
1033 
1034  default:
1035  break;
1036  }
1037  }
1038  return TargetLowering::getConstraintType(Constraint);
1039 }
1040 
1043  const char *constraint) const {
1044  ConstraintWeight weight = CW_Invalid;
1045  Value *CallOperandVal = info.CallOperandVal;
1046  // If we don't have a value, we can't do a match,
1047  // but allow it at the lowest weight.
1048  if (!CallOperandVal)
1049  return CW_Default;
1050  Type *type = CallOperandVal->getType();
1051  // Look at the constraint type.
1052  switch (*constraint) {
1053  default:
1055  break;
1056 
1057  case 'a': // Address register
1058  case 'd': // Data register (equivalent to 'r')
1059  case 'h': // High-part register
1060  case 'r': // General-purpose register
1061  if (CallOperandVal->getType()->isIntegerTy())
1062  weight = CW_Register;
1063  break;
1064 
1065  case 'f': // Floating-point register
1066  if (type->isFloatingPointTy())
1067  weight = CW_Register;
1068  break;
1069 
1070  case 'v': // Vector register
1071  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1072  Subtarget.hasVector())
1073  weight = CW_Register;
1074  break;
1075 
1076  case 'I': // Unsigned 8-bit constant
1077  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1078  if (isUInt<8>(C->getZExtValue()))
1079  weight = CW_Constant;
1080  break;
1081 
1082  case 'J': // Unsigned 12-bit constant
1083  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1084  if (isUInt<12>(C->getZExtValue()))
1085  weight = CW_Constant;
1086  break;
1087 
1088  case 'K': // Signed 16-bit constant
1089  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1090  if (isInt<16>(C->getSExtValue()))
1091  weight = CW_Constant;
1092  break;
1093 
1094  case 'L': // Signed 20-bit displacement (on all targets we support)
1095  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1096  if (isInt<20>(C->getSExtValue()))
1097  weight = CW_Constant;
1098  break;
1099 
1100  case 'M': // 0x7fffffff
1101  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1102  if (C->getZExtValue() == 0x7fffffff)
1103  weight = CW_Constant;
1104  break;
1105  }
1106  return weight;
1107 }
1108 
1109 // Parse a "{tNNN}" register constraint for which the register type "t"
1110 // has already been verified. MC is the class associated with "t" and
1111 // Map maps 0-based register numbers to LLVM register numbers.
1112 static std::pair<unsigned, const TargetRegisterClass *>
1114  const unsigned *Map, unsigned Size) {
1115  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1116  if (isdigit(Constraint[2])) {
1117  unsigned Index;
1118  bool Failed =
1119  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1120  if (!Failed && Index < Size && Map[Index])
1121  return std::make_pair(Map[Index], RC);
1122  }
1123  return std::make_pair(0U, nullptr);
1124 }
1125 
1126 std::pair<unsigned, const TargetRegisterClass *>
1128  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1129  if (Constraint.size() == 1) {
1130  // GCC Constraint Letters
1131  switch (Constraint[0]) {
1132  default: break;
1133  case 'd': // Data register (equivalent to 'r')
1134  case 'r': // General-purpose register
1135  if (VT == MVT::i64)
1136  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1137  else if (VT == MVT::i128)
1138  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1139  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1140 
1141  case 'a': // Address register
1142  if (VT == MVT::i64)
1143  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1144  else if (VT == MVT::i128)
1145  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1146  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1147 
1148  case 'h': // High-part register (an LLVM extension)
1149  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1150 
1151  case 'f': // Floating-point register
1152  if (!useSoftFloat()) {
1153  if (VT == MVT::f64)
1154  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1155  else if (VT == MVT::f128)
1156  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1157  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1158  }
1159  break;
1160  case 'v': // Vector register
1161  if (Subtarget.hasVector()) {
1162  if (VT == MVT::f32)
1163  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1164  if (VT == MVT::f64)
1165  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1166  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1167  }
1168  break;
1169  }
1170  }
1171  if (Constraint.size() > 0 && Constraint[0] == '{') {
1172  // We need to override the default register parsing for GPRs and FPRs
1173  // because the interpretation depends on VT. The internal names of
1174  // the registers are also different from the external names
1175  // (F0D and F0S instead of F0, etc.).
1176  if (Constraint[1] == 'r') {
1177  if (VT == MVT::i32)
1178  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1179  SystemZMC::GR32Regs, 16);
1180  if (VT == MVT::i128)
1181  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1182  SystemZMC::GR128Regs, 16);
1183  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1184  SystemZMC::GR64Regs, 16);
1185  }
1186  if (Constraint[1] == 'f') {
1187  if (useSoftFloat())
1188  return std::make_pair(
1189  0u, static_cast<const TargetRegisterClass *>(nullptr));
1190  if (VT == MVT::f32)
1191  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1192  SystemZMC::FP32Regs, 16);
1193  if (VT == MVT::f128)
1194  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1195  SystemZMC::FP128Regs, 16);
1196  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1197  SystemZMC::FP64Regs, 16);
1198  }
1199  if (Constraint[1] == 'v') {
1200  if (!Subtarget.hasVector())
1201  return std::make_pair(
1202  0u, static_cast<const TargetRegisterClass *>(nullptr));
1203  if (VT == MVT::f32)
1204  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1205  SystemZMC::VR32Regs, 32);
1206  if (VT == MVT::f64)
1207  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1208  SystemZMC::VR64Regs, 32);
1209  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1210  SystemZMC::VR128Regs, 32);
1211  }
1212  }
1213  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1214 }
1215 
1216 // FIXME? Maybe this could be a TableGen attribute on some registers and
1217 // this table could be generated automatically from RegInfo.
1219  const MachineFunction &MF) const {
1220 
1222  .Case("r15", SystemZ::R15D)
1223  .Default(0);
1224  if (Reg)
1225  return Reg;
1226  report_fatal_error("Invalid register name global variable");
1227 }
1228 
1230 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1231  std::vector<SDValue> &Ops,
1232  SelectionDAG &DAG) const {
1233  // Only support length 1 constraints for now.
1234  if (Constraint.length() == 1) {
1235  switch (Constraint[0]) {
1236  case 'I': // Unsigned 8-bit constant
1237  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1238  if (isUInt<8>(C->getZExtValue()))
1239  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1240  Op.getValueType()));
1241  return;
1242 
1243  case 'J': // Unsigned 12-bit constant
1244  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1245  if (isUInt<12>(C->getZExtValue()))
1246  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1247  Op.getValueType()));
1248  return;
1249 
1250  case 'K': // Signed 16-bit constant
1251  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1252  if (isInt<16>(C->getSExtValue()))
1253  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1254  Op.getValueType()));
1255  return;
1256 
1257  case 'L': // Signed 20-bit displacement (on all targets we support)
1258  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1259  if (isInt<20>(C->getSExtValue()))
1260  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1261  Op.getValueType()));
1262  return;
1263 
1264  case 'M': // 0x7fffffff
1265  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1266  if (C->getZExtValue() == 0x7fffffff)
1267  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1268  Op.getValueType()));
1269  return;
1270  }
1271  }
1272  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1273 }
1274 
1275 //===----------------------------------------------------------------------===//
1276 // Calling conventions
1277 //===----------------------------------------------------------------------===//
1278 
1279 #include "SystemZGenCallingConv.inc"
1280 
1282  CallingConv::ID) const {
1283  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1284  SystemZ::R14D, 0 };
1285  return ScratchRegs;
1286 }
1287 
1289  Type *ToType) const {
1290  return isTruncateFree(FromType, ToType);
1291 }
1292 
1294  return CI->isTailCall();
1295 }
1296 
1297 // We do not yet support 128-bit single-element vector types. If the user
1298 // attempts to use such types as function argument or return type, prefer
1299 // to error out instead of emitting code violating the ABI.
1300 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1301  if (ArgVT.isVector() && !VT.isVector())
1302  report_fatal_error("Unsupported vector argument or return type");
1303 }
1304 
1306  for (unsigned i = 0; i < Ins.size(); ++i)
1307  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1308 }
1309 
1311  for (unsigned i = 0; i < Outs.size(); ++i)
1312  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1313 }
1314 
1315 // Value is a value that has been passed to us in the location described by VA
1316 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1317 // any loads onto Chain.
1319  CCValAssign &VA, SDValue Chain,
1320  SDValue Value) {
1321  // If the argument has been promoted from a smaller type, insert an
1322  // assertion to capture this.
1323  if (VA.getLocInfo() == CCValAssign::SExt)
1324  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1325  DAG.getValueType(VA.getValVT()));
1326  else if (VA.getLocInfo() == CCValAssign::ZExt)
1327  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1328  DAG.getValueType(VA.getValVT()));
1329 
1330  if (VA.isExtInLoc())
1331  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1332  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1333  // If this is a short vector argument loaded from the stack,
1334  // extend from i64 to full vector size and then bitcast.
1335  assert(VA.getLocVT() == MVT::i64);
1336  assert(VA.getValVT().isVector());
1338  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1339  } else
1340  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1341  return Value;
1342 }
1343 
1344 // Value is a value of type VA.getValVT() that we need to copy into
1345 // the location described by VA. Return a copy of Value converted to
1346 // VA.getValVT(). The caller is responsible for handling indirect values.
1348  CCValAssign &VA, SDValue Value) {
1349  switch (VA.getLocInfo()) {
1350  case CCValAssign::SExt:
1351  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1352  case CCValAssign::ZExt:
1353  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1354  case CCValAssign::AExt:
1355  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1356  case CCValAssign::BCvt:
1357  // If this is a short vector argument to be stored to the stack,
1358  // bitcast to v2i64 and then extract first element.
1359  assert(VA.getLocVT() == MVT::i64);
1360  assert(VA.getValVT().isVector());
1362  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1363  DAG.getConstant(0, DL, MVT::i32));
1364  case CCValAssign::Full:
1365  return Value;
1366  default:
1367  llvm_unreachable("Unhandled getLocInfo()");
1368  }
1369 }
1370 
1372  SDLoc DL(In);
1374  DAG.getIntPtrConstant(0, DL));
1376  DAG.getIntPtrConstant(1, DL));
1377  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1378  MVT::Untyped, Hi, Lo);
1379  return SDValue(Pair, 0);
1380 }
1381 
1383  SDLoc DL(In);
1384  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1385  DL, MVT::i64, In);
1386  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1387  DL, MVT::i64, In);
1388  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1389 }
1390 
1392  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1393  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1394  EVT ValueVT = Val.getValueType();
1395  assert((ValueVT != MVT::i128 ||
1396  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1397  (NumParts == 2 && PartVT == MVT::i64))) &&
1398  "Unknown handling of i128 value.");
1399  if (ValueVT == MVT::i128 && NumParts == 1) {
1400  // Inline assembly operand.
1401  Parts[0] = lowerI128ToGR128(DAG, Val);
1402  return true;
1403  }
1404  return false;
1405 }
1406 
1408  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1409  MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1410  assert((ValueVT != MVT::i128 ||
1411  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1412  (NumParts == 2 && PartVT == MVT::i64))) &&
1413  "Unknown handling of i128 value.");
1414  if (ValueVT == MVT::i128 && NumParts == 1)
1415  // Inline assembly operand.
1416  return lowerGR128ToI128(DAG, Parts[0]);
1417  return SDValue();
1418 }
1419 
1421  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1422  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1423  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1424  MachineFunction &MF = DAG.getMachineFunction();
1425  MachineFrameInfo &MFI = MF.getFrameInfo();
1427  SystemZMachineFunctionInfo *FuncInfo =
1429  auto *TFL =
1430  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1431  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1432 
1433  // Detect unsupported vector argument types.
1434  if (Subtarget.hasVector())
1436 
1437  // Assign locations to all of the incoming arguments.
1439  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1440  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1441 
1442  unsigned NumFixedGPRs = 0;
1443  unsigned NumFixedFPRs = 0;
1444  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1445  SDValue ArgValue;
1446  CCValAssign &VA = ArgLocs[I];
1447  EVT LocVT = VA.getLocVT();
1448  if (VA.isRegLoc()) {
1449  // Arguments passed in registers
1450  const TargetRegisterClass *RC;
1451  switch (LocVT.getSimpleVT().SimpleTy) {
1452  default:
1453  // Integers smaller than i64 should be promoted to i64.
1454  llvm_unreachable("Unexpected argument type");
1455  case MVT::i32:
1456  NumFixedGPRs += 1;
1457  RC = &SystemZ::GR32BitRegClass;
1458  break;
1459  case MVT::i64:
1460  NumFixedGPRs += 1;
1461  RC = &SystemZ::GR64BitRegClass;
1462  break;
1463  case MVT::f32:
1464  NumFixedFPRs += 1;
1465  RC = &SystemZ::FP32BitRegClass;
1466  break;
1467  case MVT::f64:
1468  NumFixedFPRs += 1;
1469  RC = &SystemZ::FP64BitRegClass;
1470  break;
1471  case MVT::v16i8:
1472  case MVT::v8i16:
1473  case MVT::v4i32:
1474  case MVT::v2i64:
1475  case MVT::v4f32:
1476  case MVT::v2f64:
1477  RC = &SystemZ::VR128BitRegClass;
1478  break;
1479  }
1480 
1481  Register VReg = MRI.createVirtualRegister(RC);
1482  MRI.addLiveIn(VA.getLocReg(), VReg);
1483  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1484  } else {
1485  assert(VA.isMemLoc() && "Argument not register or memory");
1486 
1487  // Create the frame index object for this incoming parameter.
1488  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1489  VA.getLocMemOffset(), true);
1490 
1491  // Create the SelectionDAG nodes corresponding to a load
1492  // from this parameter. Unpromoted ints and floats are
1493  // passed as right-justified 8-byte values.
1494  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1495  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1496  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1497  DAG.getIntPtrConstant(4, DL));
1498  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1500  }
1501 
1502  // Convert the value of the argument register into the value that's
1503  // being passed.
1504  if (VA.getLocInfo() == CCValAssign::Indirect) {
1505  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1506  MachinePointerInfo()));
1507  // If the original argument was split (e.g. i128), we need
1508  // to load all parts of it here (using the same address).
1509  unsigned ArgIndex = Ins[I].OrigArgIndex;
1510  assert (Ins[I].PartOffset == 0);
1511  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1512  CCValAssign &PartVA = ArgLocs[I + 1];
1513  unsigned PartOffset = Ins[I + 1].PartOffset;
1514  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1515  DAG.getIntPtrConstant(PartOffset, DL));
1516  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1517  MachinePointerInfo()));
1518  ++I;
1519  }
1520  } else
1521  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1522  }
1523 
1524  if (IsVarArg) {
1525  // Save the number of non-varargs registers for later use by va_start, etc.
1526  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1527  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1528 
1529  // Likewise the address (in the form of a frame index) of where the
1530  // first stack vararg would be. The 1-byte size here is arbitrary.
1531  int64_t StackSize = CCInfo.getNextStackOffset();
1532  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1533 
1534  // ...and a similar frame index for the caller-allocated save area
1535  // that will be used to store the incoming registers.
1536  int64_t RegSaveOffset =
1537  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1538  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1539  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1540 
1541  // Store the FPR varargs in the reserved frame slots. (We store the
1542  // GPRs as part of the prologue.)
1543  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1545  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1546  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1547  int FI =
1549  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1550  unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1551  &SystemZ::FP64BitRegClass);
1552  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1553  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1555  }
1556  // Join the stores, which are independent of one another.
1557  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1558  makeArrayRef(&MemOps[NumFixedFPRs],
1559  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1560  }
1561  }
1562 
1563  return Chain;
1564 }
1565 
1566 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1569  // Punt if there are any indirect or stack arguments, or if the call
1570  // needs the callee-saved argument register R6, or if the call uses
1571  // the callee-saved register arguments SwiftSelf and SwiftError.
1572  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1573  CCValAssign &VA = ArgLocs[I];
1574  if (VA.getLocInfo() == CCValAssign::Indirect)
1575  return false;
1576  if (!VA.isRegLoc())
1577  return false;
1578  Register Reg = VA.getLocReg();
1579  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1580  return false;
1581  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1582  return false;
1583  }
1584  return true;
1585 }
1586 
1587 SDValue
1589  SmallVectorImpl<SDValue> &InVals) const {
1590  SelectionDAG &DAG = CLI.DAG;
1591  SDLoc &DL = CLI.DL;
1593  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1595  SDValue Chain = CLI.Chain;
1596  SDValue Callee = CLI.Callee;
1597  bool &IsTailCall = CLI.IsTailCall;
1598  CallingConv::ID CallConv = CLI.CallConv;
1599  bool IsVarArg = CLI.IsVarArg;
1600  MachineFunction &MF = DAG.getMachineFunction();
1601  EVT PtrVT = getPointerTy(MF.getDataLayout());
1602  LLVMContext &Ctx = *DAG.getContext();
1603 
1604  // Detect unsupported vector argument and return types.
1605  if (Subtarget.hasVector()) {
1606  VerifyVectorTypes(Outs);
1608  }
1609 
1610  // Analyze the operands of the call, assigning locations to each operand.
1612  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1613  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1614 
1615  // We don't support GuaranteedTailCallOpt, only automatically-detected
1616  // sibling calls.
1617  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1618  IsTailCall = false;
1619 
1620  // Get a count of how many bytes are to be pushed on the stack.
1621  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1622 
1623  // Mark the start of the call.
1624  if (!IsTailCall)
1625  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1626 
1627  // Copy argument values to their designated locations.
1629  SmallVector<SDValue, 8> MemOpChains;
1630  SDValue StackPtr;
1631  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1632  CCValAssign &VA = ArgLocs[I];
1633  SDValue ArgValue = OutVals[I];
1634 
1635  if (VA.getLocInfo() == CCValAssign::Indirect) {
1636  // Store the argument in a stack slot and pass its address.
1637  unsigned ArgIndex = Outs[I].OrigArgIndex;
1638  EVT SlotVT;
1639  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1640  // Allocate the full stack space for a promoted (and split) argument.
1641  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1642  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1643  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1644  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1645  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1646  } else {
1647  SlotVT = Outs[I].ArgVT;
1648  }
1649  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1650  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1651  MemOpChains.push_back(
1652  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1654  // If the original argument was split (e.g. i128), we need
1655  // to store all parts of it here (and pass just one address).
1656  assert (Outs[I].PartOffset == 0);
1657  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1658  SDValue PartValue = OutVals[I + 1];
1659  unsigned PartOffset = Outs[I + 1].PartOffset;
1660  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1661  DAG.getIntPtrConstant(PartOffset, DL));
1662  MemOpChains.push_back(
1663  DAG.getStore(Chain, DL, PartValue, Address,
1665  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1666  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1667  ++I;
1668  }
1669  ArgValue = SpillSlot;
1670  } else
1671  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1672 
1673  if (VA.isRegLoc())
1674  // Queue up the argument copies and emit them at the end.
1675  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1676  else {
1677  assert(VA.isMemLoc() && "Argument not register or memory");
1678 
1679  // Work out the address of the stack slot. Unpromoted ints and
1680  // floats are passed as right-justified 8-byte values.
1681  if (!StackPtr.getNode())
1682  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1684  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1685  Offset += 4;
1686  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1687  DAG.getIntPtrConstant(Offset, DL));
1688 
1689  // Emit the store.
1690  MemOpChains.push_back(
1691  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1692  }
1693  }
1694 
1695  // Join the stores, which are independent of one another.
1696  if (!MemOpChains.empty())
1697  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1698 
1699  // Accept direct calls by converting symbolic call addresses to the
1700  // associated Target* opcodes. Force %r1 to be used for indirect
1701  // tail calls.
1702  SDValue Glue;
1703  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1704  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1706  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1707  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1709  } else if (IsTailCall) {
1710  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1711  Glue = Chain.getValue(1);
1712  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1713  }
1714 
1715  // Build a sequence of copy-to-reg nodes, chained and glued together.
1716  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1717  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1718  RegsToPass[I].second, Glue);
1719  Glue = Chain.getValue(1);
1720  }
1721 
1722  // The first call operand is the chain and the second is the target address.
1724  Ops.push_back(Chain);
1725  Ops.push_back(Callee);
1726 
1727  // Add argument registers to the end of the list so that they are
1728  // known live into the call.
1729  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1730  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1731  RegsToPass[I].second.getValueType()));
1732 
1733  // Add a register mask operand representing the call-preserved registers.
1734  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1735  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1736  assert(Mask && "Missing call preserved mask for calling convention");
1737  Ops.push_back(DAG.getRegisterMask(Mask));
1738 
1739  // Glue the call to the argument copies, if any.
1740  if (Glue.getNode())
1741  Ops.push_back(Glue);
1742 
1743  // Emit the call.
1744  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1745  if (IsTailCall)
1746  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1747  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1748  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1749  Glue = Chain.getValue(1);
1750 
1751  // Mark the end of the call, which is glued to the call itself.
1752  Chain = DAG.getCALLSEQ_END(Chain,
1753  DAG.getConstant(NumBytes, DL, PtrVT, true),
1754  DAG.getConstant(0, DL, PtrVT, true),
1755  Glue, DL);
1756  Glue = Chain.getValue(1);
1757 
1758  // Assign locations to each value returned by this call.
1760  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1761  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1762 
1763  // Copy all of the result registers out of their specified physreg.
1764  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1765  CCValAssign &VA = RetLocs[I];
1766 
1767  // Copy the value out, gluing the copy to the end of the call sequence.
1768  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1769  VA.getLocVT(), Glue);
1770  Chain = RetValue.getValue(1);
1771  Glue = RetValue.getValue(2);
1772 
1773  // Convert the value of the return register into the value that's
1774  // being returned.
1775  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1776  }
1777 
1778  return Chain;
1779 }
1780 
1783  MachineFunction &MF, bool isVarArg,
1784  const SmallVectorImpl<ISD::OutputArg> &Outs,
1785  LLVMContext &Context) const {
1786  // Detect unsupported vector return types.
1787  if (Subtarget.hasVector())
1788  VerifyVectorTypes(Outs);
1789 
1790  // Special case that we cannot easily detect in RetCC_SystemZ since
1791  // i128 is not a legal type.
1792  for (auto &Out : Outs)
1793  if (Out.ArgVT == MVT::i128)
1794  return false;
1795 
1797  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1798  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1799 }
1800 
1801 SDValue
1803  bool IsVarArg,
1804  const SmallVectorImpl<ISD::OutputArg> &Outs,
1805  const SmallVectorImpl<SDValue> &OutVals,
1806  const SDLoc &DL, SelectionDAG &DAG) const {
1807  MachineFunction &MF = DAG.getMachineFunction();
1808 
1809  // Detect unsupported vector return types.
1810  if (Subtarget.hasVector())
1811  VerifyVectorTypes(Outs);
1812 
1813  // Assign locations to each returned value.
1815  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1816  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1817 
1818  // Quick exit for void returns
1819  if (RetLocs.empty())
1820  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1821 
1822  if (CallConv == CallingConv::GHC)
1823  report_fatal_error("GHC functions return void only");
1824 
1825  // Copy the result values into the output registers.
1826  SDValue Glue;
1827  SmallVector<SDValue, 4> RetOps;
1828  RetOps.push_back(Chain);
1829  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1830  CCValAssign &VA = RetLocs[I];
1831  SDValue RetValue = OutVals[I];
1832 
1833  // Make the return register live on exit.
1834  assert(VA.isRegLoc() && "Can only return in registers!");
1835 
1836  // Promote the value as required.
1837  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1838 
1839  // Chain and glue the copies together.
1840  Register Reg = VA.getLocReg();
1841  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1842  Glue = Chain.getValue(1);
1843  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1844  }
1845 
1846  // Update chain and glue.
1847  RetOps[0] = Chain;
1848  if (Glue.getNode())
1849  RetOps.push_back(Glue);
1850 
1851  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1852 }
1853 
1854 // Return true if Op is an intrinsic node with chain that returns the CC value
1855 // as its only (other) argument. Provide the associated SystemZISD opcode and
1856 // the mask of valid CC values if so.
1857 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1858  unsigned &CCValid) {
1859  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1860  switch (Id) {
1861  case Intrinsic::s390_tbegin:
1862  Opcode = SystemZISD::TBEGIN;
1863  CCValid = SystemZ::CCMASK_TBEGIN;
1864  return true;
1865 
1866  case Intrinsic::s390_tbegin_nofloat:
1867  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1868  CCValid = SystemZ::CCMASK_TBEGIN;
1869  return true;
1870 
1871  case Intrinsic::s390_tend:
1872  Opcode = SystemZISD::TEND;
1873  CCValid = SystemZ::CCMASK_TEND;
1874  return true;
1875 
1876  default:
1877  return false;
1878  }
1879 }
1880 
1881 // Return true if Op is an intrinsic node without chain that returns the
1882 // CC value as its final argument. Provide the associated SystemZISD
1883 // opcode and the mask of valid CC values if so.
1884 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1885  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1886  switch (Id) {
1887  case Intrinsic::s390_vpkshs:
1888  case Intrinsic::s390_vpksfs:
1889  case Intrinsic::s390_vpksgs:
1890  Opcode = SystemZISD::PACKS_CC;
1891  CCValid = SystemZ::CCMASK_VCMP;
1892  return true;
1893 
1894  case Intrinsic::s390_vpklshs:
1895  case Intrinsic::s390_vpklsfs:
1896  case Intrinsic::s390_vpklsgs:
1897  Opcode = SystemZISD::PACKLS_CC;
1898  CCValid = SystemZ::CCMASK_VCMP;
1899  return true;
1900 
1901  case Intrinsic::s390_vceqbs:
1902  case Intrinsic::s390_vceqhs:
1903  case Intrinsic::s390_vceqfs:
1904  case Intrinsic::s390_vceqgs:
1905  Opcode = SystemZISD::VICMPES;
1906  CCValid = SystemZ::CCMASK_VCMP;
1907  return true;
1908 
1909  case Intrinsic::s390_vchbs:
1910  case Intrinsic::s390_vchhs:
1911  case Intrinsic::s390_vchfs:
1912  case Intrinsic::s390_vchgs:
1913  Opcode = SystemZISD::VICMPHS;
1914  CCValid = SystemZ::CCMASK_VCMP;
1915  return true;
1916 
1917  case Intrinsic::s390_vchlbs:
1918  case Intrinsic::s390_vchlhs:
1919  case Intrinsic::s390_vchlfs:
1920  case Intrinsic::s390_vchlgs:
1921  Opcode = SystemZISD::VICMPHLS;
1922  CCValid = SystemZ::CCMASK_VCMP;
1923  return true;
1924 
1925  case Intrinsic::s390_vtm:
1926  Opcode = SystemZISD::VTM;
1927  CCValid = SystemZ::CCMASK_VCMP;
1928  return true;
1929 
1930  case Intrinsic::s390_vfaebs:
1931  case Intrinsic::s390_vfaehs:
1932  case Intrinsic::s390_vfaefs:
1933  Opcode = SystemZISD::VFAE_CC;
1934  CCValid = SystemZ::CCMASK_ANY;
1935  return true;
1936 
1937  case Intrinsic::s390_vfaezbs:
1938  case Intrinsic::s390_vfaezhs:
1939  case Intrinsic::s390_vfaezfs:
1940  Opcode = SystemZISD::VFAEZ_CC;
1941  CCValid = SystemZ::CCMASK_ANY;
1942  return true;
1943 
1944  case Intrinsic::s390_vfeebs:
1945  case Intrinsic::s390_vfeehs:
1946  case Intrinsic::s390_vfeefs:
1947  Opcode = SystemZISD::VFEE_CC;
1948  CCValid = SystemZ::CCMASK_ANY;
1949  return true;
1950 
1951  case Intrinsic::s390_vfeezbs:
1952  case Intrinsic::s390_vfeezhs:
1953  case Intrinsic::s390_vfeezfs:
1954  Opcode = SystemZISD::VFEEZ_CC;
1955  CCValid = SystemZ::CCMASK_ANY;
1956  return true;
1957 
1958  case Intrinsic::s390_vfenebs:
1959  case Intrinsic::s390_vfenehs:
1960  case Intrinsic::s390_vfenefs:
1961  Opcode = SystemZISD::VFENE_CC;
1962  CCValid = SystemZ::CCMASK_ANY;
1963  return true;
1964 
1965  case Intrinsic::s390_vfenezbs:
1966  case Intrinsic::s390_vfenezhs:
1967  case Intrinsic::s390_vfenezfs:
1968  Opcode = SystemZISD::VFENEZ_CC;
1969  CCValid = SystemZ::CCMASK_ANY;
1970  return true;
1971 
1972  case Intrinsic::s390_vistrbs:
1973  case Intrinsic::s390_vistrhs:
1974  case Intrinsic::s390_vistrfs:
1975  Opcode = SystemZISD::VISTR_CC;
1977  return true;
1978 
1979  case Intrinsic::s390_vstrcbs:
1980  case Intrinsic::s390_vstrchs:
1981  case Intrinsic::s390_vstrcfs:
1982  Opcode = SystemZISD::VSTRC_CC;
1983  CCValid = SystemZ::CCMASK_ANY;
1984  return true;
1985 
1986  case Intrinsic::s390_vstrczbs:
1987  case Intrinsic::s390_vstrczhs:
1988  case Intrinsic::s390_vstrczfs:
1989  Opcode = SystemZISD::VSTRCZ_CC;
1990  CCValid = SystemZ::CCMASK_ANY;
1991  return true;
1992 
1993  case Intrinsic::s390_vstrsb:
1994  case Intrinsic::s390_vstrsh:
1995  case Intrinsic::s390_vstrsf:
1996  Opcode = SystemZISD::VSTRS_CC;
1997  CCValid = SystemZ::CCMASK_ANY;
1998  return true;
1999 
2000  case Intrinsic::s390_vstrszb:
2001  case Intrinsic::s390_vstrszh:
2002  case Intrinsic::s390_vstrszf:
2003  Opcode = SystemZISD::VSTRSZ_CC;
2004  CCValid = SystemZ::CCMASK_ANY;
2005  return true;
2006 
2007  case Intrinsic::s390_vfcedbs:
2008  case Intrinsic::s390_vfcesbs:
2009  Opcode = SystemZISD::VFCMPES;
2010  CCValid = SystemZ::CCMASK_VCMP;
2011  return true;
2012 
2013  case Intrinsic::s390_vfchdbs:
2014  case Intrinsic::s390_vfchsbs:
2015  Opcode = SystemZISD::VFCMPHS;
2016  CCValid = SystemZ::CCMASK_VCMP;
2017  return true;
2018 
2019  case Intrinsic::s390_vfchedbs:
2020  case Intrinsic::s390_vfchesbs:
2021  Opcode = SystemZISD::VFCMPHES;
2022  CCValid = SystemZ::CCMASK_VCMP;
2023  return true;
2024 
2025  case Intrinsic::s390_vftcidb:
2026  case Intrinsic::s390_vftcisb:
2027  Opcode = SystemZISD::VFTCI;
2028  CCValid = SystemZ::CCMASK_VCMP;
2029  return true;
2030 
2031  case Intrinsic::s390_tdc:
2032  Opcode = SystemZISD::TDC;
2033  CCValid = SystemZ::CCMASK_TDC;
2034  return true;
2035 
2036  default:
2037  return false;
2038  }
2039 }
2040 
2041 // Emit an intrinsic with chain and an explicit CC register result.
2043  unsigned Opcode) {
2044  // Copy all operands except the intrinsic ID.
2045  unsigned NumOps = Op.getNumOperands();
2047  Ops.reserve(NumOps - 1);
2048  Ops.push_back(Op.getOperand(0));
2049  for (unsigned I = 2; I < NumOps; ++I)
2050  Ops.push_back(Op.getOperand(I));
2051 
2052  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2053  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2054  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2055  SDValue OldChain = SDValue(Op.getNode(), 1);
2056  SDValue NewChain = SDValue(Intr.getNode(), 1);
2057  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2058  return Intr.getNode();
2059 }
2060 
2061 // Emit an intrinsic with an explicit CC register result.
2063  unsigned Opcode) {
2064  // Copy all operands except the intrinsic ID.
2065  unsigned NumOps = Op.getNumOperands();
2067  Ops.reserve(NumOps - 1);
2068  for (unsigned I = 1; I < NumOps; ++I)
2069  Ops.push_back(Op.getOperand(I));
2070 
2071  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2072  return Intr.getNode();
2073 }
2074 
2075 // CC is a comparison that will be implemented using an integer or
2076 // floating-point comparison. Return the condition code mask for
2077 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2078 // unsigned comparisons and clear for signed ones. In the floating-point
2079 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2080 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2081 #define CONV(X) \
2082  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2083  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2084  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2085 
2086  switch (CC) {
2087  default:
2088  llvm_unreachable("Invalid integer condition!");
2089 
2090  CONV(EQ);
2091  CONV(NE);
2092  CONV(GT);
2093  CONV(GE);
2094  CONV(LT);
2095  CONV(LE);
2096 
2097  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2098  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2099  }
2100 #undef CONV
2101 }
2102 
2103 // If C can be converted to a comparison against zero, adjust the operands
2104 // as necessary.
2105 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2106  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2107  return;
2108 
2109  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2110  if (!ConstOp1)
2111  return;
2112 
2113  int64_t Value = ConstOp1->getSExtValue();
2114  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2115  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2116  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2117  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2118  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2119  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2120  }
2121 }
2122 
2123 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2124 // adjust the operands as necessary.
2125 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2126  Comparison &C) {
2127  // For us to make any changes, it must a comparison between a single-use
2128  // load and a constant.
2129  if (!C.Op0.hasOneUse() ||
2130  C.Op0.getOpcode() != ISD::LOAD ||
2131  C.Op1.getOpcode() != ISD::Constant)
2132  return;
2133 
2134  // We must have an 8- or 16-bit load.
2135  auto *Load = cast<LoadSDNode>(C.Op0);
2136  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2137  if ((NumBits != 8 && NumBits != 16) ||
2138  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2139  return;
2140 
2141  // The load must be an extending one and the constant must be within the
2142  // range of the unextended value.
2143  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2144  uint64_t Value = ConstOp1->getZExtValue();
2145  uint64_t Mask = (1 << NumBits) - 1;
2146  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2147  // Make sure that ConstOp1 is in range of C.Op0.
2148  int64_t SignedValue = ConstOp1->getSExtValue();
2149  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2150  return;
2151  if (C.ICmpType != SystemZICMP::SignedOnly) {
2152  // Unsigned comparison between two sign-extended values is equivalent
2153  // to unsigned comparison between two zero-extended values.
2154  Value &= Mask;
2155  } else if (NumBits == 8) {
2156  // Try to treat the comparison as unsigned, so that we can use CLI.
2157  // Adjust CCMask and Value as necessary.
2158  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2159  // Test whether the high bit of the byte is set.
2160  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2161  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2162  // Test whether the high bit of the byte is clear.
2163  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2164  else
2165  // No instruction exists for this combination.
2166  return;
2167  C.ICmpType = SystemZICMP::UnsignedOnly;
2168  }
2169  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2170  if (Value > Mask)
2171  return;
2172  // If the constant is in range, we can use any comparison.
2173  C.ICmpType = SystemZICMP::Any;
2174  } else
2175  return;
2176 
2177  // Make sure that the first operand is an i32 of the right extension type.
2178  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2179  ISD::SEXTLOAD :
2180  ISD::ZEXTLOAD);
2181  if (C.Op0.getValueType() != MVT::i32 ||
2182  Load->getExtensionType() != ExtType) {
2183  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2184  Load->getBasePtr(), Load->getPointerInfo(),
2185  Load->getMemoryVT(), Load->getAlignment(),
2186  Load->getMemOperand()->getFlags());
2187  // Update the chain uses.
2188  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2189  }
2190 
2191  // Make sure that the second operand is an i32 with the right value.
2192  if (C.Op1.getValueType() != MVT::i32 ||
2193  Value != ConstOp1->getZExtValue())
2194  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2195 }
2196 
2197 // Return true if Op is either an unextended load, or a load suitable
2198 // for integer register-memory comparisons of type ICmpType.
2199 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2200  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2201  if (Load) {
2202  // There are no instructions to compare a register with a memory byte.
2203  if (Load->getMemoryVT() == MVT::i8)
2204  return false;
2205  // Otherwise decide on extension type.
2206  switch (Load->getExtensionType()) {
2207  case ISD::NON_EXTLOAD:
2208  return true;
2209  case ISD::SEXTLOAD:
2210  return ICmpType != SystemZICMP::UnsignedOnly;
2211  case ISD::ZEXTLOAD:
2212  return ICmpType != SystemZICMP::SignedOnly;
2213  default:
2214  break;
2215  }
2216  }
2217  return false;
2218 }
2219 
2220 // Return true if it is better to swap the operands of C.
2221 static bool shouldSwapCmpOperands(const Comparison &C) {
2222  // Leave f128 comparisons alone, since they have no memory forms.
2223  if (C.Op0.getValueType() == MVT::f128)
2224  return false;
2225 
2226  // Always keep a floating-point constant second, since comparisons with
2227  // zero can use LOAD TEST and comparisons with other constants make a
2228  // natural memory operand.
2229  if (isa<ConstantFPSDNode>(C.Op1))
2230  return false;
2231 
2232  // Never swap comparisons with zero since there are many ways to optimize
2233  // those later.
2234  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2235  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2236  return false;
2237 
2238  // Also keep natural memory operands second if the loaded value is
2239  // only used here. Several comparisons have memory forms.
2240  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2241  return false;
2242 
2243  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2244  // In that case we generally prefer the memory to be second.
2245  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2246  // The only exceptions are when the second operand is a constant and
2247  // we can use things like CHHSI.
2248  if (!ConstOp1)
2249  return true;
2250  // The unsigned memory-immediate instructions can handle 16-bit
2251  // unsigned integers.
2252  if (C.ICmpType != SystemZICMP::SignedOnly &&
2253  isUInt<16>(ConstOp1->getZExtValue()))
2254  return false;
2255  // The signed memory-immediate instructions can handle 16-bit
2256  // signed integers.
2257  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2258  isInt<16>(ConstOp1->getSExtValue()))
2259  return false;
2260  return true;
2261  }
2262 
2263  // Try to promote the use of CGFR and CLGFR.
2264  unsigned Opcode0 = C.Op0.getOpcode();
2265  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2266  return true;
2267  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2268  return true;
2269  if (C.ICmpType != SystemZICMP::SignedOnly &&
2270  Opcode0 == ISD::AND &&
2271  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2272  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2273  return true;
2274 
2275  return false;
2276 }
2277 
2278 // Check whether C tests for equality between X and Y and whether X - Y
2279 // or Y - X is also computed. In that case it's better to compare the
2280 // result of the subtraction against zero.
2281 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2282  Comparison &C) {
2283  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2284  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2285  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2286  SDNode *N = *I;
2287  if (N->getOpcode() == ISD::SUB &&
2288  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2289  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2290  C.Op0 = SDValue(N, 0);
2291  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2292  return;
2293  }
2294  }
2295  }
2296 }
2297 
2298 // Check whether C compares a floating-point value with zero and if that
2299 // floating-point value is also negated. In this case we can use the
2300 // negation to set CC, so avoiding separate LOAD AND TEST and
2301 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2302 static void adjustForFNeg(Comparison &C) {
2303  // This optimization is invalid for strict comparisons, since FNEG
2304  // does not raise any exceptions.
2305  if (C.Chain)
2306  return;
2307  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2308  if (C1 && C1->isZero()) {
2309  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2310  SDNode *N = *I;
2311  if (N->getOpcode() == ISD::FNEG) {
2312  C.Op0 = SDValue(N, 0);
2313  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2314  return;
2315  }
2316  }
2317  }
2318 }
2319 
2320 // Check whether C compares (shl X, 32) with 0 and whether X is
2321 // also sign-extended. In that case it is better to test the result
2322 // of the sign extension using LTGFR.
2323 //
2324 // This case is important because InstCombine transforms a comparison
2325 // with (sext (trunc X)) into a comparison with (shl X, 32).
2326 static void adjustForLTGFR(Comparison &C) {
2327  // Check for a comparison between (shl X, 32) and 0.
2328  if (C.Op0.getOpcode() == ISD::SHL &&
2329  C.Op0.getValueType() == MVT::i64 &&
2330  C.Op1.getOpcode() == ISD::Constant &&
2331  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2332  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2333  if (C1 && C1->getZExtValue() == 32) {
2334  SDValue ShlOp0 = C.Op0.getOperand(0);
2335  // See whether X has any SIGN_EXTEND_INREG uses.
2336  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2337  SDNode *N = *I;
2338  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2339  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2340  C.Op0 = SDValue(N, 0);
2341  return;
2342  }
2343  }
2344  }
2345  }
2346 }
2347 
2348 // If C compares the truncation of an extending load, try to compare
2349 // the untruncated value instead. This exposes more opportunities to
2350 // reuse CC.
2351 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2352  Comparison &C) {
2353  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2354  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2355  C.Op1.getOpcode() == ISD::Constant &&
2356  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2357  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2358  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2359  C.Op0.getValueSizeInBits().getFixedSize()) {
2360  unsigned Type = L->getExtensionType();
2361  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2362  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2363  C.Op0 = C.Op0.getOperand(0);
2364  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2365  }
2366  }
2367  }
2368 }
2369 
2370 // Return true if shift operation N has an in-range constant shift value.
2371 // Store it in ShiftVal if so.
2372 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2373  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2374  if (!Shift)
2375  return false;
2376 
2377  uint64_t Amount = Shift->getZExtValue();
2378  if (Amount >= N.getValueSizeInBits())
2379  return false;
2380 
2381  ShiftVal = Amount;
2382  return true;
2383 }
2384 
2385 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2386 // instruction and whether the CC value is descriptive enough to handle
2387 // a comparison of type Opcode between the AND result and CmpVal.
2388 // CCMask says which comparison result is being tested and BitSize is
2389 // the number of bits in the operands. If TEST UNDER MASK can be used,
2390 // return the corresponding CC mask, otherwise return 0.
2391 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2392  uint64_t Mask, uint64_t CmpVal,
2393  unsigned ICmpType) {
2394  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2395 
2396  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2399  return 0;
2400 
2401  // Work out the masks for the lowest and highest bits.
2402  unsigned HighShift = 63 - countLeadingZeros(Mask);
2403  uint64_t High = uint64_t(1) << HighShift;
2405 
2406  // Signed ordered comparisons are effectively unsigned if the sign
2407  // bit is dropped.
2408  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2409 
2410  // Check for equality comparisons with 0, or the equivalent.
2411  if (CmpVal == 0) {
2412  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2413  return SystemZ::CCMASK_TM_ALL_0;
2414  if (CCMask == SystemZ::CCMASK_CMP_NE)
2416  }
2417  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2418  if (CCMask == SystemZ::CCMASK_CMP_LT)
2419  return SystemZ::CCMASK_TM_ALL_0;
2420  if (CCMask == SystemZ::CCMASK_CMP_GE)
2422  }
2423  if (EffectivelyUnsigned && CmpVal < Low) {
2424  if (CCMask == SystemZ::CCMASK_CMP_LE)
2425  return SystemZ::CCMASK_TM_ALL_0;
2426  if (CCMask == SystemZ::CCMASK_CMP_GT)
2428  }
2429 
2430  // Check for equality comparisons with the mask, or the equivalent.
2431  if (CmpVal == Mask) {
2432  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2433  return SystemZ::CCMASK_TM_ALL_1;
2434  if (CCMask == SystemZ::CCMASK_CMP_NE)
2436  }
2437  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2438  if (CCMask == SystemZ::CCMASK_CMP_GT)
2439  return SystemZ::CCMASK_TM_ALL_1;
2440  if (CCMask == SystemZ::CCMASK_CMP_LE)
2442  }
2443  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2444  if (CCMask == SystemZ::CCMASK_CMP_GE)
2445  return SystemZ::CCMASK_TM_ALL_1;
2446  if (CCMask == SystemZ::CCMASK_CMP_LT)
2448  }
2449 
2450  // Check for ordered comparisons with the top bit.
2451  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2452  if (CCMask == SystemZ::CCMASK_CMP_LE)
2453  return SystemZ::CCMASK_TM_MSB_0;
2454  if (CCMask == SystemZ::CCMASK_CMP_GT)
2455  return SystemZ::CCMASK_TM_MSB_1;
2456  }
2457  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2458  if (CCMask == SystemZ::CCMASK_CMP_LT)
2459  return SystemZ::CCMASK_TM_MSB_0;
2460  if (CCMask == SystemZ::CCMASK_CMP_GE)
2461  return SystemZ::CCMASK_TM_MSB_1;
2462  }
2463 
2464  // If there are just two bits, we can do equality checks for Low and High
2465  // as well.
2466  if (Mask == Low + High) {
2467  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2469  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2471  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2473  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2475  }
2476 
2477  // Looks like we've exhausted our options.
2478  return 0;
2479 }
2480 
2481 // See whether C can be implemented as a TEST UNDER MASK instruction.
2482 // Update the arguments with the TM version if so.
2483 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2484  Comparison &C) {
2485  // Check that we have a comparison with a constant.
2486  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2487  if (!ConstOp1)
2488  return;
2489  uint64_t CmpVal = ConstOp1->getZExtValue();
2490 
2491  // Check whether the nonconstant input is an AND with a constant mask.
2492  Comparison NewC(C);
2493  uint64_t MaskVal;
2494  ConstantSDNode *Mask = nullptr;
2495  if (C.Op0.getOpcode() == ISD::AND) {
2496  NewC.Op0 = C.Op0.getOperand(0);
2497  NewC.Op1 = C.Op0.getOperand(1);
2498  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2499  if (!Mask)
2500  return;
2501  MaskVal = Mask->getZExtValue();
2502  } else {
2503  // There is no instruction to compare with a 64-bit immediate
2504  // so use TMHH instead if possible. We need an unsigned ordered
2505  // comparison with an i64 immediate.
2506  if (NewC.Op0.getValueType() != MVT::i64 ||
2507  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2508  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2509  NewC.ICmpType == SystemZICMP::SignedOnly)
2510  return;
2511  // Convert LE and GT comparisons into LT and GE.
2512  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2513  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2514  if (CmpVal == uint64_t(-1))
2515  return;
2516  CmpVal += 1;
2517  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2518  }
2519  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2520  // be masked off without changing the result.
2521  MaskVal = -(CmpVal & -CmpVal);
2522  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2523  }
2524  if (!MaskVal)
2525  return;
2526 
2527  // Check whether the combination of mask, comparison value and comparison
2528  // type are suitable.
2529  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2530  unsigned NewCCMask, ShiftVal;
2531  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2532  NewC.Op0.getOpcode() == ISD::SHL &&
2533  isSimpleShift(NewC.Op0, ShiftVal) &&
2534  (MaskVal >> ShiftVal != 0) &&
2535  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2536  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2537  MaskVal >> ShiftVal,
2538  CmpVal >> ShiftVal,
2539  SystemZICMP::Any))) {
2540  NewC.Op0 = NewC.Op0.getOperand(0);
2541  MaskVal >>= ShiftVal;
2542  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2543  NewC.Op0.getOpcode() == ISD::SRL &&
2544  isSimpleShift(NewC.Op0, ShiftVal) &&
2545  (MaskVal << ShiftVal != 0) &&
2546  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2547  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2548  MaskVal << ShiftVal,
2549  CmpVal << ShiftVal,
2551  NewC.Op0 = NewC.Op0.getOperand(0);
2552  MaskVal <<= ShiftVal;
2553  } else {
2554  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2555  NewC.ICmpType);
2556  if (!NewCCMask)
2557  return;
2558  }
2559 
2560  // Go ahead and make the change.
2561  C.Opcode = SystemZISD::TM;
2562  C.Op0 = NewC.Op0;
2563  if (Mask && Mask->getZExtValue() == MaskVal)
2564  C.Op1 = SDValue(Mask, 0);
2565  else
2566  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2567  C.CCValid = SystemZ::CCMASK_TM;
2568  C.CCMask = NewCCMask;
2569 }
2570 
2571 // See whether the comparison argument contains a redundant AND
2572 // and remove it if so. This sometimes happens due to the generic
2573 // BRCOND expansion.
2574 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2575  Comparison &C) {
2576  if (C.Op0.getOpcode() != ISD::AND)
2577  return;
2578  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2579  if (!Mask)
2580  return;
2581  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2582  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2583  return;
2584 
2585  C.Op0 = C.Op0.getOperand(0);
2586 }
2587 
2588 // Return a Comparison that tests the condition-code result of intrinsic
2589 // node Call against constant integer CC using comparison code Cond.
2590 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2591 // and CCValid is the set of possible condition-code results.
2592 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2593  SDValue Call, unsigned CCValid, uint64_t CC,
2594  ISD::CondCode Cond) {
2595  Comparison C(Call, SDValue(), SDValue());
2596  C.Opcode = Opcode;
2597  C.CCValid = CCValid;
2598  if (Cond == ISD::SETEQ)
2599  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2600  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2601  else if (Cond == ISD::SETNE)
2602  // ...and the inverse of that.
2603  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2604  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2605  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2606  // always true for CC>3.
2607  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2608  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2609  // ...and the inverse of that.
2610  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2611  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2612  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2613  // always true for CC>3.
2614  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2615  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2616  // ...and the inverse of that.
2617  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2618  else
2619  llvm_unreachable("Unexpected integer comparison type");
2620  C.CCMask &= CCValid;
2621  return C;
2622 }
2623 
2624 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2625 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2626  ISD::CondCode Cond, const SDLoc &DL,
2627  SDValue Chain = SDValue(),
2628  bool IsSignaling = false) {
2629  if (CmpOp1.getOpcode() == ISD::Constant) {
2630  assert(!Chain);
2631  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2632  unsigned Opcode, CCValid;
2633  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2634  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2635  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2636  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2637  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2638  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2639  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2640  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2641  }
2642  Comparison C(CmpOp0, CmpOp1, Chain);
2643  C.CCMask = CCMaskForCondCode(Cond);
2644  if (C.Op0.getValueType().isFloatingPoint()) {
2645  C.CCValid = SystemZ::CCMASK_FCMP;
2646  if (!C.Chain)
2647  C.Opcode = SystemZISD::FCMP;
2648  else if (!IsSignaling)
2649  C.Opcode = SystemZISD::STRICT_FCMP;
2650  else
2651  C.Opcode = SystemZISD::STRICT_FCMPS;
2652  adjustForFNeg(C);
2653  } else {
2654  assert(!C.Chain);
2655  C.CCValid = SystemZ::CCMASK_ICMP;
2656  C.Opcode = SystemZISD::ICMP;
2657  // Choose the type of comparison. Equality and inequality tests can
2658  // use either signed or unsigned comparisons. The choice also doesn't
2659  // matter if both sign bits are known to be clear. In those cases we
2660  // want to give the main isel code the freedom to choose whichever
2661  // form fits best.
2662  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2663  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2664  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2665  C.ICmpType = SystemZICMP::Any;
2666  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2667  C.ICmpType = SystemZICMP::UnsignedOnly;
2668  else
2669  C.ICmpType = SystemZICMP::SignedOnly;
2670  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2671  adjustForRedundantAnd(DAG, DL, C);
2672  adjustZeroCmp(DAG, DL, C);
2673  adjustSubwordCmp(DAG, DL, C);
2674  adjustForSubtraction(DAG, DL, C);
2675  adjustForLTGFR(C);
2676  adjustICmpTruncate(DAG, DL, C);
2677  }
2678 
2679  if (shouldSwapCmpOperands(C)) {
2680  std::swap(C.Op0, C.Op1);
2681  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2682  }
2683 
2684  adjustForTestUnderMask(DAG, DL, C);
2685  return C;
2686 }
2687 
2688 // Emit the comparison instruction described by C.
2689 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2690  if (!C.Op1.getNode()) {
2691  SDNode *Node;
2692  switch (C.Op0.getOpcode()) {
2694  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2695  return SDValue(Node, 0);
2697  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2698  return SDValue(Node, Node->getNumValues() - 1);
2699  default:
2700  llvm_unreachable("Invalid comparison operands");
2701  }
2702  }
2703  if (C.Opcode == SystemZISD::ICMP)
2704  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2705  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2706  if (C.Opcode == SystemZISD::TM) {
2707  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2708  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2709  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2710  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2711  }
2712  if (C.Chain) {
2713  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2714  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2715  }
2716  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2717 }
2718 
2719 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2720 // 64 bits. Extend is the extension type to use. Store the high part
2721 // in Hi and the low part in Lo.
2722 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2723  SDValue Op0, SDValue Op1, SDValue &Hi,
2724  SDValue &Lo) {
2725  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2726  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2727  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2728  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2729  DAG.getConstant(32, DL, MVT::i64));
2730  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2731  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2732 }
2733 
2734 // Lower a binary operation that produces two VT results, one in each
2735 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2736 // and Opcode performs the GR128 operation. Store the even register result
2737 // in Even and the odd register result in Odd.
2738 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2739  unsigned Opcode, SDValue Op0, SDValue Op1,
2740  SDValue &Even, SDValue &Odd) {
2741  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2742  bool Is32Bit = is32Bit(VT);
2743  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2744  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2745 }
2746 
2747 // Return an i32 value that is 1 if the CC value produced by CCReg is
2748 // in the mask CCMask and 0 otherwise. CC is known to have a value
2749 // in CCValid, so other values can be ignored.
2750 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2751  unsigned CCValid, unsigned CCMask) {
2752  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2753  DAG.getConstant(0, DL, MVT::i32),
2754  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2755  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2756  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2757 }
2758 
2759 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2760 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2761 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2762 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2763 // floating-point comparisons.
2764 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2766  switch (CC) {
2767  case ISD::SETOEQ:
2768  case ISD::SETEQ:
2769  switch (Mode) {
2770  case CmpMode::Int: return SystemZISD::VICMPE;
2771  case CmpMode::FP: return SystemZISD::VFCMPE;
2774  }
2775  llvm_unreachable("Bad mode");
2776 
2777  case ISD::SETOGE:
2778  case ISD::SETGE:
2779  switch (Mode) {
2780  case CmpMode::Int: return 0;
2781  case CmpMode::FP: return SystemZISD::VFCMPHE;
2784  }
2785  llvm_unreachable("Bad mode");
2786 
2787  case ISD::SETOGT:
2788  case ISD::SETGT:
2789  switch (Mode) {
2790  case CmpMode::Int: return SystemZISD::VICMPH;
2791  case CmpMode::FP: return SystemZISD::VFCMPH;
2794  }
2795  llvm_unreachable("Bad mode");
2796 
2797  case ISD::SETUGT:
2798  switch (Mode) {
2799  case CmpMode::Int: return SystemZISD::VICMPHL;
2800  case CmpMode::FP: return 0;
2801  case CmpMode::StrictFP: return 0;
2802  case CmpMode::SignalingFP: return 0;
2803  }
2804  llvm_unreachable("Bad mode");
2805 
2806  default:
2807  return 0;
2808  }
2809 }
2810 
2811 // Return the SystemZISD vector comparison operation for CC or its inverse,
2812 // or 0 if neither can be done directly. Indicate in Invert whether the
2813 // result is for the inverse of CC. Mode is as above.
2815  bool &Invert) {
2816  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2817  Invert = false;
2818  return Opcode;
2819  }
2820 
2822  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2823  Invert = true;
2824  return Opcode;
2825  }
2826 
2827  return 0;
2828 }
2829 
2830 // Return a v2f64 that contains the extended form of elements Start and Start+1
2831 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2832 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2833  SDValue Op, SDValue Chain) {
2834  int Mask[] = { Start, -1, Start + 1, -1 };
2836  if (Chain) {
2838  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2839  }
2840  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2841 }
2842 
2843 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2844 // producing a result of type VT. If Chain is nonnull, return the strict form.
2845 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2846  const SDLoc &DL, EVT VT,
2847  SDValue CmpOp0,
2848  SDValue CmpOp1,
2849  SDValue Chain) const {
2850  // There is no hardware support for v4f32 (unless we have the vector
2851  // enhancements facility 1), so extend the vector into two v2f64s
2852  // and compare those.
2853  if (CmpOp0.getValueType() == MVT::v4f32 &&
2854  !Subtarget.hasVectorEnhancements1()) {
2855  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2856  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2857  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2858  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2859  if (Chain) {
2861  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2862  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2863  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2864  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2865  H1.getValue(1), L1.getValue(1),
2866  HRes.getValue(1), LRes.getValue(1) };
2867  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2868  SDValue Ops[2] = { Res, NewChain };
2869  return DAG.getMergeValues(Ops, DL);
2870  }
2871  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2872  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2873  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2874  }
2875  if (Chain) {
2876  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2877  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2878  }
2879  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2880 }
2881 
2882 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2883 // an integer mask of type VT. If Chain is nonnull, we have a strict
2884 // floating-point comparison. If in addition IsSignaling is true, we have
2885 // a strict signaling floating-point comparison.
2886 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2887  const SDLoc &DL, EVT VT,
2888  ISD::CondCode CC,
2889  SDValue CmpOp0,
2890  SDValue CmpOp1,
2891  SDValue Chain,
2892  bool IsSignaling) const {
2893  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2894  assert (!Chain || IsFP);
2895  assert (!IsSignaling || Chain);
2896  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2897  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2898  bool Invert = false;
2899  SDValue Cmp;
2900  switch (CC) {
2901  // Handle tests for order using (or (ogt y x) (oge x y)).
2902  case ISD::SETUO:
2903  Invert = true;
2905  case ISD::SETO: {
2906  assert(IsFP && "Unexpected integer comparison");
2907  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2908  DL, VT, CmpOp1, CmpOp0, Chain);
2909  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2910  DL, VT, CmpOp0, CmpOp1, Chain);
2911  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2912  if (Chain)
2913  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2914  LT.getValue(1), GE.getValue(1));
2915  break;
2916  }
2917 
2918  // Handle <> tests using (or (ogt y x) (ogt x y)).
2919  case ISD::SETUEQ:
2920  Invert = true;
2922  case ISD::SETONE: {
2923  assert(IsFP && "Unexpected integer comparison");
2924  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2925  DL, VT, CmpOp1, CmpOp0, Chain);
2926  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2927  DL, VT, CmpOp0, CmpOp1, Chain);
2928  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2929  if (Chain)
2930  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2931  LT.getValue(1), GT.getValue(1));
2932  break;
2933  }
2934 
2935  // Otherwise a single comparison is enough. It doesn't really
2936  // matter whether we try the inversion or the swap first, since
2937  // there are no cases where both work.
2938  default:
2939  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2940  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2941  else {
2943  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2944  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2945  else
2946  llvm_unreachable("Unhandled comparison");
2947  }
2948  if (Chain)
2949  Chain = Cmp.getValue(1);
2950  break;
2951  }
2952  if (Invert) {
2953  SDValue Mask =
2954  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2955  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2956  }
2957  if (Chain && Chain.getNode() != Cmp.getNode()) {
2958  SDValue Ops[2] = { Cmp, Chain };
2959  Cmp = DAG.getMergeValues(Ops, DL);
2960  }
2961  return Cmp;
2962 }
2963 
2964 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2965  SelectionDAG &DAG) const {
2966  SDValue CmpOp0 = Op.getOperand(0);
2967  SDValue CmpOp1 = Op.getOperand(1);
2968  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2969  SDLoc DL(Op);
2970  EVT VT = Op.getValueType();
2971  if (VT.isVector())
2972  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2973 
2974  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2975  SDValue CCReg = emitCmp(DAG, DL, C);
2976  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2977 }
2978 
2979 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
2980  SelectionDAG &DAG,
2981  bool IsSignaling) const {
2982  SDValue Chain = Op.getOperand(0);
2983  SDValue CmpOp0 = Op.getOperand(1);
2984  SDValue CmpOp1 = Op.getOperand(2);
2985  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
2986  SDLoc DL(Op);
2987  EVT VT = Op.getNode()->getValueType(0);
2988  if (VT.isVector()) {
2989  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
2990  Chain, IsSignaling);
2991  return Res.getValue(Op.getResNo());
2992  }
2993 
2994  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
2995  SDValue CCReg = emitCmp(DAG, DL, C);
2996  CCReg->setFlags(Op->getFlags());
2997  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2998  SDValue Ops[2] = { Result, CCReg.getValue(1) };
2999  return DAG.getMergeValues(Ops, DL);
3000 }
3001 
3002 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3003  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3004  SDValue CmpOp0 = Op.getOperand(2);
3005  SDValue CmpOp1 = Op.getOperand(3);
3006  SDValue Dest = Op.getOperand(4);
3007  SDLoc DL(Op);
3008 
3009  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3010  SDValue CCReg = emitCmp(DAG, DL, C);
3011  return DAG.getNode(
3012  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3013  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3014  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3015 }
3016 
3017 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3018 // allowing Pos and Neg to be wider than CmpOp.
3019 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3020  return (Neg.getOpcode() == ISD::SUB &&
3021  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3022  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3023  Neg.getOperand(1) == Pos &&
3024  (Pos == CmpOp ||
3025  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3026  Pos.getOperand(0) == CmpOp)));
3027 }
3028 
3029 // Return the absolute or negative absolute of Op; IsNegative decides which.
3031  bool IsNegative) {
3032  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3033  if (IsNegative)
3034  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3035  DAG.getConstant(0, DL, Op.getValueType()), Op);
3036  return Op;
3037 }
3038 
3039 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3040  SelectionDAG &DAG) const {
3041  SDValue CmpOp0 = Op.getOperand(0);
3042  SDValue CmpOp1 = Op.getOperand(1);
3043  SDValue TrueOp = Op.getOperand(2);
3044  SDValue FalseOp = Op.getOperand(3);
3045  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3046  SDLoc DL(Op);
3047 
3048  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3049 
3050  // Check for absolute and negative-absolute selections, including those
3051  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3052  // This check supplements the one in DAGCombiner.
3053  if (C.Opcode == SystemZISD::ICMP &&
3054  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3055  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3056  C.Op1.getOpcode() == ISD::Constant &&
3057  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3058  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3059  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3060  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3061  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3062  }
3063 
3064  SDValue CCReg = emitCmp(DAG, DL, C);
3065  SDValue Ops[] = {TrueOp, FalseOp,
3066  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3067  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3068 
3069  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3070 }
3071 
3072 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3073  SelectionDAG &DAG) const {
3074  SDLoc DL(Node);
3075  const GlobalValue *GV = Node->getGlobal();
3076  int64_t Offset = Node->getOffset();
3077  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3079 
3080  SDValue Result;
3081  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3082  if (isInt<32>(Offset)) {
3083  // Assign anchors at 1<<12 byte boundaries.
3084  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3085  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3086  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3087 
3088  // The offset can be folded into the address if it is aligned to a
3089  // halfword.
3090  Offset -= Anchor;
3091  if (Offset != 0 && (Offset & 1) == 0) {
3092  SDValue Full =
3093  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3094  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3095  Offset = 0;
3096  }
3097  } else {
3098  // Conservatively load a constant offset greater than 32 bits into a
3099  // register below.
3100  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3101  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3102  }
3103  } else {
3104  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3105  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3106  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3108  }
3109 
3110  // If there was a non-zero offset that we didn't fold, create an explicit
3111  // addition for it.
3112  if (Offset != 0)
3113  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3114  DAG.getConstant(Offset, DL, PtrVT));
3115 
3116  return Result;
3117 }
3118 
3119 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3120  SelectionDAG &DAG,
3121  unsigned Opcode,
3122  SDValue GOTOffset) const {
3123  SDLoc DL(Node);
3124  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3125  SDValue Chain = DAG.getEntryNode();
3126  SDValue Glue;
3127 
3130  report_fatal_error("In GHC calling convention TLS is not supported");
3131 
3132  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3133  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3134  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3135  Glue = Chain.getValue(1);
3136  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3137  Glue = Chain.getValue(1);
3138 
3139  // The first call operand is the chain and the second is the TLS symbol.
3141  Ops.push_back(Chain);
3142  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3143  Node->getValueType(0),
3144  0, 0));
3145 
3146  // Add argument registers to the end of the list so that they are
3147  // known live into the call.
3148  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3149  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3150 
3151  // Add a register mask operand representing the call-preserved registers.
3152  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3153  const uint32_t *Mask =
3155  assert(Mask && "Missing call preserved mask for calling convention");
3156  Ops.push_back(DAG.getRegisterMask(Mask));
3157 
3158  // Glue the call to the argument copies.
3159  Ops.push_back(Glue);
3160 
3161  // Emit the call.
3162  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3163  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3164  Glue = Chain.getValue(1);
3165 
3166  // Copy the return value from %r2.
3167  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3168 }
3169 
3170 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3171  SelectionDAG &DAG) const {
3172  SDValue Chain = DAG.getEntryNode();
3173  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3174 
3175  // The high part of the thread pointer is in access register 0.
3176  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3177  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3178 
3179  // The low part of the thread pointer is in access register 1.
3180  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3181  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3182 
3183  // Merge them into a single 64-bit address.
3184  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3185  DAG.getConstant(32, DL, PtrVT));
3186  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3187 }
3188 
3189 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3190  SelectionDAG &DAG) const {
3191  if (DAG.getTarget().useEmulatedTLS())
3192  return LowerToTLSEmulatedModel(Node, DAG);
3193  SDLoc DL(Node);
3194  const GlobalValue *GV = Node->getGlobal();
3195  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3197 
3200  report_fatal_error("In GHC calling convention TLS is not supported");
3201 
3202  SDValue TP = lowerThreadPointer(DL, DAG);
3203 
3204  // Get the offset of GA from the thread pointer, based on the TLS model.
3205  SDValue Offset;
3206  switch (model) {
3207  case TLSModel::GeneralDynamic: {
3208  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3211 
3212  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3213  Offset = DAG.getLoad(
3214  PtrVT, DL, DAG.getEntryNode(), Offset,
3216 
3217  // Call __tls_get_offset to retrieve the offset.
3218  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3219  break;
3220  }
3221 
3222  case TLSModel::LocalDynamic: {
3223  // Load the GOT offset of the module ID.
3226 
3227  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3228  Offset = DAG.getLoad(
3229  PtrVT, DL, DAG.getEntryNode(), Offset,
3231 
3232  // Call __tls_get_offset to retrieve the module base offset.
3233  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3234 
3235  // Note: The SystemZLDCleanupPass will remove redundant computations
3236  // of the module base offset. Count total number of local-dynamic
3237  // accesses to trigger execution of that pass.
3241 
3242  // Add the per-symbol offset.
3244 
3245  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3246  DTPOffset = DAG.getLoad(
3247  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3249 
3250  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3251  break;
3252  }
3253 
3254  case TLSModel::InitialExec: {
3255  // Load the offset from the GOT.
3256  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3259  Offset =
3260  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3262  break;
3263  }
3264 
3265  case TLSModel::LocalExec: {
3266  // Force the offset into the constant pool and load it from there.
3269 
3270  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3271  Offset = DAG.getLoad(
3272  PtrVT, DL, DAG.getEntryNode(), Offset,
3274  break;
3275  }
3276  }
3277 
3278  // Add the base and offset together.
3279  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3280 }
3281 
3282 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3283  SelectionDAG &DAG) const {
3284  SDLoc DL(Node);
3285  const BlockAddress *BA = Node->getBlockAddress();
3286  int64_t Offset = Node->getOffset();
3287  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3288 
3289  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3290  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3291  return Result;
3292 }
3293 
3294 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3295  SelectionDAG &DAG) const {
3296  SDLoc DL(JT);
3297  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3298  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3299 
3300  // Use LARL to load the address of the table.
3301  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3302 }
3303 
3304 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3305  SelectionDAG &DAG) const {
3306  SDLoc DL(CP);
3307  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3308 
3309  SDValue Result;
3310  if (CP->isMachineConstantPoolEntry())
3311  Result =
3312  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3313  else
3314  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3315  CP->getOffset());
3316 
3317  // Use LARL to load the address of the constant pool entry.
3318  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3319 }
3320 
3321 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3322  SelectionDAG &DAG) const {
3323  auto *TFL =
3324  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
3325  MachineFunction &MF = DAG.getMachineFunction();
3326  MachineFrameInfo &MFI = MF.getFrameInfo();
3327  MFI.setFrameAddressIsTaken(true);
3328 
3329  SDLoc DL(Op);
3330  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3331  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3332 
3333  // By definition, the frame address is the address of the back chain. (In
3334  // the case of packed stack without backchain, return the address where the
3335  // backchain would have been stored. This will either be an unused space or
3336  // contain a saved register).
3337  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3338  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3339 
3340  // FIXME The frontend should detect this case.
3341  if (Depth > 0) {
3342  report_fatal_error("Unsupported stack frame traversal count");
3343  }
3344 
3345  return BackChain;
3346 }
3347 
3348 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3349  SelectionDAG &DAG) const {
3350  MachineFunction &MF = DAG.getMachineFunction();
3351  MachineFrameInfo &MFI = MF.getFrameInfo();
3352  MFI.setReturnAddressIsTaken(true);
3353 
3355  return SDValue();
3356 
3357  SDLoc DL(Op);
3358  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3359  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3360 
3361  // FIXME The frontend should detect this case.
3362  if (Depth > 0) {
3363  report_fatal_error("Unsupported stack frame traversal count");
3364  }
3365 
3366  // Return R14D, which has the return address. Mark it an implicit live-in.
3367  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3368  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3369 }
3370 
3371 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3372  SelectionDAG &DAG) const {
3373  SDLoc DL(Op);
3374  SDValue In = Op.getOperand(0);
3375  EVT InVT = In.getValueType();
3376  EVT ResVT = Op.getValueType();
3377 
3378  // Convert loads directly. This is normally done by DAGCombiner,
3379  // but we need this case for bitcasts that are created during lowering
3380  // and which are then lowered themselves.
3381  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3382  if (ISD::isNormalLoad(LoadN)) {
3383  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3384  LoadN->getBasePtr(), LoadN->getMemOperand());
3385  // Update the chain uses.
3386  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3387  return NewLoad;
3388  }
3389 
3390  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3391  SDValue In64;
3392  if (Subtarget.hasHighWord()) {
3393  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3394  MVT::i64);
3395  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3396  MVT::i64, SDValue(U64, 0), In);
3397  } else {
3398  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3399  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3400  DAG.getConstant(32, DL, MVT::i64));
3401  }
3402  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3403  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3404  DL, MVT::f32, Out64);
3405  }
3406  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3407  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3408  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3409  MVT::f64, SDValue(U64, 0), In);
3410  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3411  if (Subtarget.hasHighWord())
3412  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3413  MVT::i32, Out64);
3414  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3415  DAG.getConstant(32, DL, MVT::i64));
3416  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3417  }
3418  llvm_unreachable("Unexpected bitcast combination");
3419 }
3420 
3421 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3422  SelectionDAG &DAG) const {
3423  MachineFunction &MF = DAG.getMachineFunction();
3424  SystemZMachineFunctionInfo *FuncInfo =
3426  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3427 
3428  SDValue Chain = Op.getOperand(0);
3429  SDValue Addr = Op.getOperand(1);
3430  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3431  SDLoc DL(Op);
3432 
3433  // The initial values of each field.
3434  const unsigned NumFields = 4;
3435  SDValue Fields[NumFields] = {
3436  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3437  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3438  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3439  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3440  };
3441 
3442  // Store each field into its respective slot.
3443  SDValue MemOps[NumFields];
3444  unsigned Offset = 0;
3445  for (unsigned I = 0; I < NumFields; ++I) {
3446  SDValue FieldAddr = Addr;
3447  if (Offset != 0)
3448  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3449  DAG.getIntPtrConstant(Offset, DL));
3450  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3451  MachinePointerInfo(SV, Offset));
3452  Offset += 8;
3453  }
3454  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3455 }
3456 
3457 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3458  SelectionDAG &DAG) const {
3459  SDValue Chain = Op.getOperand(0);
3460  SDValue DstPtr = Op.getOperand(1);
3461  SDValue SrcPtr = Op.getOperand(2);
3462  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3463  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3464  SDLoc DL(Op);
3465 
3466  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3467  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3468  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3469  MachinePointerInfo(SrcSV));
3470 }
3471 
3472 SDValue SystemZTargetLowering::
3473 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3474  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3475  MachineFunction &MF = DAG.getMachineFunction();
3476  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3477  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3478 
3479  SDValue Chain = Op.getOperand(0);
3480  SDValue Size = Op.getOperand(1);
3481  SDValue Align = Op.getOperand(2);
3482  SDLoc DL(Op);
3483 
3484  // If user has set the no alignment function attribute, ignore
3485  // alloca alignments.
3486  uint64_t AlignVal =
3487  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3488 
3490  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3491  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3492 
3494  SDValue NeededSpace = Size;
3495 
3496  // Get a reference to the stack pointer.
3497  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3498 
3499  // If we need a backchain, save it now.
3500  SDValue Backchain;
3501  if (StoreBackchain)
3502  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3503  MachinePointerInfo());
3504 
3505  // Add extra space for alignment if needed.
3506  if (ExtraAlignSpace)
3507  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3508  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3509 
3510  // Get the new stack pointer value.
3511  SDValue NewSP;
3512  if (hasInlineStackProbe(MF)) {
3513  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3514  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3515  Chain = NewSP.getValue(1);
3516  }
3517  else {
3518  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3519  // Copy the new stack pointer back.
3520  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3521  }
3522 
3523  // The allocated data lives above the 160 bytes allocated for the standard
3524  // frame, plus any outgoing stack arguments. We don't know how much that
3525  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3526  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3527  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3528 
3529  // Dynamically realign if needed.
3530  if (RequiredAlign > StackAlign) {
3531  Result =
3532  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3533  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3534  Result =
3535  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3536  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3537  }
3538 
3539  if (StoreBackchain)
3540  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3541  MachinePointerInfo());
3542 
3543  SDValue Ops[2] = { Result, Chain };
3544  return DAG.getMergeValues(Ops, DL);
3545 }
3546 
3547 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3548  SDValue Op, SelectionDAG &DAG) const {
3549  SDLoc DL(Op);
3550 
3552 }
3553 
3554 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3555  SelectionDAG &DAG) const {
3556  EVT VT = Op.getValueType();
3557  SDLoc DL(Op);
3558  SDValue Ops[2];
3559  if (is32Bit(VT))
3560  // Just do a normal 64-bit multiplication and extract the results.
3561  // We define this so that it can be used for constant division.
3562  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3563  Op.getOperand(1), Ops[1], Ops[0]);
3564  else if (Subtarget.hasMiscellaneousExtensions2())
3565  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3566  // the high result in the even register. ISD::SMUL_LOHI is defined to
3567  // return the low half first, so the results are in reverse order.
3569  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3570  else {
3571  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3572  //
3573  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3574  //
3575  // but using the fact that the upper halves are either all zeros
3576  // or all ones:
3577  //
3578  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3579  //
3580  // and grouping the right terms together since they are quicker than the
3581  // multiplication:
3582  //
3583  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3584  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3585  SDValue LL = Op.getOperand(0);
3586  SDValue RL = Op.getOperand(1);
3587  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3588  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3589  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3590  // the high result in the even register. ISD::SMUL_LOHI is defined to
3591  // return the low half first, so the results are in reverse order.
3593  LL, RL, Ops[1], Ops[0]);
3594  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3595  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3596  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3597  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3598  }
3599  return DAG.getMergeValues(Ops, DL);
3600 }
3601 
3602 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3603  SelectionDAG &DAG) const {
3604  EVT VT = Op.getValueType();
3605  SDLoc DL(Op);
3606  SDValue Ops[2];
3607  if (is32Bit(VT))
3608  // Just do a normal 64-bit multiplication and extract the results.
3609  // We define this so that it can be used for constant division.
3610  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3611  Op.getOperand(1), Ops[1], Ops[0]);
3612  else
3613  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3614  // the high result in the even register. ISD::UMUL_LOHI is defined to
3615  // return the low half first, so the results are in reverse order.
3617  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3618  return DAG.getMergeValues(Ops, DL);
3619 }
3620 
3621 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3622  SelectionDAG &DAG) const {
3623  SDValue Op0 = Op.getOperand(0);
3624  SDValue Op1 = Op.getOperand(1);
3625  EVT VT = Op.getValueType();
3626  SDLoc DL(Op);
3627 
3628  // We use DSGF for 32-bit division. This means the first operand must
3629  // always be 64-bit, and the second operand should be 32-bit whenever
3630  // that is possible, to improve performance.
3631  if (is32Bit(VT))
3632  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3633  else if (DAG.ComputeNumSignBits(Op1) > 32)
3634  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3635 
3636  // DSG(F) returns the remainder in the even register and the
3637  // quotient in the odd register.
3638  SDValue Ops[2];
3639  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3640  return DAG.getMergeValues(Ops, DL);
3641 }
3642 
3643 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3644  SelectionDAG &DAG) const {
3645  EVT VT = Op.getValueType();
3646  SDLoc DL(Op);
3647 
3648  // DL(G) returns the remainder in the even register and the
3649  // quotient in the odd register.
3650  SDValue Ops[2];
3652  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3653  return DAG.getMergeValues(Ops, DL);
3654 }
3655 
3656 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3657  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3658 
3659  // Get the known-zero masks for each operand.
3660  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3661  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3662  DAG.computeKnownBits(Ops[1])};
3663 
3664  // See if the upper 32 bits of one operand and the lower 32 bits of the
3665  // other are known zero. They are the low and high operands respectively.
3666  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3667  Known[1].Zero.getZExtValue() };
3668  unsigned High, Low;
3669  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3670  High = 1, Low = 0;
3671  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3672  High = 0, Low = 1;
3673  else
3674  return Op;
3675 
3676  SDValue LowOp = Ops[Low];
3677  SDValue HighOp = Ops[High];
3678 
3679  // If the high part is a constant, we're better off using IILH.
3680  if (HighOp.getOpcode() == ISD::Constant)
3681  return Op;
3682 
3683  // If the low part is a constant that is outside the range of LHI,
3684  // then we're better off using IILF.
3685  if (LowOp.getOpcode() == ISD::Constant) {
3686  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3687  if (!isInt<16>(Value))
3688  return Op;
3689  }
3690 
3691  // Check whether the high part is an AND that doesn't change the
3692  // high 32 bits and just masks out low bits. We can skip it if so.
3693  if (HighOp.getOpcode() == ISD::AND &&
3694  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3695  SDValue HighOp0 = HighOp.getOperand(0);
3696  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3697  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3698  HighOp = HighOp0;
3699  }
3700 
3701  // Take advantage of the fact that all GR32 operations only change the
3702  // low 32 bits by truncating Low to an i32 and inserting it directly
3703  // using a subreg. The interesting cases are those where the truncation
3704  // can be folded.
3705  SDLoc DL(Op);
3706  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3707  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3708  MVT::i64, HighOp, Low32);
3709 }
3710 
3711 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3712 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3713  SelectionDAG &DAG) const {
3714  SDNode *N = Op.getNode();
3715  SDValue LHS = N->getOperand(0);
3716  SDValue RHS = N->getOperand(1);
3717  SDLoc DL(N);
3718  unsigned BaseOp = 0;
3719  unsigned CCValid = 0;
3720  unsigned CCMask = 0;
3721 
3722  switch (Op.getOpcode()) {
3723  default: llvm_unreachable("Unknown instruction!");
3724  case ISD::SADDO:
3725  BaseOp = SystemZISD::SADDO;
3726  CCValid = SystemZ::CCMASK_ARITH;
3728  break;
3729  case ISD::SSUBO:
3730  BaseOp = SystemZISD::SSUBO;
3731  CCValid = SystemZ::CCMASK_ARITH;
3733  break;
3734  case ISD::UADDO:
3735  BaseOp = SystemZISD::UADDO;
3736  CCValid = SystemZ::CCMASK_LOGICAL;
3738  break;
3739  case ISD::USUBO:
3740  BaseOp = SystemZISD::USUBO;
3741  CCValid = SystemZ::CCMASK_LOGICAL;
3743  break;
3744  }
3745 
3746  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3747  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3748 
3749  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3750  if (N->getValueType(1) == MVT::i1)
3751  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3752 
3753  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3754 }
3755 
3756 static bool isAddCarryChain(SDValue Carry) {
3757  while (Carry.getOpcode() == ISD::ADDCARRY)
3758  Carry = Carry.getOperand(2);
3759  return Carry.getOpcode() == ISD::UADDO;
3760 }
3761 
3762 static bool isSubBorrowChain(SDValue Carry) {
3763  while (Carry.getOpcode() == ISD::SUBCARRY)
3764  Carry = Carry.getOperand(2);
3765  return Carry.getOpcode() == ISD::USUBO;
3766 }
3767 
3768 // Lower ADDCARRY/SUBCARRY nodes.
3769 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3770  SelectionDAG &DAG) const {
3771 
3772  SDNode *N = Op.getNode();
3773  MVT VT = N->getSimpleValueType(0);
3774 
3775  // Let legalize expand this if it isn't a legal type yet.
3776  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3777  return SDValue();
3778 
3779  SDValue LHS = N->getOperand(0);
3780  SDValue RHS = N->getOperand(1);
3781  SDValue Carry = Op.getOperand(2);
3782  SDLoc DL(N);
3783  unsigned BaseOp = 0;
3784  unsigned CCValid = 0;
3785  unsigned CCMask = 0;
3786 
3787  switch (Op.getOpcode()) {
3788  default: llvm_unreachable("Unknown instruction!");
3789  case ISD::ADDCARRY:
3790  if (!isAddCarryChain(Carry))
3791  return SDValue();
3792 
3793  BaseOp = SystemZISD::ADDCARRY;
3794  CCValid = SystemZ::CCMASK_LOGICAL;
3796  break;
3797  case ISD::SUBCARRY:
3798  if (!isSubBorrowChain(Carry))
3799  return SDValue();
3800 
3801  BaseOp = SystemZISD::SUBCARRY;
3802  CCValid = SystemZ::CCMASK_LOGICAL;
3804  break;
3805  }
3806 
3807  // Set the condition code from the carry flag.
3808  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3809  DAG.getConstant(CCValid, DL, MVT::i32),
3810  DAG.getConstant(CCMask, DL, MVT::i32));
3811 
3812  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3813  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3814 
3815  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3816  if (N->getValueType(1) == MVT::i1)
3817  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3818 
3819  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3820 }
3821 
3822 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3823  SelectionDAG &DAG) const {
3824  EVT VT = Op.getValueType();
3825  SDLoc DL(Op);
3826  Op = Op.getOperand(0);
3827 
3828  // Handle vector types via VPOPCT.
3829  if (VT.isVector()) {
3830  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3832  switch (VT.getScalarSizeInBits()) {
3833  case 8:
3834  break;
3835  case 16: {
3836  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3837  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3839  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3841  break;
3842  }
3843  case 32: {
3845  DAG.getConstant(0, DL, MVT::i32));
3846  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3847  break;
3848  }
3849  case 64: {
3851  DAG.getConstant(0, DL, MVT::i32));
3852  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3853  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3854  break;
3855  }
3856  default:
3857  llvm_unreachable("Unexpected type");
3858  }
3859  return Op;
3860  }
3861 
3862  // Get the known-zero mask for the operand.
3863  KnownBits Known = DAG.computeKnownBits(Op);
3864  unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3865  if (NumSignificantBits == 0)
3866  return DAG.getConstant(0, DL, VT);
3867 
3868  // Skip known-zero high parts of the operand.
3869  int64_t OrigBitSize = VT.getSizeInBits();
3870  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3871  BitSize = std::min(BitSize, OrigBitSize);
3872 
3873  // The POPCNT instruction counts the number of bits in each byte.
3874  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3876  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3877 
3878  // Add up per-byte counts in a binary tree. All bits of Op at
3879  // position larger than BitSize remain zero throughout.
3880  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3881  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3882  if (BitSize != OrigBitSize)
3883  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3884  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3885  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3886  }
3887 
3888  // Extract overall result from high byte.
3889  if (BitSize > 8)
3890  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3891  DAG.getConstant(BitSize - 8, DL, VT));
3892 
3893  return Op;
3894 }
3895 
3896 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3897  SelectionDAG &DAG) const {
3898  SDLoc DL(Op);
3899  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3900  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3901  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3902  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3903 
3904  // The only fence that needs an instruction is a sequentially-consistent
3905  // cross-thread fence.
3906  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3907  FenceSSID == SyncScope::System) {
3908  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3909  Op.getOperand(0)),
3910  0);
3911  }
3912 
3913  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3914  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3915 }
3916 
3917 // Op is an atomic load. Lower it into a normal volatile load.
3918 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3919  SelectionDAG &DAG) const {
3920  auto *Node = cast<AtomicSDNode>(Op.getNode());
3921  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3922  Node->getChain(), Node->getBasePtr(),
3923  Node->getMemoryVT(), Node->getMemOperand());
3924 }
3925 
3926 // Op is an atomic store. Lower it into a normal volatile store.
3927 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3928  SelectionDAG &DAG) const {
3929  auto *Node = cast<AtomicSDNode>(Op.getNode());
3930  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3931  Node->getBasePtr(), Node->getMemoryVT(),
3932  Node->getMemOperand());
3933  // We have to enforce sequential consistency by performing a
3934  // serialization operation after the store.
3935  if (Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent)
3936  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3937  MVT::Other, Chain), 0);
3938  return Chain;
3939 }
3940 
3941 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3942 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3943 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3944  SelectionDAG &DAG,
3945  unsigned Opcode) const {
3946  auto *Node = cast<AtomicSDNode>(Op.getNode());
3947 
3948  // 32-bit operations need no code outside the main loop.
3949  EVT NarrowVT = Node->getMemoryVT();
3950  EVT WideVT = MVT::i32;
3951  if (NarrowVT == WideVT)
3952  return Op;
3953 
3954  int64_t BitSize = NarrowVT.getSizeInBits();
3955  SDValue ChainIn = Node->getChain();
3956  SDValue Addr = Node->getBasePtr();
3957  SDValue Src2 = Node->getVal();
3958  MachineMemOperand *MMO = Node->getMemOperand();
3959  SDLoc DL(Node);
3960  EVT PtrVT = Addr.getValueType();
3961 
3962  // Convert atomic subtracts of constants into additions.
3963  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3964  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3966  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3967  }
3968 
3969  // Get the address of the containing word.
3970  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3971  DAG.getConstant(-4, DL, PtrVT));
3972 
3973  // Get the number of bits that the word must be rotated left in order
3974  // to bring the field to the top bits of a GR32.
3975  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3976  DAG.getConstant(3, DL, PtrVT));
3977  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3978 
3979  // Get the complementing shift amount, for rotating a field in the top
3980  // bits back to its proper position.
3981  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3982  DAG.getConstant(0, DL, WideVT), BitShift);
3983 
3984  // Extend the source operand to 32 bits and prepare it for the inner loop.
3985  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3986  // operations require the source to be shifted in advance. (This shift
3987  // can be folded if the source is constant.) For AND and NAND, the lower
3988  // bits must be set, while for other opcodes they should be left clear.
3989  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3990  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3991  DAG.getConstant(32 - BitSize, DL, WideVT));
3992  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3994  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3995  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3996 
3997  // Construct the ATOMIC_LOADW_* node.
3998  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3999  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
4000  DAG.getConstant(BitSize, DL, WideVT) };
4001  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
4002  NarrowVT, MMO);
4003 
4004  // Rotate the result of the final CS so that the field is in the lower
4005  // bits of a GR32, then truncate it.
4006  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
4007  DAG.getConstant(BitSize, DL, WideVT));
4008  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
4009 
4010  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
4011  return DAG.getMergeValues(RetOps,