LLVM  13.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize(0));
84 
85  // Set up the register classes.
86  if (Subtarget.hasHighWord())
87  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
88  else
89  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
90  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
91  if (!useSoftFloat()) {
92  if (Subtarget.hasVector()) {
93  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
94  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
95  } else {
96  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
97  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
98  }
99  if (Subtarget.hasVectorEnhancements1())
100  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
101  else
102  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
103 
104  if (Subtarget.hasVector()) {
105  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
106  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
107  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
111  }
112  }
113 
114  // Compute derived properties from the register classes
116 
117  // Set up special registers.
119 
120  // TODO: It may be better to default to latency-oriented scheduling, however
121  // LLVM's current latency-oriented scheduler can't handle physreg definitions
122  // such as SystemZ has with CC, so set this to the register-pressure
123  // scheduler, because it can.
125 
128 
129  // Instructions are strings of 2-byte aligned 2-byte values.
131  // For performance reasons we prefer 16-byte alignment.
133 
134  // Handle operations that are handled in a similar way for all types.
135  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
137  ++I) {
138  MVT VT = MVT::SimpleValueType(I);
139  if (isTypeLegal(VT)) {
140  // Lower SET_CC into an IPM-based sequence.
144 
145  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
147 
148  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
151  }
152  }
153 
154  // Expand jump table branches as address arithmetic followed by an
155  // indirect jump.
157 
158  // Expand BRCOND into a BR_CC (see above).
160 
161  // Handle integer types.
162  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
164  ++I) {
165  MVT VT = MVT::SimpleValueType(I);
166  if (isTypeLegal(VT)) {
168 
169  // Expand individual DIV and REMs into DIVREMs.
176 
177  // Support addition/subtraction with overflow.
180 
181  // Support addition/subtraction with carry.
184 
185  // Support carry in as value rather than glue.
188 
189  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
190  // stores, putting a serialization instruction after the stores.
193 
194  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
195  // available, or if the operand is constant.
197 
198  // Use POPCNT on z196 and above.
199  if (Subtarget.hasPopulationCount())
201  else
203 
204  // No special instructions for these.
207 
208  // Use *MUL_LOHI where possible instead of MULH*.
213 
214  // Only z196 and above have native support for conversions to unsigned.
215  // On z10, promoting to i64 doesn't generate an inexact condition for
216  // values that are outside the i32 range but in the i64 range, so use
217  // the default expansion.
218  if (!Subtarget.hasFPExtension())
220 
221  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
222  // default to Expand, so need to be modified to Legal where appropriate.
224  if (Subtarget.hasFPExtension())
226 
227  // And similarly for STRICT_[SU]INT_TO_FP.
229  if (Subtarget.hasFPExtension())
231  }
232  }
233 
234  // Type legalization will convert 8- and 16-bit atomic operations into
235  // forms that operate on i32s (but still keeping the original memory VT).
236  // Lower them into full i32 operations.
248 
249  // Even though i128 is not a legal type, we still need to custom lower
250  // the atomic operations in order to exploit SystemZ instructions.
253 
254  // We can use the CC result of compare-and-swap to implement
255  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
259 
261 
262  // Traps are legal, as we will convert them to "j .+2".
264 
265  // z10 has instructions for signed but not unsigned FP conversion.
266  // Handle unsigned 32-bit types as signed 64-bit types.
267  if (!Subtarget.hasFPExtension()) {
272  }
273 
274  // We have native support for a 64-bit CTLZ, via FLOGR.
278 
279  // On z15 we have native support for a 64-bit CTPOP.
280  if (Subtarget.hasMiscellaneousExtensions3()) {
283  }
284 
285  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
287 
288  // FIXME: Can we support these natively?
292 
293  // We have native instructions for i8, i16 and i32 extensions, but not i1.
295  for (MVT VT : MVT::integer_valuetypes()) {
299  }
300 
301  // Handle the various types of symbolic address.
307 
308  // We need to handle dynamic allocations specially because of the
309  // 160-byte area at the bottom of the stack.
312 
313  // Use custom expanders so that we can force the function to use
314  // a frame pointer.
317 
318  // Handle prefetches with PFD or PFDRL.
320 
321  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
322  // Assume by default that all vector operations need to be expanded.
323  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
324  if (getOperationAction(Opcode, VT) == Legal)
325  setOperationAction(Opcode, VT, Expand);
326 
327  // Likewise all truncating stores and extending loads.
328  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
329  setTruncStoreAction(VT, InnerVT, Expand);
330  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
331  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
332  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
333  }
334 
335  if (isTypeLegal(VT)) {
336  // These operations are legal for anything that can be stored in a
337  // vector register, even if there is no native support for the format
338  // as such. In particular, we can do these for v4f32 even though there
339  // are no specific instructions for that format.
345 
346  // Likewise, except that we need to replace the nodes with something
347  // more specific.
350  }
351  }
352 
353  // Handle integer vector types.
355  if (isTypeLegal(VT)) {
356  // These operations have direct equivalents.
361  if (VT != MVT::v2i64)
367  if (Subtarget.hasVectorEnhancements1())
369  else
373 
374  // Convert a GPR scalar to a vector by inserting it into element 0.
376 
377  // Use a series of unpacks for extensions.
380 
381  // Detect shifts by a scalar amount and convert them into
382  // V*_BY_SCALAR.
386 
387  // At present ROTL isn't matched by DAGCombiner. ROTR should be
388  // converted into ROTL.
391 
392  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
393  // and inverting the result as necessary.
396  if (Subtarget.hasVectorEnhancements1())
398  }
399  }
400 
401  if (Subtarget.hasVector()) {
402  // There should be no need to check for float types other than v2f64
403  // since <2 x f32> isn't a legal type.
412 
421  }
422 
423  if (Subtarget.hasVectorEnhancements2()) {
432 
441  }
442 
443  // Handle floating-point types.
444  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
446  ++I) {
447  MVT VT = MVT::SimpleValueType(I);
448  if (isTypeLegal(VT)) {
449  // We can use FI for FRINT.
451 
452  // We can use the extended form of FI for other rounding operations.
453  if (Subtarget.hasFPExtension()) {
459  }
460 
461  // No special instructions for these.
467 
468  // Handle constrained floating-point operations.
478  if (Subtarget.hasFPExtension()) {
484  }
485  }
486  }
487 
488  // Handle floating-point vector types.
489  if (Subtarget.hasVector()) {
490  // Scalar-to-vector conversion is just a subreg.
493 
494  // Some insertions and extractions can be done directly but others
495  // need to go via integers.
500 
501  // These operations have direct equivalents.
516 
517  // Handle constrained floating-point operations.
530  }
531 
532  // The vector enhancements facility 1 has instructions for these.
533  if (Subtarget.hasVectorEnhancements1()) {
548 
553 
558 
563 
568 
573 
574  // Handle constrained floating-point operations.
587  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
588  MVT::v4f32, MVT::v2f64 }) {
593  }
594  }
595 
596  // We only have fused f128 multiply-addition on vector registers.
597  if (!Subtarget.hasVectorEnhancements1()) {
600  }
601 
602  // We don't have a copysign instruction on vector registers.
603  if (Subtarget.hasVectorEnhancements1())
605 
606  // Needed so that we don't try to implement f128 constant loads using
607  // a load-and-extend of a f80 constant (in cases where the constant
608  // would fit in an f80).
609  for (MVT VT : MVT::fp_valuetypes())
611 
612  // We don't have extending load instruction on vector registers.
613  if (Subtarget.hasVectorEnhancements1()) {
616  }
617 
618  // Floating-point truncation and stores need to be done separately.
622 
623  // We have 64-bit FPR<->GPR moves, but need special handling for
624  // 32-bit forms.
625  if (!Subtarget.hasVector()) {
628  }
629 
630  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
631  // structure, but VAEND is a no-op.
635 
636  // Codes for which we want to perform some z-specific combinations.
657 
658  // Handle intrinsics.
661 
662  // We want to use MVC in preference to even a single load/store pair.
663  MaxStoresPerMemcpy = 0;
665 
666  // The main memset sequence is a byte store followed by an MVC.
667  // Two STC or MV..I stores win over that, but the kind of fused stores
668  // generated by target-independent code don't when the byte value is
669  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
670  // than "STC;MVC". Handle the choice in target-specific code instead.
671  MaxStoresPerMemset = 0;
673 
674  // Default to having -disable-strictnode-mutation on
675  IsStrictFPEnabled = true;
676 }
677 
679  return Subtarget.hasSoftFloat();
680 }
681 
683  LLVMContext &, EVT VT) const {
684  if (!VT.isVector())
685  return MVT::i32;
687 }
688 
690  const MachineFunction &MF, EVT VT) const {
691  VT = VT.getScalarType();
692 
693  if (!VT.isSimple())
694  return false;
695 
696  switch (VT.getSimpleVT().SimpleTy) {
697  case MVT::f32:
698  case MVT::f64:
699  return true;
700  case MVT::f128:
701  return Subtarget.hasVectorEnhancements1();
702  default:
703  break;
704  }
705 
706  return false;
707 }
708 
709 // Return true if the constant can be generated with a vector instruction,
710 // such as VGM, VGMB or VREPI.
712  const SystemZSubtarget &Subtarget) {
713  const SystemZInstrInfo *TII =
714  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
715  if (!Subtarget.hasVector() ||
716  (isFP128 && !Subtarget.hasVectorEnhancements1()))
717  return false;
718 
719  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
720  // preferred way of creating all-zero and all-one vectors so give it
721  // priority over other methods below.
722  unsigned Mask = 0;
723  unsigned I = 0;
724  for (; I < SystemZ::VectorBytes; ++I) {
725  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
726  if (Byte == 0xff)
727  Mask |= 1ULL << I;
728  else if (Byte != 0)
729  break;
730  }
731  if (I == SystemZ::VectorBytes) {
733  OpVals.push_back(Mask);
735  return true;
736  }
737 
738  if (SplatBitSize > 64)
739  return false;
740 
741  auto tryValue = [&](uint64_t Value) -> bool {
742  // Try VECTOR REPLICATE IMMEDIATE
743  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
744  if (isInt<16>(SignedValue)) {
745  OpVals.push_back(((unsigned) SignedValue));
747  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
748  SystemZ::VectorBits / SplatBitSize);
749  return true;
750  }
751  // Try VECTOR GENERATE MASK
752  unsigned Start, End;
753  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
754  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
755  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
756  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
757  OpVals.push_back(Start - (64 - SplatBitSize));
758  OpVals.push_back(End - (64 - SplatBitSize));
760  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
761  SystemZ::VectorBits / SplatBitSize);
762  return true;
763  }
764  return false;
765  };
766 
767  // First try assuming that any undefined bits above the highest set bit
768  // and below the lowest set bit are 1s. This increases the likelihood of
769  // being able to use a sign-extended element value in VECTOR REPLICATE
770  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
771  uint64_t SplatBitsZ = SplatBits.getZExtValue();
772  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
773  uint64_t Lower =
774  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
775  uint64_t Upper =
776  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
777  if (tryValue(SplatBitsZ | Upper | Lower))
778  return true;
779 
780  // Now try assuming that any undefined bits between the first and
781  // last defined set bits are set. This increases the chances of
782  // using a non-wraparound mask.
783  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
784  return tryValue(SplatBitsZ | Middle);
785 }
786 
788  IntBits = FPImm.bitcastToAPInt().zextOrSelf(128);
789  isFP128 = (&FPImm.getSemantics() == &APFloat::IEEEquad());
790  SplatBits = FPImm.bitcastToAPInt();
791  unsigned Width = SplatBits.getBitWidth();
792  IntBits <<= (SystemZ::VectorBits - Width);
793 
794  // Find the smallest splat.
795  while (Width > 8) {
796  unsigned HalfSize = Width / 2;
797  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
798  APInt LowValue = SplatBits.trunc(HalfSize);
799 
800  // If the two halves do not match, stop here.
801  if (HighValue != LowValue || 8 > HalfSize)
802  break;
803 
804  SplatBits = HighValue;
805  Width = HalfSize;
806  }
807  SplatUndef = 0;
808  SplatBitSize = Width;
809 }
810 
812  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
813  bool HasAnyUndefs;
814 
815  // Get IntBits by finding the 128 bit splat.
816  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
817  true);
818 
819  // Get SplatBits by finding the 8 bit or greater splat.
820  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
821  true);
822 }
823 
825  bool ForCodeSize) const {
826  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
827  if (Imm.isZero() || Imm.isNegZero())
828  return true;
829 
830  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
831 }
832 
833 /// Returns true if stack probing through inline assembly is requested.
835  // If the function specifically requests inline stack probes, emit them.
836  if (MF.getFunction().hasFnAttribute("probe-stack"))
837  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
838  "inline-asm";
839  return false;
840 }
841 
843  // We can use CGFI or CLGFI.
844  return isInt<32>(Imm) || isUInt<32>(Imm);
845 }
846 
848  // We can use ALGFI or SLGFI.
849  return isUInt<32>(Imm) || isUInt<32>(-Imm);
850 }
851 
853  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
854  // Unaligned accesses should never be slower than the expanded version.
855  // We check specifically for aligned accesses in the few cases where
856  // they are required.
857  if (Fast)
858  *Fast = true;
859  return true;
860 }
861 
862 // Information about the addressing mode for a memory access.
864  // True if a long displacement is supported.
866 
867  // True if use of index register is supported.
868  bool IndexReg;
869 
870  AddressingMode(bool LongDispl, bool IdxReg) :
871  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
872 };
873 
874 // Return the desired addressing mode for a Load which has only one use (in
875 // the same block) which is a Store.
876 static AddressingMode getLoadStoreAddrMode(bool HasVector,
877  Type *Ty) {
878  // With vector support a Load->Store combination may be combined to either
879  // an MVC or vector operations and it seems to work best to allow the
880  // vector addressing mode.
881  if (HasVector)
882  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
883 
884  // Otherwise only the MVC case is special.
885  bool MVC = Ty->isIntegerTy(8);
886  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
887 }
888 
889 // Return the addressing mode which seems most desirable given an LLVM
890 // Instruction pointer.
891 static AddressingMode
893  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
894  switch (II->getIntrinsicID()) {
895  default: break;
896  case Intrinsic::memset:
897  case Intrinsic::memmove:
898  case Intrinsic::memcpy:
899  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
900  }
901  }
902 
903  if (isa<LoadInst>(I) && I->hasOneUse()) {
904  auto *SingleUser = cast<Instruction>(*I->user_begin());
905  if (SingleUser->getParent() == I->getParent()) {
906  if (isa<ICmpInst>(SingleUser)) {
907  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
908  if (C->getBitWidth() <= 64 &&
909  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
910  // Comparison of memory with 16 bit signed / unsigned immediate
911  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
912  } else if (isa<StoreInst>(SingleUser))
913  // Load->Store
914  return getLoadStoreAddrMode(HasVector, I->getType());
915  }
916  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
917  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
918  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
919  // Load->Store
920  return getLoadStoreAddrMode(HasVector, LoadI->getType());
921  }
922 
923  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
924 
925  // * Use LDE instead of LE/LEY for z13 to avoid partial register
926  // dependencies (LDE only supports small offsets).
927  // * Utilize the vector registers to hold floating point
928  // values (vector load / store instructions only support small
929  // offsets).
930 
931  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
932  I->getOperand(0)->getType());
933  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
934  bool IsVectorAccess = MemAccessTy->isVectorTy();
935 
936  // A store of an extracted vector element will be combined into a VSTE type
937  // instruction.
938  if (!IsVectorAccess && isa<StoreInst>(I)) {
939  Value *DataOp = I->getOperand(0);
940  if (isa<ExtractElementInst>(DataOp))
941  IsVectorAccess = true;
942  }
943 
944  // A load which gets inserted into a vector element will be combined into a
945  // VLE type instruction.
946  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
947  User *LoadUser = *I->user_begin();
948  if (isa<InsertElementInst>(LoadUser))
949  IsVectorAccess = true;
950  }
951 
952  if (IsFPAccess || IsVectorAccess)
953  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
954  }
955 
956  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
957 }
958 
960  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
961  // Punt on globals for now, although they can be used in limited
962  // RELATIVE LONG cases.
963  if (AM.BaseGV)
964  return false;
965 
966  // Require a 20-bit signed offset.
967  if (!isInt<20>(AM.BaseOffs))
968  return false;
969 
970  AddressingMode SupportedAM(true, true);
971  if (I != nullptr)
972  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
973 
974  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
975  return false;
976 
977  if (!SupportedAM.IndexReg)
978  // No indexing allowed.
979  return AM.Scale == 0;
980  else
981  // Indexing is OK but no scale factor can be applied.
982  return AM.Scale == 0 || AM.Scale == 1;
983 }
984 
986  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
987  return false;
988  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
989  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
990  return FromBits > ToBits;
991 }
992 
994  if (!FromVT.isInteger() || !ToVT.isInteger())
995  return false;
996  unsigned FromBits = FromVT.getFixedSizeInBits();
997  unsigned ToBits = ToVT.getFixedSizeInBits();
998  return FromBits > ToBits;
999 }
1000 
1001 //===----------------------------------------------------------------------===//
1002 // Inline asm support
1003 //===----------------------------------------------------------------------===//
1004 
1007  if (Constraint.size() == 1) {
1008  switch (Constraint[0]) {
1009  case 'a': // Address register
1010  case 'd': // Data register (equivalent to 'r')
1011  case 'f': // Floating-point register
1012  case 'h': // High-part register
1013  case 'r': // General-purpose register
1014  case 'v': // Vector register
1015  return C_RegisterClass;
1016 
1017  case 'Q': // Memory with base and unsigned 12-bit displacement
1018  case 'R': // Likewise, plus an index
1019  case 'S': // Memory with base and signed 20-bit displacement
1020  case 'T': // Likewise, plus an index
1021  case 'm': // Equivalent to 'T'.
1022  return C_Memory;
1023 
1024  case 'I': // Unsigned 8-bit constant
1025  case 'J': // Unsigned 12-bit constant
1026  case 'K': // Signed 16-bit constant
1027  case 'L': // Signed 20-bit displacement (on all targets we support)
1028  case 'M': // 0x7fffffff
1029  return C_Immediate;
1030 
1031  default:
1032  break;
1033  }
1034  }
1035  return TargetLowering::getConstraintType(Constraint);
1036 }
1037 
1040  const char *constraint) const {
1041  ConstraintWeight weight = CW_Invalid;
1042  Value *CallOperandVal = info.CallOperandVal;
1043  // If we don't have a value, we can't do a match,
1044  // but allow it at the lowest weight.
1045  if (!CallOperandVal)
1046  return CW_Default;
1047  Type *type = CallOperandVal->getType();
1048  // Look at the constraint type.
1049  switch (*constraint) {
1050  default:
1052  break;
1053 
1054  case 'a': // Address register
1055  case 'd': // Data register (equivalent to 'r')
1056  case 'h': // High-part register
1057  case 'r': // General-purpose register
1058  if (CallOperandVal->getType()->isIntegerTy())
1059  weight = CW_Register;
1060  break;
1061 
1062  case 'f': // Floating-point register
1063  if (type->isFloatingPointTy())
1064  weight = CW_Register;
1065  break;
1066 
1067  case 'v': // Vector register
1068  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1069  Subtarget.hasVector())
1070  weight = CW_Register;
1071  break;
1072 
1073  case 'I': // Unsigned 8-bit constant
1074  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1075  if (isUInt<8>(C->getZExtValue()))
1076  weight = CW_Constant;
1077  break;
1078 
1079  case 'J': // Unsigned 12-bit constant
1080  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1081  if (isUInt<12>(C->getZExtValue()))
1082  weight = CW_Constant;
1083  break;
1084 
1085  case 'K': // Signed 16-bit constant
1086  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1087  if (isInt<16>(C->getSExtValue()))
1088  weight = CW_Constant;
1089  break;
1090 
1091  case 'L': // Signed 20-bit displacement (on all targets we support)
1092  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1093  if (isInt<20>(C->getSExtValue()))
1094  weight = CW_Constant;
1095  break;
1096 
1097  case 'M': // 0x7fffffff
1098  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1099  if (C->getZExtValue() == 0x7fffffff)
1100  weight = CW_Constant;
1101  break;
1102  }
1103  return weight;
1104 }
1105 
1106 // Parse a "{tNNN}" register constraint for which the register type "t"
1107 // has already been verified. MC is the class associated with "t" and
1108 // Map maps 0-based register numbers to LLVM register numbers.
1109 static std::pair<unsigned, const TargetRegisterClass *>
1111  const unsigned *Map, unsigned Size) {
1112  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1113  if (isdigit(Constraint[2])) {
1114  unsigned Index;
1115  bool Failed =
1116  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1117  if (!Failed && Index < Size && Map[Index])
1118  return std::make_pair(Map[Index], RC);
1119  }
1120  return std::make_pair(0U, nullptr);
1121 }
1122 
1123 std::pair<unsigned, const TargetRegisterClass *>
1125  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1126  if (Constraint.size() == 1) {
1127  // GCC Constraint Letters
1128  switch (Constraint[0]) {
1129  default: break;
1130  case 'd': // Data register (equivalent to 'r')
1131  case 'r': // General-purpose register
1132  if (VT == MVT::i64)
1133  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1134  else if (VT == MVT::i128)
1135  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1136  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1137 
1138  case 'a': // Address register
1139  if (VT == MVT::i64)
1140  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1141  else if (VT == MVT::i128)
1142  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1143  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1144 
1145  case 'h': // High-part register (an LLVM extension)
1146  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1147 
1148  case 'f': // Floating-point register
1149  if (!useSoftFloat()) {
1150  if (VT == MVT::f64)
1151  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1152  else if (VT == MVT::f128)
1153  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1154  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1155  }
1156  break;
1157  case 'v': // Vector register
1158  if (Subtarget.hasVector()) {
1159  if (VT == MVT::f32)
1160  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1161  if (VT == MVT::f64)
1162  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1163  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1164  }
1165  break;
1166  }
1167  }
1168  if (Constraint.size() > 0 && Constraint[0] == '{') {
1169  // We need to override the default register parsing for GPRs and FPRs
1170  // because the interpretation depends on VT. The internal names of
1171  // the registers are also different from the external names
1172  // (F0D and F0S instead of F0, etc.).
1173  if (Constraint[1] == 'r') {
1174  if (VT == MVT::i32)
1175  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1176  SystemZMC::GR32Regs, 16);
1177  if (VT == MVT::i128)
1178  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1179  SystemZMC::GR128Regs, 16);
1180  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1181  SystemZMC::GR64Regs, 16);
1182  }
1183  if (Constraint[1] == 'f') {
1184  if (useSoftFloat())
1185  return std::make_pair(
1186  0u, static_cast<const TargetRegisterClass *>(nullptr));
1187  if (VT == MVT::f32)
1188  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1189  SystemZMC::FP32Regs, 16);
1190  if (VT == MVT::f128)
1191  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1192  SystemZMC::FP128Regs, 16);
1193  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1194  SystemZMC::FP64Regs, 16);
1195  }
1196  if (Constraint[1] == 'v') {
1197  if (!Subtarget.hasVector())
1198  return std::make_pair(
1199  0u, static_cast<const TargetRegisterClass *>(nullptr));
1200  if (VT == MVT::f32)
1201  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1202  SystemZMC::VR32Regs, 32);
1203  if (VT == MVT::f64)
1204  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1205  SystemZMC::VR64Regs, 32);
1206  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1207  SystemZMC::VR128Regs, 32);
1208  }
1209  }
1210  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1211 }
1212 
1213 // FIXME? Maybe this could be a TableGen attribute on some registers and
1214 // this table could be generated automatically from RegInfo.
1216  const MachineFunction &MF) const {
1217 
1219  .Case("r15", SystemZ::R15D)
1220  .Default(0);
1221  if (Reg)
1222  return Reg;
1223  report_fatal_error("Invalid register name global variable");
1224 }
1225 
1227 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1228  std::vector<SDValue> &Ops,
1229  SelectionDAG &DAG) const {
1230  // Only support length 1 constraints for now.
1231  if (Constraint.length() == 1) {
1232  switch (Constraint[0]) {
1233  case 'I': // Unsigned 8-bit constant
1234  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1235  if (isUInt<8>(C->getZExtValue()))
1236  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1237  Op.getValueType()));
1238  return;
1239 
1240  case 'J': // Unsigned 12-bit constant
1241  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1242  if (isUInt<12>(C->getZExtValue()))
1243  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1244  Op.getValueType()));
1245  return;
1246 
1247  case 'K': // Signed 16-bit constant
1248  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1249  if (isInt<16>(C->getSExtValue()))
1250  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1251  Op.getValueType()));
1252  return;
1253 
1254  case 'L': // Signed 20-bit displacement (on all targets we support)
1255  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1256  if (isInt<20>(C->getSExtValue()))
1257  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1258  Op.getValueType()));
1259  return;
1260 
1261  case 'M': // 0x7fffffff
1262  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1263  if (C->getZExtValue() == 0x7fffffff)
1264  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1265  Op.getValueType()));
1266  return;
1267  }
1268  }
1269  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1270 }
1271 
1272 //===----------------------------------------------------------------------===//
1273 // Calling conventions
1274 //===----------------------------------------------------------------------===//
1275 
1276 #include "SystemZGenCallingConv.inc"
1277 
1279  CallingConv::ID) const {
1280  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1281  SystemZ::R14D, 0 };
1282  return ScratchRegs;
1283 }
1284 
1286  Type *ToType) const {
1287  return isTruncateFree(FromType, ToType);
1288 }
1289 
1291  return CI->isTailCall();
1292 }
1293 
1294 // We do not yet support 128-bit single-element vector types. If the user
1295 // attempts to use such types as function argument or return type, prefer
1296 // to error out instead of emitting code violating the ABI.
1297 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1298  if (ArgVT.isVector() && !VT.isVector())
1299  report_fatal_error("Unsupported vector argument or return type");
1300 }
1301 
1303  for (unsigned i = 0; i < Ins.size(); ++i)
1304  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1305 }
1306 
1308  for (unsigned i = 0; i < Outs.size(); ++i)
1309  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1310 }
1311 
1312 // Value is a value that has been passed to us in the location described by VA
1313 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1314 // any loads onto Chain.
1316  CCValAssign &VA, SDValue Chain,
1317  SDValue Value) {
1318  // If the argument has been promoted from a smaller type, insert an
1319  // assertion to capture this.
1320  if (VA.getLocInfo() == CCValAssign::SExt)
1321  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1322  DAG.getValueType(VA.getValVT()));
1323  else if (VA.getLocInfo() == CCValAssign::ZExt)
1324  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1325  DAG.getValueType(VA.getValVT()));
1326 
1327  if (VA.isExtInLoc())
1328  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1329  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1330  // If this is a short vector argument loaded from the stack,
1331  // extend from i64 to full vector size and then bitcast.
1332  assert(VA.getLocVT() == MVT::i64);
1333  assert(VA.getValVT().isVector());
1335  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1336  } else
1337  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1338  return Value;
1339 }
1340 
1341 // Value is a value of type VA.getValVT() that we need to copy into
1342 // the location described by VA. Return a copy of Value converted to
1343 // VA.getValVT(). The caller is responsible for handling indirect values.
1345  CCValAssign &VA, SDValue Value) {
1346  switch (VA.getLocInfo()) {
1347  case CCValAssign::SExt:
1348  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1349  case CCValAssign::ZExt:
1350  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1351  case CCValAssign::AExt:
1352  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1353  case CCValAssign::BCvt:
1354  // If this is a short vector argument to be stored to the stack,
1355  // bitcast to v2i64 and then extract first element.
1356  assert(VA.getLocVT() == MVT::i64);
1357  assert(VA.getValVT().isVector());
1359  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1360  DAG.getConstant(0, DL, MVT::i32));
1361  case CCValAssign::Full:
1362  return Value;
1363  default:
1364  llvm_unreachable("Unhandled getLocInfo()");
1365  }
1366 }
1367 
1369  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1370  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1371  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1372  MachineFunction &MF = DAG.getMachineFunction();
1373  MachineFrameInfo &MFI = MF.getFrameInfo();
1375  SystemZMachineFunctionInfo *FuncInfo =
1377  auto *TFL =
1378  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
1379  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1380 
1381  // Detect unsupported vector argument types.
1382  if (Subtarget.hasVector())
1384 
1385  // Assign locations to all of the incoming arguments.
1387  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1388  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1389 
1390  unsigned NumFixedGPRs = 0;
1391  unsigned NumFixedFPRs = 0;
1392  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1393  SDValue ArgValue;
1394  CCValAssign &VA = ArgLocs[I];
1395  EVT LocVT = VA.getLocVT();
1396  if (VA.isRegLoc()) {
1397  // Arguments passed in registers
1398  const TargetRegisterClass *RC;
1399  switch (LocVT.getSimpleVT().SimpleTy) {
1400  default:
1401  // Integers smaller than i64 should be promoted to i64.
1402  llvm_unreachable("Unexpected argument type");
1403  case MVT::i32:
1404  NumFixedGPRs += 1;
1405  RC = &SystemZ::GR32BitRegClass;
1406  break;
1407  case MVT::i64:
1408  NumFixedGPRs += 1;
1409  RC = &SystemZ::GR64BitRegClass;
1410  break;
1411  case MVT::f32:
1412  NumFixedFPRs += 1;
1413  RC = &SystemZ::FP32BitRegClass;
1414  break;
1415  case MVT::f64:
1416  NumFixedFPRs += 1;
1417  RC = &SystemZ::FP64BitRegClass;
1418  break;
1419  case MVT::v16i8:
1420  case MVT::v8i16:
1421  case MVT::v4i32:
1422  case MVT::v2i64:
1423  case MVT::v4f32:
1424  case MVT::v2f64:
1425  RC = &SystemZ::VR128BitRegClass;
1426  break;
1427  }
1428 
1429  Register VReg = MRI.createVirtualRegister(RC);
1430  MRI.addLiveIn(VA.getLocReg(), VReg);
1431  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1432  } else {
1433  assert(VA.isMemLoc() && "Argument not register or memory");
1434 
1435  // Create the frame index object for this incoming parameter.
1436  int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
1437  VA.getLocMemOffset(), true);
1438 
1439  // Create the SelectionDAG nodes corresponding to a load
1440  // from this parameter. Unpromoted ints and floats are
1441  // passed as right-justified 8-byte values.
1442  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1443  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1444  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1445  DAG.getIntPtrConstant(4, DL));
1446  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1448  }
1449 
1450  // Convert the value of the argument register into the value that's
1451  // being passed.
1452  if (VA.getLocInfo() == CCValAssign::Indirect) {
1453  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1454  MachinePointerInfo()));
1455  // If the original argument was split (e.g. i128), we need
1456  // to load all parts of it here (using the same address).
1457  unsigned ArgIndex = Ins[I].OrigArgIndex;
1458  assert (Ins[I].PartOffset == 0);
1459  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1460  CCValAssign &PartVA = ArgLocs[I + 1];
1461  unsigned PartOffset = Ins[I + 1].PartOffset;
1462  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1463  DAG.getIntPtrConstant(PartOffset, DL));
1464  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1465  MachinePointerInfo()));
1466  ++I;
1467  }
1468  } else
1469  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1470  }
1471 
1472  if (IsVarArg) {
1473  // Save the number of non-varargs registers for later use by va_start, etc.
1474  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1475  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1476 
1477  // Likewise the address (in the form of a frame index) of where the
1478  // first stack vararg would be. The 1-byte size here is arbitrary.
1479  int64_t StackSize = CCInfo.getNextStackOffset();
1480  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1481 
1482  // ...and a similar frame index for the caller-allocated save area
1483  // that will be used to store the incoming registers.
1484  int64_t RegSaveOffset =
1485  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1486  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1487  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1488 
1489  // Store the FPR varargs in the reserved frame slots. (We store the
1490  // GPRs as part of the prologue.)
1491  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1493  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1494  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1495  int FI =
1497  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1498  unsigned VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I],
1499  &SystemZ::FP64BitRegClass);
1500  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1501  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1503  }
1504  // Join the stores, which are independent of one another.
1505  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1506  makeArrayRef(&MemOps[NumFixedFPRs],
1507  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1508  }
1509  }
1510 
1511  return Chain;
1512 }
1513 
1514 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1517  // Punt if there are any indirect or stack arguments, or if the call
1518  // needs the callee-saved argument register R6, or if the call uses
1519  // the callee-saved register arguments SwiftSelf and SwiftError.
1520  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1521  CCValAssign &VA = ArgLocs[I];
1522  if (VA.getLocInfo() == CCValAssign::Indirect)
1523  return false;
1524  if (!VA.isRegLoc())
1525  return false;
1526  Register Reg = VA.getLocReg();
1527  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1528  return false;
1529  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1530  return false;
1531  }
1532  return true;
1533 }
1534 
1535 SDValue
1537  SmallVectorImpl<SDValue> &InVals) const {
1538  SelectionDAG &DAG = CLI.DAG;
1539  SDLoc &DL = CLI.DL;
1541  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1543  SDValue Chain = CLI.Chain;
1544  SDValue Callee = CLI.Callee;
1545  bool &IsTailCall = CLI.IsTailCall;
1546  CallingConv::ID CallConv = CLI.CallConv;
1547  bool IsVarArg = CLI.IsVarArg;
1548  MachineFunction &MF = DAG.getMachineFunction();
1549  EVT PtrVT = getPointerTy(MF.getDataLayout());
1550  LLVMContext &Ctx = *DAG.getContext();
1551 
1552  // Detect unsupported vector argument and return types.
1553  if (Subtarget.hasVector()) {
1554  VerifyVectorTypes(Outs);
1556  }
1557 
1558  // Analyze the operands of the call, assigning locations to each operand.
1560  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1561  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1562 
1563  // We don't support GuaranteedTailCallOpt, only automatically-detected
1564  // sibling calls.
1565  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1566  IsTailCall = false;
1567 
1568  // Get a count of how many bytes are to be pushed on the stack.
1569  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1570 
1571  // Mark the start of the call.
1572  if (!IsTailCall)
1573  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1574 
1575  // Copy argument values to their designated locations.
1577  SmallVector<SDValue, 8> MemOpChains;
1578  SDValue StackPtr;
1579  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1580  CCValAssign &VA = ArgLocs[I];
1581  SDValue ArgValue = OutVals[I];
1582 
1583  if (VA.getLocInfo() == CCValAssign::Indirect) {
1584  // Store the argument in a stack slot and pass its address.
1585  unsigned ArgIndex = Outs[I].OrigArgIndex;
1586  EVT SlotVT;
1587  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1588  // Allocate the full stack space for a promoted (and split) argument.
1589  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1590  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1591  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1592  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1593  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1594  } else {
1595  SlotVT = Outs[I].ArgVT;
1596  }
1597  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1598  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1599  MemOpChains.push_back(
1600  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1602  // If the original argument was split (e.g. i128), we need
1603  // to store all parts of it here (and pass just one address).
1604  assert (Outs[I].PartOffset == 0);
1605  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1606  SDValue PartValue = OutVals[I + 1];
1607  unsigned PartOffset = Outs[I + 1].PartOffset;
1608  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1609  DAG.getIntPtrConstant(PartOffset, DL));
1610  MemOpChains.push_back(
1611  DAG.getStore(Chain, DL, PartValue, Address,
1613  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1614  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1615  ++I;
1616  }
1617  ArgValue = SpillSlot;
1618  } else
1619  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1620 
1621  if (VA.isRegLoc())
1622  // Queue up the argument copies and emit them at the end.
1623  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1624  else {
1625  assert(VA.isMemLoc() && "Argument not register or memory");
1626 
1627  // Work out the address of the stack slot. Unpromoted ints and
1628  // floats are passed as right-justified 8-byte values.
1629  if (!StackPtr.getNode())
1630  StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
1632  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1633  Offset += 4;
1634  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1635  DAG.getIntPtrConstant(Offset, DL));
1636 
1637  // Emit the store.
1638  MemOpChains.push_back(
1639  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1640  }
1641  }
1642 
1643  // Join the stores, which are independent of one another.
1644  if (!MemOpChains.empty())
1645  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1646 
1647  // Accept direct calls by converting symbolic call addresses to the
1648  // associated Target* opcodes. Force %r1 to be used for indirect
1649  // tail calls.
1650  SDValue Glue;
1651  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1652  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1654  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1655  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1657  } else if (IsTailCall) {
1658  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1659  Glue = Chain.getValue(1);
1660  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1661  }
1662 
1663  // Build a sequence of copy-to-reg nodes, chained and glued together.
1664  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1665  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1666  RegsToPass[I].second, Glue);
1667  Glue = Chain.getValue(1);
1668  }
1669 
1670  // The first call operand is the chain and the second is the target address.
1672  Ops.push_back(Chain);
1673  Ops.push_back(Callee);
1674 
1675  // Add argument registers to the end of the list so that they are
1676  // known live into the call.
1677  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1678  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1679  RegsToPass[I].second.getValueType()));
1680 
1681  // Add a register mask operand representing the call-preserved registers.
1682  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1683  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1684  assert(Mask && "Missing call preserved mask for calling convention");
1685  Ops.push_back(DAG.getRegisterMask(Mask));
1686 
1687  // Glue the call to the argument copies, if any.
1688  if (Glue.getNode())
1689  Ops.push_back(Glue);
1690 
1691  // Emit the call.
1692  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1693  if (IsTailCall)
1694  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1695  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1696  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1697  Glue = Chain.getValue(1);
1698 
1699  // Mark the end of the call, which is glued to the call itself.
1700  Chain = DAG.getCALLSEQ_END(Chain,
1701  DAG.getConstant(NumBytes, DL, PtrVT, true),
1702  DAG.getConstant(0, DL, PtrVT, true),
1703  Glue, DL);
1704  Glue = Chain.getValue(1);
1705 
1706  // Assign locations to each value returned by this call.
1708  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1709  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1710 
1711  // Copy all of the result registers out of their specified physreg.
1712  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1713  CCValAssign &VA = RetLocs[I];
1714 
1715  // Copy the value out, gluing the copy to the end of the call sequence.
1716  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1717  VA.getLocVT(), Glue);
1718  Chain = RetValue.getValue(1);
1719  Glue = RetValue.getValue(2);
1720 
1721  // Convert the value of the return register into the value that's
1722  // being returned.
1723  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1724  }
1725 
1726  return Chain;
1727 }
1728 
1731  MachineFunction &MF, bool isVarArg,
1732  const SmallVectorImpl<ISD::OutputArg> &Outs,
1733  LLVMContext &Context) const {
1734  // Detect unsupported vector return types.
1735  if (Subtarget.hasVector())
1736  VerifyVectorTypes(Outs);
1737 
1738  // Special case that we cannot easily detect in RetCC_SystemZ since
1739  // i128 is not a legal type.
1740  for (auto &Out : Outs)
1741  if (Out.ArgVT == MVT::i128)
1742  return false;
1743 
1745  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1746  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1747 }
1748 
1749 SDValue
1751  bool IsVarArg,
1752  const SmallVectorImpl<ISD::OutputArg> &Outs,
1753  const SmallVectorImpl<SDValue> &OutVals,
1754  const SDLoc &DL, SelectionDAG &DAG) const {
1755  MachineFunction &MF = DAG.getMachineFunction();
1756 
1757  // Detect unsupported vector return types.
1758  if (Subtarget.hasVector())
1759  VerifyVectorTypes(Outs);
1760 
1761  // Assign locations to each returned value.
1763  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1764  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1765 
1766  // Quick exit for void returns
1767  if (RetLocs.empty())
1768  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1769 
1770  if (CallConv == CallingConv::GHC)
1771  report_fatal_error("GHC functions return void only");
1772 
1773  // Copy the result values into the output registers.
1774  SDValue Glue;
1775  SmallVector<SDValue, 4> RetOps;
1776  RetOps.push_back(Chain);
1777  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1778  CCValAssign &VA = RetLocs[I];
1779  SDValue RetValue = OutVals[I];
1780 
1781  // Make the return register live on exit.
1782  assert(VA.isRegLoc() && "Can only return in registers!");
1783 
1784  // Promote the value as required.
1785  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1786 
1787  // Chain and glue the copies together.
1788  Register Reg = VA.getLocReg();
1789  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1790  Glue = Chain.getValue(1);
1791  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1792  }
1793 
1794  // Update chain and glue.
1795  RetOps[0] = Chain;
1796  if (Glue.getNode())
1797  RetOps.push_back(Glue);
1798 
1799  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1800 }
1801 
1802 // Return true if Op is an intrinsic node with chain that returns the CC value
1803 // as its only (other) argument. Provide the associated SystemZISD opcode and
1804 // the mask of valid CC values if so.
1805 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1806  unsigned &CCValid) {
1807  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1808  switch (Id) {
1809  case Intrinsic::s390_tbegin:
1810  Opcode = SystemZISD::TBEGIN;
1811  CCValid = SystemZ::CCMASK_TBEGIN;
1812  return true;
1813 
1814  case Intrinsic::s390_tbegin_nofloat:
1815  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1816  CCValid = SystemZ::CCMASK_TBEGIN;
1817  return true;
1818 
1819  case Intrinsic::s390_tend:
1820  Opcode = SystemZISD::TEND;
1821  CCValid = SystemZ::CCMASK_TEND;
1822  return true;
1823 
1824  default:
1825  return false;
1826  }
1827 }
1828 
1829 // Return true if Op is an intrinsic node without chain that returns the
1830 // CC value as its final argument. Provide the associated SystemZISD
1831 // opcode and the mask of valid CC values if so.
1832 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
1833  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1834  switch (Id) {
1835  case Intrinsic::s390_vpkshs:
1836  case Intrinsic::s390_vpksfs:
1837  case Intrinsic::s390_vpksgs:
1838  Opcode = SystemZISD::PACKS_CC;
1839  CCValid = SystemZ::CCMASK_VCMP;
1840  return true;
1841 
1842  case Intrinsic::s390_vpklshs:
1843  case Intrinsic::s390_vpklsfs:
1844  case Intrinsic::s390_vpklsgs:
1845  Opcode = SystemZISD::PACKLS_CC;
1846  CCValid = SystemZ::CCMASK_VCMP;
1847  return true;
1848 
1849  case Intrinsic::s390_vceqbs:
1850  case Intrinsic::s390_vceqhs:
1851  case Intrinsic::s390_vceqfs:
1852  case Intrinsic::s390_vceqgs:
1853  Opcode = SystemZISD::VICMPES;
1854  CCValid = SystemZ::CCMASK_VCMP;
1855  return true;
1856 
1857  case Intrinsic::s390_vchbs:
1858  case Intrinsic::s390_vchhs:
1859  case Intrinsic::s390_vchfs:
1860  case Intrinsic::s390_vchgs:
1861  Opcode = SystemZISD::VICMPHS;
1862  CCValid = SystemZ::CCMASK_VCMP;
1863  return true;
1864 
1865  case Intrinsic::s390_vchlbs:
1866  case Intrinsic::s390_vchlhs:
1867  case Intrinsic::s390_vchlfs:
1868  case Intrinsic::s390_vchlgs:
1869  Opcode = SystemZISD::VICMPHLS;
1870  CCValid = SystemZ::CCMASK_VCMP;
1871  return true;
1872 
1873  case Intrinsic::s390_vtm:
1874  Opcode = SystemZISD::VTM;
1875  CCValid = SystemZ::CCMASK_VCMP;
1876  return true;
1877 
1878  case Intrinsic::s390_vfaebs:
1879  case Intrinsic::s390_vfaehs:
1880  case Intrinsic::s390_vfaefs:
1881  Opcode = SystemZISD::VFAE_CC;
1882  CCValid = SystemZ::CCMASK_ANY;
1883  return true;
1884 
1885  case Intrinsic::s390_vfaezbs:
1886  case Intrinsic::s390_vfaezhs:
1887  case Intrinsic::s390_vfaezfs:
1888  Opcode = SystemZISD::VFAEZ_CC;
1889  CCValid = SystemZ::CCMASK_ANY;
1890  return true;
1891 
1892  case Intrinsic::s390_vfeebs:
1893  case Intrinsic::s390_vfeehs:
1894  case Intrinsic::s390_vfeefs:
1895  Opcode = SystemZISD::VFEE_CC;
1896  CCValid = SystemZ::CCMASK_ANY;
1897  return true;
1898 
1899  case Intrinsic::s390_vfeezbs:
1900  case Intrinsic::s390_vfeezhs:
1901  case Intrinsic::s390_vfeezfs:
1902  Opcode = SystemZISD::VFEEZ_CC;
1903  CCValid = SystemZ::CCMASK_ANY;
1904  return true;
1905 
1906  case Intrinsic::s390_vfenebs:
1907  case Intrinsic::s390_vfenehs:
1908  case Intrinsic::s390_vfenefs:
1909  Opcode = SystemZISD::VFENE_CC;
1910  CCValid = SystemZ::CCMASK_ANY;
1911  return true;
1912 
1913  case Intrinsic::s390_vfenezbs:
1914  case Intrinsic::s390_vfenezhs:
1915  case Intrinsic::s390_vfenezfs:
1916  Opcode = SystemZISD::VFENEZ_CC;
1917  CCValid = SystemZ::CCMASK_ANY;
1918  return true;
1919 
1920  case Intrinsic::s390_vistrbs:
1921  case Intrinsic::s390_vistrhs:
1922  case Intrinsic::s390_vistrfs:
1923  Opcode = SystemZISD::VISTR_CC;
1925  return true;
1926 
1927  case Intrinsic::s390_vstrcbs:
1928  case Intrinsic::s390_vstrchs:
1929  case Intrinsic::s390_vstrcfs:
1930  Opcode = SystemZISD::VSTRC_CC;
1931  CCValid = SystemZ::CCMASK_ANY;
1932  return true;
1933 
1934  case Intrinsic::s390_vstrczbs:
1935  case Intrinsic::s390_vstrczhs:
1936  case Intrinsic::s390_vstrczfs:
1937  Opcode = SystemZISD::VSTRCZ_CC;
1938  CCValid = SystemZ::CCMASK_ANY;
1939  return true;
1940 
1941  case Intrinsic::s390_vstrsb:
1942  case Intrinsic::s390_vstrsh:
1943  case Intrinsic::s390_vstrsf:
1944  Opcode = SystemZISD::VSTRS_CC;
1945  CCValid = SystemZ::CCMASK_ANY;
1946  return true;
1947 
1948  case Intrinsic::s390_vstrszb:
1949  case Intrinsic::s390_vstrszh:
1950  case Intrinsic::s390_vstrszf:
1951  Opcode = SystemZISD::VSTRSZ_CC;
1952  CCValid = SystemZ::CCMASK_ANY;
1953  return true;
1954 
1955  case Intrinsic::s390_vfcedbs:
1956  case Intrinsic::s390_vfcesbs:
1957  Opcode = SystemZISD::VFCMPES;
1958  CCValid = SystemZ::CCMASK_VCMP;
1959  return true;
1960 
1961  case Intrinsic::s390_vfchdbs:
1962  case Intrinsic::s390_vfchsbs:
1963  Opcode = SystemZISD::VFCMPHS;
1964  CCValid = SystemZ::CCMASK_VCMP;
1965  return true;
1966 
1967  case Intrinsic::s390_vfchedbs:
1968  case Intrinsic::s390_vfchesbs:
1969  Opcode = SystemZISD::VFCMPHES;
1970  CCValid = SystemZ::CCMASK_VCMP;
1971  return true;
1972 
1973  case Intrinsic::s390_vftcidb:
1974  case Intrinsic::s390_vftcisb:
1975  Opcode = SystemZISD::VFTCI;
1976  CCValid = SystemZ::CCMASK_VCMP;
1977  return true;
1978 
1979  case Intrinsic::s390_tdc:
1980  Opcode = SystemZISD::TDC;
1981  CCValid = SystemZ::CCMASK_TDC;
1982  return true;
1983 
1984  default:
1985  return false;
1986  }
1987 }
1988 
1989 // Emit an intrinsic with chain and an explicit CC register result.
1991  unsigned Opcode) {
1992  // Copy all operands except the intrinsic ID.
1993  unsigned NumOps = Op.getNumOperands();
1995  Ops.reserve(NumOps - 1);
1996  Ops.push_back(Op.getOperand(0));
1997  for (unsigned I = 2; I < NumOps; ++I)
1998  Ops.push_back(Op.getOperand(I));
1999 
2000  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2001  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2002  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2003  SDValue OldChain = SDValue(Op.getNode(), 1);
2004  SDValue NewChain = SDValue(Intr.getNode(), 1);
2005  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2006  return Intr.getNode();
2007 }
2008 
2009 // Emit an intrinsic with an explicit CC register result.
2011  unsigned Opcode) {
2012  // Copy all operands except the intrinsic ID.
2013  unsigned NumOps = Op.getNumOperands();
2015  Ops.reserve(NumOps - 1);
2016  for (unsigned I = 1; I < NumOps; ++I)
2017  Ops.push_back(Op.getOperand(I));
2018 
2019  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2020  return Intr.getNode();
2021 }
2022 
2023 // CC is a comparison that will be implemented using an integer or
2024 // floating-point comparison. Return the condition code mask for
2025 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2026 // unsigned comparisons and clear for signed ones. In the floating-point
2027 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2028 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2029 #define CONV(X) \
2030  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2031  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2032  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2033 
2034  switch (CC) {
2035  default:
2036  llvm_unreachable("Invalid integer condition!");
2037 
2038  CONV(EQ);
2039  CONV(NE);
2040  CONV(GT);
2041  CONV(GE);
2042  CONV(LT);
2043  CONV(LE);
2044 
2045  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2046  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2047  }
2048 #undef CONV
2049 }
2050 
2051 // If C can be converted to a comparison against zero, adjust the operands
2052 // as necessary.
2053 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2054  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2055  return;
2056 
2057  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2058  if (!ConstOp1)
2059  return;
2060 
2061  int64_t Value = ConstOp1->getSExtValue();
2062  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2063  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2064  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2065  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2066  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2067  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2068  }
2069 }
2070 
2071 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2072 // adjust the operands as necessary.
2073 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2074  Comparison &C) {
2075  // For us to make any changes, it must a comparison between a single-use
2076  // load and a constant.
2077  if (!C.Op0.hasOneUse() ||
2078  C.Op0.getOpcode() != ISD::LOAD ||
2079  C.Op1.getOpcode() != ISD::Constant)
2080  return;
2081 
2082  // We must have an 8- or 16-bit load.
2083  auto *Load = cast<LoadSDNode>(C.Op0);
2084  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2085  if ((NumBits != 8 && NumBits != 16) ||
2086  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2087  return;
2088 
2089  // The load must be an extending one and the constant must be within the
2090  // range of the unextended value.
2091  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2092  uint64_t Value = ConstOp1->getZExtValue();
2093  uint64_t Mask = (1 << NumBits) - 1;
2094  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2095  // Make sure that ConstOp1 is in range of C.Op0.
2096  int64_t SignedValue = ConstOp1->getSExtValue();
2097  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2098  return;
2099  if (C.ICmpType != SystemZICMP::SignedOnly) {
2100  // Unsigned comparison between two sign-extended values is equivalent
2101  // to unsigned comparison between two zero-extended values.
2102  Value &= Mask;
2103  } else if (NumBits == 8) {
2104  // Try to treat the comparison as unsigned, so that we can use CLI.
2105  // Adjust CCMask and Value as necessary.
2106  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2107  // Test whether the high bit of the byte is set.
2108  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2109  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2110  // Test whether the high bit of the byte is clear.
2111  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2112  else
2113  // No instruction exists for this combination.
2114  return;
2115  C.ICmpType = SystemZICMP::UnsignedOnly;
2116  }
2117  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2118  if (Value > Mask)
2119  return;
2120  // If the constant is in range, we can use any comparison.
2121  C.ICmpType = SystemZICMP::Any;
2122  } else
2123  return;
2124 
2125  // Make sure that the first operand is an i32 of the right extension type.
2126  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2127  ISD::SEXTLOAD :
2128  ISD::ZEXTLOAD);
2129  if (C.Op0.getValueType() != MVT::i32 ||
2130  Load->getExtensionType() != ExtType) {
2131  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2132  Load->getBasePtr(), Load->getPointerInfo(),
2133  Load->getMemoryVT(), Load->getAlignment(),
2134  Load->getMemOperand()->getFlags());
2135  // Update the chain uses.
2136  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2137  }
2138 
2139  // Make sure that the second operand is an i32 with the right value.
2140  if (C.Op1.getValueType() != MVT::i32 ||
2141  Value != ConstOp1->getZExtValue())
2142  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2143 }
2144 
2145 // Return true if Op is either an unextended load, or a load suitable
2146 // for integer register-memory comparisons of type ICmpType.
2147 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2148  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2149  if (Load) {
2150  // There are no instructions to compare a register with a memory byte.
2151  if (Load->getMemoryVT() == MVT::i8)
2152  return false;
2153  // Otherwise decide on extension type.
2154  switch (Load->getExtensionType()) {
2155  case ISD::NON_EXTLOAD:
2156  return true;
2157  case ISD::SEXTLOAD:
2158  return ICmpType != SystemZICMP::UnsignedOnly;
2159  case ISD::ZEXTLOAD:
2160  return ICmpType != SystemZICMP::SignedOnly;
2161  default:
2162  break;
2163  }
2164  }
2165  return false;
2166 }
2167 
2168 // Return true if it is better to swap the operands of C.
2169 static bool shouldSwapCmpOperands(const Comparison &C) {
2170  // Leave f128 comparisons alone, since they have no memory forms.
2171  if (C.Op0.getValueType() == MVT::f128)
2172  return false;
2173 
2174  // Always keep a floating-point constant second, since comparisons with
2175  // zero can use LOAD TEST and comparisons with other constants make a
2176  // natural memory operand.
2177  if (isa<ConstantFPSDNode>(C.Op1))
2178  return false;
2179 
2180  // Never swap comparisons with zero since there are many ways to optimize
2181  // those later.
2182  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2183  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2184  return false;
2185 
2186  // Also keep natural memory operands second if the loaded value is
2187  // only used here. Several comparisons have memory forms.
2188  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2189  return false;
2190 
2191  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2192  // In that case we generally prefer the memory to be second.
2193  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2194  // The only exceptions are when the second operand is a constant and
2195  // we can use things like CHHSI.
2196  if (!ConstOp1)
2197  return true;
2198  // The unsigned memory-immediate instructions can handle 16-bit
2199  // unsigned integers.
2200  if (C.ICmpType != SystemZICMP::SignedOnly &&
2201  isUInt<16>(ConstOp1->getZExtValue()))
2202  return false;
2203  // The signed memory-immediate instructions can handle 16-bit
2204  // signed integers.
2205  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2206  isInt<16>(ConstOp1->getSExtValue()))
2207  return false;
2208  return true;
2209  }
2210 
2211  // Try to promote the use of CGFR and CLGFR.
2212  unsigned Opcode0 = C.Op0.getOpcode();
2213  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2214  return true;
2215  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2216  return true;
2217  if (C.ICmpType != SystemZICMP::SignedOnly &&
2218  Opcode0 == ISD::AND &&
2219  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2220  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2221  return true;
2222 
2223  return false;
2224 }
2225 
2226 // Check whether C tests for equality between X and Y and whether X - Y
2227 // or Y - X is also computed. In that case it's better to compare the
2228 // result of the subtraction against zero.
2229 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2230  Comparison &C) {
2231  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2232  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2233  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2234  SDNode *N = *I;
2235  if (N->getOpcode() == ISD::SUB &&
2236  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2237  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2238  C.Op0 = SDValue(N, 0);
2239  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2240  return;
2241  }
2242  }
2243  }
2244 }
2245 
2246 // Check whether C compares a floating-point value with zero and if that
2247 // floating-point value is also negated. In this case we can use the
2248 // negation to set CC, so avoiding separate LOAD AND TEST and
2249 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2250 static void adjustForFNeg(Comparison &C) {
2251  // This optimization is invalid for strict comparisons, since FNEG
2252  // does not raise any exceptions.
2253  if (C.Chain)
2254  return;
2255  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2256  if (C1 && C1->isZero()) {
2257  for (auto I = C.Op0->use_begin(), E = C.Op0->use_end(); I != E; ++I) {
2258  SDNode *N = *I;
2259  if (N->getOpcode() == ISD::FNEG) {
2260  C.Op0 = SDValue(N, 0);
2261  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2262  return;
2263  }
2264  }
2265  }
2266 }
2267 
2268 // Check whether C compares (shl X, 32) with 0 and whether X is
2269 // also sign-extended. In that case it is better to test the result
2270 // of the sign extension using LTGFR.
2271 //
2272 // This case is important because InstCombine transforms a comparison
2273 // with (sext (trunc X)) into a comparison with (shl X, 32).
2274 static void adjustForLTGFR(Comparison &C) {
2275  // Check for a comparison between (shl X, 32) and 0.
2276  if (C.Op0.getOpcode() == ISD::SHL &&
2277  C.Op0.getValueType() == MVT::i64 &&
2278  C.Op1.getOpcode() == ISD::Constant &&
2279  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2280  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2281  if (C1 && C1->getZExtValue() == 32) {
2282  SDValue ShlOp0 = C.Op0.getOperand(0);
2283  // See whether X has any SIGN_EXTEND_INREG uses.
2284  for (auto I = ShlOp0->use_begin(), E = ShlOp0->use_end(); I != E; ++I) {
2285  SDNode *N = *I;
2286  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2287  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2288  C.Op0 = SDValue(N, 0);
2289  return;
2290  }
2291  }
2292  }
2293  }
2294 }
2295 
2296 // If C compares the truncation of an extending load, try to compare
2297 // the untruncated value instead. This exposes more opportunities to
2298 // reuse CC.
2299 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2300  Comparison &C) {
2301  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2302  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2303  C.Op1.getOpcode() == ISD::Constant &&
2304  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2305  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2306  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2307  C.Op0.getValueSizeInBits().getFixedSize()) {
2308  unsigned Type = L->getExtensionType();
2309  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2310  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2311  C.Op0 = C.Op0.getOperand(0);
2312  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2313  }
2314  }
2315  }
2316 }
2317 
2318 // Return true if shift operation N has an in-range constant shift value.
2319 // Store it in ShiftVal if so.
2320 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2321  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2322  if (!Shift)
2323  return false;
2324 
2325  uint64_t Amount = Shift->getZExtValue();
2326  if (Amount >= N.getValueSizeInBits())
2327  return false;
2328 
2329  ShiftVal = Amount;
2330  return true;
2331 }
2332 
2333 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2334 // instruction and whether the CC value is descriptive enough to handle
2335 // a comparison of type Opcode between the AND result and CmpVal.
2336 // CCMask says which comparison result is being tested and BitSize is
2337 // the number of bits in the operands. If TEST UNDER MASK can be used,
2338 // return the corresponding CC mask, otherwise return 0.
2339 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2340  uint64_t Mask, uint64_t CmpVal,
2341  unsigned ICmpType) {
2342  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2343 
2344  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2347  return 0;
2348 
2349  // Work out the masks for the lowest and highest bits.
2350  unsigned HighShift = 63 - countLeadingZeros(Mask);
2351  uint64_t High = uint64_t(1) << HighShift;
2352  uint64_t Low = uint64_t(1) << countTrailingZeros(Mask);
2353 
2354  // Signed ordered comparisons are effectively unsigned if the sign
2355  // bit is dropped.
2356  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2357 
2358  // Check for equality comparisons with 0, or the equivalent.
2359  if (CmpVal == 0) {
2360  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2361  return SystemZ::CCMASK_TM_ALL_0;
2362  if (CCMask == SystemZ::CCMASK_CMP_NE)
2364  }
2365  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2366  if (CCMask == SystemZ::CCMASK_CMP_LT)
2367  return SystemZ::CCMASK_TM_ALL_0;
2368  if (CCMask == SystemZ::CCMASK_CMP_GE)
2370  }
2371  if (EffectivelyUnsigned && CmpVal < Low) {
2372  if (CCMask == SystemZ::CCMASK_CMP_LE)
2373  return SystemZ::CCMASK_TM_ALL_0;
2374  if (CCMask == SystemZ::CCMASK_CMP_GT)
2376  }
2377 
2378  // Check for equality comparisons with the mask, or the equivalent.
2379  if (CmpVal == Mask) {
2380  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2381  return SystemZ::CCMASK_TM_ALL_1;
2382  if (CCMask == SystemZ::CCMASK_CMP_NE)
2384  }
2385  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2386  if (CCMask == SystemZ::CCMASK_CMP_GT)
2387  return SystemZ::CCMASK_TM_ALL_1;
2388  if (CCMask == SystemZ::CCMASK_CMP_LE)
2390  }
2391  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2392  if (CCMask == SystemZ::CCMASK_CMP_GE)
2393  return SystemZ::CCMASK_TM_ALL_1;
2394  if (CCMask == SystemZ::CCMASK_CMP_LT)
2396  }
2397 
2398  // Check for ordered comparisons with the top bit.
2399  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2400  if (CCMask == SystemZ::CCMASK_CMP_LE)
2401  return SystemZ::CCMASK_TM_MSB_0;
2402  if (CCMask == SystemZ::CCMASK_CMP_GT)
2403  return SystemZ::CCMASK_TM_MSB_1;
2404  }
2405  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2406  if (CCMask == SystemZ::CCMASK_CMP_LT)
2407  return SystemZ::CCMASK_TM_MSB_0;
2408  if (CCMask == SystemZ::CCMASK_CMP_GE)
2409  return SystemZ::CCMASK_TM_MSB_1;
2410  }
2411 
2412  // If there are just two bits, we can do equality checks for Low and High
2413  // as well.
2414  if (Mask == Low + High) {
2415  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2417  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2419  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2421  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2423  }
2424 
2425  // Looks like we've exhausted our options.
2426  return 0;
2427 }
2428 
2429 // See whether C can be implemented as a TEST UNDER MASK instruction.
2430 // Update the arguments with the TM version if so.
2431 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2432  Comparison &C) {
2433  // Check that we have a comparison with a constant.
2434  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2435  if (!ConstOp1)
2436  return;
2437  uint64_t CmpVal = ConstOp1->getZExtValue();
2438 
2439  // Check whether the nonconstant input is an AND with a constant mask.
2440  Comparison NewC(C);
2441  uint64_t MaskVal;
2442  ConstantSDNode *Mask = nullptr;
2443  if (C.Op0.getOpcode() == ISD::AND) {
2444  NewC.Op0 = C.Op0.getOperand(0);
2445  NewC.Op1 = C.Op0.getOperand(1);
2446  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2447  if (!Mask)
2448  return;
2449  MaskVal = Mask->getZExtValue();
2450  } else {
2451  // There is no instruction to compare with a 64-bit immediate
2452  // so use TMHH instead if possible. We need an unsigned ordered
2453  // comparison with an i64 immediate.
2454  if (NewC.Op0.getValueType() != MVT::i64 ||
2455  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2456  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2457  NewC.ICmpType == SystemZICMP::SignedOnly)
2458  return;
2459  // Convert LE and GT comparisons into LT and GE.
2460  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2461  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2462  if (CmpVal == uint64_t(-1))
2463  return;
2464  CmpVal += 1;
2465  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2466  }
2467  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2468  // be masked off without changing the result.
2469  MaskVal = -(CmpVal & -CmpVal);
2470  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2471  }
2472  if (!MaskVal)
2473  return;
2474 
2475  // Check whether the combination of mask, comparison value and comparison
2476  // type are suitable.
2477  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2478  unsigned NewCCMask, ShiftVal;
2479  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2480  NewC.Op0.getOpcode() == ISD::SHL &&
2481  isSimpleShift(NewC.Op0, ShiftVal) &&
2482  (MaskVal >> ShiftVal != 0) &&
2483  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2484  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2485  MaskVal >> ShiftVal,
2486  CmpVal >> ShiftVal,
2487  SystemZICMP::Any))) {
2488  NewC.Op0 = NewC.Op0.getOperand(0);
2489  MaskVal >>= ShiftVal;
2490  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2491  NewC.Op0.getOpcode() == ISD::SRL &&
2492  isSimpleShift(NewC.Op0, ShiftVal) &&
2493  (MaskVal << ShiftVal != 0) &&
2494  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2495  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2496  MaskVal << ShiftVal,
2497  CmpVal << ShiftVal,
2499  NewC.Op0 = NewC.Op0.getOperand(0);
2500  MaskVal <<= ShiftVal;
2501  } else {
2502  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2503  NewC.ICmpType);
2504  if (!NewCCMask)
2505  return;
2506  }
2507 
2508  // Go ahead and make the change.
2509  C.Opcode = SystemZISD::TM;
2510  C.Op0 = NewC.Op0;
2511  if (Mask && Mask->getZExtValue() == MaskVal)
2512  C.Op1 = SDValue(Mask, 0);
2513  else
2514  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2515  C.CCValid = SystemZ::CCMASK_TM;
2516  C.CCMask = NewCCMask;
2517 }
2518 
2519 // See whether the comparison argument contains a redundant AND
2520 // and remove it if so. This sometimes happens due to the generic
2521 // BRCOND expansion.
2522 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2523  Comparison &C) {
2524  if (C.Op0.getOpcode() != ISD::AND)
2525  return;
2526  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2527  if (!Mask)
2528  return;
2529  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2530  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2531  return;
2532 
2533  C.Op0 = C.Op0.getOperand(0);
2534 }
2535 
2536 // Return a Comparison that tests the condition-code result of intrinsic
2537 // node Call against constant integer CC using comparison code Cond.
2538 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2539 // and CCValid is the set of possible condition-code results.
2540 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2541  SDValue Call, unsigned CCValid, uint64_t CC,
2542  ISD::CondCode Cond) {
2543  Comparison C(Call, SDValue(), SDValue());
2544  C.Opcode = Opcode;
2545  C.CCValid = CCValid;
2546  if (Cond == ISD::SETEQ)
2547  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2548  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2549  else if (Cond == ISD::SETNE)
2550  // ...and the inverse of that.
2551  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2552  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2553  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2554  // always true for CC>3.
2555  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2556  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2557  // ...and the inverse of that.
2558  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2559  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2560  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2561  // always true for CC>3.
2562  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2563  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2564  // ...and the inverse of that.
2565  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2566  else
2567  llvm_unreachable("Unexpected integer comparison type");
2568  C.CCMask &= CCValid;
2569  return C;
2570 }
2571 
2572 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2573 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2574  ISD::CondCode Cond, const SDLoc &DL,
2575  SDValue Chain = SDValue(),
2576  bool IsSignaling = false) {
2577  if (CmpOp1.getOpcode() == ISD::Constant) {
2578  assert(!Chain);
2579  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2580  unsigned Opcode, CCValid;
2581  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2582  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2583  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2584  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2585  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2586  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2587  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2588  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2589  }
2590  Comparison C(CmpOp0, CmpOp1, Chain);
2591  C.CCMask = CCMaskForCondCode(Cond);
2592  if (C.Op0.getValueType().isFloatingPoint()) {
2593  C.CCValid = SystemZ::CCMASK_FCMP;
2594  if (!C.Chain)
2595  C.Opcode = SystemZISD::FCMP;
2596  else if (!IsSignaling)
2597  C.Opcode = SystemZISD::STRICT_FCMP;
2598  else
2599  C.Opcode = SystemZISD::STRICT_FCMPS;
2600  adjustForFNeg(C);
2601  } else {
2602  assert(!C.Chain);
2603  C.CCValid = SystemZ::CCMASK_ICMP;
2604  C.Opcode = SystemZISD::ICMP;
2605  // Choose the type of comparison. Equality and inequality tests can
2606  // use either signed or unsigned comparisons. The choice also doesn't
2607  // matter if both sign bits are known to be clear. In those cases we
2608  // want to give the main isel code the freedom to choose whichever
2609  // form fits best.
2610  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2611  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2612  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2613  C.ICmpType = SystemZICMP::Any;
2614  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2615  C.ICmpType = SystemZICMP::UnsignedOnly;
2616  else
2617  C.ICmpType = SystemZICMP::SignedOnly;
2618  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2619  adjustForRedundantAnd(DAG, DL, C);
2620  adjustZeroCmp(DAG, DL, C);
2621  adjustSubwordCmp(DAG, DL, C);
2622  adjustForSubtraction(DAG, DL, C);
2623  adjustForLTGFR(C);
2624  adjustICmpTruncate(DAG, DL, C);
2625  }
2626 
2627  if (shouldSwapCmpOperands(C)) {
2628  std::swap(C.Op0, C.Op1);
2629  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2630  }
2631 
2632  adjustForTestUnderMask(DAG, DL, C);
2633  return C;
2634 }
2635 
2636 // Emit the comparison instruction described by C.
2637 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2638  if (!C.Op1.getNode()) {
2639  SDNode *Node;
2640  switch (C.Op0.getOpcode()) {
2642  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2643  return SDValue(Node, 0);
2645  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2646  return SDValue(Node, Node->getNumValues() - 1);
2647  default:
2648  llvm_unreachable("Invalid comparison operands");
2649  }
2650  }
2651  if (C.Opcode == SystemZISD::ICMP)
2652  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2653  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2654  if (C.Opcode == SystemZISD::TM) {
2655  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2656  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2657  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2658  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2659  }
2660  if (C.Chain) {
2661  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2662  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2663  }
2664  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2665 }
2666 
2667 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2668 // 64 bits. Extend is the extension type to use. Store the high part
2669 // in Hi and the low part in Lo.
2670 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2671  SDValue Op0, SDValue Op1, SDValue &Hi,
2672  SDValue &Lo) {
2673  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2674  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2675  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2676  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2677  DAG.getConstant(32, DL, MVT::i64));
2678  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2679  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2680 }
2681 
2682 // Lower a binary operation that produces two VT results, one in each
2683 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2684 // and Opcode performs the GR128 operation. Store the even register result
2685 // in Even and the odd register result in Odd.
2686 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2687  unsigned Opcode, SDValue Op0, SDValue Op1,
2688  SDValue &Even, SDValue &Odd) {
2689  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2690  bool Is32Bit = is32Bit(VT);
2691  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2692  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2693 }
2694 
2695 // Return an i32 value that is 1 if the CC value produced by CCReg is
2696 // in the mask CCMask and 0 otherwise. CC is known to have a value
2697 // in CCValid, so other values can be ignored.
2698 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2699  unsigned CCValid, unsigned CCMask) {
2700  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2701  DAG.getConstant(0, DL, MVT::i32),
2702  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2703  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2704  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2705 }
2706 
2707 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2708 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2709 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2710 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2711 // floating-point comparisons.
2712 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2714  switch (CC) {
2715  case ISD::SETOEQ:
2716  case ISD::SETEQ:
2717  switch (Mode) {
2718  case CmpMode::Int: return SystemZISD::VICMPE;
2719  case CmpMode::FP: return SystemZISD::VFCMPE;
2722  }
2723  llvm_unreachable("Bad mode");
2724 
2725  case ISD::SETOGE:
2726  case ISD::SETGE:
2727  switch (Mode) {
2728  case CmpMode::Int: return 0;
2729  case CmpMode::FP: return SystemZISD::VFCMPHE;
2732  }
2733  llvm_unreachable("Bad mode");
2734 
2735  case ISD::SETOGT:
2736  case ISD::SETGT:
2737  switch (Mode) {
2738  case CmpMode::Int: return SystemZISD::VICMPH;
2739  case CmpMode::FP: return SystemZISD::VFCMPH;
2742  }
2743  llvm_unreachable("Bad mode");
2744 
2745  case ISD::SETUGT:
2746  switch (Mode) {
2747  case CmpMode::Int: return SystemZISD::VICMPHL;
2748  case CmpMode::FP: return 0;
2749  case CmpMode::StrictFP: return 0;
2750  case CmpMode::SignalingFP: return 0;
2751  }
2752  llvm_unreachable("Bad mode");
2753 
2754  default:
2755  return 0;
2756  }
2757 }
2758 
2759 // Return the SystemZISD vector comparison operation for CC or its inverse,
2760 // or 0 if neither can be done directly. Indicate in Invert whether the
2761 // result is for the inverse of CC. Mode is as above.
2763  bool &Invert) {
2764  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2765  Invert = false;
2766  return Opcode;
2767  }
2768 
2770  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2771  Invert = true;
2772  return Opcode;
2773  }
2774 
2775  return 0;
2776 }
2777 
2778 // Return a v2f64 that contains the extended form of elements Start and Start+1
2779 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2780 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2781  SDValue Op, SDValue Chain) {
2782  int Mask[] = { Start, -1, Start + 1, -1 };
2784  if (Chain) {
2786  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2787  }
2788  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2789 }
2790 
2791 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2792 // producing a result of type VT. If Chain is nonnull, return the strict form.
2793 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2794  const SDLoc &DL, EVT VT,
2795  SDValue CmpOp0,
2796  SDValue CmpOp1,
2797  SDValue Chain) const {
2798  // There is no hardware support for v4f32 (unless we have the vector
2799  // enhancements facility 1), so extend the vector into two v2f64s
2800  // and compare those.
2801  if (CmpOp0.getValueType() == MVT::v4f32 &&
2802  !Subtarget.hasVectorEnhancements1()) {
2803  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2804  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2805  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2806  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2807  if (Chain) {
2809  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2810  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2811  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2812  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2813  H1.getValue(1), L1.getValue(1),
2814  HRes.getValue(1), LRes.getValue(1) };
2815  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2816  SDValue Ops[2] = { Res, NewChain };
2817  return DAG.getMergeValues(Ops, DL);
2818  }
2819  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2820  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2821  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2822  }
2823  if (Chain) {
2824  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
2825  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
2826  }
2827  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
2828 }
2829 
2830 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
2831 // an integer mask of type VT. If Chain is nonnull, we have a strict
2832 // floating-point comparison. If in addition IsSignaling is true, we have
2833 // a strict signaling floating-point comparison.
2834 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
2835  const SDLoc &DL, EVT VT,
2836  ISD::CondCode CC,
2837  SDValue CmpOp0,
2838  SDValue CmpOp1,
2839  SDValue Chain,
2840  bool IsSignaling) const {
2841  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
2842  assert (!Chain || IsFP);
2843  assert (!IsSignaling || Chain);
2844  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
2845  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
2846  bool Invert = false;
2847  SDValue Cmp;
2848  switch (CC) {
2849  // Handle tests for order using (or (ogt y x) (oge x y)).
2850  case ISD::SETUO:
2851  Invert = true;
2853  case ISD::SETO: {
2854  assert(IsFP && "Unexpected integer comparison");
2855  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2856  DL, VT, CmpOp1, CmpOp0, Chain);
2857  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
2858  DL, VT, CmpOp0, CmpOp1, Chain);
2859  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
2860  if (Chain)
2861  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2862  LT.getValue(1), GE.getValue(1));
2863  break;
2864  }
2865 
2866  // Handle <> tests using (or (ogt y x) (ogt x y)).
2867  case ISD::SETUEQ:
2868  Invert = true;
2870  case ISD::SETONE: {
2871  assert(IsFP && "Unexpected integer comparison");
2872  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2873  DL, VT, CmpOp1, CmpOp0, Chain);
2874  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
2875  DL, VT, CmpOp0, CmpOp1, Chain);
2876  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
2877  if (Chain)
2878  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2879  LT.getValue(1), GT.getValue(1));
2880  break;
2881  }
2882 
2883  // Otherwise a single comparison is enough. It doesn't really
2884  // matter whether we try the inversion or the swap first, since
2885  // there are no cases where both work.
2886  default:
2887  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2888  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
2889  else {
2891  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
2892  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
2893  else
2894  llvm_unreachable("Unhandled comparison");
2895  }
2896  if (Chain)
2897  Chain = Cmp.getValue(1);
2898  break;
2899  }
2900  if (Invert) {
2901  SDValue Mask =
2902  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
2903  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
2904  }
2905  if (Chain && Chain.getNode() != Cmp.getNode()) {
2906  SDValue Ops[2] = { Cmp, Chain };
2907  Cmp = DAG.getMergeValues(Ops, DL);
2908  }
2909  return Cmp;
2910 }
2911 
2912 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
2913  SelectionDAG &DAG) const {
2914  SDValue CmpOp0 = Op.getOperand(0);
2915  SDValue CmpOp1 = Op.getOperand(1);
2916  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
2917  SDLoc DL(Op);
2918  EVT VT = Op.getValueType();
2919  if (VT.isVector())
2920  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
2921 
2922  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2923  SDValue CCReg = emitCmp(DAG, DL, C);
2924  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2925 }
2926 
2927 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
2928  SelectionDAG &DAG,
2929  bool IsSignaling) const {
2930  SDValue Chain = Op.getOperand(0);
2931  SDValue CmpOp0 = Op.getOperand(1);
2932  SDValue CmpOp1 = Op.getOperand(2);
2933  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
2934  SDLoc DL(Op);
2935  EVT VT = Op.getNode()->getValueType(0);
2936  if (VT.isVector()) {
2937  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
2938  Chain, IsSignaling);
2939  return Res.getValue(Op.getResNo());
2940  }
2941 
2942  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
2943  SDValue CCReg = emitCmp(DAG, DL, C);
2944  CCReg->setFlags(Op->getFlags());
2945  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
2946  SDValue Ops[2] = { Result, CCReg.getValue(1) };
2947  return DAG.getMergeValues(Ops, DL);
2948 }
2949 
2950 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
2951  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
2952  SDValue CmpOp0 = Op.getOperand(2);
2953  SDValue CmpOp1 = Op.getOperand(3);
2954  SDValue Dest = Op.getOperand(4);
2955  SDLoc DL(Op);
2956 
2957  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2958  SDValue CCReg = emitCmp(DAG, DL, C);
2959  return DAG.getNode(
2960  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
2961  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
2962  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
2963 }
2964 
2965 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
2966 // allowing Pos and Neg to be wider than CmpOp.
2967 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
2968  return (Neg.getOpcode() == ISD::SUB &&
2969  Neg.getOperand(0).getOpcode() == ISD::Constant &&
2970  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
2971  Neg.getOperand(1) == Pos &&
2972  (Pos == CmpOp ||
2973  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
2974  Pos.getOperand(0) == CmpOp)));
2975 }
2976 
2977 // Return the absolute or negative absolute of Op; IsNegative decides which.
2979  bool IsNegative) {
2980  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
2981  if (IsNegative)
2982  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
2983  DAG.getConstant(0, DL, Op.getValueType()), Op);
2984  return Op;
2985 }
2986 
2987 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
2988  SelectionDAG &DAG) const {
2989  SDValue CmpOp0 = Op.getOperand(0);
2990  SDValue CmpOp1 = Op.getOperand(1);
2991  SDValue TrueOp = Op.getOperand(2);
2992  SDValue FalseOp = Op.getOperand(3);
2993  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
2994  SDLoc DL(Op);
2995 
2996  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
2997 
2998  // Check for absolute and negative-absolute selections, including those
2999  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3000  // This check supplements the one in DAGCombiner.
3001  if (C.Opcode == SystemZISD::ICMP &&
3002  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3003  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3004  C.Op1.getOpcode() == ISD::Constant &&
3005  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3006  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3007  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3008  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3009  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3010  }
3011 
3012  SDValue CCReg = emitCmp(DAG, DL, C);
3013  SDValue Ops[] = {TrueOp, FalseOp,
3014  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3015  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3016 
3017  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3018 }
3019 
3020 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3021  SelectionDAG &DAG) const {
3022  SDLoc DL(Node);
3023  const GlobalValue *GV = Node->getGlobal();
3024  int64_t Offset = Node->getOffset();
3025  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3027 
3028  SDValue Result;
3029  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3030  if (isInt<32>(Offset)) {
3031  // Assign anchors at 1<<12 byte boundaries.
3032  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3033  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3034  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3035 
3036  // The offset can be folded into the address if it is aligned to a
3037  // halfword.
3038  Offset -= Anchor;
3039  if (Offset != 0 && (Offset & 1) == 0) {
3040  SDValue Full =
3041  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3042  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3043  Offset = 0;
3044  }
3045  } else {
3046  // Conservatively load a constant offset greater than 32 bits into a
3047  // register below.
3048  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3049  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3050  }
3051  } else {
3052  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3053  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3054  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3056  }
3057 
3058  // If there was a non-zero offset that we didn't fold, create an explicit
3059  // addition for it.
3060  if (Offset != 0)
3061  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3062  DAG.getConstant(Offset, DL, PtrVT));
3063 
3064  return Result;
3065 }
3066 
3067 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3068  SelectionDAG &DAG,
3069  unsigned Opcode,
3070  SDValue GOTOffset) const {
3071  SDLoc DL(Node);
3072  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3073  SDValue Chain = DAG.getEntryNode();
3074  SDValue Glue;
3075 
3078  report_fatal_error("In GHC calling convention TLS is not supported");
3079 
3080  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3081  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3082  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3083  Glue = Chain.getValue(1);
3084  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3085  Glue = Chain.getValue(1);
3086 
3087  // The first call operand is the chain and the second is the TLS symbol.
3089  Ops.push_back(Chain);
3090  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3091  Node->getValueType(0),
3092  0, 0));
3093 
3094  // Add argument registers to the end of the list so that they are
3095  // known live into the call.
3096  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3097  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3098 
3099  // Add a register mask operand representing the call-preserved registers.
3100  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3101  const uint32_t *Mask =
3103  assert(Mask && "Missing call preserved mask for calling convention");
3104  Ops.push_back(DAG.getRegisterMask(Mask));
3105 
3106  // Glue the call to the argument copies.
3107  Ops.push_back(Glue);
3108 
3109  // Emit the call.
3110  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3111  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3112  Glue = Chain.getValue(1);
3113 
3114  // Copy the return value from %r2.
3115  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3116 }
3117 
3118 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3119  SelectionDAG &DAG) const {
3120  SDValue Chain = DAG.getEntryNode();
3121  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3122 
3123  // The high part of the thread pointer is in access register 0.
3124  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3125  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3126 
3127  // The low part of the thread pointer is in access register 1.
3128  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3129  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3130 
3131  // Merge them into a single 64-bit address.
3132  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3133  DAG.getConstant(32, DL, PtrVT));
3134  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3135 }
3136 
3137 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3138  SelectionDAG &DAG) const {
3139  if (DAG.getTarget().useEmulatedTLS())
3140  return LowerToTLSEmulatedModel(Node, DAG);
3141  SDLoc DL(Node);
3142  const GlobalValue *GV = Node->getGlobal();
3143  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3145 
3148  report_fatal_error("In GHC calling convention TLS is not supported");
3149 
3150  SDValue TP = lowerThreadPointer(DL, DAG);
3151 
3152  // Get the offset of GA from the thread pointer, based on the TLS model.
3153  SDValue Offset;
3154  switch (model) {
3155  case TLSModel::GeneralDynamic: {
3156  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3159 
3160  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3161  Offset = DAG.getLoad(
3162  PtrVT, DL, DAG.getEntryNode(), Offset,
3164 
3165  // Call __tls_get_offset to retrieve the offset.
3166  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3167  break;
3168  }
3169 
3170  case TLSModel::LocalDynamic: {
3171  // Load the GOT offset of the module ID.
3174 
3175  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3176  Offset = DAG.getLoad(
3177  PtrVT, DL, DAG.getEntryNode(), Offset,
3179 
3180  // Call __tls_get_offset to retrieve the module base offset.
3181  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3182 
3183  // Note: The SystemZLDCleanupPass will remove redundant computations
3184  // of the module base offset. Count total number of local-dynamic
3185  // accesses to trigger execution of that pass.
3189 
3190  // Add the per-symbol offset.
3192 
3193  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3194  DTPOffset = DAG.getLoad(
3195  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3197 
3198  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3199  break;
3200  }
3201 
3202  case TLSModel::InitialExec: {
3203  // Load the offset from the GOT.
3204  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3207  Offset =
3208  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3210  break;
3211  }
3212 
3213  case TLSModel::LocalExec: {
3214  // Force the offset into the constant pool and load it from there.
3217 
3218  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3219  Offset = DAG.getLoad(
3220  PtrVT, DL, DAG.getEntryNode(), Offset,
3222  break;
3223  }
3224  }
3225 
3226  // Add the base and offset together.
3227  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3228 }
3229 
3230 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3231  SelectionDAG &DAG) const {
3232  SDLoc DL(Node);
3233  const BlockAddress *BA = Node->getBlockAddress();
3234  int64_t Offset = Node->getOffset();
3235  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3236 
3237  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3238  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3239  return Result;
3240 }
3241 
3242 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3243  SelectionDAG &DAG) const {
3244  SDLoc DL(JT);
3245  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3246  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3247 
3248  // Use LARL to load the address of the table.
3249  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3250 }
3251 
3252 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3253  SelectionDAG &DAG) const {
3254  SDLoc DL(CP);
3255  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3256 
3257  SDValue Result;
3258  if (CP->isMachineConstantPoolEntry())
3259  Result =
3260  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3261  else
3262  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3263  CP->getOffset());
3264 
3265  // Use LARL to load the address of the constant pool entry.
3266  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3267 }
3268 
3269 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3270  SelectionDAG &DAG) const {
3271  auto *TFL =
3272  static_cast<const SystemZFrameLowering *>(Subtarget.getFrameLowering());
3273  MachineFunction &MF = DAG.getMachineFunction();
3274  MachineFrameInfo &MFI = MF.getFrameInfo();
3275  MFI.setFrameAddressIsTaken(true);
3276 
3277  SDLoc DL(Op);
3278  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3279  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3280 
3281  // Return null if the back chain is not present.
3282  bool HasBackChain = MF.getFunction().hasFnAttribute("backchain");
3283  if (TFL->usePackedStack(MF) && !HasBackChain)
3284  return DAG.getConstant(0, DL, PtrVT);
3285 
3286  // By definition, the frame address is the address of the back chain.
3287  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3288  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3289 
3290  // FIXME The frontend should detect this case.
3291  if (Depth > 0) {
3292  report_fatal_error("Unsupported stack frame traversal count");
3293  }
3294 
3295  return BackChain;
3296 }
3297 
3298 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3299  SelectionDAG &DAG) const {
3300  MachineFunction &MF = DAG.getMachineFunction();
3301  MachineFrameInfo &MFI = MF.getFrameInfo();
3302  MFI.setReturnAddressIsTaken(true);
3303 
3305  return SDValue();
3306 
3307  SDLoc DL(Op);
3308  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3309  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3310 
3311  // FIXME The frontend should detect this case.
3312  if (Depth > 0) {
3313  report_fatal_error("Unsupported stack frame traversal count");
3314  }
3315 
3316  // Return R14D, which has the return address. Mark it an implicit live-in.
3317  unsigned LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3318  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3319 }
3320 
3321 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3322  SelectionDAG &DAG) const {
3323  SDLoc DL(Op);
3324  SDValue In = Op.getOperand(0);
3325  EVT InVT = In.getValueType();
3326  EVT ResVT = Op.getValueType();
3327 
3328  // Convert loads directly. This is normally done by DAGCombiner,
3329  // but we need this case for bitcasts that are created during lowering
3330  // and which are then lowered themselves.
3331  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3332  if (ISD::isNormalLoad(LoadN)) {
3333  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3334  LoadN->getBasePtr(), LoadN->getMemOperand());
3335  // Update the chain uses.
3336  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3337  return NewLoad;
3338  }
3339 
3340  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3341  SDValue In64;
3342  if (Subtarget.hasHighWord()) {
3343  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3344  MVT::i64);
3345  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3346  MVT::i64, SDValue(U64, 0), In);
3347  } else {
3348  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3349  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3350  DAG.getConstant(32, DL, MVT::i64));
3351  }
3352  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3353  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3354  DL, MVT::f32, Out64);
3355  }
3356  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3357  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3358  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3359  MVT::f64, SDValue(U64, 0), In);
3360  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3361  if (Subtarget.hasHighWord())
3362  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3363  MVT::i32, Out64);
3364  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3365  DAG.getConstant(32, DL, MVT::i64));
3366  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3367  }
3368  llvm_unreachable("Unexpected bitcast combination");
3369 }
3370 
3371 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3372  SelectionDAG &DAG) const {
3373  MachineFunction &MF = DAG.getMachineFunction();
3374  SystemZMachineFunctionInfo *FuncInfo =
3376  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3377 
3378  SDValue Chain = Op.getOperand(0);
3379  SDValue Addr = Op.getOperand(1);
3380  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3381  SDLoc DL(Op);
3382 
3383  // The initial values of each field.
3384  const unsigned NumFields = 4;
3385  SDValue Fields[NumFields] = {
3386  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3387  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3388  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3389  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3390  };
3391 
3392  // Store each field into its respective slot.
3393  SDValue MemOps[NumFields];
3394  unsigned Offset = 0;
3395  for (unsigned I = 0; I < NumFields; ++I) {
3396  SDValue FieldAddr = Addr;
3397  if (Offset != 0)
3398  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3399  DAG.getIntPtrConstant(Offset, DL));
3400  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3401  MachinePointerInfo(SV, Offset));
3402  Offset += 8;
3403  }
3404  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3405 }
3406 
3407 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3408  SelectionDAG &DAG) const {
3409  SDValue Chain = Op.getOperand(0);
3410  SDValue DstPtr = Op.getOperand(1);
3411  SDValue SrcPtr = Op.getOperand(2);
3412  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3413  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3414  SDLoc DL(Op);
3415 
3416  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32, DL),
3417  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3418  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3419  MachinePointerInfo(SrcSV));
3420 }
3421 
3422 SDValue SystemZTargetLowering::
3423 lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const {
3424  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3425  MachineFunction &MF = DAG.getMachineFunction();
3426  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3427  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3428 
3429  SDValue Chain = Op.getOperand(0);
3430  SDValue Size = Op.getOperand(1);
3431  SDValue Align = Op.getOperand(2);
3432  SDLoc DL(Op);
3433 
3434  // If user has set the no alignment function attribute, ignore
3435  // alloca alignments.
3436  uint64_t AlignVal =
3437  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3438 
3439  uint64_t StackAlign = TFI->getStackAlignment();
3440  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3441  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3442 
3444  SDValue NeededSpace = Size;
3445 
3446  // Get a reference to the stack pointer.
3447  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3448 
3449  // If we need a backchain, save it now.
3450  SDValue Backchain;
3451  if (StoreBackchain)
3452  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3453  MachinePointerInfo());
3454 
3455  // Add extra space for alignment if needed.
3456  if (ExtraAlignSpace)
3457  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3458  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3459 
3460  // Get the new stack pointer value.
3461  SDValue NewSP;
3462  if (hasInlineStackProbe(MF)) {
3463  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3464  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3465  Chain = NewSP.getValue(1);
3466  }
3467  else {
3468  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3469  // Copy the new stack pointer back.
3470  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3471  }
3472 
3473  // The allocated data lives above the 160 bytes allocated for the standard
3474  // frame, plus any outgoing stack arguments. We don't know how much that
3475  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3476  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3477  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3478 
3479  // Dynamically realign if needed.
3480  if (RequiredAlign > StackAlign) {
3481  Result =
3482  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3483  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3484  Result =
3485  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3486  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3487  }
3488 
3489  if (StoreBackchain)
3490  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3491  MachinePointerInfo());
3492 
3493  SDValue Ops[2] = { Result, Chain };
3494  return DAG.getMergeValues(Ops, DL);
3495 }
3496 
3497 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3498  SDValue Op, SelectionDAG &DAG) const {
3499  SDLoc DL(Op);
3500 
3502 }
3503 
3504 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3505  SelectionDAG &DAG) const {
3506  EVT VT = Op.getValueType();
3507  SDLoc DL(Op);
3508  SDValue Ops[2];
3509  if (is32Bit(VT))
3510  // Just do a normal 64-bit multiplication and extract the results.
3511  // We define this so that it can be used for constant division.
3512  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3513  Op.getOperand(1), Ops[1], Ops[0]);
3514  else if (Subtarget.hasMiscellaneousExtensions2())
3515  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3516  // the high result in the even register. ISD::SMUL_LOHI is defined to
3517  // return the low half first, so the results are in reverse order.
3519  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3520  else {
3521  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3522  //
3523  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3524  //
3525  // but using the fact that the upper halves are either all zeros
3526  // or all ones:
3527  //
3528  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3529  //
3530  // and grouping the right terms together since they are quicker than the
3531  // multiplication:
3532  //
3533  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3534  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3535  SDValue LL = Op.getOperand(0);
3536  SDValue RL = Op.getOperand(1);
3537  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3538  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3539  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3540  // the high result in the even register. ISD::SMUL_LOHI is defined to
3541  // return the low half first, so the results are in reverse order.
3543  LL, RL, Ops[1], Ops[0]);
3544  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3545  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3546  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3547  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3548  }
3549  return DAG.getMergeValues(Ops, DL);
3550 }
3551 
3552 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3553  SelectionDAG &DAG) const {
3554  EVT VT = Op.getValueType();
3555  SDLoc DL(Op);
3556  SDValue Ops[2];
3557  if (is32Bit(VT))
3558  // Just do a normal 64-bit multiplication and extract the results.
3559  // We define this so that it can be used for constant division.
3560  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3561  Op.getOperand(1), Ops[1], Ops[0]);
3562  else
3563  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3564  // the high result in the even register. ISD::UMUL_LOHI is defined to
3565  // return the low half first, so the results are in reverse order.
3567  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3568  return DAG.getMergeValues(Ops, DL);
3569 }
3570 
3571 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3572  SelectionDAG &DAG) const {
3573  SDValue Op0 = Op.getOperand(0);
3574  SDValue Op1 = Op.getOperand(1);
3575  EVT VT = Op.getValueType();
3576  SDLoc DL(Op);
3577 
3578  // We use DSGF for 32-bit division. This means the first operand must
3579  // always be 64-bit, and the second operand should be 32-bit whenever
3580  // that is possible, to improve performance.
3581  if (is32Bit(VT))
3582  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3583  else if (DAG.ComputeNumSignBits(Op1) > 32)
3584  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3585 
3586  // DSG(F) returns the remainder in the even register and the
3587  // quotient in the odd register.
3588  SDValue Ops[2];
3589  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3590  return DAG.getMergeValues(Ops, DL);
3591 }
3592 
3593 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3594  SelectionDAG &DAG) const {
3595  EVT VT = Op.getValueType();
3596  SDLoc DL(Op);
3597 
3598  // DL(G) returns the remainder in the even register and the
3599  // quotient in the odd register.
3600  SDValue Ops[2];
3602  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3603  return DAG.getMergeValues(Ops, DL);
3604 }
3605 
3606 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3607  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3608 
3609  // Get the known-zero masks for each operand.
3610  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3611  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3612  DAG.computeKnownBits(Ops[1])};
3613 
3614  // See if the upper 32 bits of one operand and the lower 32 bits of the
3615  // other are known zero. They are the low and high operands respectively.
3616  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3617  Known[1].Zero.getZExtValue() };
3618  unsigned High, Low;
3619  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3620  High = 1, Low = 0;
3621  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3622  High = 0, Low = 1;
3623  else
3624  return Op;
3625 
3626  SDValue LowOp = Ops[Low];
3627  SDValue HighOp = Ops[High];
3628 
3629  // If the high part is a constant, we're better off using IILH.
3630  if (HighOp.getOpcode() == ISD::Constant)
3631  return Op;
3632 
3633  // If the low part is a constant that is outside the range of LHI,
3634  // then we're better off using IILF.
3635  if (LowOp.getOpcode() == ISD::Constant) {
3636  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3637  if (!isInt<16>(Value))
3638  return Op;
3639  }
3640 
3641  // Check whether the high part is an AND that doesn't change the
3642  // high 32 bits and just masks out low bits. We can skip it if so.
3643  if (HighOp.getOpcode() == ISD::AND &&
3644  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3645  SDValue HighOp0 = HighOp.getOperand(0);
3646  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3647  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3648  HighOp = HighOp0;
3649  }
3650 
3651  // Take advantage of the fact that all GR32 operations only change the
3652  // low 32 bits by truncating Low to an i32 and inserting it directly
3653  // using a subreg. The interesting cases are those where the truncation
3654  // can be folded.
3655  SDLoc DL(Op);
3656  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3657  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3658  MVT::i64, HighOp, Low32);
3659 }
3660 
3661 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3662 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3663  SelectionDAG &DAG) const {
3664  SDNode *N = Op.getNode();
3665  SDValue LHS = N->getOperand(0);
3666  SDValue RHS = N->getOperand(1);
3667  SDLoc DL(N);
3668  unsigned BaseOp = 0;
3669  unsigned CCValid = 0;
3670  unsigned CCMask = 0;
3671 
3672  switch (Op.getOpcode()) {
3673  default: llvm_unreachable("Unknown instruction!");
3674  case ISD::SADDO:
3675  BaseOp = SystemZISD::SADDO;
3676  CCValid = SystemZ::CCMASK_ARITH;
3678  break;
3679  case ISD::SSUBO:
3680  BaseOp = SystemZISD::SSUBO;
3681  CCValid = SystemZ::CCMASK_ARITH;
3683  break;
3684  case ISD::UADDO:
3685  BaseOp = SystemZISD::UADDO;
3686  CCValid = SystemZ::CCMASK_LOGICAL;
3688  break;
3689  case ISD::USUBO:
3690  BaseOp = SystemZISD::USUBO;
3691  CCValid = SystemZ::CCMASK_LOGICAL;
3693  break;
3694  }
3695 
3696  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3697  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3698 
3699  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3700  if (N->getValueType(1) == MVT::i1)
3701  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3702 
3703  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3704 }
3705 
3706 static bool isAddCarryChain(SDValue Carry) {
3707  while (Carry.getOpcode() == ISD::ADDCARRY)
3708  Carry = Carry.getOperand(2);
3709  return Carry.getOpcode() == ISD::UADDO;
3710 }
3711 
3712 static bool isSubBorrowChain(SDValue Carry) {
3713  while (Carry.getOpcode() == ISD::SUBCARRY)
3714  Carry = Carry.getOperand(2);
3715  return Carry.getOpcode() == ISD::USUBO;
3716 }
3717 
3718 // Lower ADDCARRY/SUBCARRY nodes.
3719 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3720  SelectionDAG &DAG) const {
3721 
3722  SDNode *N = Op.getNode();
3723  MVT VT = N->getSimpleValueType(0);
3724 
3725  // Let legalize expand this if it isn't a legal type yet.
3726  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
3727  return SDValue();
3728 
3729  SDValue LHS = N->getOperand(0);
3730  SDValue RHS = N->getOperand(1);
3731  SDValue Carry = Op.getOperand(2);
3732  SDLoc DL(N);
3733  unsigned BaseOp = 0;
3734  unsigned CCValid = 0;
3735  unsigned CCMask = 0;
3736 
3737  switch (Op.getOpcode()) {
3738  default: llvm_unreachable("Unknown instruction!");
3739  case ISD::ADDCARRY:
3740  if (!isAddCarryChain(Carry))
3741  return SDValue();
3742 
3743  BaseOp = SystemZISD::ADDCARRY;
3744  CCValid = SystemZ::CCMASK_LOGICAL;
3746  break;
3747  case ISD::SUBCARRY:
3748  if (!isSubBorrowChain(Carry))
3749  return SDValue();
3750 
3751  BaseOp = SystemZISD::SUBCARRY;
3752  CCValid = SystemZ::CCMASK_LOGICAL;
3754  break;
3755  }
3756 
3757  // Set the condition code from the carry flag.
3758  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
3759  DAG.getConstant(CCValid, DL, MVT::i32),
3760  DAG.getConstant(CCMask, DL, MVT::i32));
3761 
3762  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
3763  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
3764 
3765  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3766  if (N->getValueType(1) == MVT::i1)
3767  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3768 
3769  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3770 }
3771 
3772 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
3773  SelectionDAG &DAG) const {
3774  EVT VT = Op.getValueType();
3775  SDLoc DL(Op);
3776  Op = Op.getOperand(0);
3777 
3778  // Handle vector types via VPOPCT.
3779  if (VT.isVector()) {
3780  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
3782  switch (VT.getScalarSizeInBits()) {
3783  case 8:
3784  break;
3785  case 16: {
3786  Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
3787  SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
3789  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3791  break;
3792  }
3793  case 32: {
3795  DAG.getConstant(0, DL, MVT::i32));
3796  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3797  break;
3798  }
3799  case 64: {
3801  DAG.getConstant(0, DL, MVT::i32));
3802  Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
3803  Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
3804  break;
3805  }
3806  default:
3807  llvm_unreachable("Unexpected type");
3808  }
3809  return Op;
3810  }
3811 
3812  // Get the known-zero mask for the operand.
3813  KnownBits Known = DAG.computeKnownBits(Op);
3814  unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
3815  if (NumSignificantBits == 0)
3816  return DAG.getConstant(0, DL, VT);
3817 
3818  // Skip known-zero high parts of the operand.
3819  int64_t OrigBitSize = VT.getSizeInBits();
3820  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
3821  BitSize = std::min(BitSize, OrigBitSize);
3822 
3823  // The POPCNT instruction counts the number of bits in each byte.
3824  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
3826  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
3827 
3828  // Add up per-byte counts in a binary tree. All bits of Op at
3829  // position larger than BitSize remain zero throughout.
3830  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
3831  SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
3832  if (BitSize != OrigBitSize)
3833  Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
3834  DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
3835  Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
3836  }
3837 
3838  // Extract overall result from high byte.
3839  if (BitSize > 8)
3840  Op = DAG.getNode(ISD::SRL, DL, VT, Op,
3841  DAG.getConstant(BitSize - 8, DL, VT));
3842 
3843  return Op;
3844 }
3845 
3846 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
3847  SelectionDAG &DAG) const {
3848  SDLoc DL(Op);
3849  AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
3850  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
3851  SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
3852  cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
3853 
3854  // The only fence that needs an instruction is a sequentially-consistent
3855  // cross-thread fence.
3856  if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
3857  FenceSSID == SyncScope::System) {
3858  return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
3859  Op.getOperand(0)),
3860  0);
3861  }
3862 
3863  // MEMBARRIER is a compiler barrier; it codegens to a no-op.
3864  return DAG.getNode(SystemZISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
3865 }
3866 
3867 // Op is an atomic load. Lower it into a normal volatile load.
3868 SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
3869  SelectionDAG &DAG) const {
3870  auto *Node = cast<AtomicSDNode>(Op.getNode());
3871  return DAG.getExtLoad(ISD::EXTLOAD, SDLoc(Op), Op.getValueType(),
3872  Node->getChain(), Node->getBasePtr(),
3873  Node->getMemoryVT(), Node->getMemOperand());
3874 }
3875 
3876 // Op is an atomic store. Lower it into a normal volatile store.
3877 SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
3878  SelectionDAG &DAG) const {
3879  auto *Node = cast<AtomicSDNode>(Op.getNode());
3880  SDValue Chain = DAG.getTruncStore(Node->getChain(), SDLoc(Op), Node->getVal(),
3881  Node->getBasePtr(), Node->getMemoryVT(),
3882  Node->getMemOperand());
3883  // We have to enforce sequential consistency by performing a
3884  // serialization operation after the store.
3885  if (Node->getOrdering() == AtomicOrdering::SequentiallyConsistent)
3886  Chain = SDValue(DAG.getMachineNode(SystemZ::Serialize, SDLoc(Op),
3887  MVT::Other, Chain), 0);
3888  return Chain;
3889 }
3890 
3891 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
3892 // two into the fullword ATOMIC_LOADW_* operation given by Opcode.
3893 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
3894  SelectionDAG &DAG,
3895  unsigned Opcode) const {
3896  auto *Node = cast<AtomicSDNode>(Op.getNode());
3897 
3898  // 32-bit operations need no code outside the main loop.
3899  EVT NarrowVT = Node->getMemoryVT();
3900  EVT WideVT = MVT::i32;
3901  if (NarrowVT == WideVT)
3902  return Op;
3903 
3904  int64_t BitSize = NarrowVT.getSizeInBits();
3905  SDValue ChainIn = Node->getChain();
3906  SDValue Addr = Node->getBasePtr();
3907  SDValue Src2 = Node->getVal();
3908  MachineMemOperand *MMO = Node->getMemOperand();
3909  SDLoc DL(Node);
3910  EVT PtrVT = Addr.getValueType();
3911 
3912  // Convert atomic subtracts of constants into additions.
3913  if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
3914  if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
3916  Src2 = DAG.getConstant(-Const->getSExtValue(), DL, Src2.getValueType());
3917  }
3918 
3919  // Get the address of the containing word.
3920  SDValue AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
3921  DAG.getConstant(-4, DL, PtrVT));
3922 
3923  // Get the number of bits that the word must be rotated left in order
3924  // to bring the field to the top bits of a GR32.
3925  SDValue BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
3926  DAG.getConstant(3, DL, PtrVT));
3927  BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
3928 
3929  // Get the complementing shift amount, for rotating a field in the top
3930  // bits back to its proper position.
3931  SDValue NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
3932  DAG.getConstant(0, DL, WideVT), BitShift);
3933 
3934  // Extend the source operand to 32 bits and prepare it for the inner loop.
3935  // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
3936  // operations require the source to be shifted in advance. (This shift
3937  // can be folded if the source is constant.) For AND and NAND, the lower
3938  // bits must be set, while for other opcodes they should be left clear.
3939  if (Opcode != SystemZISD::ATOMIC_SWAPW)
3940  Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
3941  DAG.getConstant(32 - BitSize, DL, WideVT));
3942  if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
3944  Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
3945  DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
3946 
3947  // Construct the ATOMIC_LOADW_* node.
3948  SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
3949  SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
3950  DAG.getConstant(BitSize, DL, WideVT) };
3951  SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
3952  NarrowVT, MMO);
3953 
3954  // Rotate the result of the final CS so that the field is in the lower
3955  // bits of a GR32, then truncate it.
3956  SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
3957  DAG.getConstant(BitSize, DL, WideVT));
3958  SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
3959 
3960  SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
3961  return DAG.getMergeValues(RetOps, DL);
3962 }
3963 
3964 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations
3965 // into ATOMIC_LOADW_SUBs and decide whether to convert 32- and 64-bit
3966 // operations into additions.
3967 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
3968  SelectionDAG &DAG) const {
3969  auto *Node = cast<AtomicSDNode>(Op.getNode());
3970  EVT MemVT = Node->getMemoryVT();
3971  if (MemVT == MVT::i32 || MemVT == MVT::i64) {
3972  // A full-width operation.
3973  assert(Op.getValueType() == MemVT && "Mismatched VTs");
3974  SDValue Src2 = Node->getVal();
3975  SDValue NegSrc2;
3976  SDLoc DL(Src2);
3977 
3978  if (auto *Op2 = dyn_cast<ConstantSDNode>(Src2)) {
3979  // Use an addition if the operand is constant and either LAA(G) is
3980  // available or the negative value is in the range of A(G)FHI.
3981  int64_t Value = (-Op2->getAPIntValue()).getSExtValue();
3982  if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1())
3983  NegSrc2 = DAG.getConstant(Value, DL, MemVT);
3984  } else if (Subtarget.hasInterlockedAccess1())
3985  // Use LAA(G) if available.
3986  NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT),
3987  Src2);
3988 
3989  if (NegSrc2.getNode())
3990  return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
3991  Node->getChain(), Node->getBasePtr(), NegSrc2,
3992  Node->getMemOperand());
3993 
3994  // Use the node as-is.
3995  return Op;
3996  }
3997 
3998  return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
3999 }
4000 
4001 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
4002 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
4003  SelectionDAG &DAG) const {
4004  auto *Node = cast<AtomicSDNode>(Op.getNode());
4005  SDValue ChainIn = Node->getOperand(0);
4006  SDValue Addr = Node->getOperand(1);
4007  SDValue CmpVal = Node->getOperand(2);
4008  SDValue SwapVal = Node->getOperand(3);
4009  MachineMemOperand *MMO = Node->getMemOperand();
4010  SDLoc DL(Node);
4011 
4012  // We have native support for 32-bit and 64-bit compare and swap, but we
4013  // still need to expand extracting the "success" result from the CC.
4014  EVT NarrowVT = Node->getMemoryVT();
4015  EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
4016  if (NarrowVT == WideVT) {
4017  SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
4018  SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
4020  DL, Tys, Ops, NarrowVT, MMO);
4021  SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
4023 
4024  DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
4025  DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
4026  DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
4027  return SDValue();
4028  }
4029 
4030  // Convert 8-bit and 16-bit compare and swap to a loop, implemented
4031  // via a fullword ATOMIC_CMP_SWAPW operation.
4032  int64_t BitSize = NarrowVT.getSizeInBits();
4033  EVT PtrVT = Addr.getValueType();
4034 
4035  // Get the address of the containing word.