LLVM  15.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the SystemZTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "SystemZISelLowering.h"
14 #include "SystemZCallingConv.h"
17 #include "SystemZTargetMachine.h"
22 #include "llvm/IR/IntrinsicInst.h"
23 #include "llvm/IR/Intrinsics.h"
24 #include "llvm/IR/IntrinsicsS390.h"
26 #include "llvm/Support/KnownBits.h"
27 #include <cctype>
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "systemz-lower"
32 
33 namespace {
34 // Represents information about a comparison.
35 struct Comparison {
36  Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
37  : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
38  Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
39 
40  // The operands to the comparison.
41  SDValue Op0, Op1;
42 
43  // Chain if this is a strict floating-point comparison.
44  SDValue Chain;
45 
46  // The opcode that should be used to compare Op0 and Op1.
47  unsigned Opcode;
48 
49  // A SystemZICMP value. Only used for integer comparisons.
50  unsigned ICmpType;
51 
52  // The mask of CC values that Opcode can produce.
53  unsigned CCValid;
54 
55  // The mask of CC values for which the original condition is true.
56  unsigned CCMask;
57 };
58 } // end anonymous namespace
59 
60 // Classify VT as either 32 or 64 bit.
61 static bool is32Bit(EVT VT) {
62  switch (VT.getSimpleVT().SimpleTy) {
63  case MVT::i32:
64  return true;
65  case MVT::i64:
66  return false;
67  default:
68  llvm_unreachable("Unsupported type");
69  }
70 }
71 
72 // Return a version of MachineOperand that can be safely used before the
73 // final use.
75  if (Op.isReg())
76  Op.setIsKill(false);
77  return Op;
78 }
79 
81  const SystemZSubtarget &STI)
82  : TargetLowering(TM), Subtarget(STI) {
83  MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
84 
85  auto *Regs = STI.getSpecialRegisters();
86 
87  // Set up the register classes.
88  if (Subtarget.hasHighWord())
89  addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
90  else
91  addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
92  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
93  if (!useSoftFloat()) {
94  if (Subtarget.hasVector()) {
95  addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
96  addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
97  } else {
98  addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
99  addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
100  }
101  if (Subtarget.hasVectorEnhancements1())
102  addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
103  else
104  addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
105 
106  if (Subtarget.hasVector()) {
107  addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
108  addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
109  addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
110  addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
111  addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
112  addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
113  }
114  }
115 
116  // Compute derived properties from the register classes
118 
119  // Set up special registers.
120  setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
121 
122  // TODO: It may be better to default to latency-oriented scheduling, however
123  // LLVM's current latency-oriented scheduler can't handle physreg definitions
124  // such as SystemZ has with CC, so set this to the register-pressure
125  // scheduler, because it can.
127 
130 
131  // Instructions are strings of 2-byte aligned 2-byte values.
133  // For performance reasons we prefer 16-byte alignment.
135 
136  // Handle operations that are handled in a similar way for all types.
137  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
139  ++I) {
140  MVT VT = MVT::SimpleValueType(I);
141  if (isTypeLegal(VT)) {
142  // Lower SET_CC into an IPM-based sequence.
146 
147  // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
149 
150  // Lower SELECT_CC and BR_CC into separate comparisons and branches.
153  }
154  }
155 
156  // Expand jump table branches as address arithmetic followed by an
157  // indirect jump.
159 
160  // Expand BRCOND into a BR_CC (see above).
162 
163  // Handle integer types.
164  for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
166  ++I) {
167  MVT VT = MVT::SimpleValueType(I);
168  if (isTypeLegal(VT)) {
170 
171  // Expand individual DIV and REMs into DIVREMs.
178 
179  // Support addition/subtraction with overflow.
182 
183  // Support addition/subtraction with carry.
186 
187  // Support carry in as value rather than glue.
190 
191  // Lower ATOMIC_LOAD and ATOMIC_STORE into normal volatile loads and
192  // stores, putting a serialization instruction after the stores.
195 
196  // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
197  // available, or if the operand is constant.
199 
200  // Use POPCNT on z196 and above.
201  if (Subtarget.hasPopulationCount())
203  else
205 
206  // No special instructions for these.
209 
210  // Use *MUL_LOHI where possible instead of MULH*.
215 
216  // Only z196 and above have native support for conversions to unsigned.
217  // On z10, promoting to i64 doesn't generate an inexact condition for
218  // values that are outside the i32 range but in the i64 range, so use
219  // the default expansion.
220  if (!Subtarget.hasFPExtension())
222 
223  // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all
224  // default to Expand, so need to be modified to Legal where appropriate.
226  if (Subtarget.hasFPExtension())
228 
229  // And similarly for STRICT_[SU]INT_TO_FP.
231  if (Subtarget.hasFPExtension())
233  }
234  }
235 
236  // Type legalization will convert 8- and 16-bit atomic operations into
237  // forms that operate on i32s (but still keeping the original memory VT).
238  // Lower them into full i32 operations.
250 
251  // Even though i128 is not a legal type, we still need to custom lower
252  // the atomic operations in order to exploit SystemZ instructions.
255 
256  // We can use the CC result of compare-and-swap to implement
257  // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
261 
263 
264  // Traps are legal, as we will convert them to "j .+2".
266 
267  // z10 has instructions for signed but not unsigned FP conversion.
268  // Handle unsigned 32-bit types as signed 64-bit types.
269  if (!Subtarget.hasFPExtension()) {
274  }
275 
276  // We have native support for a 64-bit CTLZ, via FLOGR.
280 
281  // On z15 we have native support for a 64-bit CTPOP.
282  if (Subtarget.hasMiscellaneousExtensions3()) {
285  }
286 
287  // Give LowerOperation the chance to replace 64-bit ORs with subregs.
289 
290  // Expand 128 bit shifts without using a libcall.
294  setLibcallName(RTLIB::SRL_I128, nullptr);
295  setLibcallName(RTLIB::SHL_I128, nullptr);
296  setLibcallName(RTLIB::SRA_I128, nullptr);
297 
298  // Handle bitcast from fp128 to i128.
300 
301  // We have native instructions for i8, i16 and i32 extensions, but not i1.
303  for (MVT VT : MVT::integer_valuetypes()) {
307  }
308 
309  // Handle the various types of symbolic address.
315 
316  // We need to handle dynamic allocations specially because of the
317  // 160-byte area at the bottom of the stack.
320 
323 
324  // Handle prefetches with PFD or PFDRL.
326 
327  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
328  // Assume by default that all vector operations need to be expanded.
329  for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
330  if (getOperationAction(Opcode, VT) == Legal)
331  setOperationAction(Opcode, VT, Expand);
332 
333  // Likewise all truncating stores and extending loads.
334  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
335  setTruncStoreAction(VT, InnerVT, Expand);
336  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
337  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
338  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
339  }
340 
341  if (isTypeLegal(VT)) {
342  // These operations are legal for anything that can be stored in a
343  // vector register, even if there is no native support for the format
344  // as such. In particular, we can do these for v4f32 even though there
345  // are no specific instructions for that format.
351 
352  // Likewise, except that we need to replace the nodes with something
353  // more specific.
356  }
357  }
358 
359  // Handle integer vector types.
361  if (isTypeLegal(VT)) {
362  // These operations have direct equivalents.
367  if (VT != MVT::v2i64)
373  if (Subtarget.hasVectorEnhancements1())
375  else
379 
380  // Convert a GPR scalar to a vector by inserting it into element 0.
382 
383  // Use a series of unpacks for extensions.
386 
387  // Detect shifts by a scalar amount and convert them into
388  // V*_BY_SCALAR.
392 
393  // At present ROTL isn't matched by DAGCombiner. ROTR should be
394  // converted into ROTL.
397 
398  // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
399  // and inverting the result as necessary.
402  if (Subtarget.hasVectorEnhancements1())
404  }
405  }
406 
407  if (Subtarget.hasVector()) {
408  // There should be no need to check for float types other than v2f64
409  // since <2 x f32> isn't a legal type.
418 
427  }
428 
429  if (Subtarget.hasVectorEnhancements2()) {
438 
447  }
448 
449  // Handle floating-point types.
450  for (unsigned I = MVT::FIRST_FP_VALUETYPE;
452  ++I) {
453  MVT VT = MVT::SimpleValueType(I);
454  if (isTypeLegal(VT)) {
455  // We can use FI for FRINT.
457 
458  // We can use the extended form of FI for other rounding operations.
459  if (Subtarget.hasFPExtension()) {
465  }
466 
467  // No special instructions for these.
473 
474  // Special treatment.
476 
477  // Handle constrained floating-point operations.
487  if (Subtarget.hasFPExtension()) {
493  }
494  }
495  }
496 
497  // Handle floating-point vector types.
498  if (Subtarget.hasVector()) {
499  // Scalar-to-vector conversion is just a subreg.
502 
503  // Some insertions and extractions can be done directly but others
504  // need to go via integers.
509 
510  // These operations have direct equivalents.
525 
526  // Handle constrained floating-point operations.
539  }
540 
541  // The vector enhancements facility 1 has instructions for these.
542  if (Subtarget.hasVectorEnhancements1()) {
557 
562 
567 
572 
577 
582 
583  // Handle constrained floating-point operations.
596  for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
597  MVT::v4f32, MVT::v2f64 }) {
602  }
603  }
604 
605  // We only have fused f128 multiply-addition on vector registers.
606  if (!Subtarget.hasVectorEnhancements1()) {
609  }
610 
611  // We don't have a copysign instruction on vector registers.
612  if (Subtarget.hasVectorEnhancements1())
614 
615  // Needed so that we don't try to implement f128 constant loads using
616  // a load-and-extend of a f80 constant (in cases where the constant
617  // would fit in an f80).
618  for (MVT VT : MVT::fp_valuetypes())
620 
621  // We don't have extending load instruction on vector registers.
622  if (Subtarget.hasVectorEnhancements1()) {
625  }
626 
627  // Floating-point truncation and stores need to be done separately.
631 
632  // We have 64-bit FPR<->GPR moves, but need special handling for
633  // 32-bit forms.
634  if (!Subtarget.hasVector()) {
637  }
638 
639  // VASTART and VACOPY need to deal with the SystemZ-specific varargs
640  // structure, but VAEND is a no-op.
644 
645  // Codes for which we want to perform some z-specific combinations.
649  ISD::LOAD,
650  ISD::STORE,
659  ISD::BSWAP,
660  ISD::SDIV,
661  ISD::UDIV,
662  ISD::SREM,
663  ISD::UREM,
666 
667  // Handle intrinsics.
670 
671  // We want to use MVC in preference to even a single load/store pair.
672  MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
674 
675  // The main memset sequence is a byte store followed by an MVC.
676  // Two STC or MV..I stores win over that, but the kind of fused stores
677  // generated by target-independent code don't when the byte value is
678  // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
679  // than "STC;MVC". Handle the choice in target-specific code instead.
680  MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
682 
683  // Default to having -disable-strictnode-mutation on
684  IsStrictFPEnabled = true;
685 }
686 
688  return Subtarget.hasSoftFloat();
689 }
690 
692  LLVMContext &, EVT VT) const {
693  if (!VT.isVector())
694  return MVT::i32;
696 }
697 
699  const MachineFunction &MF, EVT VT) const {
700  VT = VT.getScalarType();
701 
702  if (!VT.isSimple())
703  return false;
704 
705  switch (VT.getSimpleVT().SimpleTy) {
706  case MVT::f32:
707  case MVT::f64:
708  return true;
709  case MVT::f128:
710  return Subtarget.hasVectorEnhancements1();
711  default:
712  break;
713  }
714 
715  return false;
716 }
717 
718 // Return true if the constant can be generated with a vector instruction,
719 // such as VGM, VGMB or VREPI.
721  const SystemZSubtarget &Subtarget) {
722  const SystemZInstrInfo *TII =
723  static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
724  if (!Subtarget.hasVector() ||
725  (isFP128 && !Subtarget.hasVectorEnhancements1()))
726  return false;
727 
728  // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
729  // preferred way of creating all-zero and all-one vectors so give it
730  // priority over other methods below.
731  unsigned Mask = 0;
732  unsigned I = 0;
733  for (; I < SystemZ::VectorBytes; ++I) {
734  uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
735  if (Byte == 0xff)
736  Mask |= 1ULL << I;
737  else if (Byte != 0)
738  break;
739  }
740  if (I == SystemZ::VectorBytes) {
742  OpVals.push_back(Mask);
744  return true;
745  }
746 
747  if (SplatBitSize > 64)
748  return false;
749 
750  auto tryValue = [&](uint64_t Value) -> bool {
751  // Try VECTOR REPLICATE IMMEDIATE
752  int64_t SignedValue = SignExtend64(Value, SplatBitSize);
753  if (isInt<16>(SignedValue)) {
754  OpVals.push_back(((unsigned) SignedValue));
756  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
757  SystemZ::VectorBits / SplatBitSize);
758  return true;
759  }
760  // Try VECTOR GENERATE MASK
761  unsigned Start, End;
762  if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
763  // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
764  // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
765  // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
766  OpVals.push_back(Start - (64 - SplatBitSize));
767  OpVals.push_back(End - (64 - SplatBitSize));
769  VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize),
770  SystemZ::VectorBits / SplatBitSize);
771  return true;
772  }
773  return false;
774  };
775 
776  // First try assuming that any undefined bits above the highest set bit
777  // and below the lowest set bit are 1s. This increases the likelihood of
778  // being able to use a sign-extended element value in VECTOR REPLICATE
779  // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
780  uint64_t SplatBitsZ = SplatBits.getZExtValue();
781  uint64_t SplatUndefZ = SplatUndef.getZExtValue();
782  uint64_t Lower =
783  (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
784  uint64_t Upper =
785  (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
786  if (tryValue(SplatBitsZ | Upper | Lower))
787  return true;
788 
789  // Now try assuming that any undefined bits between the first and
790  // last defined set bits are set. This increases the chances of
791  // using a non-wraparound mask.
792  uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
793  return tryValue(SplatBitsZ | Middle);
794 }
795 
797  if (IntImm.isSingleWord()) {
798  IntBits = APInt(128, IntImm.getZExtValue());
799  IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
800  } else
801  IntBits = IntImm;
802  assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
803 
804  // Find the smallest splat.
805  SplatBits = IntImm;
806  unsigned Width = SplatBits.getBitWidth();
807  while (Width > 8) {
808  unsigned HalfSize = Width / 2;
809  APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
810  APInt LowValue = SplatBits.trunc(HalfSize);
811 
812  // If the two halves do not match, stop here.
813  if (HighValue != LowValue || 8 > HalfSize)
814  break;
815 
816  SplatBits = HighValue;
817  Width = HalfSize;
818  }
819  SplatUndef = 0;
820  SplatBitSize = Width;
821 }
822 
824  assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
825  bool HasAnyUndefs;
826 
827  // Get IntBits by finding the 128 bit splat.
828  BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
829  true);
830 
831  // Get SplatBits by finding the 8 bit or greater splat.
832  BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
833  true);
834 }
835 
837  bool ForCodeSize) const {
838  // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
839  if (Imm.isZero() || Imm.isNegZero())
840  return true;
841 
842  return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget);
843 }
844 
845 /// Returns true if stack probing through inline assembly is requested.
847  // If the function specifically requests inline stack probes, emit them.
848  if (MF.getFunction().hasFnAttribute("probe-stack"))
849  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
850  "inline-asm";
851  return false;
852 }
853 
855  // We can use CGFI or CLGFI.
856  return isInt<32>(Imm) || isUInt<32>(Imm);
857 }
858 
860  // We can use ALGFI or SLGFI.
861  return isUInt<32>(Imm) || isUInt<32>(-Imm);
862 }
863 
865  EVT VT, unsigned, Align, MachineMemOperand::Flags, bool *Fast) const {
866  // Unaligned accesses should never be slower than the expanded version.
867  // We check specifically for aligned accesses in the few cases where
868  // they are required.
869  if (Fast)
870  *Fast = true;
871  return true;
872 }
873 
874 // Information about the addressing mode for a memory access.
876  // True if a long displacement is supported.
878 
879  // True if use of index register is supported.
880  bool IndexReg;
881 
882  AddressingMode(bool LongDispl, bool IdxReg) :
883  LongDisplacement(LongDispl), IndexReg(IdxReg) {}
884 };
885 
886 // Return the desired addressing mode for a Load which has only one use (in
887 // the same block) which is a Store.
888 static AddressingMode getLoadStoreAddrMode(bool HasVector,
889  Type *Ty) {
890  // With vector support a Load->Store combination may be combined to either
891  // an MVC or vector operations and it seems to work best to allow the
892  // vector addressing mode.
893  if (HasVector)
894  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
895 
896  // Otherwise only the MVC case is special.
897  bool MVC = Ty->isIntegerTy(8);
898  return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
899 }
900 
901 // Return the addressing mode which seems most desirable given an LLVM
902 // Instruction pointer.
903 static AddressingMode
905  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
906  switch (II->getIntrinsicID()) {
907  default: break;
908  case Intrinsic::memset:
909  case Intrinsic::memmove:
910  case Intrinsic::memcpy:
911  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
912  }
913  }
914 
915  if (isa<LoadInst>(I) && I->hasOneUse()) {
916  auto *SingleUser = cast<Instruction>(*I->user_begin());
917  if (SingleUser->getParent() == I->getParent()) {
918  if (isa<ICmpInst>(SingleUser)) {
919  if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
920  if (C->getBitWidth() <= 64 &&
921  (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
922  // Comparison of memory with 16 bit signed / unsigned immediate
923  return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
924  } else if (isa<StoreInst>(SingleUser))
925  // Load->Store
926  return getLoadStoreAddrMode(HasVector, I->getType());
927  }
928  } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
929  if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
930  if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
931  // Load->Store
932  return getLoadStoreAddrMode(HasVector, LoadI->getType());
933  }
934 
935  if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
936 
937  // * Use LDE instead of LE/LEY for z13 to avoid partial register
938  // dependencies (LDE only supports small offsets).
939  // * Utilize the vector registers to hold floating point
940  // values (vector load / store instructions only support small
941  // offsets).
942 
943  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
944  I->getOperand(0)->getType());
945  bool IsFPAccess = MemAccessTy->isFloatingPointTy();
946  bool IsVectorAccess = MemAccessTy->isVectorTy();
947 
948  // A store of an extracted vector element will be combined into a VSTE type
949  // instruction.
950  if (!IsVectorAccess && isa<StoreInst>(I)) {
951  Value *DataOp = I->getOperand(0);
952  if (isa<ExtractElementInst>(DataOp))
953  IsVectorAccess = true;
954  }
955 
956  // A load which gets inserted into a vector element will be combined into a
957  // VLE type instruction.
958  if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
959  User *LoadUser = *I->user_begin();
960  if (isa<InsertElementInst>(LoadUser))
961  IsVectorAccess = true;
962  }
963 
964  if (IsFPAccess || IsVectorAccess)
965  return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
966  }
967 
968  return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
969 }
970 
972  const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
973  // Punt on globals for now, although they can be used in limited
974  // RELATIVE LONG cases.
975  if (AM.BaseGV)
976  return false;
977 
978  // Require a 20-bit signed offset.
979  if (!isInt<20>(AM.BaseOffs))
980  return false;
981 
982  bool RequireD12 = Subtarget.hasVector() && Ty->isVectorTy();
983  AddressingMode SupportedAM(!RequireD12, true);
984  if (I != nullptr)
985  SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
986 
987  if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
988  return false;
989 
990  if (!SupportedAM.IndexReg)
991  // No indexing allowed.
992  return AM.Scale == 0;
993  else
994  // Indexing is OK but no scale factor can be applied.
995  return AM.Scale == 0 || AM.Scale == 1;
996 }
997 
999  std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS,
1000  unsigned SrcAS, const AttributeList &FuncAttributes) const {
1001  const int MVCFastLen = 16;
1002 
1003  // Don't expand Op into scalar loads/stores in these cases:
1004  if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1005  return false; // Small memcpy: Use MVC
1006  if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1007  return false; // Small memset (first byte with STC/MVI): Use MVC
1008  if (Op.isZeroMemset())
1009  return false; // Memset zero: Use XC
1010 
1011  return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS,
1012  SrcAS, FuncAttributes);
1013 }
1014 
1016  const AttributeList &FuncAttributes) const {
1017  return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1018 }
1019 
1021  if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1022  return false;
1023  unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedSize();
1024  unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedSize();
1025  return FromBits > ToBits;
1026 }
1027 
1029  if (!FromVT.isInteger() || !ToVT.isInteger())
1030  return false;
1031  unsigned FromBits = FromVT.getFixedSizeInBits();
1032  unsigned ToBits = ToVT.getFixedSizeInBits();
1033  return FromBits > ToBits;
1034 }
1035 
1036 //===----------------------------------------------------------------------===//
1037 // Inline asm support
1038 //===----------------------------------------------------------------------===//
1039 
1042  if (Constraint.size() == 1) {
1043  switch (Constraint[0]) {
1044  case 'a': // Address register
1045  case 'd': // Data register (equivalent to 'r')
1046  case 'f': // Floating-point register
1047  case 'h': // High-part register
1048  case 'r': // General-purpose register
1049  case 'v': // Vector register
1050  return C_RegisterClass;
1051 
1052  case 'Q': // Memory with base and unsigned 12-bit displacement
1053  case 'R': // Likewise, plus an index
1054  case 'S': // Memory with base and signed 20-bit displacement
1055  case 'T': // Likewise, plus an index
1056  case 'm': // Equivalent to 'T'.
1057  return C_Memory;
1058 
1059  case 'I': // Unsigned 8-bit constant
1060  case 'J': // Unsigned 12-bit constant
1061  case 'K': // Signed 16-bit constant
1062  case 'L': // Signed 20-bit displacement (on all targets we support)
1063  case 'M': // 0x7fffffff
1064  return C_Immediate;
1065 
1066  default:
1067  break;
1068  }
1069  } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1070  switch (Constraint[1]) {
1071  case 'Q': // Address with base and unsigned 12-bit displacement
1072  case 'R': // Likewise, plus an index
1073  case 'S': // Address with base and signed 20-bit displacement
1074  case 'T': // Likewise, plus an index
1075  return C_Address;
1076 
1077  default:
1078  break;
1079  }
1080  }
1081  return TargetLowering::getConstraintType(Constraint);
1082 }
1083 
1086  const char *constraint) const {
1087  ConstraintWeight weight = CW_Invalid;
1088  Value *CallOperandVal = info.CallOperandVal;
1089  // If we don't have a value, we can't do a match,
1090  // but allow it at the lowest weight.
1091  if (!CallOperandVal)
1092  return CW_Default;
1093  Type *type = CallOperandVal->getType();
1094  // Look at the constraint type.
1095  switch (*constraint) {
1096  default:
1098  break;
1099 
1100  case 'a': // Address register
1101  case 'd': // Data register (equivalent to 'r')
1102  case 'h': // High-part register
1103  case 'r': // General-purpose register
1104  if (CallOperandVal->getType()->isIntegerTy())
1105  weight = CW_Register;
1106  break;
1107 
1108  case 'f': // Floating-point register
1109  if (type->isFloatingPointTy())
1110  weight = CW_Register;
1111  break;
1112 
1113  case 'v': // Vector register
1114  if ((type->isVectorTy() || type->isFloatingPointTy()) &&
1115  Subtarget.hasVector())
1116  weight = CW_Register;
1117  break;
1118 
1119  case 'I': // Unsigned 8-bit constant
1120  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1121  if (isUInt<8>(C->getZExtValue()))
1122  weight = CW_Constant;
1123  break;
1124 
1125  case 'J': // Unsigned 12-bit constant
1126  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1127  if (isUInt<12>(C->getZExtValue()))
1128  weight = CW_Constant;
1129  break;
1130 
1131  case 'K': // Signed 16-bit constant
1132  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1133  if (isInt<16>(C->getSExtValue()))
1134  weight = CW_Constant;
1135  break;
1136 
1137  case 'L': // Signed 20-bit displacement (on all targets we support)
1138  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1139  if (isInt<20>(C->getSExtValue()))
1140  weight = CW_Constant;
1141  break;
1142 
1143  case 'M': // 0x7fffffff
1144  if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1145  if (C->getZExtValue() == 0x7fffffff)
1146  weight = CW_Constant;
1147  break;
1148  }
1149  return weight;
1150 }
1151 
1152 // Parse a "{tNNN}" register constraint for which the register type "t"
1153 // has already been verified. MC is the class associated with "t" and
1154 // Map maps 0-based register numbers to LLVM register numbers.
1155 static std::pair<unsigned, const TargetRegisterClass *>
1157  const unsigned *Map, unsigned Size) {
1158  assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1159  if (isdigit(Constraint[2])) {
1160  unsigned Index;
1161  bool Failed =
1162  Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1163  if (!Failed && Index < Size && Map[Index])
1164  return std::make_pair(Map[Index], RC);
1165  }
1166  return std::make_pair(0U, nullptr);
1167 }
1168 
1169 std::pair<unsigned, const TargetRegisterClass *>
1171  const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1172  if (Constraint.size() == 1) {
1173  // GCC Constraint Letters
1174  switch (Constraint[0]) {
1175  default: break;
1176  case 'd': // Data register (equivalent to 'r')
1177  case 'r': // General-purpose register
1178  if (VT == MVT::i64)
1179  return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1180  else if (VT == MVT::i128)
1181  return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1182  return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1183 
1184  case 'a': // Address register
1185  if (VT == MVT::i64)
1186  return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1187  else if (VT == MVT::i128)
1188  return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1189  return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1190 
1191  case 'h': // High-part register (an LLVM extension)
1192  return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1193 
1194  case 'f': // Floating-point register
1195  if (!useSoftFloat()) {
1196  if (VT == MVT::f64)
1197  return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1198  else if (VT == MVT::f128)
1199  return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1200  return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1201  }
1202  break;
1203  case 'v': // Vector register
1204  if (Subtarget.hasVector()) {
1205  if (VT == MVT::f32)
1206  return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1207  if (VT == MVT::f64)
1208  return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1209  return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1210  }
1211  break;
1212  }
1213  }
1214  if (Constraint.size() > 0 && Constraint[0] == '{') {
1215  // We need to override the default register parsing for GPRs and FPRs
1216  // because the interpretation depends on VT. The internal names of
1217  // the registers are also different from the external names
1218  // (F0D and F0S instead of F0, etc.).
1219  if (Constraint[1] == 'r') {
1220  if (VT == MVT::i32)
1221  return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1222  SystemZMC::GR32Regs, 16);
1223  if (VT == MVT::i128)
1224  return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1225  SystemZMC::GR128Regs, 16);
1226  return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1227  SystemZMC::GR64Regs, 16);
1228  }
1229  if (Constraint[1] == 'f') {
1230  if (useSoftFloat())
1231  return std::make_pair(
1232  0u, static_cast<const TargetRegisterClass *>(nullptr));
1233  if (VT == MVT::f32)
1234  return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1235  SystemZMC::FP32Regs, 16);
1236  if (VT == MVT::f128)
1237  return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1238  SystemZMC::FP128Regs, 16);
1239  return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1240  SystemZMC::FP64Regs, 16);
1241  }
1242  if (Constraint[1] == 'v') {
1243  if (!Subtarget.hasVector())
1244  return std::make_pair(
1245  0u, static_cast<const TargetRegisterClass *>(nullptr));
1246  if (VT == MVT::f32)
1247  return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1248  SystemZMC::VR32Regs, 32);
1249  if (VT == MVT::f64)
1250  return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1251  SystemZMC::VR64Regs, 32);
1252  return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1253  SystemZMC::VR128Regs, 32);
1254  }
1255  }
1256  return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1257 }
1258 
1259 // FIXME? Maybe this could be a TableGen attribute on some registers and
1260 // this table could be generated automatically from RegInfo.
1262  const MachineFunction &MF) const {
1263 
1265  .Case("r15", SystemZ::R15D)
1266  .Default(0);
1267  if (Reg)
1268  return Reg;
1269  report_fatal_error("Invalid register name global variable");
1270 }
1271 
1273 LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
1274  std::vector<SDValue> &Ops,
1275  SelectionDAG &DAG) const {
1276  // Only support length 1 constraints for now.
1277  if (Constraint.length() == 1) {
1278  switch (Constraint[0]) {
1279  case 'I': // Unsigned 8-bit constant
1280  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1281  if (isUInt<8>(C->getZExtValue()))
1282  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1283  Op.getValueType()));
1284  return;
1285 
1286  case 'J': // Unsigned 12-bit constant
1287  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1288  if (isUInt<12>(C->getZExtValue()))
1289  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1290  Op.getValueType()));
1291  return;
1292 
1293  case 'K': // Signed 16-bit constant
1294  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1295  if (isInt<16>(C->getSExtValue()))
1296  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1297  Op.getValueType()));
1298  return;
1299 
1300  case 'L': // Signed 20-bit displacement (on all targets we support)
1301  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1302  if (isInt<20>(C->getSExtValue()))
1303  Ops.push_back(DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op),
1304  Op.getValueType()));
1305  return;
1306 
1307  case 'M': // 0x7fffffff
1308  if (auto *C = dyn_cast<ConstantSDNode>(Op))
1309  if (C->getZExtValue() == 0x7fffffff)
1310  Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1311  Op.getValueType()));
1312  return;
1313  }
1314  }
1315  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1316 }
1317 
1318 //===----------------------------------------------------------------------===//
1319 // Calling conventions
1320 //===----------------------------------------------------------------------===//
1321 
1322 #include "SystemZGenCallingConv.inc"
1323 
1325  CallingConv::ID) const {
1326  static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1327  SystemZ::R14D, 0 };
1328  return ScratchRegs;
1329 }
1330 
1332  Type *ToType) const {
1333  return isTruncateFree(FromType, ToType);
1334 }
1335 
1337  return CI->isTailCall();
1338 }
1339 
1340 // We do not yet support 128-bit single-element vector types. If the user
1341 // attempts to use such types as function argument or return type, prefer
1342 // to error out instead of emitting code violating the ABI.
1343 static void VerifyVectorType(MVT VT, EVT ArgVT) {
1344  if (ArgVT.isVector() && !VT.isVector())
1345  report_fatal_error("Unsupported vector argument or return type");
1346 }
1347 
1349  for (unsigned i = 0; i < Ins.size(); ++i)
1350  VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
1351 }
1352 
1354  for (unsigned i = 0; i < Outs.size(); ++i)
1355  VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
1356 }
1357 
1358 // Value is a value that has been passed to us in the location described by VA
1359 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1360 // any loads onto Chain.
1362  CCValAssign &VA, SDValue Chain,
1363  SDValue Value) {
1364  // If the argument has been promoted from a smaller type, insert an
1365  // assertion to capture this.
1366  if (VA.getLocInfo() == CCValAssign::SExt)
1367  Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value,
1368  DAG.getValueType(VA.getValVT()));
1369  else if (VA.getLocInfo() == CCValAssign::ZExt)
1370  Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value,
1371  DAG.getValueType(VA.getValVT()));
1372 
1373  if (VA.isExtInLoc())
1374  Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1375  else if (VA.getLocInfo() == CCValAssign::BCvt) {
1376  // If this is a short vector argument loaded from the stack,
1377  // extend from i64 to full vector size and then bitcast.
1378  assert(VA.getLocVT() == MVT::i64);
1379  assert(VA.getValVT().isVector());
1381  Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1382  } else
1383  assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1384  return Value;
1385 }
1386 
1387 // Value is a value of type VA.getValVT() that we need to copy into
1388 // the location described by VA. Return a copy of Value converted to
1389 // VA.getValVT(). The caller is responsible for handling indirect values.
1391  CCValAssign &VA, SDValue Value) {
1392  switch (VA.getLocInfo()) {
1393  case CCValAssign::SExt:
1394  return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1395  case CCValAssign::ZExt:
1396  return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1397  case CCValAssign::AExt:
1398  return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1399  case CCValAssign::BCvt: {
1400  assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1401  assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
1402  VA.getValVT() == MVT::f128);
1403  MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1404  ? MVT::v2i64
1405  : VA.getLocVT();
1406  Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1407  // For ELF, this is a short vector argument to be stored to the stack,
1408  // bitcast to v2i64 and then extract first element.
1409  if (BitCastToType == MVT::v2i64)
1410  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1411  DAG.getConstant(0, DL, MVT::i32));
1412  return Value;
1413  }
1414  case CCValAssign::Full:
1415  return Value;
1416  default:
1417  llvm_unreachable("Unhandled getLocInfo()");
1418  }
1419 }
1420 
1422  SDLoc DL(In);
1424  DAG.getIntPtrConstant(0, DL));
1426  DAG.getIntPtrConstant(1, DL));
1427  SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1428  MVT::Untyped, Hi, Lo);
1429  return SDValue(Pair, 0);
1430 }
1431 
1433  SDLoc DL(In);
1434  SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1435  DL, MVT::i64, In);
1436  SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1437  DL, MVT::i64, In);
1438  return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1439 }
1440 
1442  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1443  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
1444  EVT ValueVT = Val.getValueType();
1445  assert((ValueVT != MVT::i128 ||
1446  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1447  (NumParts == 2 && PartVT == MVT::i64))) &&
1448  "Unknown handling of i128 value.");
1449  if (ValueVT == MVT::i128 && NumParts == 1) {
1450  // Inline assembly operand.
1451  Parts[0] = lowerI128ToGR128(DAG, Val);
1452  return true;
1453  }
1454  return false;
1455 }
1456 
1458  SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1459  MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
1460  assert((ValueVT != MVT::i128 ||
1461  ((NumParts == 1 && PartVT == MVT::Untyped) ||
1462  (NumParts == 2 && PartVT == MVT::i64))) &&
1463  "Unknown handling of i128 value.");
1464  if (ValueVT == MVT::i128 && NumParts == 1)
1465  // Inline assembly operand.
1466  return lowerGR128ToI128(DAG, Parts[0]);
1467  return SDValue();
1468 }
1469 
1471  SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1472  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1473  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1474  MachineFunction &MF = DAG.getMachineFunction();
1475  MachineFrameInfo &MFI = MF.getFrameInfo();
1477  SystemZMachineFunctionInfo *FuncInfo =
1479  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1480  EVT PtrVT = getPointerTy(DAG.getDataLayout());
1481 
1482  // Detect unsupported vector argument types.
1483  if (Subtarget.hasVector())
1485 
1486  // Assign locations to all of the incoming arguments.
1488  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1489  CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1490 
1491  unsigned NumFixedGPRs = 0;
1492  unsigned NumFixedFPRs = 0;
1493  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1494  SDValue ArgValue;
1495  CCValAssign &VA = ArgLocs[I];
1496  EVT LocVT = VA.getLocVT();
1497  if (VA.isRegLoc()) {
1498  // Arguments passed in registers
1499  const TargetRegisterClass *RC;
1500  switch (LocVT.getSimpleVT().SimpleTy) {
1501  default:
1502  // Integers smaller than i64 should be promoted to i64.
1503  llvm_unreachable("Unexpected argument type");
1504  case MVT::i32:
1505  NumFixedGPRs += 1;
1506  RC = &SystemZ::GR32BitRegClass;
1507  break;
1508  case MVT::i64:
1509  NumFixedGPRs += 1;
1510  RC = &SystemZ::GR64BitRegClass;
1511  break;
1512  case MVT::f32:
1513  NumFixedFPRs += 1;
1514  RC = &SystemZ::FP32BitRegClass;
1515  break;
1516  case MVT::f64:
1517  NumFixedFPRs += 1;
1518  RC = &SystemZ::FP64BitRegClass;
1519  break;
1520  case MVT::f128:
1521  NumFixedFPRs += 2;
1522  RC = &SystemZ::FP128BitRegClass;
1523  break;
1524  case MVT::v16i8:
1525  case MVT::v8i16:
1526  case MVT::v4i32:
1527  case MVT::v2i64:
1528  case MVT::v4f32:
1529  case MVT::v2f64:
1530  RC = &SystemZ::VR128BitRegClass;
1531  break;
1532  }
1533 
1534  Register VReg = MRI.createVirtualRegister(RC);
1535  MRI.addLiveIn(VA.getLocReg(), VReg);
1536  ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
1537  } else {
1538  assert(VA.isMemLoc() && "Argument not register or memory");
1539 
1540  // Create the frame index object for this incoming parameter.
1541  // FIXME: Pre-include call frame size in the offset, should not
1542  // need to manually add it here.
1543  int64_t ArgSPOffset = VA.getLocMemOffset();
1544  if (Subtarget.isTargetXPLINK64()) {
1545  auto &XPRegs =
1547  ArgSPOffset += XPRegs.getCallFrameSize();
1548  }
1549  int FI =
1550  MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
1551 
1552  // Create the SelectionDAG nodes corresponding to a load
1553  // from this parameter. Unpromoted ints and floats are
1554  // passed as right-justified 8-byte values.
1555  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1556  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1557  FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
1558  DAG.getIntPtrConstant(4, DL));
1559  ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
1561  }
1562 
1563  // Convert the value of the argument register into the value that's
1564  // being passed.
1565  if (VA.getLocInfo() == CCValAssign::Indirect) {
1566  InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
1567  MachinePointerInfo()));
1568  // If the original argument was split (e.g. i128), we need
1569  // to load all parts of it here (using the same address).
1570  unsigned ArgIndex = Ins[I].OrigArgIndex;
1571  assert (Ins[I].PartOffset == 0);
1572  while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
1573  CCValAssign &PartVA = ArgLocs[I + 1];
1574  unsigned PartOffset = Ins[I + 1].PartOffset;
1575  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
1576  DAG.getIntPtrConstant(PartOffset, DL));
1577  InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
1578  MachinePointerInfo()));
1579  ++I;
1580  }
1581  } else
1582  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
1583  }
1584 
1585  // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
1586  if (IsVarArg && Subtarget.isTargetELF()) {
1587  // Save the number of non-varargs registers for later use by va_start, etc.
1588  FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
1589  FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
1590 
1591  // Likewise the address (in the form of a frame index) of where the
1592  // first stack vararg would be. The 1-byte size here is arbitrary.
1593  int64_t StackSize = CCInfo.getNextStackOffset();
1594  FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
1595 
1596  // ...and a similar frame index for the caller-allocated save area
1597  // that will be used to store the incoming registers.
1598  int64_t RegSaveOffset =
1599  -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
1600  unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
1601  FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
1602 
1603  // Store the FPR varargs in the reserved frame slots. (We store the
1604  // GPRs as part of the prologue.)
1605  if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
1607  for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
1608  unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
1609  int FI =
1610  MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true);
1611  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
1613  &SystemZ::FP64BitRegClass);
1614  SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
1615  MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
1617  }
1618  // Join the stores, which are independent of one another.
1619  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
1620  makeArrayRef(&MemOps[NumFixedFPRs],
1621  SystemZ::ELFNumArgFPRs-NumFixedFPRs));
1622  }
1623  }
1624 
1625  // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
1626  // register (R5)
1627  return Chain;
1628 }
1629 
1630 static bool canUseSiblingCall(const CCState &ArgCCInfo,
1633  // Punt if there are any indirect or stack arguments, or if the call
1634  // needs the callee-saved argument register R6, or if the call uses
1635  // the callee-saved register arguments SwiftSelf and SwiftError.
1636  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1637  CCValAssign &VA = ArgLocs[I];
1638  if (VA.getLocInfo() == CCValAssign::Indirect)
1639  return false;
1640  if (!VA.isRegLoc())
1641  return false;
1642  Register Reg = VA.getLocReg();
1643  if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
1644  return false;
1645  if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
1646  return false;
1647  }
1648  return true;
1649 }
1650 
1651 SDValue
1653  SmallVectorImpl<SDValue> &InVals) const {
1654  SelectionDAG &DAG = CLI.DAG;
1655  SDLoc &DL = CLI.DL;
1657  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1659  SDValue Chain = CLI.Chain;
1660  SDValue Callee = CLI.Callee;
1661  bool &IsTailCall = CLI.IsTailCall;
1662  CallingConv::ID CallConv = CLI.CallConv;
1663  bool IsVarArg = CLI.IsVarArg;
1664  MachineFunction &MF = DAG.getMachineFunction();
1665  EVT PtrVT = getPointerTy(MF.getDataLayout());
1666  LLVMContext &Ctx = *DAG.getContext();
1668 
1669  // FIXME: z/OS support to be added in later.
1670  if (Subtarget.isTargetXPLINK64())
1671  IsTailCall = false;
1672 
1673  // Detect unsupported vector argument and return types.
1674  if (Subtarget.hasVector()) {
1675  VerifyVectorTypes(Outs);
1677  }
1678 
1679  // Analyze the operands of the call, assigning locations to each operand.
1681  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
1682  ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
1683 
1684  // We don't support GuaranteedTailCallOpt, only automatically-detected
1685  // sibling calls.
1686  if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
1687  IsTailCall = false;
1688 
1689  // Get a count of how many bytes are to be pushed on the stack.
1690  unsigned NumBytes = ArgCCInfo.getNextStackOffset();
1691 
1692  if (Subtarget.isTargetXPLINK64())
1693  // Although the XPLINK specifications for AMODE64 state that minimum size
1694  // of the param area is minimum 32 bytes and no rounding is otherwise
1695  // specified, we round this area in 64 bytes increments to be compatible
1696  // with existing compilers.
1697  NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
1698 
1699  // Mark the start of the call.
1700  if (!IsTailCall)
1701  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
1702 
1703  // Copy argument values to their designated locations.
1705  SmallVector<SDValue, 8> MemOpChains;
1706  SDValue StackPtr;
1707  for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1708  CCValAssign &VA = ArgLocs[I];
1709  SDValue ArgValue = OutVals[I];
1710 
1711  if (VA.getLocInfo() == CCValAssign::Indirect) {
1712  // Store the argument in a stack slot and pass its address.
1713  unsigned ArgIndex = Outs[I].OrigArgIndex;
1714  EVT SlotVT;
1715  if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1716  // Allocate the full stack space for a promoted (and split) argument.
1717  Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
1718  EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
1719  MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1720  unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
1721  SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
1722  } else {
1723  SlotVT = Outs[I].ArgVT;
1724  }
1725  SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
1726  int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
1727  MemOpChains.push_back(
1728  DAG.getStore(Chain, DL, ArgValue, SpillSlot,
1730  // If the original argument was split (e.g. i128), we need
1731  // to store all parts of it here (and pass just one address).
1732  assert (Outs[I].PartOffset == 0);
1733  while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
1734  SDValue PartValue = OutVals[I + 1];
1735  unsigned PartOffset = Outs[I + 1].PartOffset;
1736  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
1737  DAG.getIntPtrConstant(PartOffset, DL));
1738  MemOpChains.push_back(
1739  DAG.getStore(Chain, DL, PartValue, Address,
1741  assert((PartOffset + PartValue.getValueType().getStoreSize() <=
1742  SlotVT.getStoreSize()) && "Not enough space for argument part!");
1743  ++I;
1744  }
1745  ArgValue = SpillSlot;
1746  } else
1747  ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
1748 
1749  if (VA.isRegLoc()) {
1750  // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
1751  // MVT::i128 type. We decompose the 128-bit type to a pair of its high
1752  // and low values.
1753  if (VA.getLocVT() == MVT::i128)
1754  ArgValue = lowerI128ToGR128(DAG, ArgValue);
1755  // Queue up the argument copies and emit them at the end.
1756  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
1757  } else {
1758  assert(VA.isMemLoc() && "Argument not register or memory");
1759 
1760  // Work out the address of the stack slot. Unpromoted ints and
1761  // floats are passed as right-justified 8-byte values.
1762  if (!StackPtr.getNode())
1763  StackPtr = DAG.getCopyFromReg(Chain, DL,
1764  Regs->getStackPointerRegister(), PtrVT);
1765  unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
1766  VA.getLocMemOffset();
1767  if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
1768  Offset += 4;
1769  SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
1770  DAG.getIntPtrConstant(Offset, DL));
1771 
1772  // Emit the store.
1773  MemOpChains.push_back(
1774  DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
1775 
1776  // Although long doubles or vectors are passed through the stack when
1777  // they are vararg (non-fixed arguments), if a long double or vector
1778  // occupies the third and fourth slot of the argument list GPR3 should
1779  // still shadow the third slot of the argument list.
1780  if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
1781  SDValue ShadowArgValue =
1782  DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
1783  DAG.getIntPtrConstant(1, DL));
1784  RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
1785  }
1786  }
1787  }
1788 
1789  // Join the stores, which are independent of one another.
1790  if (!MemOpChains.empty())
1791  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
1792 
1793  // Accept direct calls by converting symbolic call addresses to the
1794  // associated Target* opcodes. Force %r1 to be used for indirect
1795  // tail calls.
1796  SDValue Glue;
1797  // FIXME: Add support for XPLINK using the ADA register.
1798  if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1799  Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
1801  } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1802  Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
1804  } else if (IsTailCall) {
1805  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
1806  Glue = Chain.getValue(1);
1807  Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
1808  }
1809 
1810  // Build a sequence of copy-to-reg nodes, chained and glued together.
1811  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
1812  Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
1813  RegsToPass[I].second, Glue);
1814  Glue = Chain.getValue(1);
1815  }
1816 
1817  // The first call operand is the chain and the second is the target address.
1819  Ops.push_back(Chain);
1820  Ops.push_back(Callee);
1821 
1822  // Add argument registers to the end of the list so that they are
1823  // known live into the call.
1824  for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
1825  Ops.push_back(DAG.getRegister(RegsToPass[I].first,
1826  RegsToPass[I].second.getValueType()));
1827 
1828  // Add a register mask operand representing the call-preserved registers.
1829  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1830  const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
1831  assert(Mask && "Missing call preserved mask for calling convention");
1832  Ops.push_back(DAG.getRegisterMask(Mask));
1833 
1834  // Glue the call to the argument copies, if any.
1835  if (Glue.getNode())
1836  Ops.push_back(Glue);
1837 
1838  // Emit the call.
1839  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1840  if (IsTailCall)
1841  return DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
1842  Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
1843  DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
1844  Glue = Chain.getValue(1);
1845 
1846  // Mark the end of the call, which is glued to the call itself.
1847  Chain = DAG.getCALLSEQ_END(Chain,
1848  DAG.getConstant(NumBytes, DL, PtrVT, true),
1849  DAG.getConstant(0, DL, PtrVT, true),
1850  Glue, DL);
1851  Glue = Chain.getValue(1);
1852 
1853  // Assign locations to each value returned by this call.
1855  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
1856  RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
1857 
1858  // Copy all of the result registers out of their specified physreg.
1859  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1860  CCValAssign &VA = RetLocs[I];
1861 
1862  // Copy the value out, gluing the copy to the end of the call sequence.
1863  SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
1864  VA.getLocVT(), Glue);
1865  Chain = RetValue.getValue(1);
1866  Glue = RetValue.getValue(2);
1867 
1868  // Convert the value of the return register into the value that's
1869  // being returned.
1870  InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
1871  }
1872 
1873  return Chain;
1874 }
1875 
1876 // Generate a call taking the given operands as arguments and returning a
1877 // result of type RetVT.
1878 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall(
1879  SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
1880  ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
1881  bool DoesNotReturn, bool IsReturnValueUsed) const {
1883  Args.reserve(Ops.size());
1884 
1886  for (SDValue Op : Ops) {
1887  Entry.Node = Op;
1888  Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1889  Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1890  Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), IsSigned);
1891  Args.push_back(Entry);
1892  }
1893 
1894  SDValue Callee =
1895  DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
1896 
1897  Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
1899  bool SignExtend = shouldSignExtendTypeInLibCall(RetVT, IsSigned);
1900  CLI.setDebugLoc(DL)
1901  .setChain(Chain)
1902  .setCallee(CallConv, RetTy, Callee, std::move(Args))
1903  .setNoReturn(DoesNotReturn)
1904  .setDiscardResult(!IsReturnValueUsed)
1905  .setSExtResult(SignExtend)
1906  .setZExtResult(!SignExtend);
1907  return LowerCallTo(CLI);
1908 }
1909 
1912  MachineFunction &MF, bool isVarArg,
1913  const SmallVectorImpl<ISD::OutputArg> &Outs,
1914  LLVMContext &Context) const {
1915  // Detect unsupported vector return types.
1916  if (Subtarget.hasVector())
1917  VerifyVectorTypes(Outs);
1918 
1919  // Special case that we cannot easily detect in RetCC_SystemZ since
1920  // i128 is not a legal type.
1921  for (auto &Out : Outs)
1922  if (Out.ArgVT == MVT::i128)
1923  return false;
1924 
1926  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
1927  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
1928 }
1929 
1930 SDValue
1932  bool IsVarArg,
1933  const SmallVectorImpl<ISD::OutputArg> &Outs,
1934  const SmallVectorImpl<SDValue> &OutVals,
1935  const SDLoc &DL, SelectionDAG &DAG) const {
1936  MachineFunction &MF = DAG.getMachineFunction();
1937 
1938  // Detect unsupported vector return types.
1939  if (Subtarget.hasVector())
1940  VerifyVectorTypes(Outs);
1941 
1942  // Assign locations to each returned value.
1944  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
1945  RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
1946 
1947  // Quick exit for void returns
1948  if (RetLocs.empty())
1949  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, Chain);
1950 
1951  if (CallConv == CallingConv::GHC)
1952  report_fatal_error("GHC functions return void only");
1953 
1954  // Copy the result values into the output registers.
1955  SDValue Glue;
1956  SmallVector<SDValue, 4> RetOps;
1957  RetOps.push_back(Chain);
1958  for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
1959  CCValAssign &VA = RetLocs[I];
1960  SDValue RetValue = OutVals[I];
1961 
1962  // Make the return register live on exit.
1963  assert(VA.isRegLoc() && "Can only return in registers!");
1964 
1965  // Promote the value as required.
1966  RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
1967 
1968  // Chain and glue the copies together.
1969  Register Reg = VA.getLocReg();
1970  Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
1971  Glue = Chain.getValue(1);
1972  RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
1973  }
1974 
1975  // Update chain and glue.
1976  RetOps[0] = Chain;
1977  if (Glue.getNode())
1978  RetOps.push_back(Glue);
1979 
1980  return DAG.getNode(SystemZISD::RET_FLAG, DL, MVT::Other, RetOps);
1981 }
1982 
1983 // Return true if Op is an intrinsic node with chain that returns the CC value
1984 // as its only (other) argument. Provide the associated SystemZISD opcode and
1985 // the mask of valid CC values if so.
1986 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
1987  unsigned &CCValid) {
1988  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1989  switch (Id) {
1990  case Intrinsic::s390_tbegin:
1991  Opcode = SystemZISD::TBEGIN;
1992  CCValid = SystemZ::CCMASK_TBEGIN;
1993  return true;
1994 
1995  case Intrinsic::s390_tbegin_nofloat:
1996  Opcode = SystemZISD::TBEGIN_NOFLOAT;
1997  CCValid = SystemZ::CCMASK_TBEGIN;
1998  return true;
1999 
2000  case Intrinsic::s390_tend:
2001  Opcode = SystemZISD::TEND;
2002  CCValid = SystemZ::CCMASK_TEND;
2003  return true;
2004 
2005  default:
2006  return false;
2007  }
2008 }
2009 
2010 // Return true if Op is an intrinsic node without chain that returns the
2011 // CC value as its final argument. Provide the associated SystemZISD
2012 // opcode and the mask of valid CC values if so.
2013 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2014  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2015  switch (Id) {
2016  case Intrinsic::s390_vpkshs:
2017  case Intrinsic::s390_vpksfs:
2018  case Intrinsic::s390_vpksgs:
2019  Opcode = SystemZISD::PACKS_CC;
2020  CCValid = SystemZ::CCMASK_VCMP;
2021  return true;
2022 
2023  case Intrinsic::s390_vpklshs:
2024  case Intrinsic::s390_vpklsfs:
2025  case Intrinsic::s390_vpklsgs:
2026  Opcode = SystemZISD::PACKLS_CC;
2027  CCValid = SystemZ::CCMASK_VCMP;
2028  return true;
2029 
2030  case Intrinsic::s390_vceqbs:
2031  case Intrinsic::s390_vceqhs:
2032  case Intrinsic::s390_vceqfs:
2033  case Intrinsic::s390_vceqgs:
2034  Opcode = SystemZISD::VICMPES;
2035  CCValid = SystemZ::CCMASK_VCMP;
2036  return true;
2037 
2038  case Intrinsic::s390_vchbs:
2039  case Intrinsic::s390_vchhs:
2040  case Intrinsic::s390_vchfs:
2041  case Intrinsic::s390_vchgs:
2042  Opcode = SystemZISD::VICMPHS;
2043  CCValid = SystemZ::CCMASK_VCMP;
2044  return true;
2045 
2046  case Intrinsic::s390_vchlbs:
2047  case Intrinsic::s390_vchlhs:
2048  case Intrinsic::s390_vchlfs:
2049  case Intrinsic::s390_vchlgs:
2050  Opcode = SystemZISD::VICMPHLS;
2051  CCValid = SystemZ::CCMASK_VCMP;
2052  return true;
2053 
2054  case Intrinsic::s390_vtm:
2055  Opcode = SystemZISD::VTM;
2056  CCValid = SystemZ::CCMASK_VCMP;
2057  return true;
2058 
2059  case Intrinsic::s390_vfaebs:
2060  case Intrinsic::s390_vfaehs:
2061  case Intrinsic::s390_vfaefs:
2062  Opcode = SystemZISD::VFAE_CC;
2063  CCValid = SystemZ::CCMASK_ANY;
2064  return true;
2065 
2066  case Intrinsic::s390_vfaezbs:
2067  case Intrinsic::s390_vfaezhs:
2068  case Intrinsic::s390_vfaezfs:
2069  Opcode = SystemZISD::VFAEZ_CC;
2070  CCValid = SystemZ::CCMASK_ANY;
2071  return true;
2072 
2073  case Intrinsic::s390_vfeebs:
2074  case Intrinsic::s390_vfeehs:
2075  case Intrinsic::s390_vfeefs:
2076  Opcode = SystemZISD::VFEE_CC;
2077  CCValid = SystemZ::CCMASK_ANY;
2078  return true;
2079 
2080  case Intrinsic::s390_vfeezbs:
2081  case Intrinsic::s390_vfeezhs:
2082  case Intrinsic::s390_vfeezfs:
2083  Opcode = SystemZISD::VFEEZ_CC;
2084  CCValid = SystemZ::CCMASK_ANY;
2085  return true;
2086 
2087  case Intrinsic::s390_vfenebs:
2088  case Intrinsic::s390_vfenehs:
2089  case Intrinsic::s390_vfenefs:
2090  Opcode = SystemZISD::VFENE_CC;
2091  CCValid = SystemZ::CCMASK_ANY;
2092  return true;
2093 
2094  case Intrinsic::s390_vfenezbs:
2095  case Intrinsic::s390_vfenezhs:
2096  case Intrinsic::s390_vfenezfs:
2097  Opcode = SystemZISD::VFENEZ_CC;
2098  CCValid = SystemZ::CCMASK_ANY;
2099  return true;
2100 
2101  case Intrinsic::s390_vistrbs:
2102  case Intrinsic::s390_vistrhs:
2103  case Intrinsic::s390_vistrfs:
2104  Opcode = SystemZISD::VISTR_CC;
2106  return true;
2107 
2108  case Intrinsic::s390_vstrcbs:
2109  case Intrinsic::s390_vstrchs:
2110  case Intrinsic::s390_vstrcfs:
2111  Opcode = SystemZISD::VSTRC_CC;
2112  CCValid = SystemZ::CCMASK_ANY;
2113  return true;
2114 
2115  case Intrinsic::s390_vstrczbs:
2116  case Intrinsic::s390_vstrczhs:
2117  case Intrinsic::s390_vstrczfs:
2118  Opcode = SystemZISD::VSTRCZ_CC;
2119  CCValid = SystemZ::CCMASK_ANY;
2120  return true;
2121 
2122  case Intrinsic::s390_vstrsb:
2123  case Intrinsic::s390_vstrsh:
2124  case Intrinsic::s390_vstrsf:
2125  Opcode = SystemZISD::VSTRS_CC;
2126  CCValid = SystemZ::CCMASK_ANY;
2127  return true;
2128 
2129  case Intrinsic::s390_vstrszb:
2130  case Intrinsic::s390_vstrszh:
2131  case Intrinsic::s390_vstrszf:
2132  Opcode = SystemZISD::VSTRSZ_CC;
2133  CCValid = SystemZ::CCMASK_ANY;
2134  return true;
2135 
2136  case Intrinsic::s390_vfcedbs:
2137  case Intrinsic::s390_vfcesbs:
2138  Opcode = SystemZISD::VFCMPES;
2139  CCValid = SystemZ::CCMASK_VCMP;
2140  return true;
2141 
2142  case Intrinsic::s390_vfchdbs:
2143  case Intrinsic::s390_vfchsbs:
2144  Opcode = SystemZISD::VFCMPHS;
2145  CCValid = SystemZ::CCMASK_VCMP;
2146  return true;
2147 
2148  case Intrinsic::s390_vfchedbs:
2149  case Intrinsic::s390_vfchesbs:
2150  Opcode = SystemZISD::VFCMPHES;
2151  CCValid = SystemZ::CCMASK_VCMP;
2152  return true;
2153 
2154  case Intrinsic::s390_vftcidb:
2155  case Intrinsic::s390_vftcisb:
2156  Opcode = SystemZISD::VFTCI;
2157  CCValid = SystemZ::CCMASK_VCMP;
2158  return true;
2159 
2160  case Intrinsic::s390_tdc:
2161  Opcode = SystemZISD::TDC;
2162  CCValid = SystemZ::CCMASK_TDC;
2163  return true;
2164 
2165  default:
2166  return false;
2167  }
2168 }
2169 
2170 // Emit an intrinsic with chain and an explicit CC register result.
2172  unsigned Opcode) {
2173  // Copy all operands except the intrinsic ID.
2174  unsigned NumOps = Op.getNumOperands();
2176  Ops.reserve(NumOps - 1);
2177  Ops.push_back(Op.getOperand(0));
2178  for (unsigned I = 2; I < NumOps; ++I)
2179  Ops.push_back(Op.getOperand(I));
2180 
2181  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2182  SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2183  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2184  SDValue OldChain = SDValue(Op.getNode(), 1);
2185  SDValue NewChain = SDValue(Intr.getNode(), 1);
2186  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2187  return Intr.getNode();
2188 }
2189 
2190 // Emit an intrinsic with an explicit CC register result.
2192  unsigned Opcode) {
2193  // Copy all operands except the intrinsic ID.
2194  unsigned NumOps = Op.getNumOperands();
2196  Ops.reserve(NumOps - 1);
2197  for (unsigned I = 1; I < NumOps; ++I)
2198  Ops.push_back(Op.getOperand(I));
2199 
2200  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops);
2201  return Intr.getNode();
2202 }
2203 
2204 // CC is a comparison that will be implemented using an integer or
2205 // floating-point comparison. Return the condition code mask for
2206 // a branch on true. In the integer case, CCMASK_CMP_UO is set for
2207 // unsigned comparisons and clear for signed ones. In the floating-point
2208 // case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2209 static unsigned CCMaskForCondCode(ISD::CondCode CC) {
2210 #define CONV(X) \
2211  case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2212  case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2213  case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2214 
2215  switch (CC) {
2216  default:
2217  llvm_unreachable("Invalid integer condition!");
2218 
2219  CONV(EQ);
2220  CONV(NE);
2221  CONV(GT);
2222  CONV(GE);
2223  CONV(LT);
2224  CONV(LE);
2225 
2226  case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2227  case ISD::SETUO: return SystemZ::CCMASK_CMP_UO;
2228  }
2229 #undef CONV
2230 }
2231 
2232 // If C can be converted to a comparison against zero, adjust the operands
2233 // as necessary.
2234 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2235  if (C.ICmpType == SystemZICMP::UnsignedOnly)
2236  return;
2237 
2238  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2239  if (!ConstOp1)
2240  return;
2241 
2242  int64_t Value = ConstOp1->getSExtValue();
2243  if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2244  (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2245  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2246  (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2247  C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2248  C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2249  }
2250 }
2251 
2252 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2253 // adjust the operands as necessary.
2254 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2255  Comparison &C) {
2256  // For us to make any changes, it must a comparison between a single-use
2257  // load and a constant.
2258  if (!C.Op0.hasOneUse() ||
2259  C.Op0.getOpcode() != ISD::LOAD ||
2260  C.Op1.getOpcode() != ISD::Constant)
2261  return;
2262 
2263  // We must have an 8- or 16-bit load.
2264  auto *Load = cast<LoadSDNode>(C.Op0);
2265  unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2266  if ((NumBits != 8 && NumBits != 16) ||
2267  NumBits != Load->getMemoryVT().getStoreSizeInBits())
2268  return;
2269 
2270  // The load must be an extending one and the constant must be within the
2271  // range of the unextended value.
2272  auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2273  uint64_t Value = ConstOp1->getZExtValue();
2274  uint64_t Mask = (1 << NumBits) - 1;
2275  if (Load->getExtensionType() == ISD::SEXTLOAD) {
2276  // Make sure that ConstOp1 is in range of C.Op0.
2277  int64_t SignedValue = ConstOp1->getSExtValue();
2278  if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2279  return;
2280  if (C.ICmpType != SystemZICMP::SignedOnly) {
2281  // Unsigned comparison between two sign-extended values is equivalent
2282  // to unsigned comparison between two zero-extended values.
2283  Value &= Mask;
2284  } else if (NumBits == 8) {
2285  // Try to treat the comparison as unsigned, so that we can use CLI.
2286  // Adjust CCMask and Value as necessary.
2287  if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2288  // Test whether the high bit of the byte is set.
2289  Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2290  else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2291  // Test whether the high bit of the byte is clear.
2292  Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2293  else
2294  // No instruction exists for this combination.
2295  return;
2296  C.ICmpType = SystemZICMP::UnsignedOnly;
2297  }
2298  } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2299  if (Value > Mask)
2300  return;
2301  // If the constant is in range, we can use any comparison.
2302  C.ICmpType = SystemZICMP::Any;
2303  } else
2304  return;
2305 
2306  // Make sure that the first operand is an i32 of the right extension type.
2307  ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2308  ISD::SEXTLOAD :
2309  ISD::ZEXTLOAD);
2310  if (C.Op0.getValueType() != MVT::i32 ||
2311  Load->getExtensionType() != ExtType) {
2312  C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2313  Load->getBasePtr(), Load->getPointerInfo(),
2314  Load->getMemoryVT(), Load->getAlignment(),
2315  Load->getMemOperand()->getFlags());
2316  // Update the chain uses.
2317  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2318  }
2319 
2320  // Make sure that the second operand is an i32 with the right value.
2321  if (C.Op1.getValueType() != MVT::i32 ||
2322  Value != ConstOp1->getZExtValue())
2323  C.Op1 = DAG.getConstant(Value, DL, MVT::i32);
2324 }
2325 
2326 // Return true if Op is either an unextended load, or a load suitable
2327 // for integer register-memory comparisons of type ICmpType.
2328 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2329  auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2330  if (Load) {
2331  // There are no instructions to compare a register with a memory byte.
2332  if (Load->getMemoryVT() == MVT::i8)
2333  return false;
2334  // Otherwise decide on extension type.
2335  switch (Load->getExtensionType()) {
2336  case ISD::NON_EXTLOAD:
2337  return true;
2338  case ISD::SEXTLOAD:
2339  return ICmpType != SystemZICMP::UnsignedOnly;
2340  case ISD::ZEXTLOAD:
2341  return ICmpType != SystemZICMP::SignedOnly;
2342  default:
2343  break;
2344  }
2345  }
2346  return false;
2347 }
2348 
2349 // Return true if it is better to swap the operands of C.
2350 static bool shouldSwapCmpOperands(const Comparison &C) {
2351  // Leave f128 comparisons alone, since they have no memory forms.
2352  if (C.Op0.getValueType() == MVT::f128)
2353  return false;
2354 
2355  // Always keep a floating-point constant second, since comparisons with
2356  // zero can use LOAD TEST and comparisons with other constants make a
2357  // natural memory operand.
2358  if (isa<ConstantFPSDNode>(C.Op1))
2359  return false;
2360 
2361  // Never swap comparisons with zero since there are many ways to optimize
2362  // those later.
2363  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2364  if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2365  return false;
2366 
2367  // Also keep natural memory operands second if the loaded value is
2368  // only used here. Several comparisons have memory forms.
2369  if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2370  return false;
2371 
2372  // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
2373  // In that case we generally prefer the memory to be second.
2374  if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
2375  // The only exceptions are when the second operand is a constant and
2376  // we can use things like CHHSI.
2377  if (!ConstOp1)
2378  return true;
2379  // The unsigned memory-immediate instructions can handle 16-bit
2380  // unsigned integers.
2381  if (C.ICmpType != SystemZICMP::SignedOnly &&
2382  isUInt<16>(ConstOp1->getZExtValue()))
2383  return false;
2384  // The signed memory-immediate instructions can handle 16-bit
2385  // signed integers.
2386  if (C.ICmpType != SystemZICMP::UnsignedOnly &&
2387  isInt<16>(ConstOp1->getSExtValue()))
2388  return false;
2389  return true;
2390  }
2391 
2392  // Try to promote the use of CGFR and CLGFR.
2393  unsigned Opcode0 = C.Op0.getOpcode();
2394  if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
2395  return true;
2396  if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
2397  return true;
2398  if (C.ICmpType != SystemZICMP::SignedOnly &&
2399  Opcode0 == ISD::AND &&
2400  C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
2401  cast<ConstantSDNode>(C.Op0.getOperand(1))->getZExtValue() == 0xffffffff)
2402  return true;
2403 
2404  return false;
2405 }
2406 
2407 // Check whether C tests for equality between X and Y and whether X - Y
2408 // or Y - X is also computed. In that case it's better to compare the
2409 // result of the subtraction against zero.
2410 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL,
2411  Comparison &C) {
2412  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2413  C.CCMask == SystemZ::CCMASK_CMP_NE) {
2414  for (SDNode *N : C.Op0->uses()) {
2415  if (N->getOpcode() == ISD::SUB &&
2416  ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
2417  (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
2418  C.Op0 = SDValue(N, 0);
2419  C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
2420  return;
2421  }
2422  }
2423  }
2424 }
2425 
2426 // Check whether C compares a floating-point value with zero and if that
2427 // floating-point value is also negated. In this case we can use the
2428 // negation to set CC, so avoiding separate LOAD AND TEST and
2429 // LOAD (NEGATIVE/COMPLEMENT) instructions.
2430 static void adjustForFNeg(Comparison &C) {
2431  // This optimization is invalid for strict comparisons, since FNEG
2432  // does not raise any exceptions.
2433  if (C.Chain)
2434  return;
2435  auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
2436  if (C1 && C1->isZero()) {
2437  for (SDNode *N : C.Op0->uses()) {
2438  if (N->getOpcode() == ISD::FNEG) {
2439  C.Op0 = SDValue(N, 0);
2440  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2441  return;
2442  }
2443  }
2444  }
2445 }
2446 
2447 // Check whether C compares (shl X, 32) with 0 and whether X is
2448 // also sign-extended. In that case it is better to test the result
2449 // of the sign extension using LTGFR.
2450 //
2451 // This case is important because InstCombine transforms a comparison
2452 // with (sext (trunc X)) into a comparison with (shl X, 32).
2453 static void adjustForLTGFR(Comparison &C) {
2454  // Check for a comparison between (shl X, 32) and 0.
2455  if (C.Op0.getOpcode() == ISD::SHL &&
2456  C.Op0.getValueType() == MVT::i64 &&
2457  C.Op1.getOpcode() == ISD::Constant &&
2458  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2459  auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2460  if (C1 && C1->getZExtValue() == 32) {
2461  SDValue ShlOp0 = C.Op0.getOperand(0);
2462  // See whether X has any SIGN_EXTEND_INREG uses.
2463  for (SDNode *N : ShlOp0->uses()) {
2464  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
2465  cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
2466  C.Op0 = SDValue(N, 0);
2467  return;
2468  }
2469  }
2470  }
2471  }
2472 }
2473 
2474 // If C compares the truncation of an extending load, try to compare
2475 // the untruncated value instead. This exposes more opportunities to
2476 // reuse CC.
2477 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
2478  Comparison &C) {
2479  if (C.Op0.getOpcode() == ISD::TRUNCATE &&
2480  C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
2481  C.Op1.getOpcode() == ISD::Constant &&
2482  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
2483  auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
2484  if (L->getMemoryVT().getStoreSizeInBits().getFixedSize() <=
2485  C.Op0.getValueSizeInBits().getFixedSize()) {
2486  unsigned Type = L->getExtensionType();
2487  if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
2488  (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
2489  C.Op0 = C.Op0.getOperand(0);
2490  C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
2491  }
2492  }
2493  }
2494 }
2495 
2496 // Return true if shift operation N has an in-range constant shift value.
2497 // Store it in ShiftVal if so.
2498 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
2499  auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
2500  if (!Shift)
2501  return false;
2502 
2503  uint64_t Amount = Shift->getZExtValue();
2504  if (Amount >= N.getValueSizeInBits())
2505  return false;
2506 
2507  ShiftVal = Amount;
2508  return true;
2509 }
2510 
2511 // Check whether an AND with Mask is suitable for a TEST UNDER MASK
2512 // instruction and whether the CC value is descriptive enough to handle
2513 // a comparison of type Opcode between the AND result and CmpVal.
2514 // CCMask says which comparison result is being tested and BitSize is
2515 // the number of bits in the operands. If TEST UNDER MASK can be used,
2516 // return the corresponding CC mask, otherwise return 0.
2517 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
2518  uint64_t Mask, uint64_t CmpVal,
2519  unsigned ICmpType) {
2520  assert(Mask != 0 && "ANDs with zero should have been removed by now");
2521 
2522  // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
2525  return 0;
2526 
2527  // Work out the masks for the lowest and highest bits.
2528  unsigned HighShift = 63 - countLeadingZeros(Mask);
2529  uint64_t High = uint64_t(1) << HighShift;
2531 
2532  // Signed ordered comparisons are effectively unsigned if the sign
2533  // bit is dropped.
2534  bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
2535 
2536  // Check for equality comparisons with 0, or the equivalent.
2537  if (CmpVal == 0) {
2538  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2539  return SystemZ::CCMASK_TM_ALL_0;
2540  if (CCMask == SystemZ::CCMASK_CMP_NE)
2542  }
2543  if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
2544  if (CCMask == SystemZ::CCMASK_CMP_LT)
2545  return SystemZ::CCMASK_TM_ALL_0;
2546  if (CCMask == SystemZ::CCMASK_CMP_GE)
2548  }
2549  if (EffectivelyUnsigned && CmpVal < Low) {
2550  if (CCMask == SystemZ::CCMASK_CMP_LE)
2551  return SystemZ::CCMASK_TM_ALL_0;
2552  if (CCMask == SystemZ::CCMASK_CMP_GT)
2554  }
2555 
2556  // Check for equality comparisons with the mask, or the equivalent.
2557  if (CmpVal == Mask) {
2558  if (CCMask == SystemZ::CCMASK_CMP_EQ)
2559  return SystemZ::CCMASK_TM_ALL_1;
2560  if (CCMask == SystemZ::CCMASK_CMP_NE)
2562  }
2563  if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
2564  if (CCMask == SystemZ::CCMASK_CMP_GT)
2565  return SystemZ::CCMASK_TM_ALL_1;
2566  if (CCMask == SystemZ::CCMASK_CMP_LE)
2568  }
2569  if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
2570  if (CCMask == SystemZ::CCMASK_CMP_GE)
2571  return SystemZ::CCMASK_TM_ALL_1;
2572  if (CCMask == SystemZ::CCMASK_CMP_LT)
2574  }
2575 
2576  // Check for ordered comparisons with the top bit.
2577  if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
2578  if (CCMask == SystemZ::CCMASK_CMP_LE)
2579  return SystemZ::CCMASK_TM_MSB_0;
2580  if (CCMask == SystemZ::CCMASK_CMP_GT)
2581  return SystemZ::CCMASK_TM_MSB_1;
2582  }
2583  if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
2584  if (CCMask == SystemZ::CCMASK_CMP_LT)
2585  return SystemZ::CCMASK_TM_MSB_0;
2586  if (CCMask == SystemZ::CCMASK_CMP_GE)
2587  return SystemZ::CCMASK_TM_MSB_1;
2588  }
2589 
2590  // If there are just two bits, we can do equality checks for Low and High
2591  // as well.
2592  if (Mask == Low + High) {
2593  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
2595  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
2597  if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
2599  if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
2601  }
2602 
2603  // Looks like we've exhausted our options.
2604  return 0;
2605 }
2606 
2607 // See whether C can be implemented as a TEST UNDER MASK instruction.
2608 // Update the arguments with the TM version if so.
2609 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
2610  Comparison &C) {
2611  // Check that we have a comparison with a constant.
2612  auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2613  if (!ConstOp1)
2614  return;
2615  uint64_t CmpVal = ConstOp1->getZExtValue();
2616 
2617  // Check whether the nonconstant input is an AND with a constant mask.
2618  Comparison NewC(C);
2619  uint64_t MaskVal;
2620  ConstantSDNode *Mask = nullptr;
2621  if (C.Op0.getOpcode() == ISD::AND) {
2622  NewC.Op0 = C.Op0.getOperand(0);
2623  NewC.Op1 = C.Op0.getOperand(1);
2624  Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
2625  if (!Mask)
2626  return;
2627  MaskVal = Mask->getZExtValue();
2628  } else {
2629  // There is no instruction to compare with a 64-bit immediate
2630  // so use TMHH instead if possible. We need an unsigned ordered
2631  // comparison with an i64 immediate.
2632  if (NewC.Op0.getValueType() != MVT::i64 ||
2633  NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
2634  NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
2635  NewC.ICmpType == SystemZICMP::SignedOnly)
2636  return;
2637  // Convert LE and GT comparisons into LT and GE.
2638  if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
2639  NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
2640  if (CmpVal == uint64_t(-1))
2641  return;
2642  CmpVal += 1;
2643  NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2644  }
2645  // If the low N bits of Op1 are zero than the low N bits of Op0 can
2646  // be masked off without changing the result.
2647  MaskVal = -(CmpVal & -CmpVal);
2648  NewC.ICmpType = SystemZICMP::UnsignedOnly;
2649  }
2650  if (!MaskVal)
2651  return;
2652 
2653  // Check whether the combination of mask, comparison value and comparison
2654  // type are suitable.
2655  unsigned BitSize = NewC.Op0.getValueSizeInBits();
2656  unsigned NewCCMask, ShiftVal;
2657  if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2658  NewC.Op0.getOpcode() == ISD::SHL &&
2659  isSimpleShift(NewC.Op0, ShiftVal) &&
2660  (MaskVal >> ShiftVal != 0) &&
2661  ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
2662  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2663  MaskVal >> ShiftVal,
2664  CmpVal >> ShiftVal,
2665  SystemZICMP::Any))) {
2666  NewC.Op0 = NewC.Op0.getOperand(0);
2667  MaskVal >>= ShiftVal;
2668  } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
2669  NewC.Op0.getOpcode() == ISD::SRL &&
2670  isSimpleShift(NewC.Op0, ShiftVal) &&
2671  (MaskVal << ShiftVal != 0) &&
2672  ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
2673  (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
2674  MaskVal << ShiftVal,
2675  CmpVal << ShiftVal,
2677  NewC.Op0 = NewC.Op0.getOperand(0);
2678  MaskVal <<= ShiftVal;
2679  } else {
2680  NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
2681  NewC.ICmpType);
2682  if (!NewCCMask)
2683  return;
2684  }
2685 
2686  // Go ahead and make the change.
2687  C.Opcode = SystemZISD::TM;
2688  C.Op0 = NewC.Op0;
2689  if (Mask && Mask->getZExtValue() == MaskVal)
2690  C.Op1 = SDValue(Mask, 0);
2691  else
2692  C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
2693  C.CCValid = SystemZ::CCMASK_TM;
2694  C.CCMask = NewCCMask;
2695 }
2696 
2697 // See whether the comparison argument contains a redundant AND
2698 // and remove it if so. This sometimes happens due to the generic
2699 // BRCOND expansion.
2700 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL,
2701  Comparison &C) {
2702  if (C.Op0.getOpcode() != ISD::AND)
2703  return;
2704  auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
2705  if (!Mask)
2706  return;
2707  KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
2708  if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
2709  return;
2710 
2711  C.Op0 = C.Op0.getOperand(0);
2712 }
2713 
2714 // Return a Comparison that tests the condition-code result of intrinsic
2715 // node Call against constant integer CC using comparison code Cond.
2716 // Opcode is the opcode of the SystemZISD operation for the intrinsic
2717 // and CCValid is the set of possible condition-code results.
2718 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
2719  SDValue Call, unsigned CCValid, uint64_t CC,
2720  ISD::CondCode Cond) {
2721  Comparison C(Call, SDValue(), SDValue());
2722  C.Opcode = Opcode;
2723  C.CCValid = CCValid;
2724  if (Cond == ISD::SETEQ)
2725  // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
2726  C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
2727  else if (Cond == ISD::SETNE)
2728  // ...and the inverse of that.
2729  C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
2730  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
2731  // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
2732  // always true for CC>3.
2733  C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
2734  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
2735  // ...and the inverse of that.
2736  C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
2737  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
2738  // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
2739  // always true for CC>3.
2740  C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
2741  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
2742  // ...and the inverse of that.
2743  C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
2744  else
2745  llvm_unreachable("Unexpected integer comparison type");
2746  C.CCMask &= CCValid;
2747  return C;
2748 }
2749 
2750 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
2751 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
2752  ISD::CondCode Cond, const SDLoc &DL,
2753  SDValue Chain = SDValue(),
2754  bool IsSignaling = false) {
2755  if (CmpOp1.getOpcode() == ISD::Constant) {
2756  assert(!Chain);
2757  uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
2758  unsigned Opcode, CCValid;
2759  if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
2760  CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
2761  isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
2762  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2763  if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
2764  CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
2765  isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
2766  return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
2767  }
2768  Comparison C(CmpOp0, CmpOp1, Chain);
2769  C.CCMask = CCMaskForCondCode(Cond);
2770  if (C.Op0.getValueType().isFloatingPoint()) {
2771  C.CCValid = SystemZ::CCMASK_FCMP;
2772  if (!C.Chain)
2773  C.Opcode = SystemZISD::FCMP;
2774  else if (!IsSignaling)
2775  C.Opcode = SystemZISD::STRICT_FCMP;
2776  else
2777  C.Opcode = SystemZISD::STRICT_FCMPS;
2778  adjustForFNeg(C);
2779  } else {
2780  assert(!C.Chain);
2781  C.CCValid = SystemZ::CCMASK_ICMP;
2782  C.Opcode = SystemZISD::ICMP;
2783  // Choose the type of comparison. Equality and inequality tests can
2784  // use either signed or unsigned comparisons. The choice also doesn't
2785  // matter if both sign bits are known to be clear. In those cases we
2786  // want to give the main isel code the freedom to choose whichever
2787  // form fits best.
2788  if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
2789  C.CCMask == SystemZ::CCMASK_CMP_NE ||
2790  (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
2791  C.ICmpType = SystemZICMP::Any;
2792  else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
2793  C.ICmpType = SystemZICMP::UnsignedOnly;
2794  else
2795  C.ICmpType = SystemZICMP::SignedOnly;
2796  C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
2797  adjustForRedundantAnd(DAG, DL, C);
2798  adjustZeroCmp(DAG, DL, C);
2799  adjustSubwordCmp(DAG, DL, C);
2800  adjustForSubtraction(DAG, DL, C);
2801  adjustForLTGFR(C);
2802  adjustICmpTruncate(DAG, DL, C);
2803  }
2804 
2805  if (shouldSwapCmpOperands(C)) {
2806  std::swap(C.Op0, C.Op1);
2807  C.CCMask = SystemZ::reverseCCMask(C.CCMask);
2808  }
2809 
2810  adjustForTestUnderMask(DAG, DL, C);
2811  return C;
2812 }
2813 
2814 // Emit the comparison instruction described by C.
2815 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2816  if (!C.Op1.getNode()) {
2817  SDNode *Node;
2818  switch (C.Op0.getOpcode()) {
2820  Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
2821  return SDValue(Node, 0);
2823  Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
2824  return SDValue(Node, Node->getNumValues() - 1);
2825  default:
2826  llvm_unreachable("Invalid comparison operands");
2827  }
2828  }
2829  if (C.Opcode == SystemZISD::ICMP)
2830  return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
2831  DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
2832  if (C.Opcode == SystemZISD::TM) {
2833  bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
2834  bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1));
2835  return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
2836  DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
2837  }
2838  if (C.Chain) {
2839  SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
2840  return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
2841  }
2842  return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
2843 }
2844 
2845 // Implement a 32-bit *MUL_LOHI operation by extending both operands to
2846 // 64 bits. Extend is the extension type to use. Store the high part
2847 // in Hi and the low part in Lo.
2848 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
2849  SDValue Op0, SDValue Op1, SDValue &Hi,
2850  SDValue &Lo) {
2851  Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
2852  Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
2853  SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
2854  Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2855  DAG.getConstant(32, DL, MVT::i64));
2856  Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
2857  Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
2858 }
2859 
2860 // Lower a binary operation that produces two VT results, one in each
2861 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
2862 // and Opcode performs the GR128 operation. Store the even register result
2863 // in Even and the odd register result in Odd.
2864 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
2865  unsigned Opcode, SDValue Op0, SDValue Op1,
2866  SDValue &Even, SDValue &Odd) {
2867  SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
2868  bool Is32Bit = is32Bit(VT);
2869  Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
2870  Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
2871 }
2872 
2873 // Return an i32 value that is 1 if the CC value produced by CCReg is
2874 // in the mask CCMask and 0 otherwise. CC is known to have a value
2875 // in CCValid, so other values can be ignored.
2876 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
2877  unsigned CCValid, unsigned CCMask) {
2878  SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
2879  DAG.getConstant(0, DL, MVT::i32),
2880  DAG.getTargetConstant(CCValid, DL, MVT::i32),
2881  DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
2882  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
2883 }
2884 
2885 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot
2886 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
2887 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
2888 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling
2889 // floating-point comparisons.
2890 enum class CmpMode { Int, FP, StrictFP, SignalingFP };
2892  switch (CC) {
2893  case ISD::SETOEQ:
2894  case ISD::SETEQ:
2895  switch (Mode) {
2896  case CmpMode::Int: return SystemZISD::VICMPE;
2897  case CmpMode::FP: return SystemZISD::VFCMPE;
2900  }
2901  llvm_unreachable("Bad mode");
2902 
2903  case ISD::SETOGE:
2904  case ISD::SETGE:
2905  switch (Mode) {
2906  case CmpMode::Int: return 0;
2907  case CmpMode::FP: return SystemZISD::VFCMPHE;
2910  }
2911  llvm_unreachable("Bad mode");
2912 
2913  case ISD::SETOGT:
2914  case ISD::SETGT:
2915  switch (Mode) {
2916  case CmpMode::Int: return SystemZISD::VICMPH;
2917  case CmpMode::FP: return SystemZISD::VFCMPH;
2920  }
2921  llvm_unreachable("Bad mode");
2922 
2923  case ISD::SETUGT:
2924  switch (Mode) {
2925  case CmpMode::Int: return SystemZISD::VICMPHL;
2926  case CmpMode::FP: return 0;
2927  case CmpMode::StrictFP: return 0;
2928  case CmpMode::SignalingFP: return 0;
2929  }
2930  llvm_unreachable("Bad mode");
2931 
2932  default:
2933  return 0;
2934  }
2935 }
2936 
2937 // Return the SystemZISD vector comparison operation for CC or its inverse,
2938 // or 0 if neither can be done directly. Indicate in Invert whether the
2939 // result is for the inverse of CC. Mode is as above.
2941  bool &Invert) {
2942  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2943  Invert = false;
2944  return Opcode;
2945  }
2946 
2948  if (unsigned Opcode = getVectorComparison(CC, Mode)) {
2949  Invert = true;
2950  return Opcode;
2951  }
2952 
2953  return 0;
2954 }
2955 
2956 // Return a v2f64 that contains the extended form of elements Start and Start+1
2957 // of v4f32 value Op. If Chain is nonnull, return the strict form.
2958 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
2959  SDValue Op, SDValue Chain) {
2960  int Mask[] = { Start, -1, Start + 1, -1 };
2962  if (Chain) {
2964  return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
2965  }
2966  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
2967 }
2968 
2969 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
2970 // producing a result of type VT. If Chain is nonnull, return the strict form.
2971 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
2972  const SDLoc &DL, EVT VT,
2973  SDValue CmpOp0,
2974  SDValue CmpOp1,
2975  SDValue Chain) const {
2976  // There is no hardware support for v4f32 (unless we have the vector
2977  // enhancements facility 1), so extend the vector into two v2f64s
2978  // and compare those.
2979  if (CmpOp0.getValueType() == MVT::v4f32 &&
2980  !Subtarget.hasVectorEnhancements1()) {
2981  SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
2982  SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
2983  SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
2984  SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
2985  if (Chain) {
2987  SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
2988  SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
2989  SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
2990  SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
2991  H1.getValue(1), L1.getValue(1),
2992  HRes.getValue(1), LRes.getValue(1) };
2993  SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
2994  SDValue Ops[2] = { Res, NewChain };
2995  return DAG.getMergeValues(Ops, DL);
2996  }
2997  SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
2998  SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
2999  return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3000  }
3001  if (Chain) {
3002  SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3003  return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3004  }
3005  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3006 }
3007 
3008 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3009 // an integer mask of type VT. If Chain is nonnull, we have a strict
3010 // floating-point comparison. If in addition IsSignaling is true, we have
3011 // a strict signaling floating-point comparison.
3012 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3013  const SDLoc &DL, EVT VT,
3014  ISD::CondCode CC,
3015  SDValue CmpOp0,
3016  SDValue CmpOp1,
3017  SDValue Chain,
3018  bool IsSignaling) const {
3019  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3020  assert (!Chain || IsFP);
3021  assert (!IsSignaling || Chain);
3022  CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3023  Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3024  bool Invert = false;
3025  SDValue Cmp;
3026  switch (CC) {
3027  // Handle tests for order using (or (ogt y x) (oge x y)).
3028  case ISD::SETUO:
3029  Invert = true;
3031  case ISD::SETO: {
3032  assert(IsFP && "Unexpected integer comparison");
3033  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3034  DL, VT, CmpOp1, CmpOp0, Chain);
3035  SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3036  DL, VT, CmpOp0, CmpOp1, Chain);
3037  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3038  if (Chain)
3039  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3040  LT.getValue(1), GE.getValue(1));
3041  break;
3042  }
3043 
3044  // Handle <> tests using (or (ogt y x) (ogt x y)).
3045  case ISD::SETUEQ:
3046  Invert = true;
3048  case ISD::SETONE: {
3049  assert(IsFP && "Unexpected integer comparison");
3050  SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3051  DL, VT, CmpOp1, CmpOp0, Chain);
3052  SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3053  DL, VT, CmpOp0, CmpOp1, Chain);
3054  Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3055  if (Chain)
3056  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3057  LT.getValue(1), GT.getValue(1));
3058  break;
3059  }
3060 
3061  // Otherwise a single comparison is enough. It doesn't really
3062  // matter whether we try the inversion or the swap first, since
3063  // there are no cases where both work.
3064  default:
3065  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3066  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3067  else {
3069  if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3070  Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3071  else
3072  llvm_unreachable("Unhandled comparison");
3073  }
3074  if (Chain)
3075  Chain = Cmp.getValue(1);
3076  break;
3077  }
3078  if (Invert) {
3079  SDValue Mask =
3080  DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
3081  Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3082  }
3083  if (Chain && Chain.getNode() != Cmp.getNode()) {
3084  SDValue Ops[2] = { Cmp, Chain };
3085  Cmp = DAG.getMergeValues(Ops, DL);
3086  }
3087  return Cmp;
3088 }
3089 
3090 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3091  SelectionDAG &DAG) const {
3092  SDValue CmpOp0 = Op.getOperand(0);
3093  SDValue CmpOp1 = Op.getOperand(1);
3094  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3095  SDLoc DL(Op);
3096  EVT VT = Op.getValueType();
3097  if (VT.isVector())
3098  return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3099 
3100  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3101  SDValue CCReg = emitCmp(DAG, DL, C);
3102  return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3103 }
3104 
3105 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3106  SelectionDAG &DAG,
3107  bool IsSignaling) const {
3108  SDValue Chain = Op.getOperand(0);
3109  SDValue CmpOp0 = Op.getOperand(1);
3110  SDValue CmpOp1 = Op.getOperand(2);
3111  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3112  SDLoc DL(Op);
3113  EVT VT = Op.getNode()->getValueType(0);
3114  if (VT.isVector()) {
3115  SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3116  Chain, IsSignaling);
3117  return Res.getValue(Op.getResNo());
3118  }
3119 
3120  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3121  SDValue CCReg = emitCmp(DAG, DL, C);
3122  CCReg->setFlags(Op->getFlags());
3123  SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3124  SDValue Ops[2] = { Result, CCReg.getValue(1) };
3125  return DAG.getMergeValues(Ops, DL);
3126 }
3127 
3128 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3129  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3130  SDValue CmpOp0 = Op.getOperand(2);
3131  SDValue CmpOp1 = Op.getOperand(3);
3132  SDValue Dest = Op.getOperand(4);
3133  SDLoc DL(Op);
3134 
3135  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3136  SDValue CCReg = emitCmp(DAG, DL, C);
3137  return DAG.getNode(
3138  SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3139  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3140  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3141 }
3142 
3143 // Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3144 // allowing Pos and Neg to be wider than CmpOp.
3145 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3146  return (Neg.getOpcode() == ISD::SUB &&
3147  Neg.getOperand(0).getOpcode() == ISD::Constant &&
3148  cast<ConstantSDNode>(Neg.getOperand(0))->getZExtValue() == 0 &&
3149  Neg.getOperand(1) == Pos &&
3150  (Pos == CmpOp ||
3151  (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3152  Pos.getOperand(0) == CmpOp)));
3153 }
3154 
3155 // Return the absolute or negative absolute of Op; IsNegative decides which.
3157  bool IsNegative) {
3158  Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3159  if (IsNegative)
3160  Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3161  DAG.getConstant(0, DL, Op.getValueType()), Op);
3162  return Op;
3163 }
3164 
3165 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3166  SelectionDAG &DAG) const {
3167  SDValue CmpOp0 = Op.getOperand(0);
3168  SDValue CmpOp1 = Op.getOperand(1);
3169  SDValue TrueOp = Op.getOperand(2);
3170  SDValue FalseOp = Op.getOperand(3);
3171  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3172  SDLoc DL(Op);
3173 
3174  Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3175 
3176  // Check for absolute and negative-absolute selections, including those
3177  // where the comparison value is sign-extended (for LPGFR and LNGFR).
3178  // This check supplements the one in DAGCombiner.
3179  if (C.Opcode == SystemZISD::ICMP &&
3180  C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3181  C.CCMask != SystemZ::CCMASK_CMP_NE &&
3182  C.Op1.getOpcode() == ISD::Constant &&
3183  cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
3184  if (isAbsolute(C.Op0, TrueOp, FalseOp))
3185  return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3186  if (isAbsolute(C.Op0, FalseOp, TrueOp))
3187  return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3188  }
3189 
3190  SDValue CCReg = emitCmp(DAG, DL, C);
3191  SDValue Ops[] = {TrueOp, FalseOp,
3192  DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3193  DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
3194 
3195  return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
3196 }
3197 
3198 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
3199  SelectionDAG &DAG) const {
3200  SDLoc DL(Node);
3201  const GlobalValue *GV = Node->getGlobal();
3202  int64_t Offset = Node->getOffset();
3203  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3205 
3206  SDValue Result;
3207  if (Subtarget.isPC32DBLSymbol(GV, CM)) {
3208  if (isInt<32>(Offset)) {
3209  // Assign anchors at 1<<12 byte boundaries.
3210  uint64_t Anchor = Offset & ~uint64_t(0xfff);
3211  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
3212  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3213 
3214  // The offset can be folded into the address if it is aligned to a
3215  // halfword.
3216  Offset -= Anchor;
3217  if (Offset != 0 && (Offset & 1) == 0) {
3218  SDValue Full =
3219  DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
3220  Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
3221  Offset = 0;
3222  }
3223  } else {
3224  // Conservatively load a constant offset greater than 32 bits into a
3225  // register below.
3226  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
3227  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3228  }
3229  } else {
3230  Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
3231  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3232  Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3234  }
3235 
3236  // If there was a non-zero offset that we didn't fold, create an explicit
3237  // addition for it.
3238  if (Offset != 0)
3239  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3240  DAG.getConstant(Offset, DL, PtrVT));
3241 
3242  return Result;
3243 }
3244 
3245 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
3246  SelectionDAG &DAG,
3247  unsigned Opcode,
3248  SDValue GOTOffset) const {
3249  SDLoc DL(Node);
3250  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3251  SDValue Chain = DAG.getEntryNode();
3252  SDValue Glue;
3253 
3256  report_fatal_error("In GHC calling convention TLS is not supported");
3257 
3258  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
3259  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
3260  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
3261  Glue = Chain.getValue(1);
3262  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
3263  Glue = Chain.getValue(1);
3264 
3265  // The first call operand is the chain and the second is the TLS symbol.
3267  Ops.push_back(Chain);
3268  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
3269  Node->getValueType(0),
3270  0, 0));
3271 
3272  // Add argument registers to the end of the list so that they are
3273  // known live into the call.
3274  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
3275  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
3276 
3277  // Add a register mask operand representing the call-preserved registers.
3278  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3279  const uint32_t *Mask =
3281  assert(Mask && "Missing call preserved mask for calling convention");
3282  Ops.push_back(DAG.getRegisterMask(Mask));
3283 
3284  // Glue the call to the argument copies.
3285  Ops.push_back(Glue);
3286 
3287  // Emit the call.
3288  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3289  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
3290  Glue = Chain.getValue(1);
3291 
3292  // Copy the return value from %r2.
3293  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
3294 }
3295 
3296 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
3297  SelectionDAG &DAG) const {
3298  SDValue Chain = DAG.getEntryNode();
3299  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3300 
3301  // The high part of the thread pointer is in access register 0.
3302  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
3303  TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
3304 
3305  // The low part of the thread pointer is in access register 1.
3306  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
3307  TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
3308 
3309  // Merge them into a single 64-bit address.
3310  SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
3311  DAG.getConstant(32, DL, PtrVT));
3312  return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
3313 }
3314 
3315 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
3316  SelectionDAG &DAG) const {
3317  if (DAG.getTarget().useEmulatedTLS())
3318  return LowerToTLSEmulatedModel(Node, DAG);
3319  SDLoc DL(Node);
3320  const GlobalValue *GV = Node->getGlobal();
3321  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3323 
3326  report_fatal_error("In GHC calling convention TLS is not supported");
3327 
3328  SDValue TP = lowerThreadPointer(DL, DAG);
3329 
3330  // Get the offset of GA from the thread pointer, based on the TLS model.
3331  SDValue Offset;
3332  switch (model) {
3333  case TLSModel::GeneralDynamic: {
3334  // Load the GOT offset of the tls_index (module ID / per-symbol offset).
3337 
3338  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3339  Offset = DAG.getLoad(
3340  PtrVT, DL, DAG.getEntryNode(), Offset,
3342 
3343  // Call __tls_get_offset to retrieve the offset.
3344  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
3345  break;
3346  }
3347 
3348  case TLSModel::LocalDynamic: {
3349  // Load the GOT offset of the module ID.
3352 
3353  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3354  Offset = DAG.getLoad(
3355  PtrVT, DL, DAG.getEntryNode(), Offset,
3357 
3358  // Call __tls_get_offset to retrieve the module base offset.
3359  Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
3360 
3361  // Note: The SystemZLDCleanupPass will remove redundant computations
3362  // of the module base offset. Count total number of local-dynamic
3363  // accesses to trigger execution of that pass.
3367 
3368  // Add the per-symbol offset.
3370 
3371  SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3372  DTPOffset = DAG.getLoad(
3373  PtrVT, DL, DAG.getEntryNode(), DTPOffset,
3375 
3376  Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
3377  break;
3378  }
3379 
3380  case TLSModel::InitialExec: {
3381  // Load the offset from the GOT.
3382  Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
3384  Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
3385  Offset =
3386  DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
3388  break;
3389  }
3390 
3391  case TLSModel::LocalExec: {
3392  // Force the offset into the constant pool and load it from there.
3395 
3396  Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
3397  Offset = DAG.getLoad(
3398  PtrVT, DL, DAG.getEntryNode(), Offset,
3400  break;
3401  }
3402  }
3403 
3404  // Add the base and offset together.
3405  return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
3406 }
3407 
3408 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
3409  SelectionDAG &DAG) const {
3410  SDLoc DL(Node);
3411  const BlockAddress *BA = Node->getBlockAddress();
3412  int64_t Offset = Node->getOffset();
3413  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3414 
3415  SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
3416  Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3417  return Result;
3418 }
3419 
3420 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
3421  SelectionDAG &DAG) const {
3422  SDLoc DL(JT);
3423  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3424  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3425 
3426  // Use LARL to load the address of the table.
3427  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3428 }
3429 
3430 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
3431  SelectionDAG &DAG) const {
3432  SDLoc DL(CP);
3433  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3434 
3435  SDValue Result;
3436  if (CP->isMachineConstantPoolEntry())
3437  Result =
3438  DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
3439  else
3440  Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
3441  CP->getOffset());
3442 
3443  // Use LARL to load the address of the constant pool entry.
3444  return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
3445 }
3446 
3447 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
3448  SelectionDAG &DAG) const {
3449  auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
3450  MachineFunction &MF = DAG.getMachineFunction();
3451  MachineFrameInfo &MFI = MF.getFrameInfo();
3452  MFI.setFrameAddressIsTaken(true);
3453 
3454  SDLoc DL(Op);
3455  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3456  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3457 
3458  // By definition, the frame address is the address of the back chain. (In
3459  // the case of packed stack without backchain, return the address where the
3460  // backchain would have been stored. This will either be an unused space or
3461  // contain a saved register).
3462  int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
3463  SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
3464 
3465  // FIXME The frontend should detect this case.
3466  if (Depth > 0) {
3467  report_fatal_error("Unsupported stack frame traversal count");
3468  }
3469 
3470  return BackChain;
3471 }
3472 
3473 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
3474  SelectionDAG &DAG) const {
3475  MachineFunction &MF = DAG.getMachineFunction();
3476  MachineFrameInfo &MFI = MF.getFrameInfo();
3477  MFI.setReturnAddressIsTaken(true);
3478 
3480  return SDValue();
3481 
3482  SDLoc DL(Op);
3483  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3484  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3485 
3486  // FIXME The frontend should detect this case.
3487  if (Depth > 0) {
3488  report_fatal_error("Unsupported stack frame traversal count");
3489  }
3490 
3491  // Return R14D, which has the return address. Mark it an implicit live-in.
3492  Register LinkReg = MF.addLiveIn(SystemZ::R14D, &SystemZ::GR64BitRegClass);
3493  return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
3494 }
3495 
3496 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
3497  SelectionDAG &DAG) const {
3498  SDLoc DL(Op);
3499  SDValue In = Op.getOperand(0);
3500  EVT InVT = In.getValueType();
3501  EVT ResVT = Op.getValueType();
3502 
3503  // Convert loads directly. This is normally done by DAGCombiner,
3504  // but we need this case for bitcasts that are created during lowering
3505  // and which are then lowered themselves.
3506  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
3507  if (ISD::isNormalLoad(LoadN)) {
3508  SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
3509  LoadN->getBasePtr(), LoadN->getMemOperand());
3510  // Update the chain uses.
3511  DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
3512  return NewLoad;
3513  }
3514 
3515  if (InVT == MVT::i32 && ResVT == MVT::f32) {
3516  SDValue In64;
3517  if (Subtarget.hasHighWord()) {
3518  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
3519  MVT::i64);
3520  In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3521  MVT::i64, SDValue(U64, 0), In);
3522  } else {
3523  In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
3524  In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
3525  DAG.getConstant(32, DL, MVT::i64));
3526  }
3527  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
3528  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
3529  DL, MVT::f32, Out64);
3530  }
3531  if (InVT == MVT::f32 && ResVT == MVT::i32) {
3532  SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
3533  SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
3534  MVT::f64, SDValue(U64, 0), In);
3535  SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
3536  if (Subtarget.hasHighWord())
3537  return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
3538  MVT::i32, Out64);
3539  SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
3540  DAG.getConstant(32, DL, MVT::i64));
3541  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
3542  }
3543  llvm_unreachable("Unexpected bitcast combination");
3544 }
3545 
3546 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
3547  SelectionDAG &DAG) const {
3548 
3549  if (Subtarget.isTargetXPLINK64())
3550  return lowerVASTART_XPLINK(Op, DAG);
3551  else
3552  return lowerVASTART_ELF(Op, DAG);
3553 }
3554 
3555 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
3556  SelectionDAG &DAG) const {
3557  MachineFunction &MF = DAG.getMachineFunction();
3558  SystemZMachineFunctionInfo *FuncInfo =
3560 
3561  SDLoc DL(Op);
3562 
3563  // vastart just stores the address of the VarArgsFrameIndex slot into the
3564  // memory location argument.
3565  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3566  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3567  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3568  return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
3569  MachinePointerInfo(SV));
3570 }
3571 
3572 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
3573  SelectionDAG &DAG) const {
3574  MachineFunction &MF = DAG.getMachineFunction();
3575  SystemZMachineFunctionInfo *FuncInfo =
3577  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3578 
3579  SDValue Chain = Op.getOperand(0);
3580  SDValue Addr = Op.getOperand(1);
3581  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3582  SDLoc DL(Op);
3583 
3584  // The initial values of each field.
3585  const unsigned NumFields = 4;
3586  SDValue Fields[NumFields] = {
3587  DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
3588  DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
3589  DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
3590  DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
3591  };
3592 
3593  // Store each field into its respective slot.
3594  SDValue MemOps[NumFields];
3595  unsigned Offset = 0;
3596  for (unsigned I = 0; I < NumFields; ++I) {
3597  SDValue FieldAddr = Addr;
3598  if (Offset != 0)
3599  FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
3600  DAG.getIntPtrConstant(Offset, DL));
3601  MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
3602  MachinePointerInfo(SV, Offset));
3603  Offset += 8;
3604  }
3605  return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3606 }
3607 
3608 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
3609  SelectionDAG &DAG) const {
3610  SDValue Chain = Op.getOperand(0);
3611  SDValue DstPtr = Op.getOperand(1);
3612  SDValue SrcPtr = Op.getOperand(2);
3613  const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
3614  const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
3615  SDLoc DL(Op);
3616 
3617  uint32_t Sz =
3618  Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
3619  return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
3620  Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
3621  /*isTailCall*/ false, MachinePointerInfo(DstSV),
3622  MachinePointerInfo(SrcSV));
3623 }
3624 
3625 SDValue
3626 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
3627  SelectionDAG &DAG) const {
3628  if (Subtarget.isTargetXPLINK64())
3629  return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
3630  else
3631  return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
3632 }
3633 
3634 SDValue
3635 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
3636  SelectionDAG &DAG) const {
3637  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3638  MachineFunction &MF = DAG.getMachineFunction();
3639  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3640  SDValue Chain = Op.getOperand(0);
3641  SDValue Size = Op.getOperand(1);
3642  SDValue Align = Op.getOperand(2);
3643  SDLoc DL(Op);
3644 
3645  // If user has set the no alignment function attribute, ignore
3646  // alloca alignments.
3647  uint64_t AlignVal =
3648  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3649 
3651  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3652  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3653 
3654  SDValue NeededSpace = Size;
3655 
3656  // Add extra space for alignment if needed.
3657  EVT PtrVT = getPointerTy(MF.getDataLayout());
3658  if (ExtraAlignSpace)
3659  NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
3660  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3661 
3662  bool IsSigned = false;
3663  bool DoesNotReturn = false;
3664  bool IsReturnValueUsed = false;
3665  EVT VT = Op.getValueType();
3666  SDValue AllocaCall =
3667  makeExternalCall(Chain, DAG, "@@ALCAXP", VT, makeArrayRef(NeededSpace),
3668  CallingConv::C, IsSigned, DL, DoesNotReturn,
3669  IsReturnValueUsed)
3670  .first;
3671 
3672  // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
3673  // to end of call in order to ensure it isn't broken up from the call
3674  // sequence.
3675  auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
3676  Register SPReg = Regs.getStackPointerRegister();
3677  Chain = AllocaCall.getValue(1);
3678  SDValue Glue = AllocaCall.getValue(2);
3679  SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
3680  Chain = NewSPRegNode.getValue(1);
3681 
3682  MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
3683  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
3684  SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
3685 
3686  // Dynamically realign if needed.
3687  if (ExtraAlignSpace) {
3688  Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
3689  DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
3690  Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
3691  DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
3692  }
3693 
3694  SDValue Ops[2] = {Result, Chain};
3695  return DAG.getMergeValues(Ops, DL);
3696 }
3697 
3698 SDValue
3699 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
3700  SelectionDAG &DAG) const {
3701  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
3702  MachineFunction &MF = DAG.getMachineFunction();
3703  bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
3704  bool StoreBackchain = MF.getFunction().hasFnAttribute("backchain");
3705 
3706  SDValue Chain = Op.getOperand(0);
3707  SDValue Size = Op.getOperand(1);
3708  SDValue Align = Op.getOperand(2);
3709  SDLoc DL(Op);
3710 
3711  // If user has set the no alignment function attribute, ignore
3712  // alloca alignments.
3713  uint64_t AlignVal =
3714  (RealignOpt ? cast<ConstantSDNode>(Align)->getZExtValue() : 0);
3715 
3717  uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
3718  uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
3719 
3721  SDValue NeededSpace = Size;
3722 
3723  // Get a reference to the stack pointer.
3724  SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
3725 
3726  // If we need a backchain, save it now.
3727  SDValue Backchain;
3728  if (StoreBackchain)
3729  Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
3730  MachinePointerInfo());
3731 
3732  // Add extra space for alignment if needed.
3733  if (ExtraAlignSpace)
3734  NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
3735  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3736 
3737  // Get the new stack pointer value.
3738  SDValue NewSP;
3739  if (hasInlineStackProbe(MF)) {
3740  NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
3741  DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
3742  Chain = NewSP.getValue(1);
3743  }
3744  else {
3745  NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
3746  // Copy the new stack pointer back.
3747  Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
3748  }
3749 
3750  // The allocated data lives above the 160 bytes allocated for the standard
3751  // frame, plus any outgoing stack arguments. We don't know how much that
3752  // amounts to yet, so emit a special ADJDYNALLOC placeholder.
3753  SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
3754  SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
3755 
3756  // Dynamically realign if needed.
3757  if (RequiredAlign > StackAlign) {
3758  Result =
3759  DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
3760  DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
3761  Result =
3762  DAG.getNode(ISD::AND, DL, MVT::i64, Result,
3763  DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
3764  }
3765 
3766  if (StoreBackchain)
3767  Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
3768  MachinePointerInfo());
3769 
3770  SDValue Ops[2] = { Result, Chain };
3771  return DAG.getMergeValues(Ops, DL);
3772 }
3773 
3774 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
3775  SDValue Op, SelectionDAG &DAG) const {
3776  SDLoc DL(Op);
3777 
3779 }
3780 
3781 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
3782  SelectionDAG &DAG) const {
3783  EVT VT = Op.getValueType();
3784  SDLoc DL(Op);
3785  SDValue Ops[2];
3786  if (is32Bit(VT))
3787  // Just do a normal 64-bit multiplication and extract the results.
3788  // We define this so that it can be used for constant division.
3789  lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
3790  Op.getOperand(1), Ops[1], Ops[0]);
3791  else if (Subtarget.hasMiscellaneousExtensions2())
3792  // SystemZISD::SMUL_LOHI returns the low result in the odd register and
3793  // the high result in the even register. ISD::SMUL_LOHI is defined to
3794  // return the low half first, so the results are in reverse order.
3796  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3797  else {
3798  // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
3799  //
3800  // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
3801  //
3802  // but using the fact that the upper halves are either all zeros
3803  // or all ones:
3804  //
3805  // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
3806  //
3807  // and grouping the right terms together since they are quicker than the
3808  // multiplication:
3809  //
3810  // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
3811  SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
3812  SDValue LL = Op.getOperand(0);
3813  SDValue RL = Op.getOperand(1);
3814  SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
3815  SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
3816  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3817  // the high result in the even register. ISD::SMUL_LOHI is defined to
3818  // return the low half first, so the results are in reverse order.
3820  LL, RL, Ops[1], Ops[0]);
3821  SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
3822  SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
3823  SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
3824  Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
3825  }
3826  return DAG.getMergeValues(Ops, DL);
3827 }
3828 
3829 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
3830  SelectionDAG &DAG) const {
3831  EVT VT = Op.getValueType();
3832  SDLoc DL(Op);
3833  SDValue Ops[2];
3834  if (is32Bit(VT))
3835  // Just do a normal 64-bit multiplication and extract the results.
3836  // We define this so that it can be used for constant division.
3837  lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
3838  Op.getOperand(1), Ops[1], Ops[0]);
3839  else
3840  // SystemZISD::UMUL_LOHI returns the low result in the odd register and
3841  // the high result in the even register. ISD::UMUL_LOHI is defined to
3842  // return the low half first, so the results are in reverse order.
3844  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3845  return DAG.getMergeValues(Ops, DL);
3846 }
3847 
3848 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
3849  SelectionDAG &DAG) const {
3850  SDValue Op0 = Op.getOperand(0);
3851  SDValue Op1 = Op.getOperand(1);
3852  EVT VT = Op.getValueType();
3853  SDLoc DL(Op);
3854 
3855  // We use DSGF for 32-bit division. This means the first operand must
3856  // always be 64-bit, and the second operand should be 32-bit whenever
3857  // that is possible, to improve performance.
3858  if (is32Bit(VT))
3859  Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
3860  else if (DAG.ComputeNumSignBits(Op1) > 32)
3861  Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
3862 
3863  // DSG(F) returns the remainder in the even register and the
3864  // quotient in the odd register.
3865  SDValue Ops[2];
3866  lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
3867  return DAG.getMergeValues(Ops, DL);
3868 }
3869 
3870 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
3871  SelectionDAG &DAG) const {
3872  EVT VT = Op.getValueType();
3873  SDLoc DL(Op);
3874 
3875  // DL(G) returns the remainder in the even register and the
3876  // quotient in the odd register.
3877  SDValue Ops[2];
3879  Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
3880  return DAG.getMergeValues(Ops, DL);
3881 }
3882 
3883 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
3884  assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
3885 
3886  // Get the known-zero masks for each operand.
3887  SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
3888  KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
3889  DAG.computeKnownBits(Ops[1])};
3890 
3891  // See if the upper 32 bits of one operand and the lower 32 bits of the
3892  // other are known zero. They are the low and high operands respectively.
3893  uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
3894  Known[1].Zero.getZExtValue() };
3895  unsigned High, Low;
3896  if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
3897  High = 1, Low = 0;
3898  else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
3899  High = 0, Low = 1;
3900  else
3901  return Op;
3902 
3903  SDValue LowOp = Ops[Low];
3904  SDValue HighOp = Ops[High];
3905 
3906  // If the high part is a constant, we're better off using IILH.
3907  if (HighOp.getOpcode() == ISD::Constant)
3908  return Op;
3909 
3910  // If the low part is a constant that is outside the range of LHI,
3911  // then we're better off using IILF.
3912  if (LowOp.getOpcode() == ISD::Constant) {
3913  int64_t Value = int32_t(cast<ConstantSDNode>(LowOp)->getZExtValue());
3914  if (!isInt<16>(Value))
3915  return Op;
3916  }
3917 
3918  // Check whether the high part is an AND that doesn't change the
3919  // high 32 bits and just masks out low bits. We can skip it if so.
3920  if (HighOp.getOpcode() == ISD::AND &&
3921  HighOp.getOperand(1).getOpcode() == ISD::Constant) {
3922  SDValue HighOp0 = HighOp.getOperand(0);
3923  uint64_t Mask = cast<ConstantSDNode>(HighOp.getOperand(1))->getZExtValue();
3924  if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
3925  HighOp = HighOp0;
3926  }
3927 
3928  // Take advantage of the fact that all GR32 operations only change the
3929  // low 32 bits by truncating Low to an i32 and inserting it directly
3930  // using a subreg. The interesting cases are those where the truncation
3931  // can be folded.
3932  SDLoc DL(Op);
3933  SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
3934  return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
3935  MVT::i64, HighOp, Low32);
3936 }
3937 
3938 // Lower SADDO/SSUBO/UADDO/USUBO nodes.
3939 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
3940  SelectionDAG &DAG) const {
3941  SDNode *N = Op.getNode();
3942  SDValue LHS = N->getOperand(0);
3943  SDValue RHS = N->getOperand(1);
3944  SDLoc DL(N);
3945  unsigned BaseOp = 0;
3946  unsigned CCValid = 0;
3947  unsigned CCMask = 0;
3948 
3949  switch (Op.getOpcode()) {
3950  default: llvm_unreachable("Unknown instruction!");
3951  case ISD::SADDO:
3952  BaseOp = SystemZISD::SADDO;
3953  CCValid = SystemZ::CCMASK_ARITH;
3955  break;
3956  case ISD::SSUBO:
3957  BaseOp = SystemZISD::SSUBO;
3958  CCValid = SystemZ::CCMASK_ARITH;
3960  break;
3961  case ISD::UADDO:
3962  BaseOp = SystemZISD::UADDO;
3963  CCValid = SystemZ::CCMASK_LOGICAL;
3965  break;
3966  case ISD::USUBO:
3967  BaseOp = SystemZISD::USUBO;
3968  CCValid = SystemZ::CCMASK_LOGICAL;
3970  break;
3971  }
3972 
3973  SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
3974  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
3975 
3976  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
3977  if (N->getValueType(1) == MVT::i1)
3978  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
3979 
3980  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
3981 }
3982 
3983 static bool isAddCarryChain(SDValue Carry) {
3984  while (Carry.getOpcode() == ISD::ADDCARRY)
3985  Carry = Carry.getOperand(2);
3986  return Carry.getOpcode() == ISD::UADDO;
3987 }
3988 
3989 static bool isSubBorrowChain(SDValue Carry) {
3990  while (Carry.getOpcode() == ISD::SUBCARRY)
3991  Carry = Carry.getOperand(2);
3992  return Carry.getOpcode() == ISD::USUBO;
3993 }
3994 
3995 // Lower ADDCARRY/SUBCARRY nodes.
3996 SDValue SystemZTargetLowering::lowerADDSUBCARRY(SDValue Op,
3997  SelectionDAG &DAG) const {
3998 
3999  SDNode *N = Op.getNode();
4000  MVT VT = N->getSimpleValueType(0);
4001 
4002  // Let legalize expand this if it isn't a legal type yet.
4003  if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4004  return SDValue();
4005 
4006  SDValue LHS = N->getOperand(0);
4007  SDValue RHS = N->getOperand(1);
4008  SDValue Carry = Op.getOperand(2);
4009  SDLoc DL(N);
4010  unsigned BaseOp = 0;
4011  unsigned CCValid = 0;
4012  unsigned CCMask = 0;
4013 
4014  switch (Op.getOpcode()) {
4015  default: llvm_unreachable("Unknown instruction!");
4016  case ISD::ADDCARRY:
4017  if (!isAddCarryChain(Carry))
4018  return SDValue();
4019 
4020  BaseOp = SystemZISD::ADDCARRY;
4021  CCValid = SystemZ::CCMASK_LOGICAL;
4023  break;
4024  case ISD::SUBCARRY:
4025  if (!isSubBorrowChain(Carry))
4026  return SDValue();
4027 
4028  BaseOp = SystemZISD::SUBCARRY;
4029  CCValid = SystemZ::CCMASK_LOGICAL;
4031  break;
4032  }
4033 
4034  // Set the condition code from the carry flag.
4035  Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4036  DAG.getConstant(CCValid, DL, MVT::i32),
4037  DAG.getConstant(CCMask, DL, MVT::i32));
4038 
4039  SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4040  SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4041 
4042  SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4043  if (N->getValueType(1) == MVT::i1)
4044  SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4045 
4046  return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4047 }
4048 
4049 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4050  SelectionDAG &DAG) const {
4051  EVT VT = Op.getValueType();
4052  SDLoc DL(Op);
4053  Op = Op.getOperand(0);
4054 
4055  // Handle vector types via VPOPCT.
4056  if (VT.isVector()) {
4057  Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4059  switch (VT.getScalarSizeInBits()) {
4060  case 8:
<