LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
172 setOperationAction(ISD::BR_CC, VT, Custom);
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
178 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
179
180 // Expand BRCOND into a BR_CC (see above).
181 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
213 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
311 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
312 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
313 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
314 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
315 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
316 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
317 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
318 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
319 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
320 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
321 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
325 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
326 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
327 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
328 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom);
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
341 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
342 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
343 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
344
345 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
385 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
404 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
405 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
406
407 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
408 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
409
410 // Handle prefetches with PFD or PFDRL.
411 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
412
413 // Handle readcyclecounter with STCKF.
414 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
435 setOperationAction(ISD::LOAD, VT, Legal);
436 setOperationAction(ISD::STORE, VT, Legal);
438 setOperationAction(ISD::BITCAST, VT, Legal);
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
495 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
558 for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE})
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
562 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
574 setOperationAction(ISD::FRINT, VT, Legal);
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
578 setOperationAction(ISD::FNEARBYINT, VT, Legal);
579 setOperationAction(ISD::FFLOOR, VT, Legal);
580 setOperationAction(ISD::FCEIL, VT, Legal);
581 setOperationAction(ISD::FTRUNC, VT, Legal);
582 setOperationAction(ISD::FROUND, VT, Legal);
583 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
584 }
585
586 // No special instructions for these.
587 setOperationAction(ISD::FSIN, VT, Expand);
588 setOperationAction(ISD::FCOS, VT, Expand);
589 setOperationAction(ISD::FSINCOS, VT, Expand);
591 setOperationAction(ISD::FPOW, VT, Expand);
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
615 setOperationAction(ISD::FP_EXTEND, VT, Custom);
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
643 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
644 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
646 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
647 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
648 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
686 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
687 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
689 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
690 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
691 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
692
693 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
694 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
695 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
696 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
697
698 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
699 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
700 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
701 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
702
703 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
704 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
705 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
706 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
707
708 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
709 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
710 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
712
713 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
714 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
715 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
716 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
771 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
772 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
777 setOperationAction(ISD::VASTART, MVT::Other, Custom);
778 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
779 setOperationAction(ISD::VAEND, MVT::Other, Expand);
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
783 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
784 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
794 ISD::STORE,
799 ISD::FP_EXTEND,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
897 Opcode = SystemZISD::BYTE_MASK;
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
911 Opcode = SystemZISD::REPLICATE;
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
924 Opcode = SystemZISD::ROTATE_MASK;
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 // Don't expand subword operations as they require special treatment.
1258 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1260
1261 // Don't expand if there is a target instruction available.
1262 if (Subtarget.hasInterlockedAccess1() &&
1263 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1270
1272}
1273
1275 // We can use CGFI or CLGFI.
1276 return isInt<32>(Imm) || isUInt<32>(Imm);
1277}
1278
1280 // We can use ALGFI or SLGFI.
1281 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1282}
1283
1285 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1286 // Unaligned accesses should never be slower than the expanded version.
1287 // We check specifically for aligned accesses in the few cases where
1288 // they are required.
1289 if (Fast)
1290 *Fast = 1;
1291 return true;
1292}
1293
1295 EVT VT = Y.getValueType();
1296
1297 // We can use NC(G)RK for types in GPRs ...
1298 if (VT == MVT::i32 || VT == MVT::i64)
1299 return Subtarget.hasMiscellaneousExtensions3();
1300
1301 // ... or VNC for types in VRs.
1302 if (VT.isVector() || VT == MVT::i128)
1303 return Subtarget.hasVector();
1304
1305 return false;
1306}
1307
1308// Information about the addressing mode for a memory access.
1310 // True if a long displacement is supported.
1312
1313 // True if use of index register is supported.
1315
1316 AddressingMode(bool LongDispl, bool IdxReg) :
1317 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1318};
1319
1320// Return the desired addressing mode for a Load which has only one use (in
1321// the same block) which is a Store.
1323 Type *Ty) {
1324 // With vector support a Load->Store combination may be combined to either
1325 // an MVC or vector operations and it seems to work best to allow the
1326 // vector addressing mode.
1327 if (HasVector)
1328 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1329
1330 // Otherwise only the MVC case is special.
1331 bool MVC = Ty->isIntegerTy(8);
1332 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1333}
1334
1335// Return the addressing mode which seems most desirable given an LLVM
1336// Instruction pointer.
1337static AddressingMode
1340 switch (II->getIntrinsicID()) {
1341 default: break;
1342 case Intrinsic::memset:
1343 case Intrinsic::memmove:
1344 case Intrinsic::memcpy:
1345 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1346 }
1347 }
1348
1349 if (isa<LoadInst>(I) && I->hasOneUse()) {
1350 auto *SingleUser = cast<Instruction>(*I->user_begin());
1351 if (SingleUser->getParent() == I->getParent()) {
1352 if (isa<ICmpInst>(SingleUser)) {
1353 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1354 if (C->getBitWidth() <= 64 &&
1355 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1356 // Comparison of memory with 16 bit signed / unsigned immediate
1357 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1358 } else if (isa<StoreInst>(SingleUser))
1359 // Load->Store
1360 return getLoadStoreAddrMode(HasVector, I->getType());
1361 }
1362 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1363 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1364 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1365 // Load->Store
1366 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1367 }
1368
1369 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1370
1371 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1372 // dependencies (LDE only supports small offsets).
1373 // * Utilize the vector registers to hold floating point
1374 // values (vector load / store instructions only support small
1375 // offsets).
1376
1377 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1378 I->getOperand(0)->getType());
1379 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1380 bool IsVectorAccess = MemAccessTy->isVectorTy();
1381
1382 // A store of an extracted vector element will be combined into a VSTE type
1383 // instruction.
1384 if (!IsVectorAccess && isa<StoreInst>(I)) {
1385 Value *DataOp = I->getOperand(0);
1386 if (isa<ExtractElementInst>(DataOp))
1387 IsVectorAccess = true;
1388 }
1389
1390 // A load which gets inserted into a vector element will be combined into a
1391 // VLE type instruction.
1392 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1393 User *LoadUser = *I->user_begin();
1394 if (isa<InsertElementInst>(LoadUser))
1395 IsVectorAccess = true;
1396 }
1397
1398 if (IsFPAccess || IsVectorAccess)
1399 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1400 }
1401
1402 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1403}
1404
1406 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1407 // Punt on globals for now, although they can be used in limited
1408 // RELATIVE LONG cases.
1409 if (AM.BaseGV)
1410 return false;
1411
1412 // Require a 20-bit signed offset.
1413 if (!isInt<20>(AM.BaseOffs))
1414 return false;
1415
1416 bool RequireD12 =
1417 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1418 AddressingMode SupportedAM(!RequireD12, true);
1419 if (I != nullptr)
1420 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1421
1422 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1423 return false;
1424
1425 if (!SupportedAM.IndexReg)
1426 // No indexing allowed.
1427 return AM.Scale == 0;
1428 else
1429 // Indexing is OK but no scale factor can be applied.
1430 return AM.Scale == 0 || AM.Scale == 1;
1431}
1432
1434 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1435 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1436 const AttributeList &FuncAttributes) const {
1437 const int MVCFastLen = 16;
1438
1439 if (Limit != ~unsigned(0)) {
1440 // Don't expand Op into scalar loads/stores in these cases:
1441 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1442 return false; // Small memcpy: Use MVC
1443 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1444 return false; // Small memset (first byte with STC/MVI): Use MVC
1445 if (Op.isZeroMemset())
1446 return false; // Memset zero: Use XC
1447 }
1448
1449 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1450 DstAS, SrcAS, FuncAttributes);
1451}
1452
1454 LLVMContext &Context, const MemOp &Op,
1455 const AttributeList &FuncAttributes) const {
1456 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1457}
1458
1459bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1460 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1461 return false;
1462 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1463 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1464 return FromBits > ToBits;
1465}
1466
1468 if (!FromVT.isInteger() || !ToVT.isInteger())
1469 return false;
1470 unsigned FromBits = FromVT.getFixedSizeInBits();
1471 unsigned ToBits = ToVT.getFixedSizeInBits();
1472 return FromBits > ToBits;
1473}
1474
1475//===----------------------------------------------------------------------===//
1476// Inline asm support
1477//===----------------------------------------------------------------------===//
1478
1481 if (Constraint.size() == 1) {
1482 switch (Constraint[0]) {
1483 case 'a': // Address register
1484 case 'd': // Data register (equivalent to 'r')
1485 case 'f': // Floating-point register
1486 case 'h': // High-part register
1487 case 'r': // General-purpose register
1488 case 'v': // Vector register
1489 return C_RegisterClass;
1490
1491 case 'Q': // Memory with base and unsigned 12-bit displacement
1492 case 'R': // Likewise, plus an index
1493 case 'S': // Memory with base and signed 20-bit displacement
1494 case 'T': // Likewise, plus an index
1495 case 'm': // Equivalent to 'T'.
1496 return C_Memory;
1497
1498 case 'I': // Unsigned 8-bit constant
1499 case 'J': // Unsigned 12-bit constant
1500 case 'K': // Signed 16-bit constant
1501 case 'L': // Signed 20-bit displacement (on all targets we support)
1502 case 'M': // 0x7fffffff
1503 return C_Immediate;
1504
1505 default:
1506 break;
1507 }
1508 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1509 switch (Constraint[1]) {
1510 case 'Q': // Address with base and unsigned 12-bit displacement
1511 case 'R': // Likewise, plus an index
1512 case 'S': // Address with base and signed 20-bit displacement
1513 case 'T': // Likewise, plus an index
1514 return C_Address;
1515
1516 default:
1517 break;
1518 }
1519 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1520 if (StringRef("{@cc}").compare(Constraint) == 0)
1521 return C_Other;
1522 }
1523 return TargetLowering::getConstraintType(Constraint);
1524}
1525
1528 AsmOperandInfo &Info, const char *Constraint) const {
1530 Value *CallOperandVal = Info.CallOperandVal;
1531 // If we don't have a value, we can't do a match,
1532 // but allow it at the lowest weight.
1533 if (!CallOperandVal)
1534 return CW_Default;
1535 Type *type = CallOperandVal->getType();
1536 // Look at the constraint type.
1537 switch (*Constraint) {
1538 default:
1539 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1540 break;
1541
1542 case 'a': // Address register
1543 case 'd': // Data register (equivalent to 'r')
1544 case 'h': // High-part register
1545 case 'r': // General-purpose register
1546 Weight =
1547 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'f': // Floating-point register
1551 if (!useSoftFloat())
1552 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1553 break;
1554
1555 case 'v': // Vector register
1556 if (Subtarget.hasVector())
1557 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1558 : CW_Default;
1559 break;
1560
1561 case 'I': // Unsigned 8-bit constant
1562 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1563 if (isUInt<8>(C->getZExtValue()))
1564 Weight = CW_Constant;
1565 break;
1566
1567 case 'J': // Unsigned 12-bit constant
1568 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1569 if (isUInt<12>(C->getZExtValue()))
1570 Weight = CW_Constant;
1571 break;
1572
1573 case 'K': // Signed 16-bit constant
1574 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1575 if (isInt<16>(C->getSExtValue()))
1576 Weight = CW_Constant;
1577 break;
1578
1579 case 'L': // Signed 20-bit displacement (on all targets we support)
1580 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1581 if (isInt<20>(C->getSExtValue()))
1582 Weight = CW_Constant;
1583 break;
1584
1585 case 'M': // 0x7fffffff
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (C->getZExtValue() == 0x7fffffff)
1588 Weight = CW_Constant;
1589 break;
1590 }
1591 return Weight;
1592}
1593
1594// Parse a "{tNNN}" register constraint for which the register type "t"
1595// has already been verified. MC is the class associated with "t" and
1596// Map maps 0-based register numbers to LLVM register numbers.
1597static std::pair<unsigned, const TargetRegisterClass *>
1599 const unsigned *Map, unsigned Size) {
1600 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1601 if (isdigit(Constraint[2])) {
1602 unsigned Index;
1603 bool Failed =
1604 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1605 if (!Failed && Index < Size && Map[Index])
1606 return std::make_pair(Map[Index], RC);
1607 }
1608 return std::make_pair(0U, nullptr);
1609}
1610
1611std::pair<unsigned, const TargetRegisterClass *>
1613 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1614 if (Constraint.size() == 1) {
1615 // GCC Constraint Letters
1616 switch (Constraint[0]) {
1617 default: break;
1618 case 'd': // Data register (equivalent to 'r')
1619 case 'r': // General-purpose register
1620 if (VT.getSizeInBits() == 64)
1621 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1622 else if (VT.getSizeInBits() == 128)
1623 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1624 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1625
1626 case 'a': // Address register
1627 if (VT == MVT::i64)
1628 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1629 else if (VT == MVT::i128)
1630 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1631 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1632
1633 case 'h': // High-part register (an LLVM extension)
1634 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1635
1636 case 'f': // Floating-point register
1637 if (!useSoftFloat()) {
1638 if (VT.getSizeInBits() == 16)
1639 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1640 else if (VT.getSizeInBits() == 64)
1641 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1642 else if (VT.getSizeInBits() == 128)
1643 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1644 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1645 }
1646 break;
1647
1648 case 'v': // Vector register
1649 if (Subtarget.hasVector()) {
1650 if (VT.getSizeInBits() == 16)
1651 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1652 if (VT.getSizeInBits() == 32)
1653 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1654 if (VT.getSizeInBits() == 64)
1655 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1656 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1657 }
1658 break;
1659 }
1660 }
1661 if (Constraint.starts_with("{")) {
1662
1663 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1664 // to check the size on.
1665 auto getVTSizeInBits = [&VT]() {
1666 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1667 };
1668
1669 // We need to override the default register parsing for GPRs and FPRs
1670 // because the interpretation depends on VT. The internal names of
1671 // the registers are also different from the external names
1672 // (F0D and F0S instead of F0, etc.).
1673 if (Constraint[1] == 'r') {
1674 if (getVTSizeInBits() == 32)
1675 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1677 if (getVTSizeInBits() == 128)
1678 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1680 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1682 }
1683 if (Constraint[1] == 'f') {
1684 if (useSoftFloat())
1685 return std::make_pair(
1686 0u, static_cast<const TargetRegisterClass *>(nullptr));
1687 if (getVTSizeInBits() == 16)
1688 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1690 if (getVTSizeInBits() == 32)
1691 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1693 if (getVTSizeInBits() == 128)
1694 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1696 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1698 }
1699 if (Constraint[1] == 'v') {
1700 if (!Subtarget.hasVector())
1701 return std::make_pair(
1702 0u, static_cast<const TargetRegisterClass *>(nullptr));
1703 if (getVTSizeInBits() == 16)
1704 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1709 if (getVTSizeInBits() == 64)
1710 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1714 }
1715 if (Constraint[1] == '@') {
1716 if (StringRef("{@cc}").compare(Constraint) == 0)
1717 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1718 }
1719 }
1720 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1721}
1722
1723// FIXME? Maybe this could be a TableGen attribute on some registers and
1724// this table could be generated automatically from RegInfo.
1727 const MachineFunction &MF) const {
1728 Register Reg =
1730 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1731 : SystemZ::NoRegister)
1732 .Case("r15",
1733 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1734 .Default(Register());
1735
1736 return Reg;
1737}
1738
1740 const Constant *PersonalityFn) const {
1741 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1742}
1743
1745 const Constant *PersonalityFn) const {
1746 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1747}
1748
1749// Convert condition code in CCReg to an i32 value.
1751 SDLoc DL(CCReg);
1752 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1753 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1754 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1755}
1756
1757// Lower @cc targets via setcc.
1759 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1760 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1761 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1762 return SDValue();
1763
1764 // Check that return type is valid.
1765 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1766 OpInfo.ConstraintVT.getSizeInBits() < 8)
1767 report_fatal_error("Glue output operand is of invalid type");
1768
1769 if (Glue.getNode()) {
1770 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1771 Chain = Glue.getValue(1);
1772 } else
1773 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1774 return getCCResult(DAG, Glue);
1775}
1776
1778 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1779 SelectionDAG &DAG) const {
1780 // Only support length 1 constraints for now.
1781 if (Constraint.size() == 1) {
1782 switch (Constraint[0]) {
1783 case 'I': // Unsigned 8-bit constant
1784 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1785 if (isUInt<8>(C->getZExtValue()))
1786 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1787 Op.getValueType()));
1788 return;
1789
1790 case 'J': // Unsigned 12-bit constant
1791 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1792 if (isUInt<12>(C->getZExtValue()))
1793 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1794 Op.getValueType()));
1795 return;
1796
1797 case 'K': // Signed 16-bit constant
1798 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1799 if (isInt<16>(C->getSExtValue()))
1800 Ops.push_back(DAG.getSignedTargetConstant(
1801 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1802 return;
1803
1804 case 'L': // Signed 20-bit displacement (on all targets we support)
1805 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1806 if (isInt<20>(C->getSExtValue()))
1807 Ops.push_back(DAG.getSignedTargetConstant(
1808 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1809 return;
1810
1811 case 'M': // 0x7fffffff
1812 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1813 if (C->getZExtValue() == 0x7fffffff)
1814 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1815 Op.getValueType()));
1816 return;
1817 }
1818 }
1820}
1821
1822//===----------------------------------------------------------------------===//
1823// Calling conventions
1824//===----------------------------------------------------------------------===//
1825
1826#include "SystemZGenCallingConv.inc"
1827
1829 CallingConv::ID) const {
1830 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1831 SystemZ::R14D, 0 };
1832 return ScratchRegs;
1833}
1834
1836 Type *ToType) const {
1837 return isTruncateFree(FromType, ToType);
1838}
1839
1841 return CI->isTailCall();
1842}
1843
1844// Value is a value that has been passed to us in the location described by VA
1845// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1846// any loads onto Chain.
1848 CCValAssign &VA, SDValue Chain,
1849 SDValue Value) {
1850 // If the argument has been promoted from a smaller type, insert an
1851 // assertion to capture this.
1852 if (VA.getLocInfo() == CCValAssign::SExt)
1854 DAG.getValueType(VA.getValVT()));
1855 else if (VA.getLocInfo() == CCValAssign::ZExt)
1857 DAG.getValueType(VA.getValVT()));
1858
1859 if (VA.isExtInLoc())
1860 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1861 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1862 // If this is a short vector argument loaded from the stack,
1863 // extend from i64 to full vector size and then bitcast.
1864 assert(VA.getLocVT() == MVT::i64);
1865 assert(VA.getValVT().isVector());
1866 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1867 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1868 } else
1869 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1870 return Value;
1871}
1872
1873// Value is a value of type VA.getValVT() that we need to copy into
1874// the location described by VA. Return a copy of Value converted to
1875// VA.getValVT(). The caller is responsible for handling indirect values.
1877 CCValAssign &VA, SDValue Value) {
1878 switch (VA.getLocInfo()) {
1879 case CCValAssign::SExt:
1880 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1881 case CCValAssign::ZExt:
1882 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1883 case CCValAssign::AExt:
1884 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1885 case CCValAssign::BCvt: {
1886 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1887 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1888 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1889 // For an f32 vararg we need to first promote it to an f64 and then
1890 // bitcast it to an i64.
1891 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1892 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1893 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1894 ? MVT::v2i64
1895 : VA.getLocVT();
1896 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1897 // For ELF, this is a short vector argument to be stored to the stack,
1898 // bitcast to v2i64 and then extract first element.
1899 if (BitCastToType == MVT::v2i64)
1900 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1901 DAG.getConstant(0, DL, MVT::i32));
1902 return Value;
1903 }
1904 case CCValAssign::Full:
1905 return Value;
1906 default:
1907 llvm_unreachable("Unhandled getLocInfo()");
1908 }
1909}
1910
1912 SDLoc DL(In);
1913 SDValue Lo, Hi;
1914 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1915 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1916 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1917 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1918 DAG.getConstant(64, DL, MVT::i32)));
1919 } else {
1920 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1921 }
1922
1923 // FIXME: If v2i64 were a legal type, we could use it instead of
1924 // Untyped here. This might enable improved folding.
1925 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1926 MVT::Untyped, Hi, Lo);
1927 return SDValue(Pair, 0);
1928}
1929
1931 SDLoc DL(In);
1932 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1933 DL, MVT::i64, In);
1934 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1935 DL, MVT::i64, In);
1936
1937 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1938 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1939 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1940 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1941 DAG.getConstant(64, DL, MVT::i32));
1942 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1943 } else {
1944 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1945 }
1946}
1947
1949 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1950 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1951 EVT ValueVT = Val.getValueType();
1952 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1953 // Inline assembly operand.
1954 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1955 return true;
1956 }
1957
1958 return false;
1959}
1960
1962 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1963 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1964 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1965 // Inline assembly operand.
1966 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1967 return DAG.getBitcast(ValueVT, Res);
1968 }
1969
1970 return SDValue();
1971}
1972
1973// The first part of a split stack argument is at index I in Args (and
1974// ArgLocs). Return the type of a part and the number of them by reference.
1975template <class ArgTy>
1977 SmallVector<CCValAssign, 16> &ArgLocs, unsigned I,
1978 MVT &PartVT, unsigned &NumParts) {
1979 if (!Args[I].Flags.isSplit())
1980 return false;
1981 assert(I < ArgLocs.size() && ArgLocs.size() == Args.size() &&
1982 "ArgLocs havoc.");
1983 PartVT = ArgLocs[I].getValVT();
1984 NumParts = 1;
1985 for (unsigned PartIdx = I + 1;; ++PartIdx) {
1986 assert(PartIdx != ArgLocs.size() && "SplitEnd not found.");
1987 assert(ArgLocs[PartIdx].getValVT() == PartVT && "Unsupported split.");
1988 ++NumParts;
1989 if (Args[PartIdx].Flags.isSplitEnd())
1990 break;
1991 }
1992 return true;
1993}
1994
1996 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1997 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1998 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2000 MachineFrameInfo &MFI = MF.getFrameInfo();
2002 SystemZMachineFunctionInfo *FuncInfo =
2004 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
2005 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2006
2007 // Assign locations to all of the incoming arguments.
2009 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
2010 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
2011 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
2012
2013 unsigned NumFixedGPRs = 0;
2014 unsigned NumFixedFPRs = 0;
2015 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2016 SDValue ArgValue;
2017 CCValAssign &VA = ArgLocs[I];
2018 EVT LocVT = VA.getLocVT();
2019 if (VA.isRegLoc()) {
2020 // Arguments passed in registers
2021 const TargetRegisterClass *RC;
2022 switch (LocVT.getSimpleVT().SimpleTy) {
2023 default:
2024 // Integers smaller than i64 should be promoted to i64.
2025 llvm_unreachable("Unexpected argument type");
2026 case MVT::i32:
2027 NumFixedGPRs += 1;
2028 RC = &SystemZ::GR32BitRegClass;
2029 break;
2030 case MVT::i64:
2031 NumFixedGPRs += 1;
2032 RC = &SystemZ::GR64BitRegClass;
2033 break;
2034 case MVT::f16:
2035 NumFixedFPRs += 1;
2036 RC = &SystemZ::FP16BitRegClass;
2037 break;
2038 case MVT::f32:
2039 NumFixedFPRs += 1;
2040 RC = &SystemZ::FP32BitRegClass;
2041 break;
2042 case MVT::f64:
2043 NumFixedFPRs += 1;
2044 RC = &SystemZ::FP64BitRegClass;
2045 break;
2046 case MVT::f128:
2047 NumFixedFPRs += 2;
2048 RC = &SystemZ::FP128BitRegClass;
2049 break;
2050 case MVT::v16i8:
2051 case MVT::v8i16:
2052 case MVT::v4i32:
2053 case MVT::v2i64:
2054 case MVT::v4f32:
2055 case MVT::v2f64:
2056 RC = &SystemZ::VR128BitRegClass;
2057 break;
2058 }
2059
2060 Register VReg = MRI.createVirtualRegister(RC);
2061 MRI.addLiveIn(VA.getLocReg(), VReg);
2062 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2063 } else {
2064 assert(VA.isMemLoc() && "Argument not register or memory");
2065
2066 // Create the frame index object for this incoming parameter.
2067 // FIXME: Pre-include call frame size in the offset, should not
2068 // need to manually add it here.
2069 int64_t ArgSPOffset = VA.getLocMemOffset();
2070 if (Subtarget.isTargetXPLINK64()) {
2071 auto &XPRegs =
2072 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2073 ArgSPOffset += XPRegs.getCallFrameSize();
2074 }
2075 int FI =
2076 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2077
2078 // Create the SelectionDAG nodes corresponding to a load
2079 // from this parameter. Unpromoted ints and floats are
2080 // passed as right-justified 8-byte values.
2081 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2082 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2083 VA.getLocVT() == MVT::f16) {
2084 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2085 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2086 DAG.getIntPtrConstant(SlotOffs, DL));
2087 }
2088 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2090 }
2091
2092 // Convert the value of the argument register into the value that's
2093 // being passed.
2094 if (VA.getLocInfo() == CCValAssign::Indirect) {
2095 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2097 // If the original argument was split (e.g. i128), we need
2098 // to load all parts of it here (using the same address).
2099 MVT PartVT;
2100 unsigned NumParts;
2101 if (analyzeArgSplit(Ins, ArgLocs, I, PartVT, NumParts)) {
2102 // TODO: It is strange that while LowerCallTo() sets the PartOffset
2103 // relative to the first split part LowerArguments() sets the offset
2104 // from the beginning of the struct. So with {i32, i256}, the
2105 // PartOffset for the i256 parts are differently handled. Try to
2106 // remove that difference and use PartOffset directly here (instead
2107 // of SplitBaseOffs).
2108 unsigned SplitBaseOffs = Ins[I].PartOffset;
2109 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2110 ++I;
2111 CCValAssign &PartVA = ArgLocs[I];
2112 unsigned PartOffset = Ins[I].PartOffset - SplitBaseOffs;
2113 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2114 DAG.getIntPtrConstant(PartOffset, DL));
2115 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2117 assert(PartOffset && "Offset should be non-zero.");
2118 }
2119 }
2120 } else
2121 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2122 }
2123
2124 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2125 // Save the number of non-varargs registers for later use by va_start, etc.
2126 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2127 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2128
2129 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2130 Subtarget.getSpecialRegisters());
2131
2132 // Likewise the address (in the form of a frame index) of where the
2133 // first stack vararg would be. The 1-byte size here is arbitrary.
2134 // FIXME: Pre-include call frame size in the offset, should not
2135 // need to manually add it here.
2136 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2137 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2138 FuncInfo->setVarArgsFrameIndex(FI);
2139 }
2140
2141 if (IsVarArg && Subtarget.isTargetELF()) {
2142 // Save the number of non-varargs registers for later use by va_start, etc.
2143 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2144 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2145
2146 // Likewise the address (in the form of a frame index) of where the
2147 // first stack vararg would be. The 1-byte size here is arbitrary.
2148 int64_t VarArgsOffset = CCInfo.getStackSize();
2149 FuncInfo->setVarArgsFrameIndex(
2150 MFI.CreateFixedObject(1, VarArgsOffset, true));
2151
2152 // ...and a similar frame index for the caller-allocated save area
2153 // that will be used to store the incoming registers.
2154 int64_t RegSaveOffset =
2155 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2156 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2157 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2158
2159 // Store the FPR varargs in the reserved frame slots. (We store the
2160 // GPRs as part of the prologue.)
2161 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2163 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2164 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2165 int FI =
2167 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2169 &SystemZ::FP64BitRegClass);
2170 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2171 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2173 }
2174 // Join the stores, which are independent of one another.
2175 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2176 ArrayRef(&MemOps[NumFixedFPRs],
2177 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2178 }
2179 }
2180
2181 if (Subtarget.isTargetXPLINK64()) {
2182 // Create virual register for handling incoming "ADA" special register (R5)
2183 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2184 Register ADAvReg = MRI.createVirtualRegister(RC);
2185 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2186 Subtarget.getSpecialRegisters());
2187 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2188 FuncInfo->setADAVirtualRegister(ADAvReg);
2189 }
2190 return Chain;
2191}
2192
2193static bool canUseSiblingCall(const CCState &ArgCCInfo,
2196 // Punt if there are any indirect or stack arguments, or if the call
2197 // needs the callee-saved argument register R6, or if the call uses
2198 // the callee-saved register arguments SwiftSelf and SwiftError.
2199 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2200 CCValAssign &VA = ArgLocs[I];
2202 return false;
2203 if (!VA.isRegLoc())
2204 return false;
2205 Register Reg = VA.getLocReg();
2206 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2207 return false;
2208 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2209 return false;
2210 }
2211 return true;
2212}
2213
2215 unsigned Offset, bool LoadAdr = false) {
2218 Register ADAvReg = MFI->getADAVirtualRegister();
2220
2221 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2222 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2223
2224 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2225 if (!LoadAdr)
2226 Result = DAG.getLoad(
2227 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2229
2230 return Result;
2231}
2232
2233// ADA access using Global value
2234// Note: for functions, address of descriptor is returned
2236 EVT PtrVT) {
2237 unsigned ADAtype;
2238 bool LoadAddr = false;
2239 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2240 bool IsFunction =
2241 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2242 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2243
2244 if (IsFunction) {
2245 if (IsInternal) {
2247 LoadAddr = true;
2248 } else
2250 } else {
2252 }
2253 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2254
2255 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2256}
2257
2258static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2259 SDLoc &DL, SDValue &Chain) {
2260 unsigned ADADelta = 0; // ADA offset in desc.
2261 unsigned EPADelta = 8; // EPA offset in desc.
2264
2265 // XPLink calling convention.
2266 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2267 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2268 G->getGlobal()->hasPrivateLinkage());
2269 if (IsInternal) {
2272 Register ADAvReg = MFI->getADAVirtualRegister();
2273 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2274 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2275 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2276 return true;
2277 } else {
2279 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2280 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2281 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2282 }
2283 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2285 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2286 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2287 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2288 } else {
2289 // Function pointer case
2290 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2291 DAG.getConstant(ADADelta, DL, PtrVT));
2292 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2294 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2295 DAG.getConstant(EPADelta, DL, PtrVT));
2296 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2298 }
2299 return false;
2300}
2301
2302SDValue
2304 SmallVectorImpl<SDValue> &InVals) const {
2305 SelectionDAG &DAG = CLI.DAG;
2306 SDLoc &DL = CLI.DL;
2308 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2310 SDValue Chain = CLI.Chain;
2311 SDValue Callee = CLI.Callee;
2312 bool &IsTailCall = CLI.IsTailCall;
2313 CallingConv::ID CallConv = CLI.CallConv;
2314 bool IsVarArg = CLI.IsVarArg;
2316 EVT PtrVT = getPointerTy(MF.getDataLayout());
2317 LLVMContext &Ctx = *DAG.getContext();
2318 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2319
2320 // FIXME: z/OS support to be added in later.
2321 if (Subtarget.isTargetXPLINK64())
2322 IsTailCall = false;
2323
2324 // Integer args <=32 bits should have an extension attribute.
2325 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2326
2327 // Analyze the operands of the call, assigning locations to each operand.
2329 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2330 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2331
2332 // We don't support GuaranteedTailCallOpt, only automatically-detected
2333 // sibling calls.
2334 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2335 IsTailCall = false;
2336
2337 // Get a count of how many bytes are to be pushed on the stack.
2338 unsigned NumBytes = ArgCCInfo.getStackSize();
2339
2340 // Mark the start of the call.
2341 if (!IsTailCall)
2342 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2343
2344 // Copy argument values to their designated locations.
2346 SmallVector<SDValue, 8> MemOpChains;
2347 SDValue StackPtr;
2348 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2349 CCValAssign &VA = ArgLocs[I];
2350 SDValue ArgValue = OutVals[I];
2351
2352 if (VA.getLocInfo() == CCValAssign::Indirect) {
2353 // Store the argument in a stack slot and pass its address.
2354 EVT SlotVT;
2355 MVT PartVT;
2356 unsigned NumParts = 1;
2357 if (analyzeArgSplit(Outs, ArgLocs, I, PartVT, NumParts))
2358 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * NumParts);
2359 else
2360 SlotVT = Outs[I].VT;
2361 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2362 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2363 MemOpChains.push_back(
2364 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2366 // If the original argument was split (e.g. i128), we need
2367 // to store all parts of it here (and pass just one address).
2368 assert(Outs[I].PartOffset == 0);
2369 for (unsigned PartIdx = 1; PartIdx < NumParts; ++PartIdx) {
2370 ++I;
2371 SDValue PartValue = OutVals[I];
2372 unsigned PartOffset = Outs[I].PartOffset;
2373 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2374 DAG.getIntPtrConstant(PartOffset, DL));
2375 MemOpChains.push_back(
2376 DAG.getStore(Chain, DL, PartValue, Address,
2378 assert(PartOffset && "Offset should be non-zero.");
2379 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2380 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2381 }
2382 ArgValue = SpillSlot;
2383 } else
2384 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2385
2386 if (VA.isRegLoc()) {
2387 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2388 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2389 // and low values.
2390 if (VA.getLocVT() == MVT::i128)
2391 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2392 // Queue up the argument copies and emit them at the end.
2393 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2394 } else {
2395 assert(VA.isMemLoc() && "Argument not register or memory");
2396
2397 // Work out the address of the stack slot. Unpromoted ints and
2398 // floats are passed as right-justified 8-byte values.
2399 if (!StackPtr.getNode())
2400 StackPtr = DAG.getCopyFromReg(Chain, DL,
2401 Regs->getStackPointerRegister(), PtrVT);
2402 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2403 VA.getLocMemOffset();
2404 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2405 Offset += 4;
2406 else if (VA.getLocVT() == MVT::f16)
2407 Offset += 6;
2408 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2410
2411 // Emit the store.
2412 MemOpChains.push_back(
2413 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2414
2415 // Although long doubles or vectors are passed through the stack when
2416 // they are vararg (non-fixed arguments), if a long double or vector
2417 // occupies the third and fourth slot of the argument list GPR3 should
2418 // still shadow the third slot of the argument list.
2419 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2420 SDValue ShadowArgValue =
2421 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2422 DAG.getIntPtrConstant(1, DL));
2423 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2424 }
2425 }
2426 }
2427
2428 // Join the stores, which are independent of one another.
2429 if (!MemOpChains.empty())
2430 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2431
2432 // Accept direct calls by converting symbolic call addresses to the
2433 // associated Target* opcodes. Force %r1 to be used for indirect
2434 // tail calls.
2435 SDValue Glue;
2436
2437 if (Subtarget.isTargetXPLINK64()) {
2438 SDValue ADA;
2439 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2440 if (!IsBRASL) {
2441 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2442 ->getAddressOfCalleeRegister();
2443 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2444 Glue = Chain.getValue(1);
2445 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2446 }
2447 RegsToPass.push_back(std::make_pair(
2448 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2449 } else {
2450 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2451 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2452 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2453 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2454 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2455 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2456 } else if (IsTailCall) {
2457 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2458 Glue = Chain.getValue(1);
2459 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2460 }
2461 }
2462
2463 // Build a sequence of copy-to-reg nodes, chained and glued together.
2464 for (const auto &[Reg, N] : RegsToPass) {
2465 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2466 Glue = Chain.getValue(1);
2467 }
2468
2469 // The first call operand is the chain and the second is the target address.
2471 Ops.push_back(Chain);
2472 Ops.push_back(Callee);
2473
2474 // Add argument registers to the end of the list so that they are
2475 // known live into the call.
2476 for (const auto &[Reg, N] : RegsToPass)
2477 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2478
2479 // Add a register mask operand representing the call-preserved registers.
2480 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2481 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2482 assert(Mask && "Missing call preserved mask for calling convention");
2483 Ops.push_back(DAG.getRegisterMask(Mask));
2484
2485 // Glue the call to the argument copies, if any.
2486 if (Glue.getNode())
2487 Ops.push_back(Glue);
2488
2489 // Emit the call.
2490 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2491 if (IsTailCall) {
2492 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2493 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2494 return Ret;
2495 }
2496 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2497 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2498 Glue = Chain.getValue(1);
2499
2500 // Mark the end of the call, which is glued to the call itself.
2501 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2502 Glue = Chain.getValue(1);
2503
2504 // Assign locations to each value returned by this call.
2506 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2507 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2508
2509 // Copy all of the result registers out of their specified physreg.
2510 for (CCValAssign &VA : RetLocs) {
2511 // Copy the value out, gluing the copy to the end of the call sequence.
2512 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2513 VA.getLocVT(), Glue);
2514 Chain = RetValue.getValue(1);
2515 Glue = RetValue.getValue(2);
2516
2517 // Convert the value of the return register into the value that's
2518 // being returned.
2519 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2520 }
2521
2522 return Chain;
2523}
2524
2525// Generate a call taking the given operands as arguments and returning a
2526// result of type RetVT.
2528 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2529 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2530 bool DoesNotReturn, bool IsReturnValueUsed) const {
2532 Args.reserve(Ops.size());
2533
2534 for (SDValue Op : Ops) {
2536 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2537 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2538 Entry.IsZExt = !Entry.IsSExt;
2539 Args.push_back(Entry);
2540 }
2541
2542 SDValue Callee =
2543 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2544
2545 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2547 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2548 CLI.setDebugLoc(DL)
2549 .setChain(Chain)
2550 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2551 .setNoReturn(DoesNotReturn)
2552 .setDiscardResult(!IsReturnValueUsed)
2553 .setSExtResult(SignExtend)
2554 .setZExtResult(!SignExtend);
2555 return LowerCallTo(CLI);
2556}
2557
2559 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2560 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2561 const Type *RetTy) const {
2562 // Special case that we cannot easily detect in RetCC_SystemZ since
2563 // i128 may not be a legal type.
2564 for (auto &Out : Outs)
2565 if (Out.ArgVT.isScalarInteger() && Out.ArgVT.getSizeInBits() > 64)
2566 return false;
2567
2569 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2570 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2571}
2572
2573SDValue
2575 bool IsVarArg,
2577 const SmallVectorImpl<SDValue> &OutVals,
2578 const SDLoc &DL, SelectionDAG &DAG) const {
2580
2581 // Integer args <=32 bits should have an extension attribute.
2582 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2583
2584 // Assign locations to each returned value.
2586 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2587 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2588
2589 // Quick exit for void returns
2590 if (RetLocs.empty())
2591 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2592
2593 if (CallConv == CallingConv::GHC)
2594 report_fatal_error("GHC functions return void only");
2595
2596 // Copy the result values into the output registers.
2597 SDValue Glue;
2599 RetOps.push_back(Chain);
2600 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2601 CCValAssign &VA = RetLocs[I];
2602 SDValue RetValue = OutVals[I];
2603
2604 // Make the return register live on exit.
2605 assert(VA.isRegLoc() && "Can only return in registers!");
2606
2607 // Promote the value as required.
2608 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2609
2610 // Chain and glue the copies together.
2611 Register Reg = VA.getLocReg();
2612 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2613 Glue = Chain.getValue(1);
2614 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2615 }
2616
2617 // Update chain and glue.
2618 RetOps[0] = Chain;
2619 if (Glue.getNode())
2620 RetOps.push_back(Glue);
2621
2622 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2623}
2624
2625// Return true if Op is an intrinsic node with chain that returns the CC value
2626// as its only (other) argument. Provide the associated SystemZISD opcode and
2627// the mask of valid CC values if so.
2628static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2629 unsigned &CCValid) {
2630 unsigned Id = Op.getConstantOperandVal(1);
2631 switch (Id) {
2632 case Intrinsic::s390_tbegin:
2633 Opcode = SystemZISD::TBEGIN;
2634 CCValid = SystemZ::CCMASK_TBEGIN;
2635 return true;
2636
2637 case Intrinsic::s390_tbegin_nofloat:
2638 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2639 CCValid = SystemZ::CCMASK_TBEGIN;
2640 return true;
2641
2642 case Intrinsic::s390_tend:
2643 Opcode = SystemZISD::TEND;
2644 CCValid = SystemZ::CCMASK_TEND;
2645 return true;
2646
2647 default:
2648 return false;
2649 }
2650}
2651
2652// Return true if Op is an intrinsic node without chain that returns the
2653// CC value as its final argument. Provide the associated SystemZISD
2654// opcode and the mask of valid CC values if so.
2655static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2656 unsigned Id = Op.getConstantOperandVal(0);
2657 switch (Id) {
2658 case Intrinsic::s390_vpkshs:
2659 case Intrinsic::s390_vpksfs:
2660 case Intrinsic::s390_vpksgs:
2661 Opcode = SystemZISD::PACKS_CC;
2662 CCValid = SystemZ::CCMASK_VCMP;
2663 return true;
2664
2665 case Intrinsic::s390_vpklshs:
2666 case Intrinsic::s390_vpklsfs:
2667 case Intrinsic::s390_vpklsgs:
2668 Opcode = SystemZISD::PACKLS_CC;
2669 CCValid = SystemZ::CCMASK_VCMP;
2670 return true;
2671
2672 case Intrinsic::s390_vceqbs:
2673 case Intrinsic::s390_vceqhs:
2674 case Intrinsic::s390_vceqfs:
2675 case Intrinsic::s390_vceqgs:
2676 case Intrinsic::s390_vceqqs:
2677 Opcode = SystemZISD::VICMPES;
2678 CCValid = SystemZ::CCMASK_VCMP;
2679 return true;
2680
2681 case Intrinsic::s390_vchbs:
2682 case Intrinsic::s390_vchhs:
2683 case Intrinsic::s390_vchfs:
2684 case Intrinsic::s390_vchgs:
2685 case Intrinsic::s390_vchqs:
2686 Opcode = SystemZISD::VICMPHS;
2687 CCValid = SystemZ::CCMASK_VCMP;
2688 return true;
2689
2690 case Intrinsic::s390_vchlbs:
2691 case Intrinsic::s390_vchlhs:
2692 case Intrinsic::s390_vchlfs:
2693 case Intrinsic::s390_vchlgs:
2694 case Intrinsic::s390_vchlqs:
2695 Opcode = SystemZISD::VICMPHLS;
2696 CCValid = SystemZ::CCMASK_VCMP;
2697 return true;
2698
2699 case Intrinsic::s390_vtm:
2700 Opcode = SystemZISD::VTM;
2701 CCValid = SystemZ::CCMASK_VCMP;
2702 return true;
2703
2704 case Intrinsic::s390_vfaebs:
2705 case Intrinsic::s390_vfaehs:
2706 case Intrinsic::s390_vfaefs:
2707 Opcode = SystemZISD::VFAE_CC;
2708 CCValid = SystemZ::CCMASK_ANY;
2709 return true;
2710
2711 case Intrinsic::s390_vfaezbs:
2712 case Intrinsic::s390_vfaezhs:
2713 case Intrinsic::s390_vfaezfs:
2714 Opcode = SystemZISD::VFAEZ_CC;
2715 CCValid = SystemZ::CCMASK_ANY;
2716 return true;
2717
2718 case Intrinsic::s390_vfeebs:
2719 case Intrinsic::s390_vfeehs:
2720 case Intrinsic::s390_vfeefs:
2721 Opcode = SystemZISD::VFEE_CC;
2722 CCValid = SystemZ::CCMASK_ANY;
2723 return true;
2724
2725 case Intrinsic::s390_vfeezbs:
2726 case Intrinsic::s390_vfeezhs:
2727 case Intrinsic::s390_vfeezfs:
2728 Opcode = SystemZISD::VFEEZ_CC;
2729 CCValid = SystemZ::CCMASK_ANY;
2730 return true;
2731
2732 case Intrinsic::s390_vfenebs:
2733 case Intrinsic::s390_vfenehs:
2734 case Intrinsic::s390_vfenefs:
2735 Opcode = SystemZISD::VFENE_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vfenezbs:
2740 case Intrinsic::s390_vfenezhs:
2741 case Intrinsic::s390_vfenezfs:
2742 Opcode = SystemZISD::VFENEZ_CC;
2743 CCValid = SystemZ::CCMASK_ANY;
2744 return true;
2745
2746 case Intrinsic::s390_vistrbs:
2747 case Intrinsic::s390_vistrhs:
2748 case Intrinsic::s390_vistrfs:
2749 Opcode = SystemZISD::VISTR_CC;
2751 return true;
2752
2753 case Intrinsic::s390_vstrcbs:
2754 case Intrinsic::s390_vstrchs:
2755 case Intrinsic::s390_vstrcfs:
2756 Opcode = SystemZISD::VSTRC_CC;
2757 CCValid = SystemZ::CCMASK_ANY;
2758 return true;
2759
2760 case Intrinsic::s390_vstrczbs:
2761 case Intrinsic::s390_vstrczhs:
2762 case Intrinsic::s390_vstrczfs:
2763 Opcode = SystemZISD::VSTRCZ_CC;
2764 CCValid = SystemZ::CCMASK_ANY;
2765 return true;
2766
2767 case Intrinsic::s390_vstrsb:
2768 case Intrinsic::s390_vstrsh:
2769 case Intrinsic::s390_vstrsf:
2770 Opcode = SystemZISD::VSTRS_CC;
2771 CCValid = SystemZ::CCMASK_ANY;
2772 return true;
2773
2774 case Intrinsic::s390_vstrszb:
2775 case Intrinsic::s390_vstrszh:
2776 case Intrinsic::s390_vstrszf:
2777 Opcode = SystemZISD::VSTRSZ_CC;
2778 CCValid = SystemZ::CCMASK_ANY;
2779 return true;
2780
2781 case Intrinsic::s390_vfcedbs:
2782 case Intrinsic::s390_vfcesbs:
2783 Opcode = SystemZISD::VFCMPES;
2784 CCValid = SystemZ::CCMASK_VCMP;
2785 return true;
2786
2787 case Intrinsic::s390_vfchdbs:
2788 case Intrinsic::s390_vfchsbs:
2789 Opcode = SystemZISD::VFCMPHS;
2790 CCValid = SystemZ::CCMASK_VCMP;
2791 return true;
2792
2793 case Intrinsic::s390_vfchedbs:
2794 case Intrinsic::s390_vfchesbs:
2795 Opcode = SystemZISD::VFCMPHES;
2796 CCValid = SystemZ::CCMASK_VCMP;
2797 return true;
2798
2799 case Intrinsic::s390_vftcidb:
2800 case Intrinsic::s390_vftcisb:
2801 Opcode = SystemZISD::VFTCI;
2802 CCValid = SystemZ::CCMASK_VCMP;
2803 return true;
2804
2805 case Intrinsic::s390_tdc:
2806 Opcode = SystemZISD::TDC;
2807 CCValid = SystemZ::CCMASK_TDC;
2808 return true;
2809
2810 default:
2811 return false;
2812 }
2813}
2814
2815// Emit an intrinsic with chain and an explicit CC register result.
2817 unsigned Opcode) {
2818 // Copy all operands except the intrinsic ID.
2819 unsigned NumOps = Op.getNumOperands();
2821 Ops.reserve(NumOps - 1);
2822 Ops.push_back(Op.getOperand(0));
2823 for (unsigned I = 2; I < NumOps; ++I)
2824 Ops.push_back(Op.getOperand(I));
2825
2826 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2827 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2828 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2829 SDValue OldChain = SDValue(Op.getNode(), 1);
2830 SDValue NewChain = SDValue(Intr.getNode(), 1);
2831 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2832 return Intr.getNode();
2833}
2834
2835// Emit an intrinsic with an explicit CC register result.
2837 unsigned Opcode) {
2838 // Copy all operands except the intrinsic ID.
2839 SDLoc DL(Op);
2840 unsigned NumOps = Op.getNumOperands();
2842 Ops.reserve(NumOps - 1);
2843 for (unsigned I = 1; I < NumOps; ++I) {
2844 SDValue CurrOper = Op.getOperand(I);
2845 if (CurrOper.getValueType() == MVT::f16) {
2846 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2847 "Unhandled intrinsic with f16 operand.");
2848 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2849 }
2850 Ops.push_back(CurrOper);
2851 }
2852
2853 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2854 return Intr.getNode();
2855}
2856
2857// CC is a comparison that will be implemented using an integer or
2858// floating-point comparison. Return the condition code mask for
2859// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2860// unsigned comparisons and clear for signed ones. In the floating-point
2861// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2863#define CONV(X) \
2864 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2865 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2866 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2867
2868 switch (CC) {
2869 default:
2870 llvm_unreachable("Invalid integer condition!");
2871
2872 CONV(EQ);
2873 CONV(NE);
2874 CONV(GT);
2875 CONV(GE);
2876 CONV(LT);
2877 CONV(LE);
2878
2879 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2881 }
2882#undef CONV
2883}
2884
2885// If C can be converted to a comparison against zero, adjust the operands
2886// as necessary.
2887static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2888 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2889 return;
2890
2891 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2892 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2893 return;
2894
2895 int64_t Value = ConstOp1->getSExtValue();
2896 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2897 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2898 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2899 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2900 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2901 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2902 }
2903}
2904
2905// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2906// adjust the operands as necessary.
2907static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2908 Comparison &C) {
2909 // For us to make any changes, it must a comparison between a single-use
2910 // load and a constant.
2911 if (!C.Op0.hasOneUse() ||
2912 C.Op0.getOpcode() != ISD::LOAD ||
2913 C.Op1.getOpcode() != ISD::Constant)
2914 return;
2915
2916 // We must have an 8- or 16-bit load.
2917 auto *Load = cast<LoadSDNode>(C.Op0);
2918 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2919 if ((NumBits != 8 && NumBits != 16) ||
2920 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2921 return;
2922
2923 // The load must be an extending one and the constant must be within the
2924 // range of the unextended value.
2925 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2926 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2927 return;
2928 uint64_t Value = ConstOp1->getZExtValue();
2929 uint64_t Mask = (1 << NumBits) - 1;
2930 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2931 // Make sure that ConstOp1 is in range of C.Op0.
2932 int64_t SignedValue = ConstOp1->getSExtValue();
2933 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2934 return;
2935 if (C.ICmpType != SystemZICMP::SignedOnly) {
2936 // Unsigned comparison between two sign-extended values is equivalent
2937 // to unsigned comparison between two zero-extended values.
2938 Value &= Mask;
2939 } else if (NumBits == 8) {
2940 // Try to treat the comparison as unsigned, so that we can use CLI.
2941 // Adjust CCMask and Value as necessary.
2942 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2943 // Test whether the high bit of the byte is set.
2944 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2945 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2946 // Test whether the high bit of the byte is clear.
2947 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2948 else
2949 // No instruction exists for this combination.
2950 return;
2951 C.ICmpType = SystemZICMP::UnsignedOnly;
2952 }
2953 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2954 if (Value > Mask)
2955 return;
2956 // If the constant is in range, we can use any comparison.
2957 C.ICmpType = SystemZICMP::Any;
2958 } else
2959 return;
2960
2961 // Make sure that the first operand is an i32 of the right extension type.
2962 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2965 if (C.Op0.getValueType() != MVT::i32 ||
2966 Load->getExtensionType() != ExtType) {
2967 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2968 Load->getBasePtr(), Load->getPointerInfo(),
2969 Load->getMemoryVT(), Load->getAlign(),
2970 Load->getMemOperand()->getFlags());
2971 // Update the chain uses.
2972 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2973 }
2974
2975 // Make sure that the second operand is an i32 with the right value.
2976 if (C.Op1.getValueType() != MVT::i32 ||
2977 Value != ConstOp1->getZExtValue())
2978 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2979}
2980
2981// Return true if Op is either an unextended load, or a load suitable
2982// for integer register-memory comparisons of type ICmpType.
2983static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2984 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2985 if (Load) {
2986 // There are no instructions to compare a register with a memory byte.
2987 if (Load->getMemoryVT() == MVT::i8)
2988 return false;
2989 // Otherwise decide on extension type.
2990 switch (Load->getExtensionType()) {
2991 case ISD::NON_EXTLOAD:
2992 return true;
2993 case ISD::SEXTLOAD:
2994 return ICmpType != SystemZICMP::UnsignedOnly;
2995 case ISD::ZEXTLOAD:
2996 return ICmpType != SystemZICMP::SignedOnly;
2997 default:
2998 break;
2999 }
3000 }
3001 return false;
3002}
3003
3004// Return true if it is better to swap the operands of C.
3005static bool shouldSwapCmpOperands(const Comparison &C) {
3006 // Leave i128 and f128 comparisons alone, since they have no memory forms.
3007 if (C.Op0.getValueType() == MVT::i128)
3008 return false;
3009 if (C.Op0.getValueType() == MVT::f128)
3010 return false;
3011
3012 // Always keep a floating-point constant second, since comparisons with
3013 // zero can use LOAD TEST and comparisons with other constants make a
3014 // natural memory operand.
3015 if (isa<ConstantFPSDNode>(C.Op1))
3016 return false;
3017
3018 // Never swap comparisons with zero since there are many ways to optimize
3019 // those later.
3020 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3021 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
3022 return false;
3023
3024 // Also keep natural memory operands second if the loaded value is
3025 // only used here. Several comparisons have memory forms.
3026 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
3027 return false;
3028
3029 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3030 // In that case we generally prefer the memory to be second.
3031 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3032 // The only exceptions are when the second operand is a constant and
3033 // we can use things like CHHSI.
3034 if (!ConstOp1)
3035 return true;
3036 // The unsigned memory-immediate instructions can handle 16-bit
3037 // unsigned integers.
3038 if (C.ICmpType != SystemZICMP::SignedOnly &&
3039 isUInt<16>(ConstOp1->getZExtValue()))
3040 return false;
3041 // The signed memory-immediate instructions can handle 16-bit
3042 // signed integers.
3043 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3044 isInt<16>(ConstOp1->getSExtValue()))
3045 return false;
3046 return true;
3047 }
3048
3049 // Try to promote the use of CGFR and CLGFR.
3050 unsigned Opcode0 = C.Op0.getOpcode();
3051 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3052 return true;
3053 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3054 return true;
3055 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3056 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3057 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3058 return true;
3059
3060 return false;
3061}
3062
3063// Check whether C tests for equality between X and Y and whether X - Y
3064// or Y - X is also computed. In that case it's better to compare the
3065// result of the subtraction against zero.
3067 Comparison &C) {
3068 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3069 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3070 for (SDNode *N : C.Op0->users()) {
3071 if (N->getOpcode() == ISD::SUB &&
3072 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3073 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3074 // Disable the nsw and nuw flags: the backend needs to handle
3075 // overflow as well during comparison elimination.
3076 N->dropFlags(SDNodeFlags::NoWrap);
3077 C.Op0 = SDValue(N, 0);
3078 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3079 return;
3080 }
3081 }
3082 }
3083}
3084
3085// Check whether C compares a floating-point value with zero and if that
3086// floating-point value is also negated. In this case we can use the
3087// negation to set CC, so avoiding separate LOAD AND TEST and
3088// LOAD (NEGATIVE/COMPLEMENT) instructions.
3089static void adjustForFNeg(Comparison &C) {
3090 // This optimization is invalid for strict comparisons, since FNEG
3091 // does not raise any exceptions.
3092 if (C.Chain)
3093 return;
3094 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3095 if (C1 && C1->isZero()) {
3096 for (SDNode *N : C.Op0->users()) {
3097 if (N->getOpcode() == ISD::FNEG) {
3098 C.Op0 = SDValue(N, 0);
3099 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3100 return;
3101 }
3102 }
3103 }
3104}
3105
3106// Check whether C compares (shl X, 32) with 0 and whether X is
3107// also sign-extended. In that case it is better to test the result
3108// of the sign extension using LTGFR.
3109//
3110// This case is important because InstCombine transforms a comparison
3111// with (sext (trunc X)) into a comparison with (shl X, 32).
3112static void adjustForLTGFR(Comparison &C) {
3113 // Check for a comparison between (shl X, 32) and 0.
3114 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3115 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3116 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3117 if (C1 && C1->getZExtValue() == 32) {
3118 SDValue ShlOp0 = C.Op0.getOperand(0);
3119 // See whether X has any SIGN_EXTEND_INREG uses.
3120 for (SDNode *N : ShlOp0->users()) {
3121 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3122 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3123 C.Op0 = SDValue(N, 0);
3124 return;
3125 }
3126 }
3127 }
3128 }
3129}
3130
3131// If C compares the truncation of an extending load, try to compare
3132// the untruncated value instead. This exposes more opportunities to
3133// reuse CC.
3134static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3135 Comparison &C) {
3136 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3137 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3138 C.Op1.getOpcode() == ISD::Constant &&
3139 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3140 C.Op1->getAsZExtVal() == 0) {
3141 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3142 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3143 C.Op0.getValueSizeInBits().getFixedValue()) {
3144 unsigned Type = L->getExtensionType();
3145 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3146 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3147 C.Op0 = C.Op0.getOperand(0);
3148 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3149 }
3150 }
3151 }
3152}
3153
3154// Return true if shift operation N has an in-range constant shift value.
3155// Store it in ShiftVal if so.
3156static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3157 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3158 if (!Shift)
3159 return false;
3160
3161 uint64_t Amount = Shift->getZExtValue();
3162 if (Amount >= N.getValueSizeInBits())
3163 return false;
3164
3165 ShiftVal = Amount;
3166 return true;
3167}
3168
3169// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3170// instruction and whether the CC value is descriptive enough to handle
3171// a comparison of type Opcode between the AND result and CmpVal.
3172// CCMask says which comparison result is being tested and BitSize is
3173// the number of bits in the operands. If TEST UNDER MASK can be used,
3174// return the corresponding CC mask, otherwise return 0.
3175static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3176 uint64_t Mask, uint64_t CmpVal,
3177 unsigned ICmpType) {
3178 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3179
3180 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3181 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3182 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3183 return 0;
3184
3185 // Work out the masks for the lowest and highest bits.
3187 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3188
3189 // Signed ordered comparisons are effectively unsigned if the sign
3190 // bit is dropped.
3191 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3192
3193 // Check for equality comparisons with 0, or the equivalent.
3194 if (CmpVal == 0) {
3195 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3197 if (CCMask == SystemZ::CCMASK_CMP_NE)
3199 }
3200 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3201 if (CCMask == SystemZ::CCMASK_CMP_LT)
3203 if (CCMask == SystemZ::CCMASK_CMP_GE)
3205 }
3206 if (EffectivelyUnsigned && CmpVal < Low) {
3207 if (CCMask == SystemZ::CCMASK_CMP_LE)
3209 if (CCMask == SystemZ::CCMASK_CMP_GT)
3211 }
3212
3213 // Check for equality comparisons with the mask, or the equivalent.
3214 if (CmpVal == Mask) {
3215 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3217 if (CCMask == SystemZ::CCMASK_CMP_NE)
3219 }
3220 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3221 if (CCMask == SystemZ::CCMASK_CMP_GT)
3223 if (CCMask == SystemZ::CCMASK_CMP_LE)
3225 }
3226 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3227 if (CCMask == SystemZ::CCMASK_CMP_GE)
3229 if (CCMask == SystemZ::CCMASK_CMP_LT)
3231 }
3232
3233 // Check for ordered comparisons with the top bit.
3234 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3235 if (CCMask == SystemZ::CCMASK_CMP_LE)
3237 if (CCMask == SystemZ::CCMASK_CMP_GT)
3239 }
3240 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3241 if (CCMask == SystemZ::CCMASK_CMP_LT)
3243 if (CCMask == SystemZ::CCMASK_CMP_GE)
3245 }
3246
3247 // If there are just two bits, we can do equality checks for Low and High
3248 // as well.
3249 if (Mask == Low + High) {
3250 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3252 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3254 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3256 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3258 }
3259
3260 // Looks like we've exhausted our options.
3261 return 0;
3262}
3263
3264// See whether C can be implemented as a TEST UNDER MASK instruction.
3265// Update the arguments with the TM version if so.
3267 Comparison &C) {
3268 // Use VECTOR TEST UNDER MASK for i128 operations.
3269 if (C.Op0.getValueType() == MVT::i128) {
3270 // We can use VTM for EQ/NE comparisons of x & y against 0.
3271 if (C.Op0.getOpcode() == ISD::AND &&
3272 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3273 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3274 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3275 if (Mask && Mask->getAPIntValue() == 0) {
3276 C.Opcode = SystemZISD::VTM;
3277 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3278 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3279 C.CCValid = SystemZ::CCMASK_VCMP;
3280 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3281 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3282 else
3283 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3284 }
3285 }
3286 return;
3287 }
3288
3289 // Check that we have a comparison with a constant.
3290 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3291 if (!ConstOp1)
3292 return;
3293 uint64_t CmpVal = ConstOp1->getZExtValue();
3294
3295 // Check whether the nonconstant input is an AND with a constant mask.
3296 Comparison NewC(C);
3297 uint64_t MaskVal;
3298 ConstantSDNode *Mask = nullptr;
3299 if (C.Op0.getOpcode() == ISD::AND) {
3300 NewC.Op0 = C.Op0.getOperand(0);
3301 NewC.Op1 = C.Op0.getOperand(1);
3302 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3303 if (!Mask)
3304 return;
3305 MaskVal = Mask->getZExtValue();
3306 } else {
3307 // There is no instruction to compare with a 64-bit immediate
3308 // so use TMHH instead if possible. We need an unsigned ordered
3309 // comparison with an i64 immediate.
3310 if (NewC.Op0.getValueType() != MVT::i64 ||
3311 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3312 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3313 NewC.ICmpType == SystemZICMP::SignedOnly)
3314 return;
3315 // Convert LE and GT comparisons into LT and GE.
3316 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3317 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3318 if (CmpVal == uint64_t(-1))
3319 return;
3320 CmpVal += 1;
3321 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3322 }
3323 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3324 // be masked off without changing the result.
3325 MaskVal = -(CmpVal & -CmpVal);
3326 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3327 }
3328 if (!MaskVal)
3329 return;
3330
3331 // Check whether the combination of mask, comparison value and comparison
3332 // type are suitable.
3333 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3334 unsigned NewCCMask, ShiftVal;
3335 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3336 NewC.Op0.getOpcode() == ISD::SHL &&
3337 isSimpleShift(NewC.Op0, ShiftVal) &&
3338 (MaskVal >> ShiftVal != 0) &&
3339 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3340 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3341 MaskVal >> ShiftVal,
3342 CmpVal >> ShiftVal,
3343 SystemZICMP::Any))) {
3344 NewC.Op0 = NewC.Op0.getOperand(0);
3345 MaskVal >>= ShiftVal;
3346 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3347 NewC.Op0.getOpcode() == ISD::SRL &&
3348 isSimpleShift(NewC.Op0, ShiftVal) &&
3349 (MaskVal << ShiftVal != 0) &&
3350 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3351 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3352 MaskVal << ShiftVal,
3353 CmpVal << ShiftVal,
3355 NewC.Op0 = NewC.Op0.getOperand(0);
3356 MaskVal <<= ShiftVal;
3357 } else {
3358 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3359 NewC.ICmpType);
3360 if (!NewCCMask)
3361 return;
3362 }
3363
3364 // Go ahead and make the change.
3365 C.Opcode = SystemZISD::TM;
3366 C.Op0 = NewC.Op0;
3367 if (Mask && Mask->getZExtValue() == MaskVal)
3368 C.Op1 = SDValue(Mask, 0);
3369 else
3370 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3371 C.CCValid = SystemZ::CCMASK_TM;
3372 C.CCMask = NewCCMask;
3373}
3374
3375// Implement i128 comparison in vector registers.
3376static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3377 Comparison &C) {
3378 if (C.Opcode != SystemZISD::ICMP)
3379 return;
3380 if (C.Op0.getValueType() != MVT::i128)
3381 return;
3382
3383 // Recognize vector comparison reductions.
3384 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3385 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3386 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3387 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3388 bool CmpNull = isNullConstant(C.Op1);
3389 SDValue Src = peekThroughBitcasts(C.Op0);
3390 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3391 Src = Src.getOperand(0);
3392 CmpNull = !CmpNull;
3393 }
3394 unsigned Opcode = 0;
3395 if (Src.hasOneUse()) {
3396 switch (Src.getOpcode()) {
3397 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3398 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3399 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3400 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3401 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3402 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3403 default: break;
3404 }
3405 }
3406 if (Opcode) {
3407 C.Opcode = Opcode;
3408 C.Op0 = Src->getOperand(0);
3409 C.Op1 = Src->getOperand(1);
3410 C.CCValid = SystemZ::CCMASK_VCMP;
3412 if (!CmpEq)
3413 C.CCMask ^= C.CCValid;
3414 return;
3415 }
3416 }
3417
3418 // Everything below here is not useful if we have native i128 compares.
3419 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3420 return;
3421
3422 // (In-)Equality comparisons can be implemented via VCEQGS.
3423 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3424 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3425 C.Opcode = SystemZISD::VICMPES;
3426 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3427 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3428 C.CCValid = SystemZ::CCMASK_VCMP;
3429 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3430 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3431 else
3432 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3433 return;
3434 }
3435
3436 // Normalize other comparisons to GT.
3437 bool Swap = false, Invert = false;
3438 switch (C.CCMask) {
3439 case SystemZ::CCMASK_CMP_GT: break;
3440 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3441 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3442 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3443 default: llvm_unreachable("Invalid integer condition!");
3444 }
3445 if (Swap)
3446 std::swap(C.Op0, C.Op1);
3447
3448 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3449 C.Opcode = SystemZISD::UCMP128HI;
3450 else
3451 C.Opcode = SystemZISD::SCMP128HI;
3452 C.CCValid = SystemZ::CCMASK_ANY;
3453 C.CCMask = SystemZ::CCMASK_1;
3454
3455 if (Invert)
3456 C.CCMask ^= C.CCValid;
3457}
3458
3459// See whether the comparison argument contains a redundant AND
3460// and remove it if so. This sometimes happens due to the generic
3461// BRCOND expansion.
3463 Comparison &C) {
3464 if (C.Op0.getOpcode() != ISD::AND)
3465 return;
3466 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3467 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3468 return;
3469 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3470 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3471 return;
3472
3473 C.Op0 = C.Op0.getOperand(0);
3474}
3475
3476// Return a Comparison that tests the condition-code result of intrinsic
3477// node Call against constant integer CC using comparison code Cond.
3478// Opcode is the opcode of the SystemZISD operation for the intrinsic
3479// and CCValid is the set of possible condition-code results.
3480static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3481 SDValue Call, unsigned CCValid, uint64_t CC,
3483 Comparison C(Call, SDValue(), SDValue());
3484 C.Opcode = Opcode;
3485 C.CCValid = CCValid;
3486 if (Cond == ISD::SETEQ)
3487 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3488 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3489 else if (Cond == ISD::SETNE)
3490 // ...and the inverse of that.
3491 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3492 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3493 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3494 // always true for CC>3.
3495 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3496 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3497 // ...and the inverse of that.
3498 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3499 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3500 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3501 // always true for CC>3.
3502 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3503 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3504 // ...and the inverse of that.
3505 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3506 else
3507 llvm_unreachable("Unexpected integer comparison type");
3508 C.CCMask &= CCValid;
3509 return C;
3510}
3511
3512// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3513static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3514 ISD::CondCode Cond, const SDLoc &DL,
3515 SDValue Chain = SDValue(),
3516 bool IsSignaling = false) {
3517 if (CmpOp1.getOpcode() == ISD::Constant) {
3518 assert(!Chain);
3519 unsigned Opcode, CCValid;
3520 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3521 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3522 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3523 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3524 CmpOp1->getAsZExtVal(), Cond);
3525 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3526 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3527 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3528 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3529 CmpOp1->getAsZExtVal(), Cond);
3530 }
3531 Comparison C(CmpOp0, CmpOp1, Chain);
3532 C.CCMask = CCMaskForCondCode(Cond);
3533 if (C.Op0.getValueType().isFloatingPoint()) {
3534 C.CCValid = SystemZ::CCMASK_FCMP;
3535 if (!C.Chain)
3536 C.Opcode = SystemZISD::FCMP;
3537 else if (!IsSignaling)
3538 C.Opcode = SystemZISD::STRICT_FCMP;
3539 else
3540 C.Opcode = SystemZISD::STRICT_FCMPS;
3542 } else {
3543 assert(!C.Chain);
3544 C.CCValid = SystemZ::CCMASK_ICMP;
3545 C.Opcode = SystemZISD::ICMP;
3546 // Choose the type of comparison. Equality and inequality tests can
3547 // use either signed or unsigned comparisons. The choice also doesn't
3548 // matter if both sign bits are known to be clear. In those cases we
3549 // want to give the main isel code the freedom to choose whichever
3550 // form fits best.
3551 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3552 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3553 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3554 C.ICmpType = SystemZICMP::Any;
3555 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3556 C.ICmpType = SystemZICMP::UnsignedOnly;
3557 else
3558 C.ICmpType = SystemZICMP::SignedOnly;
3559 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3560 adjustForRedundantAnd(DAG, DL, C);
3561 adjustZeroCmp(DAG, DL, C);
3562 adjustSubwordCmp(DAG, DL, C);
3563 adjustForSubtraction(DAG, DL, C);
3565 adjustICmpTruncate(DAG, DL, C);
3566 }
3567
3568 if (shouldSwapCmpOperands(C)) {
3569 std::swap(C.Op0, C.Op1);
3570 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3571 }
3572
3574 adjustICmp128(DAG, DL, C);
3575 return C;
3576}
3577
3578// Emit the comparison instruction described by C.
3579static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3580 if (!C.Op1.getNode()) {
3581 SDNode *Node;
3582 switch (C.Op0.getOpcode()) {
3584 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3585 return SDValue(Node, 0);
3587 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3588 return SDValue(Node, Node->getNumValues() - 1);
3589 default:
3590 llvm_unreachable("Invalid comparison operands");
3591 }
3592 }
3593 if (C.Opcode == SystemZISD::ICMP)
3594 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3595 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3596 if (C.Opcode == SystemZISD::TM) {
3597 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3599 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3600 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3601 }
3602 if (C.Opcode == SystemZISD::VICMPES ||
3603 C.Opcode == SystemZISD::VICMPHS ||
3604 C.Opcode == SystemZISD::VICMPHLS ||
3605 C.Opcode == SystemZISD::VFCMPES ||
3606 C.Opcode == SystemZISD::VFCMPHS ||
3607 C.Opcode == SystemZISD::VFCMPHES) {
3608 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3609 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3610 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3611 return SDValue(Val.getNode(), 1);
3612 }
3613 if (C.Chain) {
3614 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3615 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3616 }
3617 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3618}
3619
3620// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3621// 64 bits. Extend is the extension type to use. Store the high part
3622// in Hi and the low part in Lo.
3623static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3624 SDValue Op0, SDValue Op1, SDValue &Hi,
3625 SDValue &Lo) {
3626 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3627 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3628 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3629 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3630 DAG.getConstant(32, DL, MVT::i64));
3631 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3632 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3633}
3634
3635// Lower a binary operation that produces two VT results, one in each
3636// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3637// and Opcode performs the GR128 operation. Store the even register result
3638// in Even and the odd register result in Odd.
3639static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3640 unsigned Opcode, SDValue Op0, SDValue Op1,
3641 SDValue &Even, SDValue &Odd) {
3642 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3643 bool Is32Bit = is32Bit(VT);
3644 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3645 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3646}
3647
3648// Return an i32 value that is 1 if the CC value produced by CCReg is
3649// in the mask CCMask and 0 otherwise. CC is known to have a value
3650// in CCValid, so other values can be ignored.
3651static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3652 unsigned CCValid, unsigned CCMask) {
3653 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3654 DAG.getConstant(0, DL, MVT::i32),
3655 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3656 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3657 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3658}
3659
3660// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3661// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3662// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3663// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3664// floating-point comparisons.
3667 switch (CC) {
3668 case ISD::SETOEQ:
3669 case ISD::SETEQ:
3670 switch (Mode) {
3671 case CmpMode::Int: return SystemZISD::VICMPE;
3672 case CmpMode::FP: return SystemZISD::VFCMPE;
3673 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3674 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3675 }
3676 llvm_unreachable("Bad mode");
3677
3678 case ISD::SETOGE:
3679 case ISD::SETGE:
3680 switch (Mode) {
3681 case CmpMode::Int: return 0;
3682 case CmpMode::FP: return SystemZISD::VFCMPHE;
3683 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3684 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3685 }
3686 llvm_unreachable("Bad mode");
3687
3688 case ISD::SETOGT:
3689 case ISD::SETGT:
3690 switch (Mode) {
3691 case CmpMode::Int: return SystemZISD::VICMPH;
3692 case CmpMode::FP: return SystemZISD::VFCMPH;
3693 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3694 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3695 }
3696 llvm_unreachable("Bad mode");
3697
3698 case ISD::SETUGT:
3699 switch (Mode) {
3700 case CmpMode::Int: return SystemZISD::VICMPHL;
3701 case CmpMode::FP: return 0;
3702 case CmpMode::StrictFP: return 0;
3703 case CmpMode::SignalingFP: return 0;
3704 }
3705 llvm_unreachable("Bad mode");
3706
3707 default:
3708 return 0;
3709 }
3710}
3711
3712// Return the SystemZISD vector comparison operation for CC or its inverse,
3713// or 0 if neither can be done directly. Indicate in Invert whether the
3714// result is for the inverse of CC. Mode is as above.
3716 bool &Invert) {
3717 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3718 Invert = false;
3719 return Opcode;
3720 }
3721
3722 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3723 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3724 Invert = true;
3725 return Opcode;
3726 }
3727
3728 return 0;
3729}
3730
3731// Return a v2f64 that contains the extended form of elements Start and Start+1
3732// of v4f32 value Op. If Chain is nonnull, return the strict form.
3733static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3734 SDValue Op, SDValue Chain) {
3735 int Mask[] = { Start, -1, Start + 1, -1 };
3736 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3737 if (Chain) {
3738 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3739 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3740 }
3741 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3742}
3743
3744// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3745// producing a result of type VT. If Chain is nonnull, return the strict form.
3746SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3747 const SDLoc &DL, EVT VT,
3748 SDValue CmpOp0,
3749 SDValue CmpOp1,
3750 SDValue Chain) const {
3751 // There is no hardware support for v4f32 (unless we have the vector
3752 // enhancements facility 1), so extend the vector into two v2f64s
3753 // and compare those.
3754 if (CmpOp0.getValueType() == MVT::v4f32 &&
3755 !Subtarget.hasVectorEnhancements1()) {
3756 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3757 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3758 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3759 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3760 if (Chain) {
3761 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3762 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3763 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3764 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3765 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3766 H1.getValue(1), L1.getValue(1),
3767 HRes.getValue(1), LRes.getValue(1) };
3768 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3769 SDValue Ops[2] = { Res, NewChain };
3770 return DAG.getMergeValues(Ops, DL);
3771 }
3772 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3773 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3774 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3775 }
3776 if (Chain) {
3777 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3778 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3779 }
3780 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3781}
3782
3783// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3784// an integer mask of type VT. If Chain is nonnull, we have a strict
3785// floating-point comparison. If in addition IsSignaling is true, we have
3786// a strict signaling floating-point comparison.
3787SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3788 const SDLoc &DL, EVT VT,
3789 ISD::CondCode CC,
3790 SDValue CmpOp0,
3791 SDValue CmpOp1,
3792 SDValue Chain,
3793 bool IsSignaling) const {
3794 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3795 assert (!Chain || IsFP);
3796 assert (!IsSignaling || Chain);
3797 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3798 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3799 bool Invert = false;
3800 SDValue Cmp;
3801 switch (CC) {
3802 // Handle tests for order using (or (ogt y x) (oge x y)).
3803 case ISD::SETUO:
3804 Invert = true;
3805 [[fallthrough]];
3806 case ISD::SETO: {
3807 assert(IsFP && "Unexpected integer comparison");
3808 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3809 DL, VT, CmpOp1, CmpOp0, Chain);
3810 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3811 DL, VT, CmpOp0, CmpOp1, Chain);
3812 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3813 if (Chain)
3814 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3815 LT.getValue(1), GE.getValue(1));
3816 break;
3817 }
3818
3819 // Handle <> tests using (or (ogt y x) (ogt x y)).
3820 case ISD::SETUEQ:
3821 Invert = true;
3822 [[fallthrough]];
3823 case ISD::SETONE: {
3824 assert(IsFP && "Unexpected integer comparison");
3825 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3826 DL, VT, CmpOp1, CmpOp0, Chain);
3827 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3828 DL, VT, CmpOp0, CmpOp1, Chain);
3829 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3830 if (Chain)
3831 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3832 LT.getValue(1), GT.getValue(1));
3833 break;
3834 }
3835
3836 // Otherwise a single comparison is enough. It doesn't really
3837 // matter whether we try the inversion or the swap first, since
3838 // there are no cases where both work.
3839 default:
3840 // Optimize sign-bit comparisons to signed compares.
3841 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3843 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3844 APInt Mask;
3845 if (CmpOp0.getOpcode() == ISD::AND
3846 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3847 && Mask == APInt::getSignMask(EltSize)) {
3848 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3849 CmpOp0 = CmpOp0.getOperand(0);
3850 }
3851 }
3852 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3853 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3854 else {
3856 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3857 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3858 else
3859 llvm_unreachable("Unhandled comparison");
3860 }
3861 if (Chain)
3862 Chain = Cmp.getValue(1);
3863 break;
3864 }
3865 if (Invert) {
3866 SDValue Mask =
3867 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3868 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3869 }
3870 if (Chain && Chain.getNode() != Cmp.getNode()) {
3871 SDValue Ops[2] = { Cmp, Chain };
3872 Cmp = DAG.getMergeValues(Ops, DL);
3873 }
3874 return Cmp;
3875}
3876
3877SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3878 SelectionDAG &DAG) const {
3879 SDValue CmpOp0 = Op.getOperand(0);
3880 SDValue CmpOp1 = Op.getOperand(1);
3881 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3882 SDLoc DL(Op);
3883 EVT VT = Op.getValueType();
3884 if (VT.isVector())
3885 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3886
3887 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3888 SDValue CCReg = emitCmp(DAG, DL, C);
3889 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3890}
3891
3892SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3893 SelectionDAG &DAG,
3894 bool IsSignaling) const {
3895 SDValue Chain = Op.getOperand(0);
3896 SDValue CmpOp0 = Op.getOperand(1);
3897 SDValue CmpOp1 = Op.getOperand(2);
3898 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3899 SDLoc DL(Op);
3900 EVT VT = Op.getNode()->getValueType(0);
3901 if (VT.isVector()) {
3902 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3903 Chain, IsSignaling);
3904 return Res.getValue(Op.getResNo());
3905 }
3906
3907 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3908 SDValue CCReg = emitCmp(DAG, DL, C);
3909 CCReg->setFlags(Op->getFlags());
3910 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3911 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3912 return DAG.getMergeValues(Ops, DL);
3913}
3914
3915SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3916 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3917 SDValue CmpOp0 = Op.getOperand(2);
3918 SDValue CmpOp1 = Op.getOperand(3);
3919 SDValue Dest = Op.getOperand(4);
3920 SDLoc DL(Op);
3921
3922 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3923 SDValue CCReg = emitCmp(DAG, DL, C);
3924 return DAG.getNode(
3925 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3926 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3927 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3928}
3929
3930// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3931// allowing Pos and Neg to be wider than CmpOp.
3932static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3933 return (Neg.getOpcode() == ISD::SUB &&
3934 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3935 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3936 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3937 Pos.getOperand(0) == CmpOp)));
3938}
3939
3940// Return the absolute or negative absolute of Op; IsNegative decides which.
3942 bool IsNegative) {
3943 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3944 if (IsNegative)
3945 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3946 DAG.getConstant(0, DL, Op.getValueType()), Op);
3947 return Op;
3948}
3949
3951 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3952 EVT VT = MVT::i128;
3953 unsigned Op;
3954
3955 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3956 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3957 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3958 std::swap(TrueOp, FalseOp);
3959 C.CCMask ^= C.CCValid;
3960 }
3961 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3962 std::swap(C.Op0, C.Op1);
3963 C.CCMask = SystemZ::CCMASK_CMP_GT;
3964 }
3965 switch (C.CCMask) {
3967 Op = SystemZISD::VICMPE;
3968 break;
3970 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3971 Op = SystemZISD::VICMPHL;
3972 else
3973 Op = SystemZISD::VICMPH;
3974 break;
3975 default:
3976 llvm_unreachable("Unhandled comparison");
3977 break;
3978 }
3979
3980 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3981 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3982 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3983 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3984}
3985
3986SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3987 SelectionDAG &DAG) const {
3988 SDValue CmpOp0 = Op.getOperand(0);
3989 SDValue CmpOp1 = Op.getOperand(1);
3990 SDValue TrueOp = Op.getOperand(2);
3991 SDValue FalseOp = Op.getOperand(3);
3992 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3993 SDLoc DL(Op);
3994
3995 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3996 // legalizer, as it will be handled according to the type of the resulting
3997 // value. Extend them here if needed.
3998 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3999 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
4000 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
4001 }
4002
4003 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
4004
4005 // Check for absolute and negative-absolute selections, including those
4006 // where the comparison value is sign-extended (for LPGFR and LNGFR).
4007 // This check supplements the one in DAGCombiner.
4008 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
4009 C.CCMask != SystemZ::CCMASK_CMP_NE &&
4010 C.Op1.getOpcode() == ISD::Constant &&
4011 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
4012 C.Op1->getAsZExtVal() == 0) {
4013 if (isAbsolute(C.Op0, TrueOp, FalseOp))
4014 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
4015 if (isAbsolute(C.Op0, FalseOp, TrueOp))
4016 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
4017 }
4018
4019 if (Subtarget.hasVectorEnhancements3() &&
4020 C.Opcode == SystemZISD::ICMP &&
4021 C.Op0.getValueType() == MVT::i128 &&
4022 TrueOp.getValueType() == MVT::i128) {
4023 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
4024 }
4025
4026 SDValue CCReg = emitCmp(DAG, DL, C);
4027 SDValue Ops[] = {TrueOp, FalseOp,
4028 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4029 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4030
4031 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4032}
4033
4034SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4035 SelectionDAG &DAG) const {
4036 SDLoc DL(Node);
4037 const GlobalValue *GV = Node->getGlobal();
4038 int64_t Offset = Node->getOffset();
4039 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4041
4043 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4044 if (isInt<32>(Offset)) {
4045 // Assign anchors at 1<<12 byte boundaries.
4046 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4047 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4048 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4049
4050 // The offset can be folded into the address if it is aligned to a
4051 // halfword.
4052 Offset -= Anchor;
4053 if (Offset != 0 && (Offset & 1) == 0) {
4054 SDValue Full =
4055 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4056 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4057 Offset = 0;
4058 }
4059 } else {
4060 // Conservatively load a constant offset greater than 32 bits into a
4061 // register below.
4062 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4063 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4064 }
4065 } else if (Subtarget.isTargetELF()) {
4066 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4067 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4068 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4070 } else if (Subtarget.isTargetzOS()) {
4071 Result = getADAEntry(DAG, GV, DL, PtrVT);
4072 } else
4073 llvm_unreachable("Unexpected Subtarget");
4074
4075 // If there was a non-zero offset that we didn't fold, create an explicit
4076 // addition for it.
4077 if (Offset != 0)
4078 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4079 DAG.getSignedConstant(Offset, DL, PtrVT));
4080
4081 return Result;
4082}
4083
4084SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4085 SelectionDAG &DAG,
4086 unsigned Opcode,
4087 SDValue GOTOffset) const {
4088 SDLoc DL(Node);
4089 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4090 SDValue Chain = DAG.getEntryNode();
4091 SDValue Glue;
4092
4095 report_fatal_error("In GHC calling convention TLS is not supported");
4096
4097 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4098 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4099 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4100 Glue = Chain.getValue(1);
4101 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4102 Glue = Chain.getValue(1);
4103
4104 // The first call operand is the chain and the second is the TLS symbol.
4106 Ops.push_back(Chain);
4107 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4108 Node->getValueType(0),
4109 0, 0));
4110
4111 // Add argument registers to the end of the list so that they are
4112 // known live into the call.
4113 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4114 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4115
4116 // Add a register mask operand representing the call-preserved registers.
4117 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4118 const uint32_t *Mask =
4119 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4120 assert(Mask && "Missing call preserved mask for calling convention");
4121 Ops.push_back(DAG.getRegisterMask(Mask));
4122
4123 // Glue the call to the argument copies.
4124 Ops.push_back(Glue);
4125
4126 // Emit the call.
4127 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4128 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4129 Glue = Chain.getValue(1);
4130
4131 // Copy the return value from %r2.
4132 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4133}
4134
4135SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4136 SelectionDAG &DAG) const {
4137 SDValue Chain = DAG.getEntryNode();
4138 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4139
4140 // The high part of the thread pointer is in access register 0.
4141 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4142 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4143
4144 // The low part of the thread pointer is in access register 1.
4145 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4146 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4147
4148 // Merge them into a single 64-bit address.
4149 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4150 DAG.getConstant(32, DL, PtrVT));
4151 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4152}
4153
4154SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4155 SelectionDAG &DAG) const {
4156 if (DAG.getTarget().useEmulatedTLS())
4157 return LowerToTLSEmulatedModel(Node, DAG);
4158 SDLoc DL(Node);
4159 const GlobalValue *GV = Node->getGlobal();
4160 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4161 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4162
4165 report_fatal_error("In GHC calling convention TLS is not supported");
4166
4167 SDValue TP = lowerThreadPointer(DL, DAG);
4168
4169 // Get the offset of GA from the thread pointer, based on the TLS model.
4171 switch (model) {
4173 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4174 SystemZConstantPoolValue *CPV =
4176
4177 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4178 Offset = DAG.getLoad(
4179 PtrVT, DL, DAG.getEntryNode(), Offset,
4181
4182 // Call __tls_get_offset to retrieve the offset.
4183 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4184 break;
4185 }
4186
4188 // Load the GOT offset of the module ID.
4189 SystemZConstantPoolValue *CPV =
4191
4192 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4193 Offset = DAG.getLoad(
4194 PtrVT, DL, DAG.getEntryNode(), Offset,
4196
4197 // Call __tls_get_offset to retrieve the module base offset.
4198 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4199
4200 // Note: The SystemZLDCleanupPass will remove redundant computations
4201 // of the module base offset. Count total number of local-dynamic
4202 // accesses to trigger execution of that pass.
4203 SystemZMachineFunctionInfo* MFI =
4204 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4206
4207 // Add the per-symbol offset.
4209
4210 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4211 DTPOffset = DAG.getLoad(
4212 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4214
4215 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4216 break;
4217 }
4218
4219 case TLSModel::InitialExec: {
4220 // Load the offset from the GOT.
4221 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4223 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4224 Offset =
4225 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4227 break;
4228 }
4229
4230 case TLSModel::LocalExec: {
4231 // Force the offset into the constant pool and load it from there.
4232 SystemZConstantPoolValue *CPV =
4234
4235 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4236 Offset = DAG.getLoad(
4237 PtrVT, DL, DAG.getEntryNode(), Offset,
4239 break;
4240 }
4241 }
4242
4243 // Add the base and offset together.
4244 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4245}
4246
4247SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4248 SelectionDAG &DAG) const {
4249 SDLoc DL(Node);
4250 const BlockAddress *BA = Node->getBlockAddress();
4251 int64_t Offset = Node->getOffset();
4252 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4253
4254 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4255 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4256 return Result;
4257}
4258
4259SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4260 SelectionDAG &DAG) const {
4261 SDLoc DL(JT);
4262 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4263 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4264
4265 // Use LARL to load the address of the table.
4266 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4267}
4268
4269SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4270 SelectionDAG &DAG) const {
4271 SDLoc DL(CP);
4272 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4273
4275 if (CP->isMachineConstantPoolEntry())
4276 Result =
4277 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4278 else
4279 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4280 CP->getOffset());
4281
4282 // Use LARL to load the address of the constant pool entry.
4283 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4284}
4285
4286SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4287 SelectionDAG &DAG) const {
4288 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4289 MachineFunction &MF = DAG.getMachineFunction();
4290 MachineFrameInfo &MFI = MF.getFrameInfo();
4291 MFI.setFrameAddressIsTaken(true);
4292
4293 SDLoc DL(Op);
4294 unsigned Depth = Op.getConstantOperandVal(0);
4295 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4296
4297 // By definition, the frame address is the address of the back chain. (In
4298 // the case of packed stack without backchain, return the address where the
4299 // backchain would have been stored. This will either be an unused space or
4300 // contain a saved register).
4301 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4302 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4303
4304 if (Depth > 0) {
4305 // FIXME The frontend should detect this case.
4306 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4307 report_fatal_error("Unsupported stack frame traversal count");
4308
4309 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4310 while (Depth--) {
4311 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4312 MachinePointerInfo());
4313 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4314 }
4315 }
4316
4317 return BackChain;
4318}
4319
4320SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4321 SelectionDAG &DAG) const {
4322 MachineFunction &MF = DAG.getMachineFunction();
4323 MachineFrameInfo &MFI = MF.getFrameInfo();
4324 MFI.setReturnAddressIsTaken(true);
4325
4326 SDLoc DL(Op);
4327 unsigned Depth = Op.getConstantOperandVal(0);
4328 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4329
4330 if (Depth > 0) {
4331 // FIXME The frontend should detect this case.
4332 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4333 report_fatal_error("Unsupported stack frame traversal count");
4334
4335 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4336 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4337 int Offset = TFL->getReturnAddressOffset(MF);
4338 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4339 DAG.getSignedConstant(Offset, DL, PtrVT));
4340 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4341 MachinePointerInfo());
4342 }
4343
4344 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4345 // implicit live-in.
4346 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4348 &SystemZ::GR64BitRegClass);
4349 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4350}
4351
4352SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4353 SelectionDAG &DAG) const {
4354 SDLoc DL(Op);
4355 SDValue In = Op.getOperand(0);
4356 EVT InVT = In.getValueType();
4357 EVT ResVT = Op.getValueType();
4358
4359 // Convert loads directly. This is normally done by DAGCombiner,
4360 // but we need this case for bitcasts that are created during lowering
4361 // and which are then lowered themselves.
4362 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4363 if (ISD::isNormalLoad(LoadN)) {
4364 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4365 LoadN->getBasePtr(), LoadN->getMemOperand());
4366 // Update the chain uses.
4367 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4368 return NewLoad;
4369 }
4370
4371 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4372 SDValue In64;
4373 if (Subtarget.hasHighWord()) {
4374 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4375 MVT::i64);
4376 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4377 MVT::i64, SDValue(U64, 0), In);
4378 } else {
4379 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4380 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4381 DAG.getConstant(32, DL, MVT::i64));
4382 }
4383 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4384 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4385 DL, MVT::f32, Out64);
4386 }
4387 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4388 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4389 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4390 MVT::f64, SDValue(U64, 0), In);
4391 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4392 if (Subtarget.hasHighWord())
4393 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4394 MVT::i32, Out64);
4395 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4396 DAG.getConstant(32, DL, MVT::i64));
4397 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4398 }
4399 llvm_unreachable("Unexpected bitcast combination");
4400}
4401
4402SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4403 SelectionDAG &DAG) const {
4404
4405 if (Subtarget.isTargetXPLINK64())
4406 return lowerVASTART_XPLINK(Op, DAG);
4407 else
4408 return lowerVASTART_ELF(Op, DAG);
4409}
4410
4411SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4412 SelectionDAG &DAG) const {
4413 MachineFunction &MF = DAG.getMachineFunction();
4414 SystemZMachineFunctionInfo *FuncInfo =
4415 MF.getInfo<SystemZMachineFunctionInfo>();
4416
4417 SDLoc DL(Op);
4418
4419 // vastart just stores the address of the VarArgsFrameIndex slot into the
4420 // memory location argument.
4421 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4422 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4423 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4424 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4425 MachinePointerInfo(SV));
4426}
4427
4428SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4429 SelectionDAG &DAG) const {
4430 MachineFunction &MF = DAG.getMachineFunction();
4431 SystemZMachineFunctionInfo *FuncInfo =
4432 MF.getInfo<SystemZMachineFunctionInfo>();
4433 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4434
4435 SDValue Chain = Op.getOperand(0);
4436 SDValue Addr = Op.getOperand(1);
4437 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4438 SDLoc DL(Op);
4439
4440 // The initial values of each field.
4441 const unsigned NumFields = 4;
4442 SDValue Fields[NumFields] = {
4443 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4444 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4445 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4446 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4447 };
4448
4449 // Store each field into its respective slot.
4450 SDValue MemOps[NumFields];
4451 unsigned Offset = 0;
4452 for (unsigned I = 0; I < NumFields; ++I) {
4453 SDValue FieldAddr = Addr;
4454 if (Offset != 0)
4455 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4457 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4458 MachinePointerInfo(SV, Offset));
4459 Offset += 8;
4460 }
4461 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4462}
4463
4464SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4465 SelectionDAG &DAG) const {
4466 SDValue Chain = Op.getOperand(0);
4467 SDValue DstPtr = Op.getOperand(1);
4468 SDValue SrcPtr = Op.getOperand(2);
4469 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4470 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4471 SDLoc DL(Op);
4472
4473 uint32_t Sz =
4474 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4475 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4476 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4477 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4478 MachinePointerInfo(SrcSV));
4479}
4480
4481SDValue
4482SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4483 SelectionDAG &DAG) const {
4484 if (Subtarget.isTargetXPLINK64())
4485 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4486 else
4487 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4488}
4489
4490SDValue
4491SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4492 SelectionDAG &DAG) const {
4493 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4494 MachineFunction &MF = DAG.getMachineFunction();
4495 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4496 SDValue Chain = Op.getOperand(0);
4497 SDValue Size = Op.getOperand(1);
4498 SDValue Align = Op.getOperand(2);
4499 SDLoc DL(Op);
4500
4501 // If user has set the no alignment function attribute, ignore
4502 // alloca alignments.
4503 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4504
4505 uint64_t StackAlign = TFI->getStackAlignment();
4506 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4507 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4508
4509 SDValue NeededSpace = Size;
4510
4511 // Add extra space for alignment if needed.
4512 EVT PtrVT = getPointerTy(MF.getDataLayout());
4513 if (ExtraAlignSpace)
4514 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4515 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4516
4517 bool IsSigned = false;
4518 bool DoesNotReturn = false;
4519 bool IsReturnValueUsed = false;
4520 EVT VT = Op.getValueType();
4521 SDValue AllocaCall =
4522 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4523 CallingConv::C, IsSigned, DL, DoesNotReturn,
4524 IsReturnValueUsed)
4525 .first;
4526
4527 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4528 // to end of call in order to ensure it isn't broken up from the call
4529 // sequence.
4530 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4531 Register SPReg = Regs.getStackPointerRegister();
4532 Chain = AllocaCall.getValue(1);
4533 SDValue Glue = AllocaCall.getValue(2);
4534 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4535 Chain = NewSPRegNode.getValue(1);
4536
4537 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4538 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4539 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4540
4541 // Dynamically realign if needed.
4542 if (ExtraAlignSpace) {
4543 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4544 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4545 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4546 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4547 }
4548
4549 SDValue Ops[2] = {Result, Chain};
4550 return DAG.getMergeValues(Ops, DL);
4551}
4552
4553SDValue
4554SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4555 SelectionDAG &DAG) const {
4556 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4557 MachineFunction &MF = DAG.getMachineFunction();
4558 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4559 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4560
4561 SDValue Chain = Op.getOperand(0);
4562 SDValue Size = Op.getOperand(1);
4563 SDValue Align = Op.getOperand(2);
4564 SDLoc DL(Op);
4565
4566 // If user has set the no alignment function attribute, ignore
4567 // alloca alignments.
4568 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4569
4570 uint64_t StackAlign = TFI->getStackAlignment();
4571 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4572 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4573
4575 SDValue NeededSpace = Size;
4576
4577 // Get a reference to the stack pointer.
4578 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4579
4580 // If we need a backchain, save it now.
4581 SDValue Backchain;
4582 if (StoreBackchain)
4583 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4584 MachinePointerInfo());
4585
4586 // Add extra space for alignment if needed.
4587 if (ExtraAlignSpace)
4588 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4589 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4590
4591 // Get the new stack pointer value.
4592 SDValue NewSP;
4593 if (hasInlineStackProbe(MF)) {
4594 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4595 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4596 Chain = NewSP.getValue(1);
4597 }
4598 else {
4599 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4600 // Copy the new stack pointer back.
4601 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4602 }
4603
4604 // The allocated data lives above the 160 bytes allocated for the standard
4605 // frame, plus any outgoing stack arguments. We don't know how much that
4606 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4607 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4608 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4609
4610 // Dynamically realign if needed.
4611 if (RequiredAlign > StackAlign) {
4612 Result =
4613 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4614 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4615 Result =
4616 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4617 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4618 }
4619
4620 if (StoreBackchain)
4621 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4622 MachinePointerInfo());
4623
4624 SDValue Ops[2] = { Result, Chain };
4625 return DAG.getMergeValues(Ops, DL);
4626}
4627
4628SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4629 SDValue Op, SelectionDAG &DAG) const {
4630 SDLoc DL(Op);
4631
4632 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4633}
4634
4635SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4636 SelectionDAG &DAG,
4637 unsigned Opcode) const {
4638 EVT VT = Op.getValueType();
4639 SDLoc DL(Op);
4640 SDValue Even, Odd;
4641
4642 // This custom expander is only used on z17 and later for 64-bit types.
4643 assert(!is32Bit(VT));
4644 assert(Subtarget.hasMiscellaneousExtensions2());
4645
4646 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4647 // the high result in the even register. Return the latter.
4648 lowerGR128Binary(DAG, DL, VT, Opcode,
4649 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4650 return Even;
4651}
4652
4653SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4654 SelectionDAG &DAG) const {
4655 EVT VT = Op.getValueType();
4656 SDLoc DL(Op);
4657 SDValue Ops[2];
4658 if (is32Bit(VT))
4659 // Just do a normal 64-bit multiplication and extract the results.
4660 // We define this so that it can be used for constant division.
4661 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4662 Op.getOperand(1), Ops[1], Ops[0]);
4663 else if (Subtarget.hasMiscellaneousExtensions2())
4664 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4665 // the high result in the even register. ISD::SMUL_LOHI is defined to
4666 // return the low half first, so the results are in reverse order.
4667 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4668 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4669 else {
4670 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4671 //
4672 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4673 //
4674 // but using the fact that the upper halves are either all zeros
4675 // or all ones:
4676 //
4677 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4678 //
4679 // and grouping the right terms together since they are quicker than the
4680 // multiplication:
4681 //
4682 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4683 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4684 SDValue LL = Op.getOperand(0);
4685 SDValue RL = Op.getOperand(1);
4686 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4687 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4688 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4689 // the high result in the even register. ISD::SMUL_LOHI is defined to
4690 // return the low half first, so the results are in reverse order.
4691 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4692 LL, RL, Ops[1], Ops[0]);
4693 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4694 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4695 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4696 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4697 }
4698 return DAG.getMergeValues(Ops, DL);
4699}
4700
4701SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4702 SelectionDAG &DAG) const {
4703 EVT VT = Op.getValueType();
4704 SDLoc DL(Op);
4705 SDValue Ops[2];
4706 if (is32Bit(VT))
4707 // Just do a normal 64-bit multiplication and extract the results.
4708 // We define this so that it can be used for constant division.
4709 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4710 Op.getOperand(1), Ops[1], Ops[0]);
4711 else
4712 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4713 // the high result in the even register. ISD::UMUL_LOHI is defined to
4714 // return the low half first, so the results are in reverse order.
4715 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4716 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4717 return DAG.getMergeValues(Ops, DL);
4718}
4719
4720SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4721 SelectionDAG &DAG) const {
4722 SDValue Op0 = Op.getOperand(0);
4723 SDValue Op1 = Op.getOperand(1);
4724 EVT VT = Op.getValueType();
4725 SDLoc DL(Op);
4726
4727 // We use DSGF for 32-bit division. This means the first operand must
4728 // always be 64-bit, and the second operand should be 32-bit whenever
4729 // that is possible, to improve performance.
4730 if (is32Bit(VT))
4731 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4732 else if (DAG.ComputeNumSignBits(Op1) > 32)
4733 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4734
4735 // DSG(F) returns the remainder in the even register and the
4736 // quotient in the odd register.
4737 SDValue Ops[2];
4738 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4739 return DAG.getMergeValues(Ops, DL);
4740}
4741
4742SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4743 SelectionDAG &DAG) const {
4744 EVT VT = Op.getValueType();
4745 SDLoc DL(Op);
4746
4747 // DL(G) returns the remainder in the even register and the
4748 // quotient in the odd register.
4749 SDValue Ops[2];
4750 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4751 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4752 return DAG.getMergeValues(Ops, DL);
4753}
4754
4755SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4756 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4757
4758 // Get the known-zero masks for each operand.
4759 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4760 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4761 DAG.computeKnownBits(Ops[1])};
4762
4763 // See if the upper 32 bits of one operand and the lower 32 bits of the
4764 // other are known zero. They are the low and high operands respectively.
4765 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4766 Known[1].Zero.getZExtValue() };
4767 unsigned High, Low;
4768 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4769 High = 1, Low = 0;
4770 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4771 High = 0, Low = 1;
4772 else
4773 return Op;
4774
4775 SDValue LowOp = Ops[Low];
4776 SDValue HighOp = Ops[High];
4777
4778 // If the high part is a constant, we're better off using IILH.
4779 if (HighOp.getOpcode() == ISD::Constant)
4780 return Op;
4781
4782 // If the low part is a constant that is outside the range of LHI,
4783 // then we're better off using IILF.
4784 if (LowOp.getOpcode() == ISD::Constant) {
4785 int64_t Value = int32_t(LowOp->getAsZExtVal());
4786 if (!isInt<16>(Value))
4787 return Op;
4788 }
4789
4790 // Check whether the high part is an AND that doesn't change the
4791 // high 32 bits and just masks out low bits. We can skip it if so.
4792 if (HighOp.getOpcode() == ISD::AND &&
4793 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4794 SDValue HighOp0 = HighOp.getOperand(0);
4795 uint64_t Mask = HighOp.getConstantOperandVal(1);
4796 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4797 HighOp = HighOp0;
4798 }
4799
4800 // Take advantage of the fact that all GR32 operations only change the
4801 // low 32 bits by truncating Low to an i32 and inserting it directly
4802 // using a subreg. The interesting cases are those where the truncation
4803 // can be folded.
4804 SDLoc DL(Op);
4805 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4806 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4807 MVT::i64, HighOp, Low32);
4808}
4809
4810// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4811SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4812 SelectionDAG &DAG) const {
4813 SDNode *N = Op.getNode();
4814 SDValue LHS = N->getOperand(0);
4815 SDValue RHS = N->getOperand(1);
4816 SDLoc DL(N);
4817
4818 if (N->getValueType(0) == MVT::i128) {
4819 unsigned BaseOp = 0;
4820 unsigned FlagOp = 0;
4821 bool IsBorrow = false;
4822 switch (Op.getOpcode()) {
4823 default: llvm_unreachable("Unknown instruction!");
4824 case ISD::UADDO:
4825 BaseOp = ISD::ADD;
4826 FlagOp = SystemZISD::VACC;
4827 break;
4828 case ISD::USUBO:
4829 BaseOp = ISD::SUB;
4830 FlagOp = SystemZISD::VSCBI;
4831 IsBorrow = true;
4832 break;
4833 }
4834 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4835 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4836 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4837 DAG.getValueType(MVT::i1));
4838 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4839 if (IsBorrow)
4840 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4841 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4842 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4843 }
4844
4845 unsigned BaseOp = 0;
4846 unsigned CCValid = 0;
4847 unsigned CCMask = 0;
4848
4849 switch (Op.getOpcode()) {
4850 default: llvm_unreachable("Unknown instruction!");
4851 case ISD::SADDO:
4852 BaseOp = SystemZISD::SADDO;
4853 CCValid = SystemZ::CCMASK_ARITH;
4855 break;
4856 case ISD::SSUBO:
4857 BaseOp = SystemZISD::SSUBO;
4858 CCValid = SystemZ::CCMASK_ARITH;
4860 break;
4861 case ISD::UADDO:
4862 BaseOp = SystemZISD::UADDO;
4863 CCValid = SystemZ::CCMASK_LOGICAL;
4865 break;
4866 case ISD::USUBO:
4867 BaseOp = SystemZISD::USUBO;
4868 CCValid = SystemZ::CCMASK_LOGICAL;
4870 break;
4871 }
4872
4873 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4874 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4875
4876 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4877 if (N->getValueType(1) == MVT::i1)
4878 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4879
4880 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4881}
4882
4883static bool isAddCarryChain(SDValue Carry) {
4884 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4885 Carry->getValueType(0) != MVT::i128)
4886 Carry = Carry.getOperand(2);
4887 return Carry.getOpcode() == ISD::UADDO &&
4888 Carry->getValueType(0) != MVT::i128;
4889}
4890
4891static bool isSubBorrowChain(SDValue Carry) {
4892 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4893 Carry->getValueType(0) != MVT::i128)
4894 Carry = Carry.getOperand(2);
4895 return Carry.getOpcode() == ISD::USUBO &&
4896 Carry->getValueType(0) != MVT::i128;
4897}
4898
4899// Lower UADDO_CARRY/USUBO_CARRY nodes.
4900SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4901 SelectionDAG &DAG) const {
4902
4903 SDNode *N = Op.getNode();
4904 MVT VT = N->getSimpleValueType(0);
4905
4906 // Let legalize expand this if it isn't a legal type yet.
4907 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4908 return SDValue();
4909
4910 SDValue LHS = N->getOperand(0);
4911 SDValue RHS = N->getOperand(1);
4912 SDValue Carry = Op.getOperand(2);
4913 SDLoc DL(N);
4914
4915 if (VT == MVT::i128) {
4916 unsigned BaseOp = 0;
4917 unsigned FlagOp = 0;
4918 bool IsBorrow = false;
4919 switch (Op.getOpcode()) {
4920 default: llvm_unreachable("Unknown instruction!");
4921 case ISD::UADDO_CARRY:
4922 BaseOp = SystemZISD::VAC;
4923 FlagOp = SystemZISD::VACCC;
4924 break;
4925 case ISD::USUBO_CARRY:
4926 BaseOp = SystemZISD::VSBI;
4927 FlagOp = SystemZISD::VSBCBI;
4928 IsBorrow = true;
4929 break;
4930 }
4931 if (IsBorrow)
4932 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4933 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4934 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4935 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4936 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4937 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4938 DAG.getValueType(MVT::i1));
4939 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4940 if (IsBorrow)
4941 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4942 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4943 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4944 }
4945
4946 unsigned BaseOp = 0;
4947 unsigned CCValid = 0;
4948 unsigned CCMask = 0;
4949
4950 switch (Op.getOpcode()) {
4951 default: llvm_unreachable("Unknown instruction!");
4952 case ISD::UADDO_CARRY:
4953 if (!isAddCarryChain(Carry))
4954 return SDValue();
4955
4956 BaseOp = SystemZISD::ADDCARRY;
4957 CCValid = SystemZ::CCMASK_LOGICAL;
4959 break;
4960 case ISD::USUBO_CARRY:
4961 if (!isSubBorrowChain(Carry))
4962 return SDValue();
4963
4964 BaseOp = SystemZISD::SUBCARRY;
4965 CCValid = SystemZ::CCMASK_LOGICAL;
4967 break;
4968 }
4969
4970 // Set the condition code from the carry flag.
4971 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4972 DAG.getConstant(CCValid, DL, MVT::i32),
4973 DAG.getConstant(CCMask, DL, MVT::i32));
4974
4975 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4976 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4977
4978 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4979 if (N->getValueType(1) == MVT::i1)
4980 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4981
4982 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4983}
4984
4985SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4986 SelectionDAG &DAG) const {
4987 EVT VT = Op.getValueType();
4988 SDLoc DL(Op);
4989 Op = Op.getOperand(0);
4990
4991 if (VT.getScalarSizeInBits() == 128) {
4992 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4993 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4994 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4995 DAG.getConstant(0, DL, MVT::i64));
4996 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4997 return Op;
4998 }
4999
5000 // Handle vector types via VPOPCT.
5001 if (VT.isVector()) {
5002 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
5003 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
5004 switch (VT.getScalarSizeInBits()) {
5005 case 8:
5006 break;
5007 case 16: {
5008 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
5009 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
5010 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
5011 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5012 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
5013 break;
5014 }
5015 case 32: {
5016 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5017 DAG.getConstant(0, DL, MVT::i32));
5018 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5019 break;
5020 }
5021 case 64: {
5022 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
5023 DAG.getConstant(0, DL, MVT::i32));
5024 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
5025 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
5026 break;
5027 }
5028 default:
5029 llvm_unreachable("Unexpected type");
5030 }
5031 return Op;
5032 }
5033
5034 // Get the known-zero mask for the operand.
5035 KnownBits Known = DAG.computeKnownBits(Op);
5036 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5037 if (NumSignificantBits == 0)
5038 return DAG.getConstant(0, DL, VT);
5039
5040 // Skip known-zero high parts of the operand.
5041 int64_t OrigBitSize = VT.getSizeInBits();
5042 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5043 BitSize = std::min(BitSize, OrigBitSize);
5044
5045 // The POPCNT instruction counts the number of bits in each byte.
5046 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5047 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5048 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5049
5050 // Add up per-byte counts in a binary tree. All bits of Op at
5051 // position larger than BitSize remain zero throughout.
5052 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5053 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5054 if (BitSize != OrigBitSize)
5055 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5056 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5057 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5058 }
5059
5060 // Extract overall result from high byte.
5061 if (BitSize > 8)
5062 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5063 DAG.getConstant(BitSize - 8, DL, VT));
5064
5065 return Op;
5066}
5067
5068SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5069 SelectionDAG &DAG) const {
5070 SDLoc DL(Op);
5071 AtomicOrdering FenceOrdering =
5072 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5073 SyncScope::ID FenceSSID =
5074 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5075
5076 // The only fence that needs an instruction is a sequentially-consistent
5077 // cross-thread fence.
5078 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5079 FenceSSID == SyncScope::System) {
5080 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5081 Op.getOperand(0)),
5082 0);
5083 }
5084
5085 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5086 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5087}
5088
5089SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5090 SelectionDAG &DAG) const {
5091 EVT RegVT = Op.getValueType();
5092 if (RegVT.getSizeInBits() == 128)
5093 return lowerATOMIC_LDST_I128(Op, DAG);
5094 return lowerLoadF16(Op, DAG);
5095}
5096
5097SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5098 SelectionDAG &DAG) const {
5099 auto *Node = cast<AtomicSDNode>(Op.getNode());
5100 if (Node->getMemoryVT().getSizeInBits() == 128)
5101 return lowerATOMIC_LDST_I128(Op, DAG);
5102 return lowerStoreF16(Op, DAG);
5103}
5104
5105SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5106 SelectionDAG &DAG) const {
5107 auto *Node = cast<AtomicSDNode>(Op.getNode());
5108 assert(
5109 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5110 "Only custom lowering i128 or f128.");
5111 // Use same code to handle both legal and non-legal i128 types.
5113 LowerOperationWrapper(Node, Results, DAG);
5114 return DAG.getMergeValues(Results, SDLoc(Op));
5115}
5116
5117// Prepare for a Compare And Swap for a subword operation. This needs to be
5118// done in memory with 4 bytes at natural alignment.
5120 SDValue &AlignedAddr, SDValue &BitShift,
5121 SDValue &NegBitShift) {
5122 EVT PtrVT = Addr.getValueType();
5123 EVT WideVT = MVT::i32;
5124
5125 // Get the address of the containing word.
5126 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5127 DAG.getSignedConstant(-4, DL, PtrVT));
5128
5129 // Get the number of bits that the word must be rotated left in order
5130 // to bring the field to the top bits of a GR32.
5131 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5132 DAG.getConstant(3, DL, PtrVT));
5133 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5134
5135 // Get the complementing shift amount, for rotating a field in the top
5136 // bits back to its proper position.
5137 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5138 DAG.getConstant(0, DL, WideVT), BitShift);
5139
5140}
5141
5142// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5143// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5144SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5145 SelectionDAG &DAG,
5146 unsigned Opcode) const {
5147 auto *Node = cast<AtomicSDNode>(Op.getNode());
5148
5149 // 32-bit operations need no special handling.
5150 EVT NarrowVT = Node->getMemoryVT();
5151 EVT WideVT = MVT::i32;
5152 if (NarrowVT == WideVT)
5153 return Op;
5154
5155 int64_t BitSize = NarrowVT.getSizeInBits();
5156 SDValue ChainIn = Node->getChain();
5157 SDValue Addr = Node->getBasePtr();
5158 SDValue Src2 = Node->getVal();
5159 MachineMemOperand *MMO = Node->getMemOperand();
5160 SDLoc DL(Node);
5161
5162 // Convert atomic subtracts of constants into additions.
5163 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5164 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5165 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5166 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5167 Src2.getValueType());
5168 }
5169
5170 SDValue AlignedAddr, BitShift, NegBitShift;
5171 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5172
5173 // Extend the source operand to 32 bits and prepare it for the inner loop.
5174 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5175 // operations require the source to be shifted in advance. (This shift
5176 // can be folded if the source is constant.) For AND and NAND, the lower
5177 // bits must be set, while for other opcodes they should be left clear.
5178 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5179 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5180 DAG.getConstant(32 - BitSize, DL, WideVT));
5181 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5182 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5183 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5184 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5185
5186 // Construct the ATOMIC_LOADW_* node.
5187 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5188 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5189 DAG.getConstant(BitSize, DL, WideVT) };
5190 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5191 NarrowVT, MMO);
5192
5193 // Rotate the result of the final CS so that the field is in the lower
5194 // bits of a GR32, then truncate it.
5195 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5196 DAG.getConstant(BitSize, DL, WideVT));
5197 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5198
5199 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5200 return DAG.getMergeValues(RetOps, DL);
5201}
5202
5203// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5204// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5205SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5206 SelectionDAG &DAG) const {
5207 auto *Node = cast<AtomicSDNode>(Op.getNode());
5208 EVT MemVT = Node->getMemoryVT();
5209 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5210 // A full-width operation: negate and use LAA(G).
5211 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5212 assert(Subtarget.hasInterlockedAccess1() &&
5213 "Should have been expanded by AtomicExpand pass.");
5214 SDValue Src2 = Node->getVal();
5215 SDLoc DL(Src2);
5216 SDValue NegSrc2 =
5217 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5218 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5219 Node->getChain(), Node->getBasePtr(), NegSrc2,
5220 Node->getMemOperand());
5221 }
5222
5223 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5224}
5225
5226// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5227SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5228 SelectionDAG &DAG) const {
5229 auto *Node = cast<AtomicSDNode>(Op.getNode());
5230 SDValue ChainIn = Node->getOperand(0);
5231 SDValue Addr = Node->getOperand(1);
5232 SDValue CmpVal = Node->getOperand(2);
5233 SDValue SwapVal = Node->getOperand(3);
5234 MachineMemOperand *MMO = Node->getMemOperand();
5235 SDLoc DL(Node);
5236
5237 if (Node->getMemoryVT() == MVT::i128) {
5238 // Use same code to handle both legal and non-legal i128 types.
5240 LowerOperationWrapper(Node, Results, DAG);
5241 return DAG.getMergeValues(Results, DL);
5242 }
5243
5244 // We have native support for 32-bit and 64-bit compare and swap, but we
5245 // still need to expand extracting the "success" result from the CC.
5246 EVT NarrowVT = Node->getMemoryVT();
5247 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5248 if (NarrowVT == WideVT) {
5249 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5250 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5251 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5252 DL, Tys, Ops, NarrowVT, MMO);
5253 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5255
5256 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5257 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5258 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5259 return SDValue();
5260 }
5261
5262 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5263 // via a fullword ATOMIC_CMP_SWAPW operation.
5264 int64_t BitSize = NarrowVT.getSizeInBits();
5265
5266 SDValue AlignedAddr, BitShift, NegBitShift;
5267 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5268
5269 // Construct the ATOMIC_CMP_SWAPW node.
5270 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5271 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5272 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5273 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5274 VTList, Ops, NarrowVT, MMO);
5275 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5277
5278 // emitAtomicCmpSwapW() will zero extend the result (original value).
5279 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5280 DAG.getValueType(NarrowVT));
5281 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5282 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5283 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5284 return SDValue();
5285}
5286
5288SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5289 // Because of how we convert atomic_load and atomic_store to normal loads and
5290 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5291 // since DAGCombine hasn't been updated to account for atomic, but non
5292 // volatile loads. (See D57601)
5293 if (auto *SI = dyn_cast<StoreInst>(&I))
5294 if (SI->isAtomic())
5296 if (auto *LI = dyn_cast<LoadInst>(&I))
5297 if (LI->isAtomic())
5299 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5300 if (AI->isAtomic())
5302 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5303 if (AI->isAtomic())
5306}
5307
5308SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5309 SelectionDAG &DAG) const {
5310 MachineFunction &MF = DAG.getMachineFunction();
5311 auto *Regs = Subtarget.getSpecialRegisters();
5313 report_fatal_error("Variable-sized stack allocations are not supported "
5314 "in GHC calling convention");
5315 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5316 Regs->getStackPointerRegister(), Op.getValueType());
5317}
5318
5319SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5320 SelectionDAG &DAG) const {
5321 MachineFunction &MF = DAG.getMachineFunction();
5322 auto *Regs = Subtarget.getSpecialRegisters();
5323 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5324
5326 report_fatal_error("Variable-sized stack allocations are not supported "
5327 "in GHC calling convention");
5328
5329 SDValue Chain = Op.getOperand(0);
5330 SDValue NewSP = Op.getOperand(1);
5331 SDValue Backchain;
5332 SDLoc DL(Op);
5333
5334 if (StoreBackchain) {
5335 SDValue OldSP = DAG.getCopyFromReg(
5336 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5337 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5338 MachinePointerInfo());
5339 }
5340
5341 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5342
5343 if (StoreBackchain)
5344 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5345 MachinePointerInfo());
5346
5347 return Chain;
5348}
5349
5350SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5351 SelectionDAG &DAG) const {
5352 bool IsData = Op.getConstantOperandVal(4);
5353 if (!IsData)
5354 // Just preserve the chain.
5355 return Op.getOperand(0);
5356
5357 SDLoc DL(Op);
5358 bool IsWrite = Op.getConstantOperandVal(2);
5359 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5360 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5361 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5362 Op.getOperand(1)};
5363 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5364 Node->getVTList(), Ops,
5365 Node->getMemoryVT(), Node->getMemOperand());
5366}
5367
5368SDValue
5369SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5370 SelectionDAG &DAG) const {
5371 unsigned Opcode, CCValid;
5372 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5373 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5374 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5375 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5376 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5377 return SDValue();
5378 }
5379
5380 return SDValue();
5381}
5382
5383SDValue
5384SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5385 SelectionDAG &DAG) const {
5386 unsigned Opcode, CCValid;
5387 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5388 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5389 if (Op->getNumValues() == 1)
5390 return getCCResult(DAG, SDValue(Node, 0));
5391 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5392 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5393 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5394 }
5395
5396 unsigned Id = Op.getConstantOperandVal(0);
5397 switch (Id) {
5398 case Intrinsic::thread_pointer:
5399 return lowerThreadPointer(SDLoc(Op), DAG);
5400
5401 case Intrinsic::s390_vpdi:
5402 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5403 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5404
5405 case Intrinsic::s390_vperm:
5406 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5407 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5408
5409 case Intrinsic::s390_vuphb:
5410 case Intrinsic::s390_vuphh:
5411 case Intrinsic::s390_vuphf:
5412 case Intrinsic::s390_vuphg:
5413 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5414 Op.getOperand(1));
5415
5416 case Intrinsic::s390_vuplhb:
5417 case Intrinsic::s390_vuplhh:
5418 case Intrinsic::s390_vuplhf:
5419 case Intrinsic::s390_vuplhg:
5420 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5421 Op.getOperand(1));
5422
5423 case Intrinsic::s390_vuplb:
5424 case Intrinsic::s390_vuplhw:
5425 case Intrinsic::s390_vuplf:
5426 case Intrinsic::s390_vuplg:
5427 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5428 Op.getOperand(1));
5429
5430 case Intrinsic::s390_vupllb:
5431 case Intrinsic::s390_vupllh:
5432 case Intrinsic::s390_vupllf:
5433 case Intrinsic::s390_vupllg:
5434 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5435 Op.getOperand(1));
5436
5437 case Intrinsic::s390_vsumb:
5438 case Intrinsic::s390_vsumh:
5439 case Intrinsic::s390_vsumgh:
5440 case Intrinsic::s390_vsumgf:
5441 case Intrinsic::s390_vsumqf:
5442 case Intrinsic::s390_vsumqg:
5443 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5444 Op.getOperand(1), Op.getOperand(2));
5445
5446 case Intrinsic::s390_vaq:
5447 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5448 Op.getOperand(1), Op.getOperand(2));
5449 case Intrinsic::s390_vaccb:
5450 case Intrinsic::s390_vacch:
5451 case Intrinsic::s390_vaccf:
5452 case Intrinsic::s390_vaccg:
5453 case Intrinsic::s390_vaccq:
5454 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5455 Op.getOperand(1), Op.getOperand(2));
5456 case Intrinsic::s390_vacq:
5457 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5458 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5459 case Intrinsic::s390_vacccq:
5460 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5461 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5462
5463 case Intrinsic::s390_vsq:
5464 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5465 Op.getOperand(1), Op.getOperand(2));
5466 case Intrinsic::s390_vscbib:
5467 case Intrinsic::s390_vscbih:
5468 case Intrinsic::s390_vscbif:
5469 case Intrinsic::s390_vscbig:
5470 case Intrinsic::s390_vscbiq:
5471 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5472 Op.getOperand(1), Op.getOperand(2));
5473 case Intrinsic::s390_vsbiq:
5474 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5475 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5476 case Intrinsic::s390_vsbcbiq:
5477 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5478 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5479
5480 case Intrinsic::s390_vmhb:
5481 case Intrinsic::s390_vmhh:
5482 case Intrinsic::s390_vmhf:
5483 case Intrinsic::s390_vmhg:
5484 case Intrinsic::s390_vmhq:
5485 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5486 Op.getOperand(1), Op.getOperand(2));
5487 case Intrinsic::s390_vmlhb:
5488 case Intrinsic::s390_vmlhh:
5489 case Intrinsic::s390_vmlhf:
5490 case Intrinsic::s390_vmlhg:
5491 case Intrinsic::s390_vmlhq:
5492 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5493 Op.getOperand(1), Op.getOperand(2));
5494
5495 case Intrinsic::s390_vmahb:
5496 case Intrinsic::s390_vmahh:
5497 case Intrinsic::s390_vmahf:
5498 case Intrinsic::s390_vmahg:
5499 case Intrinsic::s390_vmahq:
5500 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5501 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5502 case Intrinsic::s390_vmalhb:
5503 case Intrinsic::s390_vmalhh:
5504 case Intrinsic::s390_vmalhf:
5505 case Intrinsic::s390_vmalhg:
5506 case Intrinsic::s390_vmalhq:
5507 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5508 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5509
5510 case Intrinsic::s390_vmeb:
5511 case Intrinsic::s390_vmeh:
5512 case Intrinsic::s390_vmef:
5513 case Intrinsic::s390_vmeg:
5514 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5515 Op.getOperand(1), Op.getOperand(2));
5516 case Intrinsic::s390_vmleb:
5517 case Intrinsic::s390_vmleh:
5518 case Intrinsic::s390_vmlef:
5519 case Intrinsic::s390_vmleg:
5520 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2));
5522 case Intrinsic::s390_vmob:
5523 case Intrinsic::s390_vmoh:
5524 case Intrinsic::s390_vmof:
5525 case Intrinsic::s390_vmog:
5526 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5527 Op.getOperand(1), Op.getOperand(2));
5528 case Intrinsic::s390_vmlob:
5529 case Intrinsic::s390_vmloh:
5530 case Intrinsic::s390_vmlof:
5531 case Intrinsic::s390_vmlog:
5532 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5533 Op.getOperand(1), Op.getOperand(2));
5534
5535 case Intrinsic::s390_vmaeb:
5536 case Intrinsic::s390_vmaeh:
5537 case Intrinsic::s390_vmaef:
5538 case Intrinsic::s390_vmaeg:
5539 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5540 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5541 Op.getOperand(1), Op.getOperand(2)),
5542 Op.getOperand(3));
5543 case Intrinsic::s390_vmaleb:
5544 case Intrinsic::s390_vmaleh:
5545 case Intrinsic::s390_vmalef:
5546 case Intrinsic::s390_vmaleg:
5547 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5548 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5549 Op.getOperand(1), Op.getOperand(2)),
5550 Op.getOperand(3));
5551 case Intrinsic::s390_vmaob:
5552 case Intrinsic::s390_vmaoh:
5553 case Intrinsic::s390_vmaof:
5554 case Intrinsic::s390_vmaog:
5555 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5556 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5557 Op.getOperand(1), Op.getOperand(2)),
5558 Op.getOperand(3));
5559 case Intrinsic::s390_vmalob:
5560 case Intrinsic::s390_vmaloh:
5561 case Intrinsic::s390_vmalof:
5562 case Intrinsic::s390_vmalog:
5563 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5564 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5565 Op.getOperand(1), Op.getOperand(2)),
5566 Op.getOperand(3));
5567 }
5568
5569 return SDValue();
5570}
5571
5572namespace {
5573// Says that SystemZISD operation Opcode can be used to perform the equivalent
5574// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5575// Operand is the constant third operand, otherwise it is the number of
5576// bytes in each element of the result.
5577struct Permute {
5578 unsigned Opcode;
5579 unsigned Operand;
5580 unsigned char Bytes[SystemZ::VectorBytes];
5581};
5582}
5583
5584static const Permute PermuteForms[] = {
5585 // VMRHG
5586 { SystemZISD::MERGE_HIGH, 8,
5587 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5588 // VMRHF
5589 { SystemZISD::MERGE_HIGH, 4,
5590 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5591 // VMRHH
5592 { SystemZISD::MERGE_HIGH, 2,
5593 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5594 // VMRHB
5595 { SystemZISD::MERGE_HIGH, 1,
5596 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5597 // VMRLG
5598 { SystemZISD::MERGE_LOW, 8,
5599 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5600 // VMRLF
5601 { SystemZISD::MERGE_LOW, 4,
5602 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5603 // VMRLH
5604 { SystemZISD::MERGE_LOW, 2,
5605 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5606 // VMRLB
5607 { SystemZISD::MERGE_LOW, 1,
5608 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5609 // VPKG
5610 { SystemZISD::PACK, 4,
5611 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5612 // VPKF
5613 { SystemZISD::PACK, 2,
5614 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5615 // VPKH
5616 { SystemZISD::PACK, 1,
5617 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5618 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5619 { SystemZISD::PERMUTE_DWORDS, 4,
5620 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5621 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5622 { SystemZISD::PERMUTE_DWORDS, 1,
5623 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5624};
5625
5626// Called after matching a vector shuffle against a particular pattern.
5627// Both the original shuffle and the pattern have two vector operands.
5628// OpNos[0] is the operand of the original shuffle that should be used for
5629// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5630// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5631// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5632// for operands 0 and 1 of the pattern.
5633static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5634 if (OpNos[0] < 0) {
5635 if (OpNos[1] < 0)
5636 return false;
5637 OpNo0 = OpNo1 = OpNos[1];
5638 } else if (OpNos[1] < 0) {
5639 OpNo0 = OpNo1 = OpNos[0];
5640 } else {
5641 OpNo0 = OpNos[0];
5642 OpNo1 = OpNos[1];
5643 }
5644 return true;
5645}
5646
5647// Bytes is a VPERM-like permute vector, except that -1 is used for
5648// undefined bytes. Return true if the VPERM can be implemented using P.
5649// When returning true set OpNo0 to the VPERM operand that should be
5650// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5651//
5652// For example, if swapping the VPERM operands allows P to match, OpNo0
5653// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5654// operand, but rewriting it to use two duplicated operands allows it to
5655// match P, then OpNo0 and OpNo1 will be the same.
5656static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5657 unsigned &OpNo0, unsigned &OpNo1) {
5658 int OpNos[] = { -1, -1 };
5659 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5660 int Elt = Bytes[I];
5661 if (Elt >= 0) {
5662 // Make sure that the two permute vectors use the same suboperand
5663 // byte number. Only the operand numbers (the high bits) are
5664 // allowed to differ.
5665 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5666 return false;
5667 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5668 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5669 // Make sure that the operand mappings are consistent with previous
5670 // elements.
5671 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5672 return false;
5673 OpNos[ModelOpNo] = RealOpNo;
5674 }
5675 }
5676 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5677}
5678
5679// As above, but search for a matching permute.
5680static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5681 unsigned &OpNo0, unsigned &OpNo1) {
5682 for (auto &P : PermuteForms)
5683 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5684 return &P;
5685 return nullptr;
5686}
5687
5688// Bytes is a VPERM-like permute vector, except that -1 is used for
5689// undefined bytes. This permute is an operand of an outer permute.
5690// See whether redistributing the -1 bytes gives a shuffle that can be
5691// implemented using P. If so, set Transform to a VPERM-like permute vector
5692// that, when applied to the result of P, gives the original permute in Bytes.
5694 const Permute &P,
5695 SmallVectorImpl<int> &Transform) {
5696 unsigned To = 0;
5697 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5698 int Elt = Bytes[From];
5699 if (Elt < 0)
5700 // Byte number From of the result is undefined.
5701 Transform[From] = -1;
5702 else {
5703 while (P.Bytes[To] != Elt) {
5704 To += 1;
5705 if (To == SystemZ::VectorBytes)
5706 return false;
5707 }
5708 Transform[From] = To;
5709 }
5710 }
5711 return true;
5712}
5713
5714// As above, but search for a matching permute.
5715static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5716 SmallVectorImpl<int> &Transform) {
5717 for (auto &P : PermuteForms)
5718 if (matchDoublePermute(Bytes, P, Transform))
5719 return &P;
5720 return nullptr;
5721}
5722
5723// Convert the mask of the given shuffle op into a byte-level mask,
5724// as if it had type vNi8.
5725static bool getVPermMask(SDValue ShuffleOp,
5726 SmallVectorImpl<int> &Bytes) {
5727 EVT VT = ShuffleOp.getValueType();
5728 unsigned NumElements = VT.getVectorNumElements();
5729 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5730
5731 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5732 Bytes.resize(NumElements * BytesPerElement, -1);
5733 for (unsigned I = 0; I < NumElements; ++I) {
5734 int Index = VSN->getMaskElt(I);
5735 if (Index >= 0)
5736 for (unsigned J = 0; J < BytesPerElement; ++J)
5737 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5738 }
5739 return true;
5740 }
5741 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5742 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5743 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5744 Bytes.resize(NumElements * BytesPerElement, -1);
5745 for (unsigned I = 0; I < NumElements; ++I)
5746 for (unsigned J = 0; J < BytesPerElement; ++J)
5747 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5748 return true;
5749 }
5750 return false;
5751}
5752
5753// Bytes is a VPERM-like permute vector, except that -1 is used for
5754// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5755// the result come from a contiguous sequence of bytes from one input.
5756// Set Base to the selector for the first byte if so.
5757static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5758 unsigned BytesPerElement, int &Base) {
5759 Base = -1;
5760 for (unsigned I = 0; I < BytesPerElement; ++I) {
5761 if (Bytes[Start + I] >= 0) {
5762 unsigned Elem = Bytes[Start + I];
5763 if (Base < 0) {
5764 Base = Elem - I;
5765 // Make sure the bytes would come from one input operand.
5766 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5767 return false;
5768 } else if (unsigned(Base) != Elem - I)
5769 return false;
5770 }
5771 }
5772 return true;
5773}
5774
5775// Bytes is a VPERM-like permute vector, except that -1 is used for
5776// undefined bytes. Return true if it can be performed using VSLDB.
5777// When returning true, set StartIndex to the shift amount and OpNo0
5778// and OpNo1 to the VPERM operands that should be used as the first
5779// and second shift operand respectively.
5781 unsigned &StartIndex, unsigned &OpNo0,
5782 unsigned &OpNo1) {
5783 int OpNos[] = { -1, -1 };
5784 int Shift = -1;
5785 for (unsigned I = 0; I < 16; ++I) {
5786 int Index = Bytes[I];
5787 if (Index >= 0) {
5788 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5789 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5790 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5791 if (Shift < 0)
5792 Shift = ExpectedShift;
5793 else if (Shift != ExpectedShift)
5794 return false;
5795 // Make sure that the operand mappings are consistent with previous
5796 // elements.
5797 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5798 return false;
5799 OpNos[ModelOpNo] = RealOpNo;
5800 }
5801 }
5802 StartIndex = Shift;
5803 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5804}
5805
5806// Create a node that performs P on operands Op0 and Op1, casting the
5807// operands to the appropriate type. The type of the result is determined by P.
5809 const Permute &P, SDValue Op0, SDValue Op1) {
5810 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5811 // elements of a PACK are twice as wide as the outputs.
5812 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5813 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5814 P.Operand);
5815 // Cast both operands to the appropriate type.
5816 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5817 SystemZ::VectorBytes / InBytes);
5818 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5819 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5820 SDValue Op;
5821 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5822 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5823 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5824 } else if (P.Opcode == SystemZISD::PACK) {
5825 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5826 SystemZ::VectorBytes / P.Operand);
5827 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5828 } else {
5829 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5830 }
5831 return Op;
5832}
5833
5834static bool isZeroVector(SDValue N) {
5835 if (N->getOpcode() == ISD::BITCAST)
5836 N = N->getOperand(0);
5837 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5838 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5839 return Op->getZExtValue() == 0;
5840 return ISD::isBuildVectorAllZeros(N.getNode());
5841}
5842
5843// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5844static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5845 for (unsigned I = 0; I < Num ; I++)
5846 if (isZeroVector(Ops[I]))
5847 return I;
5848 return UINT32_MAX;
5849}
5850
5851// Bytes is a VPERM-like permute vector, except that -1 is used for
5852// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5853// VSLDB or VPERM.
5855 SDValue *Ops,
5856 const SmallVectorImpl<int> &Bytes) {
5857 for (unsigned I = 0; I < 2; ++I)
5858 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5859
5860 // First see whether VSLDB can be used.
5861 unsigned StartIndex, OpNo0, OpNo1;
5862 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5863 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5864 Ops[OpNo1],
5865 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5866
5867 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5868 // eliminate a zero vector by reusing any zero index in the permute vector.
5869 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5870 if (ZeroVecIdx != UINT32_MAX) {
5871 bool MaskFirst = true;
5872 int ZeroIdx = -1;
5873 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5874 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5875 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5876 if (OpNo == ZeroVecIdx && I == 0) {
5877 // If the first byte is zero, use mask as first operand.
5878 ZeroIdx = 0;
5879 break;
5880 }
5881 if (OpNo != ZeroVecIdx && Byte == 0) {
5882 // If mask contains a zero, use it by placing that vector first.
5883 ZeroIdx = I + SystemZ::VectorBytes;
5884 MaskFirst = false;
5885 break;
5886 }
5887 }
5888 if (ZeroIdx != -1) {
5889 SDValue IndexNodes[SystemZ::VectorBytes];
5890 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5891 if (Bytes[I] >= 0) {
5892 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5893 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5894 if (OpNo == ZeroVecIdx)
5895 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5896 else {
5897 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5898 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5899 }
5900 } else
5901 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5902 }
5903 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5904 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5905 if (MaskFirst)
5906 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5907 Mask);
5908 else
5909 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5910 Mask);
5911 }
5912 }
5913
5914 SDValue IndexNodes[SystemZ::VectorBytes];
5915 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5916 if (Bytes[I] >= 0)
5917 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5918 else
5919 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5920 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5921 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5922 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5923}
5924
5925namespace {
5926// Describes a general N-operand vector shuffle.
5927struct GeneralShuffle {
5928 GeneralShuffle(EVT vt)
5929 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5930 void addUndef();
5931 bool add(SDValue, unsigned);
5932 SDValue getNode(SelectionDAG &, const SDLoc &);
5933 void tryPrepareForUnpack();
5934 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5935 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5936
5937 // The operands of the shuffle.
5939
5940 // Index I is -1 if byte I of the result is undefined. Otherwise the
5941 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5942 // Bytes[I] / SystemZ::VectorBytes.
5944
5945 // The type of the shuffle result.
5946 EVT VT;
5947
5948 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5949 unsigned UnpackFromEltSize;
5950 // True if the final unpack uses the low half.
5951 bool UnpackLow;
5952};
5953} // namespace
5954
5955// Add an extra undefined element to the shuffle.
5956void GeneralShuffle::addUndef() {
5957 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5958 for (unsigned I = 0; I < BytesPerElement; ++I)
5959 Bytes.push_back(-1);
5960}
5961
5962// Add an extra element to the shuffle, taking it from element Elem of Op.
5963// A null Op indicates a vector input whose value will be calculated later;
5964// there is at most one such input per shuffle and it always has the same
5965// type as the result. Aborts and returns false if the source vector elements
5966// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5967// LLVM they become implicitly extended, but this is rare and not optimized.
5968bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5969 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5970
5971 // The source vector can have wider elements than the result,
5972 // either through an explicit TRUNCATE or because of type legalization.
5973 // We want the least significant part.
5974 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5975 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5976
5977 // Return false if the source elements are smaller than their destination
5978 // elements.
5979 if (FromBytesPerElement < BytesPerElement)
5980 return false;
5981
5982 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5983 (FromBytesPerElement - BytesPerElement));
5984
5985 // Look through things like shuffles and bitcasts.
5986 while (Op.getNode()) {
5987 if (Op.getOpcode() == ISD::BITCAST)
5988 Op = Op.getOperand(0);
5989 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5990 // See whether the bytes we need come from a contiguous part of one
5991 // operand.
5993 if (!getVPermMask(Op, OpBytes))
5994 break;
5995 int NewByte;
5996 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5997 break;
5998 if (NewByte < 0) {
5999 addUndef();
6000 return true;
6001 }
6002 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
6003 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
6004 } else if (Op.isUndef()) {
6005 addUndef();
6006 return true;
6007 } else
6008 break;
6009 }
6010
6011 // Make sure that the source of the extraction is in Ops.
6012 unsigned OpNo = 0;
6013 for (; OpNo < Ops.size(); ++OpNo)
6014 if (Ops[OpNo] == Op)
6015 break;
6016 if (OpNo == Ops.size())
6017 Ops.push_back(Op);
6018
6019 // Add the element to Bytes.
6020 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
6021 for (unsigned I = 0; I < BytesPerElement; ++I)
6022 Bytes.push_back(Base + I);
6023
6024 return true;
6025}
6026
6027// Return SDNodes for the completed shuffle.
6028SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6029 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6030
6031 if (Ops.size() == 0)
6032 return DAG.getUNDEF(VT);
6033
6034 // Use a single unpack if possible as the last operation.
6035 tryPrepareForUnpack();
6036
6037 // Make sure that there are at least two shuffle operands.
6038 if (Ops.size() == 1)
6039 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6040
6041 // Create a tree of shuffles, deferring root node until after the loop.
6042 // Try to redistribute the undefined elements of non-root nodes so that
6043 // the non-root shuffles match something like a pack or merge, then adjust
6044 // the parent node's permute vector to compensate for the new order.
6045 // Among other things, this copes with vectors like <2 x i16> that were
6046 // padded with undefined elements during type legalization.
6047 //
6048 // In the best case this redistribution will lead to the whole tree
6049 // using packs and merges. It should rarely be a loss in other cases.
6050 unsigned Stride = 1;
6051 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6052 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6053 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6054
6055 // Create a mask for just these two operands.
6057 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6058 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6059 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6060 if (OpNo == I)
6061 NewBytes[J] = Byte;
6062 else if (OpNo == I + Stride)
6063 NewBytes[J] = SystemZ::VectorBytes + Byte;
6064 else
6065 NewBytes[J] = -1;
6066 }
6067 // See if it would be better to reorganize NewMask to avoid using VPERM.
6069 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6070 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6071 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6072 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6073 if (NewBytes[J] >= 0) {
6074 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6075 "Invalid double permute");
6076 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6077 } else
6078 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6079 }
6080 } else {
6081 // Just use NewBytes on the operands.
6082 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6083 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6084 if (NewBytes[J] >= 0)
6085 Bytes[J] = I * SystemZ::VectorBytes + J;
6086 }
6087 }
6088 }
6089
6090 // Now we just have 2 inputs. Put the second operand in Ops[1].
6091 if (Stride > 1) {
6092 Ops[1] = Ops[Stride];
6093 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6094 if (Bytes[I] >= int(SystemZ::VectorBytes))
6095 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6096 }
6097
6098 // Look for an instruction that can do the permute without resorting
6099 // to VPERM.
6100 unsigned OpNo0, OpNo1;
6101 SDValue Op;
6102 if (unpackWasPrepared() && Ops[1].isUndef())
6103 Op = Ops[0];
6104 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6105 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6106 else
6107 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6108
6109 Op = insertUnpackIfPrepared(DAG, DL, Op);
6110
6111 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6112}
6113
6114#ifndef NDEBUG
6115static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6116 dbgs() << Msg.c_str() << " { ";
6117 for (unsigned I = 0; I < Bytes.size(); I++)
6118 dbgs() << Bytes[I] << " ";
6119 dbgs() << "}\n";
6120}
6121#endif
6122
6123// If the Bytes vector matches an unpack operation, prepare to do the unpack
6124// after all else by removing the zero vector and the effect of the unpack on
6125// Bytes.
6126void GeneralShuffle::tryPrepareForUnpack() {
6127 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6128 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6129 return;
6130
6131 // Only do this if removing the zero vector reduces the depth, otherwise
6132 // the critical path will increase with the final unpack.
6133 if (Ops.size() > 2 &&
6134 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6135 return;
6136
6137 // Find an unpack that would allow removing the zero vector from Ops.
6138 UnpackFromEltSize = 1;
6139 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6140 bool MatchUnpack = true;
6142 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6143 unsigned ToEltSize = UnpackFromEltSize * 2;
6144 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6145 if (!IsZextByte)
6146 SrcBytes.push_back(Bytes[Elt]);
6147 if (Bytes[Elt] != -1) {
6148 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6149 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6150 MatchUnpack = false;
6151 break;
6152 }
6153 }
6154 }
6155 if (MatchUnpack) {
6156 if (Ops.size() == 2) {
6157 // Don't use unpack if a single source operand needs rearrangement.
6158 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6159 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6160 if (SrcBytes[i] == -1)
6161 continue;
6162 if (SrcBytes[i] % 16 != int(i))
6163 CanUseUnpackHigh = false;
6164 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6165 CanUseUnpackLow = false;
6166 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6167 UnpackFromEltSize = UINT_MAX;
6168 return;
6169 }
6170 }
6171 if (!CanUseUnpackHigh)
6172 UnpackLow = true;
6173 }
6174 break;
6175 }
6176 }
6177 if (UnpackFromEltSize > 4)
6178 return;
6179
6180 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6181 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6182 << ".\n";
6183 dumpBytes(Bytes, "Original Bytes vector:"););
6184
6185 // Apply the unpack in reverse to the Bytes array.
6186 unsigned B = 0;
6187 if (UnpackLow) {
6188 while (B < SystemZ::VectorBytes / 2)
6189 Bytes[B++] = -1;
6190 }
6191 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6192 Elt += UnpackFromEltSize;
6193 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6194 Bytes[B] = Bytes[Elt];
6195 }
6196 if (!UnpackLow) {
6197 while (B < SystemZ::VectorBytes)
6198 Bytes[B++] = -1;
6199 }
6200
6201 // Remove the zero vector from Ops
6202 Ops.erase(&Ops[ZeroVecOpNo]);
6203 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6204 if (Bytes[I] >= 0) {
6205 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6206 if (OpNo > ZeroVecOpNo)
6207 Bytes[I] -= SystemZ::VectorBytes;
6208 }
6209
6210 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6211 dbgs() << "\n";);
6212}
6213
6214SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6215 const SDLoc &DL,
6216 SDValue Op) {
6217 if (!unpackWasPrepared())
6218 return Op;
6219 unsigned InBits = UnpackFromEltSize * 8;
6220 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6221 SystemZ::VectorBits / InBits);
6222 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6223 unsigned OutBits = InBits * 2;
6224 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6225 SystemZ::VectorBits / OutBits);
6226 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6227 : SystemZISD::UNPACKL_HIGH,
6228 DL, OutVT, PackedOp);
6229}
6230
6231// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6233 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6234 if (!Op.getOperand(I).isUndef())
6235 return false;
6236 return true;
6237}
6238
6239// Return a vector of type VT that contains Value in the first element.
6240// The other elements don't matter.
6242 SDValue Value) {
6243 // If we have a constant, replicate it to all elements and let the
6244 // BUILD_VECTOR lowering take care of it.
6245 if (Value.getOpcode() == ISD::Constant ||
6246 Value.getOpcode() == ISD::ConstantFP) {
6248 return DAG.getBuildVector(VT, DL, Ops);
6249 }
6250 if (Value.isUndef())
6251 return DAG.getUNDEF(VT);
6252 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6253}
6254
6255// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6256// element 1. Used for cases in which replication is cheap.
6258 SDValue Op0, SDValue Op1) {
6259 if (Op0.isUndef()) {
6260 if (Op1.isUndef())
6261 return DAG.getUNDEF(VT);
6262 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6263 }
6264 if (Op1.isUndef())
6265 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6266 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6267 buildScalarToVector(DAG, DL, VT, Op0),
6268 buildScalarToVector(DAG, DL, VT, Op1));
6269}
6270
6271// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6272// vector for them.
6274 SDValue Op1) {
6275 if (Op0.isUndef() && Op1.isUndef())
6276 return DAG.getUNDEF(MVT::v2i64);
6277 // If one of the two inputs is undefined then replicate the other one,
6278 // in order to avoid using another register unnecessarily.
6279 if (Op0.isUndef())
6280 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6281 else if (Op1.isUndef())
6282 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6283 else {
6284 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6285 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6286 }
6287 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6288}
6289
6290// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6291// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6292// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6293// would benefit from this representation and return it if so.
6295 BuildVectorSDNode *BVN) {
6296 EVT VT = BVN->getValueType(0);
6297 unsigned NumElements = VT.getVectorNumElements();
6298
6299 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6300 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6301 // need a BUILD_VECTOR, add an additional placeholder operand for that
6302 // BUILD_VECTOR and store its operands in ResidueOps.
6303 GeneralShuffle GS(VT);
6305 bool FoundOne = false;
6306 for (unsigned I = 0; I < NumElements; ++I) {
6307 SDValue Op = BVN->getOperand(I);
6308 if (Op.getOpcode() == ISD::TRUNCATE)
6309 Op = Op.getOperand(0);
6310 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6311 Op.getOperand(1).getOpcode() == ISD::Constant) {
6312 unsigned Elem = Op.getConstantOperandVal(1);
6313 if (!GS.add(Op.getOperand(0), Elem))
6314 return SDValue();
6315 FoundOne = true;
6316 } else if (Op.isUndef()) {
6317 GS.addUndef();
6318 } else {
6319 if (!GS.add(SDValue(), ResidueOps.size()))
6320 return SDValue();
6321 ResidueOps.push_back(BVN->getOperand(I));
6322 }
6323 }
6324
6325 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6326 if (!FoundOne)
6327 return SDValue();
6328
6329 // Create the BUILD_VECTOR for the remaining elements, if any.
6330 if (!ResidueOps.empty()) {
6331 while (ResidueOps.size() < NumElements)
6332 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6333 for (auto &Op : GS.Ops) {
6334 if (!Op.getNode()) {
6335 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6336 break;
6337 }
6338 }
6339 }
6340 return GS.getNode(DAG, SDLoc(BVN));
6341}
6342
6343bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6344 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6345 return true;
6346 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6347 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6348 return true;
6349 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6350 return true;
6351 return false;
6352}
6353
6354// Combine GPR scalar values Elems into a vector of type VT.
6355SDValue
6356SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6357 SmallVectorImpl<SDValue> &Elems) const {
6358 // See whether there is a single replicated value.
6360 unsigned int NumElements = Elems.size();
6361 unsigned int Count = 0;
6362 for (auto Elem : Elems) {
6363 if (!Elem.isUndef()) {
6364 if (!Single.getNode())
6365 Single = Elem;
6366 else if (Elem != Single) {
6367 Single = SDValue();
6368 break;
6369 }
6370 Count += 1;
6371 }
6372 }
6373 // There are three cases here:
6374 //
6375 // - if the only defined element is a loaded one, the best sequence
6376 // is a replicating load.
6377 //
6378 // - otherwise, if the only defined element is an i64 value, we will
6379 // end up with the same VLVGP sequence regardless of whether we short-cut
6380 // for replication or fall through to the later code.
6381 //
6382 // - otherwise, if the only defined element is an i32 or smaller value,
6383 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6384 // This is only a win if the single defined element is used more than once.
6385 // In other cases we're better off using a single VLVGx.
6386 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6387 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6388
6389 // If all elements are loads, use VLREP/VLEs (below).
6390 bool AllLoads = true;
6391 for (auto Elem : Elems)
6392 if (!isVectorElementLoad(Elem)) {
6393 AllLoads = false;
6394 break;
6395 }
6396
6397 // The best way of building a v2i64 from two i64s is to use VLVGP.
6398 if (VT == MVT::v2i64 && !AllLoads)
6399 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6400
6401 // Use a 64-bit merge high to combine two doubles.
6402 if (VT == MVT::v2f64 && !AllLoads)
6403 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6404
6405 // Build v4f32 values directly from the FPRs:
6406 //
6407 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6408 // V V VMRHF
6409 // <ABxx> <CDxx>
6410 // V VMRHG
6411 // <ABCD>
6412 if (VT == MVT::v4f32 && !AllLoads) {
6413 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6414 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6415 // Avoid unnecessary undefs by reusing the other operand.
6416 if (Op01.isUndef())
6417 Op01 = Op23;
6418 else if (Op23.isUndef())
6419 Op23 = Op01;
6420 // Merging identical replications is a no-op.
6421 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6422 return Op01;
6423 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6424 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6425 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
6426 DL, MVT::v2i64, Op01, Op23);
6427 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6428 }
6429
6430 // Collect the constant terms.
6433
6434 unsigned NumConstants = 0;
6435 for (unsigned I = 0; I < NumElements; ++I) {
6436 SDValue Elem = Elems[I];
6437 if (Elem.getOpcode() == ISD::Constant ||
6438 Elem.getOpcode() == ISD::ConstantFP) {
6439 NumConstants += 1;
6440 Constants[I] = Elem;
6441 Done[I] = true;
6442 }
6443 }
6444 // If there was at least one constant, fill in the other elements of
6445 // Constants with undefs to get a full vector constant and use that
6446 // as the starting point.
6448 SDValue ReplicatedVal;
6449 if (NumConstants > 0) {
6450 for (unsigned I = 0; I < NumElements; ++I)
6451 if (!Constants[I].getNode())
6452 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6453 Result = DAG.getBuildVector(VT, DL, Constants);
6454 } else {
6455 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6456 // avoid a false dependency on any previous contents of the vector
6457 // register.
6458
6459 // Use a VLREP if at least one element is a load. Make sure to replicate
6460 // the load with the most elements having its value.
6461 std::map<const SDNode*, unsigned> UseCounts;
6462 SDNode *LoadMaxUses = nullptr;
6463 for (unsigned I = 0; I < NumElements; ++I)
6464 if (isVectorElementLoad(Elems[I])) {
6465 SDNode *Ld = Elems[I].getNode();
6466 unsigned Count = ++UseCounts[Ld];
6467 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6468 LoadMaxUses = Ld;
6469 }
6470 if (LoadMaxUses != nullptr) {
6471 ReplicatedVal = SDValue(LoadMaxUses, 0);
6472 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6473 } else {
6474 // Try to use VLVGP.
6475 unsigned I1 = NumElements / 2 - 1;
6476 unsigned I2 = NumElements - 1;
6477 bool Def1 = !Elems[I1].isUndef();
6478 bool Def2 = !Elems[I2].isUndef();
6479 if (Def1 || Def2) {
6480 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6481 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6482 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6483 joinDwords(DAG, DL, Elem1, Elem2));
6484 Done[I1] = true;
6485 Done[I2] = true;
6486 } else
6487 Result = DAG.getUNDEF(VT);
6488 }
6489 }
6490
6491 // Use VLVGx to insert the other elements.
6492 for (unsigned I = 0; I < NumElements; ++I)
6493 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6494 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6495 DAG.getConstant(I, DL, MVT::i32));
6496 return Result;
6497}
6498
6499SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6500 SelectionDAG &DAG) const {
6501 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6502 SDLoc DL(Op);
6503 EVT VT = Op.getValueType();
6504
6505 if (BVN->isConstant()) {
6506 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6507 return Op;
6508
6509 // Fall back to loading it from memory.
6510 return SDValue();
6511 }
6512
6513 // See if we should use shuffles to construct the vector from other vectors.
6514 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6515 return Res;
6516
6517 // Detect SCALAR_TO_VECTOR conversions.
6519 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6520
6521 // Otherwise use buildVector to build the vector up from GPRs.
6522 unsigned NumElements = Op.getNumOperands();
6524 for (unsigned I = 0; I < NumElements; ++I)
6525 Ops[I] = Op.getOperand(I);
6526 return buildVector(DAG, DL, VT, Ops);
6527}
6528
6529SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6530 SelectionDAG &DAG) const {
6531 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6532 SDLoc DL(Op);
6533 EVT VT = Op.getValueType();
6534 unsigned NumElements = VT.getVectorNumElements();
6535
6536 if (VSN->isSplat()) {
6537 SDValue Op0 = Op.getOperand(0);
6538 unsigned Index = VSN->getSplatIndex();
6539 assert(Index < VT.getVectorNumElements() &&
6540 "Splat index should be defined and in first operand");
6541 // See whether the value we're splatting is directly available as a scalar.
6542 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6544 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6545 // Otherwise keep it as a vector-to-vector operation.
6546 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6547 DAG.getTargetConstant(Index, DL, MVT::i32));
6548 }
6549
6550 GeneralShuffle GS(VT);
6551 for (unsigned I = 0; I < NumElements; ++I) {
6552 int Elt = VSN->getMaskElt(I);
6553 if (Elt < 0)
6554 GS.addUndef();
6555 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6556 unsigned(Elt) % NumElements))
6557 return SDValue();
6558 }
6559 return GS.getNode(DAG, SDLoc(VSN));
6560}
6561
6562SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6563 SelectionDAG &DAG) const {
6564 SDLoc DL(Op);
6565 // Just insert the scalar into element 0 of an undefined vector.
6566 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6567 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6568 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6569}
6570
6571SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6572 SelectionDAG &DAG) const {
6573 // Handle insertions of floating-point values.
6574 SDLoc DL(Op);
6575 SDValue Op0 = Op.getOperand(0);
6576 SDValue Op1 = Op.getOperand(1);
6577 SDValue Op2 = Op.getOperand(2);
6578 EVT VT = Op.getValueType();
6579
6580 // Insertions into constant indices of a v2f64 can be done using VPDI.
6581 // However, if the inserted value is a bitcast or a constant then it's
6582 // better to use GPRs, as below.
6583 if (VT == MVT::v2f64 &&
6584 Op1.getOpcode() != ISD::BITCAST &&
6585 Op1.getOpcode() != ISD::ConstantFP &&
6586 Op2.getOpcode() == ISD::Constant) {
6587 uint64_t Index = Op2->getAsZExtVal();
6588 unsigned Mask = VT.getVectorNumElements() - 1;
6589 if (Index <= Mask)
6590 return Op;
6591 }
6592
6593 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6594 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6595 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6596 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6597 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6598 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6599 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6600}
6601
6602SDValue
6603SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6604 SelectionDAG &DAG) const {
6605 // Handle extractions of floating-point values.
6606 SDLoc DL(Op);
6607 SDValue Op0 = Op.getOperand(0);
6608 SDValue Op1 = Op.getOperand(1);
6609 EVT VT = Op.getValueType();
6610 EVT VecVT = Op0.getValueType();
6611
6612 // Extractions of constant indices can be done directly.
6613 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6614 uint64_t Index = CIndexN->getZExtValue();
6615 unsigned Mask = VecVT.getVectorNumElements() - 1;
6616 if (Index <= Mask)
6617 return Op;
6618 }
6619
6620 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6621 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6622 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6623 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6624 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6625 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6626}
6627
6628SDValue SystemZTargetLowering::
6629lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6630 SDValue PackedOp = Op.getOperand(0);
6631 EVT OutVT = Op.getValueType();
6632 EVT InVT = PackedOp.getValueType();
6633 unsigned ToBits = OutVT.getScalarSizeInBits();
6634 unsigned FromBits = InVT.getScalarSizeInBits();
6635 unsigned StartOffset = 0;
6636
6637 // If the input is a VECTOR_SHUFFLE, there are a number of important
6638 // cases where we can directly implement the sign-extension of the
6639 // original input lanes of the shuffle.
6640 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6641 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6642 ArrayRef<int> ShuffleMask = SVN->getMask();
6643 int OutNumElts = OutVT.getVectorNumElements();
6644
6645 // Recognize the special case where the sign-extension can be done
6646 // by the VSEG instruction. Handled via the default expander.
6647 if (ToBits == 64 && OutNumElts == 2) {
6648 int NumElem = ToBits / FromBits;
6649 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6650 return SDValue();
6651 }
6652
6653 // Recognize the special case where we can fold the shuffle by
6654 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6655 int StartOffsetCandidate = -1;
6656 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6657 if (ShuffleMask[Elt] == -1)
6658 continue;
6659 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6660 if (StartOffsetCandidate == -1)
6661 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6662 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6663 continue;
6664 }
6665 StartOffsetCandidate = -1;
6666 break;
6667 }
6668 if (StartOffsetCandidate != -1) {
6669 StartOffset = StartOffsetCandidate;
6670 PackedOp = PackedOp.getOperand(0);
6671 }
6672 }
6673
6674 do {
6675 FromBits *= 2;
6676 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6677 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6678 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6679 if (StartOffset >= OutNumElts) {
6680 Opcode = SystemZISD::UNPACK_LOW;
6681 StartOffset -= OutNumElts;
6682 }
6683 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6684 } while (FromBits != ToBits);
6685 return PackedOp;
6686}
6687
6688// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6689SDValue SystemZTargetLowering::
6690lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6691 SDValue PackedOp = Op.getOperand(0);
6692 SDLoc DL(Op);
6693 EVT OutVT = Op.getValueType();
6694 EVT InVT = PackedOp.getValueType();
6695 unsigned InNumElts = InVT.getVectorNumElements();
6696 unsigned OutNumElts = OutVT.getVectorNumElements();
6697 unsigned NumInPerOut = InNumElts / OutNumElts;
6698
6699 SDValue ZeroVec =
6700 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6701
6702 SmallVector<int, 16> Mask(InNumElts);
6703 unsigned ZeroVecElt = InNumElts;
6704 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6705 unsigned MaskElt = PackedElt * NumInPerOut;
6706 unsigned End = MaskElt + NumInPerOut - 1;
6707 for (; MaskElt < End; MaskElt++)
6708 Mask[MaskElt] = ZeroVecElt++;
6709 Mask[MaskElt] = PackedElt;
6710 }
6711 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6712 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6713}
6714
6715SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6716 unsigned ByScalar) const {
6717 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6718 SDValue Op0 = Op.getOperand(0);
6719 SDValue Op1 = Op.getOperand(1);
6720 SDLoc DL(Op);
6721 EVT VT = Op.getValueType();
6722 unsigned ElemBitSize = VT.getScalarSizeInBits();
6723
6724 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6725 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6726 APInt SplatBits, SplatUndef;
6727 unsigned SplatBitSize;
6728 bool HasAnyUndefs;
6729 // Check for constant splats. Use ElemBitSize as the minimum element
6730 // width and reject splats that need wider elements.
6731 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6732 ElemBitSize, true) &&
6733 SplatBitSize == ElemBitSize) {
6734 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6735 DL, MVT::i32);
6736 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6737 }
6738 // Check for variable splats.
6739 BitVector UndefElements;
6740 SDValue Splat = BVN->getSplatValue(&UndefElements);
6741 if (Splat) {
6742 // Since i32 is the smallest legal type, we either need a no-op
6743 // or a truncation.
6744 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6745 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6746 }
6747 }
6748
6749 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6750 // and the shift amount is directly available in a GPR.
6751 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6752 if (VSN->isSplat()) {
6753 SDValue VSNOp0 = VSN->getOperand(0);
6754 unsigned Index = VSN->getSplatIndex();
6755 assert(Index < VT.getVectorNumElements() &&
6756 "Splat index should be defined and in first operand");
6757 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6758 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6759 // Since i32 is the smallest legal type, we either need a no-op
6760 // or a truncation.
6761 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6762 VSNOp0.getOperand(Index));
6763 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6764 }
6765 }
6766 }
6767
6768 // Otherwise just treat the current form as legal.
6769 return Op;
6770}
6771
6772SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6773 SDLoc DL(Op);
6774
6775 // i128 FSHL with a constant amount that is a multiple of 8 can be
6776 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6777 // facility, FSHL with a constant amount less than 8 can be implemented
6778 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6779 // combination of the two.
6780 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6781 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6782 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6783 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6784 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6785 if (ShiftAmt > 120) {
6786 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6787 // SHR_DOUBLE_BIT emits fewer instructions.
6788 SDValue Val =
6789 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6790 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6791 return DAG.getBitcast(MVT::i128, Val);
6792 }
6793 SmallVector<int, 16> Mask(16);
6794 for (unsigned Elt = 0; Elt < 16; Elt++)
6795 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6796 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6797 if ((ShiftAmt & 7) == 0)
6798 return DAG.getBitcast(MVT::i128, Shuf1);
6799 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6800 SDValue Val =
6801 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6802 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6803 return DAG.getBitcast(MVT::i128, Val);
6804 }
6805 }
6806
6807 return SDValue();
6808}
6809
6810SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6811 SDLoc DL(Op);
6812
6813 // i128 FSHR with a constant amount that is a multiple of 8 can be
6814 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6815 // facility, FSHR with a constant amount less than 8 can be implemented
6816 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6817 // combination of the two.
6818 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6819 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6820 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6821 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6822 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6823 if (ShiftAmt > 120) {
6824 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6825 // SHL_DOUBLE_BIT emits fewer instructions.
6826 SDValue Val =
6827 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6828 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6829 return DAG.getBitcast(MVT::i128, Val);
6830 }
6831 SmallVector<int, 16> Mask(16);
6832 for (unsigned Elt = 0; Elt < 16; Elt++)
6833 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6834 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6835 if ((ShiftAmt & 7) == 0)
6836 return DAG.getBitcast(MVT::i128, Shuf1);
6837 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6838 SDValue Val =
6839 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6840 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6841 return DAG.getBitcast(MVT::i128, Val);
6842 }
6843 }
6844
6845 return SDValue();
6846}
6847
6849 SDLoc DL(Op);
6850 SDValue Src = Op.getOperand(0);
6851 MVT DstVT = Op.getSimpleValueType();
6852
6854 unsigned SrcAS = N->getSrcAddressSpace();
6855
6856 assert(SrcAS != N->getDestAddressSpace() &&
6857 "addrspacecast must be between different address spaces");
6858
6859 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6860 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6861 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6862 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6863 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6864 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6865 } else if (DstVT == MVT::i32) {
6866 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6867 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6868 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6869 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6870 } else {
6871 report_fatal_error("Bad address space in addrspacecast");
6872 }
6873 return Op;
6874}
6875
6876SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6877 SelectionDAG &DAG) const {
6878 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6879 if (In.getSimpleValueType() != MVT::f16)
6880 return Op; // Legal
6881 return SDValue(); // Let legalizer emit the libcall.
6882}
6883
6885 MVT VT, SDValue Arg, SDLoc DL,
6886 SDValue Chain, bool IsStrict) const {
6887 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6888 MakeLibCallOptions CallOptions;
6889 SDValue Result;
6890 std::tie(Result, Chain) =
6891 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6892 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6893}
6894
6895SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6896 SelectionDAG &DAG) const {
6897 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6898 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6899 bool IsStrict = Op->isStrictFPOpcode();
6900 SDLoc DL(Op);
6901 MVT VT = Op.getSimpleValueType();
6902 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6903 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6904 EVT InVT = InOp.getValueType();
6905
6906 // FP to unsigned is not directly supported on z10. Promoting an i32
6907 // result to (signed) i64 doesn't generate an inexact condition (fp
6908 // exception) for values that are outside the i32 range but in the i64
6909 // range, so use the default expansion.
6910 if (!Subtarget.hasFPExtension() && !IsSigned)
6911 // Expand i32/i64. F16 values will be recognized to fit and extended.
6912 return SDValue();
6913
6914 // Conversion from f16 is done via f32.
6915 if (InOp.getSimpleValueType() == MVT::f16) {
6917 LowerOperationWrapper(Op.getNode(), Results, DAG);
6918 return DAG.getMergeValues(Results, DL);
6919 }
6920
6921 if (VT == MVT::i128) {
6922 RTLIB::Libcall LC =
6923 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6924 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6925 }
6926
6927 return Op; // Legal
6928}
6929
6930SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6931 SelectionDAG &DAG) const {
6932 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6933 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6934 bool IsStrict = Op->isStrictFPOpcode();
6935 SDLoc DL(Op);
6936 MVT VT = Op.getSimpleValueType();
6937 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6938 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6939 EVT InVT = InOp.getValueType();
6940
6941 // Conversion to f16 is done via f32.
6942 if (VT == MVT::f16) {
6944 LowerOperationWrapper(Op.getNode(), Results, DAG);
6945 return DAG.getMergeValues(Results, DL);
6946 }
6947
6948 // Unsigned to fp is not directly supported on z10.
6949 if (!Subtarget.hasFPExtension() && !IsSigned)
6950 return SDValue(); // Expand i64.
6951
6952 if (InVT == MVT::i128) {
6953 RTLIB::Libcall LC =
6954 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6955 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6956 }
6957
6958 return Op; // Legal
6959}
6960
6961// Shift the lower 2 bytes of Op to the left in order to insert into the
6962// upper 2 bytes of the FP register.
6964 assert(Op.getSimpleValueType() == MVT::i64 &&
6965 "Expexted to convert i64 to f16.");
6966 SDLoc DL(Op);
6967 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6968 DAG.getConstant(48, DL, MVT::i64));
6969 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6970 SDValue F16Val =
6971 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6972 return F16Val;
6973}
6974
6975// Extract Op into GPR and shift the 2 f16 bytes to the right.
6977 assert(Op.getSimpleValueType() == MVT::f16 &&
6978 "Expected to convert f16 to i64.");
6979 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6980 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6981 SDValue(U32, 0), Op);
6982 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6983 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6984 DAG.getConstant(48, DL, MVT::i32));
6985 return Shft;
6986}
6987
6988// Lower an f16 LOAD in case of no vector support.
6989SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6990 SelectionDAG &DAG) const {
6991 EVT RegVT = Op.getValueType();
6992 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6993 (void)RegVT;
6994
6995 // Load as integer.
6996 SDLoc DL(Op);
6997 SDValue NewLd;
6998 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6999 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
7000 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
7001 AtomicLd->getChain(), AtomicLd->getBasePtr(),
7002 AtomicLd->getMemOperand());
7003 } else {
7004 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
7005 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
7006 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
7007 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
7008 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
7009 }
7010 SDValue F16Val = convertToF16(NewLd, DAG);
7011 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
7012}
7013
7014// Lower an f16 STORE in case of no vector support.
7015SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
7016 SelectionDAG &DAG) const {
7017 SDLoc DL(Op);
7018 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
7019
7020 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
7021 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
7022 Shft, AtomicSt->getBasePtr(),
7023 AtomicSt->getMemOperand());
7024
7025 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
7026 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
7027 St->getMemOperand());
7028}
7029
7030SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7031 SelectionDAG &DAG) const {
7032 SDLoc DL(Op);
7033 MVT ResultVT = Op.getSimpleValueType();
7034 SDValue Arg = Op.getOperand(0);
7035 unsigned Check = Op.getConstantOperandVal(1);
7036
7037 unsigned TDCMask = 0;
7038 if (Check & fcSNan)
7040 if (Check & fcQNan)
7042 if (Check & fcPosInf)
7044 if (Check & fcNegInf)
7046 if (Check & fcPosNormal)
7048 if (Check & fcNegNormal)
7050 if (Check & fcPosSubnormal)
7052 if (Check & fcNegSubnormal)
7054 if (Check & fcPosZero)
7055 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7056 if (Check & fcNegZero)
7057 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7058 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7059
7060 if (Arg.getSimpleValueType() == MVT::f16)
7061 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7062 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7063 return getCCResult(DAG, Intr);
7064}
7065
7066SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7067 SelectionDAG &DAG) const {
7068 SDLoc DL(Op);
7069 SDValue Chain = Op.getOperand(0);
7070
7071 // STCKF only supports a memory operand, so we have to use a temporary.
7072 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7073 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7074 MachinePointerInfo MPI =
7076
7077 // Use STCFK to store the TOD clock into the temporary.
7078 SDValue StoreOps[] = {Chain, StackPtr};
7079 Chain = DAG.getMemIntrinsicNode(
7080 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7081 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7082
7083 // And read it back from there.
7084 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7085}
7086
7088 SelectionDAG &DAG) const {
7089 switch (Op.getOpcode()) {
7090 case ISD::FRAMEADDR:
7091 return lowerFRAMEADDR(Op, DAG);
7092 case ISD::RETURNADDR:
7093 return lowerRETURNADDR(Op, DAG);
7094 case ISD::BR_CC:
7095 return lowerBR_CC(Op, DAG);
7096 case ISD::SELECT_CC:
7097 return lowerSELECT_CC(Op, DAG);
7098 case ISD::SETCC:
7099 return lowerSETCC(Op, DAG);
7100 case ISD::STRICT_FSETCC:
7101 return lowerSTRICT_FSETCC(Op, DAG, false);
7103 return lowerSTRICT_FSETCC(Op, DAG, true);
7104 case ISD::GlobalAddress:
7105 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7107 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7108 case ISD::BlockAddress:
7109 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7110 case ISD::JumpTable:
7111 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7112 case ISD::ConstantPool:
7113 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7114 case ISD::BITCAST:
7115 return lowerBITCAST(Op, DAG);
7116 case ISD::VASTART:
7117 return lowerVASTART(Op, DAG);
7118 case ISD::VACOPY:
7119 return lowerVACOPY(Op, DAG);
7120 case ISD::DYNAMIC_STACKALLOC:
7121 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7122 case ISD::GET_DYNAMIC_AREA_OFFSET:
7123 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7124 case ISD::MULHS:
7125 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7126 case ISD::MULHU:
7127 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7128 case ISD::SMUL_LOHI:
7129 return lowerSMUL_LOHI(Op, DAG);
7130 case ISD::UMUL_LOHI:
7131 return lowerUMUL_LOHI(Op, DAG);
7132 case ISD::SDIVREM:
7133 return lowerSDIVREM(Op, DAG);
7134 case ISD::UDIVREM:
7135 return lowerUDIVREM(Op, DAG);
7136 case ISD::SADDO:
7137 case ISD::SSUBO:
7138 case ISD::UADDO:
7139 case ISD::USUBO:
7140 return lowerXALUO(Op, DAG);
7141 case ISD::UADDO_CARRY:
7142 case ISD::USUBO_CARRY:
7143 return lowerUADDSUBO_CARRY(Op, DAG);
7144 case ISD::OR:
7145 return lowerOR(Op, DAG);
7146 case ISD::CTPOP:
7147 return lowerCTPOP(Op, DAG);
7148 case ISD::VECREDUCE_ADD:
7149 return lowerVECREDUCE_ADD(Op, DAG);
7150 case ISD::ATOMIC_FENCE:
7151 return lowerATOMIC_FENCE(Op, DAG);
7152 case ISD::ATOMIC_SWAP:
7153 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7154 case ISD::ATOMIC_STORE:
7155 return lowerATOMIC_STORE(Op, DAG);
7156 case ISD::ATOMIC_LOAD:
7157 return lowerATOMIC_LOAD(Op, DAG);
7158 case ISD::ATOMIC_LOAD_ADD:
7159 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7160 case ISD::ATOMIC_LOAD_SUB:
7161 return lowerATOMIC_LOAD_SUB(Op, DAG);
7162 case ISD::ATOMIC_LOAD_AND:
7163 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7164 case ISD::ATOMIC_LOAD_OR:
7165 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7166 case ISD::ATOMIC_LOAD_XOR:
7167 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7168 case ISD::ATOMIC_LOAD_NAND:
7169 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7170 case ISD::ATOMIC_LOAD_MIN:
7171 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7172 case ISD::ATOMIC_LOAD_MAX:
7173 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7174 case ISD::ATOMIC_LOAD_UMIN:
7175 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7176 case ISD::ATOMIC_LOAD_UMAX:
7177 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7178 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
7179 return lowerATOMIC_CMP_SWAP(Op, DAG);
7180 case ISD::STACKSAVE:
7181 return lowerSTACKSAVE(Op, DAG);
7182 case ISD::STACKRESTORE:
7183 return lowerSTACKRESTORE(Op, DAG);
7184 case ISD::PREFETCH:
7185 return lowerPREFETCH(Op, DAG);
7187 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7189 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7190 case ISD::BUILD_VECTOR:
7191 return lowerBUILD_VECTOR(Op, DAG);
7193 return lowerVECTOR_SHUFFLE(Op, DAG);
7195 return lowerSCALAR_TO_VECTOR(Op, DAG);
7197 return lowerINSERT_VECTOR_ELT(Op, DAG);
7199 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7201 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7203 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7204 case ISD::SHL:
7205 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7206 case ISD::SRL:
7207 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7208 case ISD::SRA:
7209 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7210 case ISD::ADDRSPACECAST:
7211 return lowerAddrSpaceCast(Op, DAG);
7212 case ISD::ROTL:
7213 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7214 case ISD::FSHL:
7215 return lowerFSHL(Op, DAG);
7216 case ISD::FSHR:
7217 return lowerFSHR(Op, DAG);
7218 case ISD::FP_EXTEND:
7220 return lowerFP_EXTEND(Op, DAG);
7221 case ISD::FP_TO_UINT:
7222 case ISD::FP_TO_SINT:
7225 return lower_FP_TO_INT(Op, DAG);
7226 case ISD::UINT_TO_FP:
7227 case ISD::SINT_TO_FP:
7230 return lower_INT_TO_FP(Op, DAG);
7231 case ISD::LOAD:
7232 return lowerLoadF16(Op, DAG);
7233 case ISD::STORE:
7234 return lowerStoreF16(Op, DAG);
7235 case ISD::IS_FPCLASS:
7236 return lowerIS_FPCLASS(Op, DAG);
7237 case ISD::GET_ROUNDING:
7238 return lowerGET_ROUNDING(Op, DAG);
7239 case ISD::READCYCLECOUNTER:
7240 return lowerREADCYCLECOUNTER(Op, DAG);
7243 // These operations are legal on our platform, but we cannot actually
7244 // set the operation action to Legal as common code would treat this
7245 // as equivalent to Expand. Instead, we keep the operation action to
7246 // Custom and just leave them unchanged here.
7247 return Op;
7248
7249 default:
7250 llvm_unreachable("Unexpected node to lower");
7251 }
7252}
7253
7255 const SDLoc &SL) {
7256 // If i128 is legal, just use a normal bitcast.
7257 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7258 return DAG.getBitcast(MVT::f128, Src);
7259
7260 // Otherwise, f128 must live in FP128, so do a partwise move.
7262 &SystemZ::FP128BitRegClass);
7263
7264 SDValue Hi, Lo;
7265 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7266
7267 Hi = DAG.getBitcast(MVT::f64, Hi);
7268 Lo = DAG.getBitcast(MVT::f64, Lo);
7269
7270 SDNode *Pair = DAG.getMachineNode(
7271 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7272 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7273 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7274 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7275 return SDValue(Pair, 0);
7276}
7277
7279 const SDLoc &SL) {
7280 // If i128 is legal, just use a normal bitcast.
7281 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7282 return DAG.getBitcast(MVT::i128, Src);
7283
7284 // Otherwise, f128 must live in FP128, so do a partwise move.
7286 &SystemZ::FP128BitRegClass);
7287
7288 SDValue LoFP =
7289 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7290 SDValue HiFP =
7291 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7292 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7293 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7294
7295 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7296}
7297
7298// Lower operations with invalid operand or result types.
7299void
7302 SelectionDAG &DAG) const {
7303 switch (N->getOpcode()) {
7304 case ISD::ATOMIC_LOAD: {
7305 SDLoc DL(N);
7306 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7307 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7308 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7309 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7310 DL, Tys, Ops, MVT::i128, MMO);
7311
7312 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7313 if (N->getValueType(0) == MVT::f128)
7314 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7315 Results.push_back(Lowered);
7316 Results.push_back(Res.getValue(1));
7317 break;
7318 }
7319 case ISD::ATOMIC_STORE: {
7320 SDLoc DL(N);
7321 SDVTList Tys = DAG.getVTList(MVT::Other);
7322 SDValue Val = N->getOperand(1);
7323 if (Val.getValueType() == MVT::f128)
7324 Val = expandBitCastF128ToI128(DAG, Val, DL);
7325 Val = lowerI128ToGR128(DAG, Val);
7326
7327 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7328 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7329 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7330 DL, Tys, Ops, MVT::i128, MMO);
7331 // We have to enforce sequential consistency by performing a
7332 // serialization operation after the store.
7333 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7335 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7336 MVT::Other, Res), 0);
7337 Results.push_back(Res);
7338 break;
7339 }
7340 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
7341 SDLoc DL(N);
7342 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7343 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7344 lowerI128ToGR128(DAG, N->getOperand(2)),
7345 lowerI128ToGR128(DAG, N->getOperand(3)) };
7346 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7347 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7348 DL, Tys, Ops, MVT::i128, MMO);
7349 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7351 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7352 Results.push_back(lowerGR128ToI128(DAG, Res));
7353 Results.push_back(Success);
7354 Results.push_back(Res.getValue(2));
7355 break;
7356 }
7357 case ISD::BITCAST: {
7358 if (useSoftFloat())
7359 return;
7360 SDLoc DL(N);
7361 SDValue Src = N->getOperand(0);
7362 EVT SrcVT = Src.getValueType();
7363 EVT ResVT = N->getValueType(0);
7364 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7365 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7366 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7367 if (Subtarget.hasVector()) {
7368 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7369 Results.push_back(SDValue(
7370 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7371 } else {
7372 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7373 Results.push_back(convertToF16(In64, DAG));
7374 }
7375 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7376 SDValue ExtractedVal =
7377 Subtarget.hasVector()
7378 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7379 0)
7380 : convertFromF16(Src, DL, DAG);
7381 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7382 }
7383 break;
7384 }
7385 case ISD::UINT_TO_FP:
7386 case ISD::SINT_TO_FP:
7389 if (useSoftFloat())
7390 return;
7391 bool IsStrict = N->isStrictFPOpcode();
7392 SDLoc DL(N);
7393 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7394 EVT ResVT = N->getValueType(0);
7395 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7396 if (ResVT == MVT::f16) {
7397 if (!IsStrict) {
7398 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7399 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7400 } else {
7401 SDValue OpF32 =
7402 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7403 {Chain, InOp});
7404 SDValue F16Res;
7405 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7406 OpF32, OpF32.getValue(1), DL, MVT::f16);
7407 Results.push_back(F16Res);
7408 Results.push_back(Chain);
7409 }
7410 }
7411 break;
7412 }
7413 case ISD::FP_TO_UINT:
7414 case ISD::FP_TO_SINT:
7417 if (useSoftFloat())
7418 return;
7419 bool IsStrict = N->isStrictFPOpcode();
7420 SDLoc DL(N);
7421 EVT ResVT = N->getValueType(0);
7422 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7423 EVT InVT = InOp->getValueType(0);
7424 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7425 if (InVT == MVT::f16) {
7426 if (!IsStrict) {
7427 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7428 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7429 } else {
7430 SDValue InF32;
7431 std::tie(InF32, Chain) =
7432 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7433 SDValue OpF32 =
7434 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7435 {Chain, InF32});
7436 Results.push_back(OpF32);
7437 Results.push_back(OpF32.getValue(1));
7438 }
7439 }
7440 break;
7441 }
7442 default:
7443 llvm_unreachable("Unexpected node to lower");
7444 }
7445}
7446
7447void
7453
7454// Return true if VT is a vector whose elements are a whole number of bytes
7455// in width. Also check for presence of vector support.
7456bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7457 if (!Subtarget.hasVector())
7458 return false;
7459
7460 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7461}
7462
7463// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7464// producing a result of type ResVT. Op is a possibly bitcast version
7465// of the input vector and Index is the index (based on type VecVT) that
7466// should be extracted. Return the new extraction if a simplification
7467// was possible or if Force is true.
7468SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7469 EVT VecVT, SDValue Op,
7470 unsigned Index,
7471 DAGCombinerInfo &DCI,
7472 bool Force) const {
7473 SelectionDAG &DAG = DCI.DAG;
7474
7475 // The number of bytes being extracted.
7476 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7477
7478 for (;;) {
7479 unsigned Opcode = Op.getOpcode();
7480 if (Opcode == ISD::BITCAST)
7481 // Look through bitcasts.
7482 Op = Op.getOperand(0);
7483 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7484 canTreatAsByteVector(Op.getValueType())) {
7485 // Get a VPERM-like permute mask and see whether the bytes covered
7486 // by the extracted element are a contiguous sequence from one
7487 // source operand.
7489 if (!getVPermMask(Op, Bytes))
7490 break;
7491 int First;
7492 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7493 BytesPerElement, First))
7494 break;
7495 if (First < 0)
7496 return DAG.getUNDEF(ResVT);
7497 // Make sure the contiguous sequence starts at a multiple of the
7498 // original element size.
7499 unsigned Byte = unsigned(First) % Bytes.size();
7500 if (Byte % BytesPerElement != 0)
7501 break;
7502 // We can get the extracted value directly from an input.
7503 Index = Byte / BytesPerElement;
7504 Op = Op.getOperand(unsigned(First) / Bytes.size());
7505 Force = true;
7506 } else if (Opcode == ISD::BUILD_VECTOR &&
7507 canTreatAsByteVector(Op.getValueType())) {
7508 // We can only optimize this case if the BUILD_VECTOR elements are
7509 // at least as wide as the extracted value.
7510 EVT OpVT = Op.getValueType();
7511 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7512 if (OpBytesPerElement < BytesPerElement)
7513 break;
7514 // Make sure that the least-significant bit of the extracted value
7515 // is the least significant bit of an input.
7516 unsigned End = (Index + 1) * BytesPerElement;
7517 if (End % OpBytesPerElement != 0)
7518 break;
7519 // We're extracting the low part of one operand of the BUILD_VECTOR.
7520 Op = Op.getOperand(End / OpBytesPerElement - 1);
7521 if (!Op.getValueType().isInteger()) {
7522 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7523 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7524 DCI.AddToWorklist(Op.getNode());
7525 }
7526 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7527 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7528 if (VT != ResVT) {
7529 DCI.AddToWorklist(Op.getNode());
7530 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7531 }
7532 return Op;
7533 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7535 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7536 canTreatAsByteVector(Op.getValueType()) &&
7537 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7538 // Make sure that only the unextended bits are significant.
7539 EVT ExtVT = Op.getValueType();
7540 EVT OpVT = Op.getOperand(0).getValueType();
7541 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7542 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7543 unsigned Byte = Index * BytesPerElement;
7544 unsigned SubByte = Byte % ExtBytesPerElement;
7545 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7546 if (SubByte < MinSubByte ||
7547 SubByte + BytesPerElement > ExtBytesPerElement)
7548 break;
7549 // Get the byte offset of the unextended element
7550 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7551 // ...then add the byte offset relative to that element.
7552 Byte += SubByte - MinSubByte;
7553 if (Byte % BytesPerElement != 0)
7554 break;
7555 Op = Op.getOperand(0);
7556 Index = Byte / BytesPerElement;
7557 Force = true;
7558 } else
7559 break;
7560 }
7561 if (Force) {
7562 if (Op.getValueType() != VecVT) {
7563 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7564 DCI.AddToWorklist(Op.getNode());
7565 }
7566 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7567 DAG.getConstant(Index, DL, MVT::i32));
7568 }
7569 return SDValue();
7570}
7571
7572// Optimize vector operations in scalar value Op on the basis that Op
7573// is truncated to TruncVT.
7574SDValue SystemZTargetLowering::combineTruncateExtract(
7575 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7576 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7577 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7578 // of type TruncVT.
7579 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7580 TruncVT.getSizeInBits() % 8 == 0) {
7581 SDValue Vec = Op.getOperand(0);
7582 EVT VecVT = Vec.getValueType();
7583 if (canTreatAsByteVector(VecVT)) {
7584 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7585 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7586 unsigned TruncBytes = TruncVT.getStoreSize();
7587 if (BytesPerElement % TruncBytes == 0) {
7588 // Calculate the value of Y' in the above description. We are
7589 // splitting the original elements into Scale equal-sized pieces
7590 // and for truncation purposes want the last (least-significant)
7591 // of these pieces for IndexN. This is easiest to do by calculating
7592 // the start index of the following element and then subtracting 1.
7593 unsigned Scale = BytesPerElement / TruncBytes;
7594 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7595
7596 // Defer the creation of the bitcast from X to combineExtract,
7597 // which might be able to optimize the extraction.
7598 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7599 MVT::getIntegerVT(TruncBytes * 8),
7600 VecVT.getStoreSize() / TruncBytes);
7601 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7602 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7603 }
7604 }
7605 }
7606 }
7607 return SDValue();
7608}
7609
7610SDValue SystemZTargetLowering::combineZERO_EXTEND(
7611 SDNode *N, DAGCombinerInfo &DCI) const {
7612 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7613 SelectionDAG &DAG = DCI.DAG;
7614 SDValue N0 = N->getOperand(0);
7615 EVT VT = N->getValueType(0);
7616 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7617 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7618 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7619 if (TrueOp && FalseOp) {
7620 SDLoc DL(N0);
7621 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7622 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7623 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7624 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7625 // If N0 has multiple uses, change other uses as well.
7626 if (!N0.hasOneUse()) {
7627 SDValue TruncSelect =
7628 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7629 DCI.CombineTo(N0.getNode(), TruncSelect);
7630 }
7631 return NewSelect;
7632 }
7633 }
7634 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7635 // of the result is smaller than the size of X and all the truncated bits
7636 // of X are already zero.
7637 if (N0.getOpcode() == ISD::XOR &&
7638 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7639 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7640 N0.getOperand(1).getOpcode() == ISD::Constant) {
7641 SDValue X = N0.getOperand(0).getOperand(0);
7642 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7643 KnownBits Known = DAG.computeKnownBits(X);
7644 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7645 N0.getValueSizeInBits(),
7646 VT.getSizeInBits());
7647 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7648 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7649 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7650 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7651 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7652 }
7653 }
7654 }
7655 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7656 // and VECTOR ADD COMPUTE CARRY for i128:
7657 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7658 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7659 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7660 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7661 // For vector types, these patterns are recognized in the .td file.
7662 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7663 N0.getOperand(0).getValueType() == VT) {
7664 SDValue Op0 = N0.getOperand(0);
7665 SDValue Op1 = N0.getOperand(1);
7666 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7667 switch (CC) {
7668 case ISD::SETULE:
7669 std::swap(Op0, Op1);
7670 [[fallthrough]];
7671 case ISD::SETUGE:
7672 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7673 case ISD::SETUGT:
7674 std::swap(Op0, Op1);
7675 [[fallthrough]];
7676 case ISD::SETULT:
7677 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7678 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7679 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7680 Op0->getOperand(1));
7681 break;
7682 default:
7683 break;
7684 }
7685 }
7686
7687 return SDValue();
7688}
7689
7690SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7691 SDNode *N, DAGCombinerInfo &DCI) const {
7692 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7693 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7694 // into (select_cc LHS, RHS, -1, 0, COND)
7695 SelectionDAG &DAG = DCI.DAG;
7696 SDValue N0 = N->getOperand(0);
7697 EVT VT = N->getValueType(0);
7698 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7699 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7700 N0 = N0.getOperand(0);
7701 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7702 SDLoc DL(N0);
7703 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7704 DAG.getAllOnesConstant(DL, VT),
7705 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7706 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7707 }
7708 return SDValue();
7709}
7710
7711SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7712 SDNode *N, DAGCombinerInfo &DCI) const {
7713 // Convert (sext (ashr (shl X, C1), C2)) to
7714 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7715 // cheap as narrower ones.
7716 SelectionDAG &DAG = DCI.DAG;
7717 SDValue N0 = N->getOperand(0);
7718 EVT VT = N->getValueType(0);
7719 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7720 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7721 SDValue Inner = N0.getOperand(0);
7722 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7723 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7724 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7725 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7726 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7727 EVT ShiftVT = N0.getOperand(1).getValueType();
7728 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7729 Inner.getOperand(0));
7730 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7731 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7732 ShiftVT));
7733 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7734 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7735 }
7736 }
7737 }
7738
7739 return SDValue();
7740}
7741
7742SDValue SystemZTargetLowering::combineMERGE(
7743 SDNode *N, DAGCombinerInfo &DCI) const {
7744 SelectionDAG &DAG = DCI.DAG;
7745 unsigned Opcode = N->getOpcode();
7746 SDValue Op0 = N->getOperand(0);
7747 SDValue Op1 = N->getOperand(1);
7748 if (Op0.getOpcode() == ISD::BITCAST)
7749 Op0 = Op0.getOperand(0);
7751 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7752 // for v4f32.
7753 if (Op1 == N->getOperand(0))
7754 return Op1;
7755 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7756 EVT VT = Op1.getValueType();
7757 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7758 if (ElemBytes <= 4) {
7759 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7760 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7761 EVT InVT = VT.changeVectorElementTypeToInteger();
7762 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7763 SystemZ::VectorBytes / ElemBytes / 2);
7764 if (VT != InVT) {
7765 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7766 DCI.AddToWorklist(Op1.getNode());
7767 }
7768 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7769 DCI.AddToWorklist(Op.getNode());
7770 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7771 }
7772 }
7773 return SDValue();
7774}
7775
7776static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7777 SDNode *&HiPart) {
7778 LoPart = HiPart = nullptr;
7779
7780 // Scan through all users.
7781 for (SDUse &Use : LD->uses()) {
7782 // Skip the uses of the chain.
7783 if (Use.getResNo() != 0)
7784 continue;
7785
7786 // Verify every user is a TRUNCATE to i64 of the low or high half.
7787 SDNode *User = Use.getUser();
7788 bool IsLoPart = true;
7789 if (User->getOpcode() == ISD::SRL &&
7790 User->getOperand(1).getOpcode() == ISD::Constant &&
7791 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7792 User = *User->user_begin();
7793 IsLoPart = false;
7794 }
7795 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7796 return false;
7797
7798 if (IsLoPart) {
7799 if (LoPart)
7800 return false;
7801 LoPart = User;
7802 } else {
7803 if (HiPart)
7804 return false;
7805 HiPart = User;
7806 }
7807 }
7808 return true;
7809}
7810
7811static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7812 SDNode *&HiPart) {
7813 LoPart = HiPart = nullptr;
7814
7815 // Scan through all users.
7816 for (SDUse &Use : LD->uses()) {
7817 // Skip the uses of the chain.
7818 if (Use.getResNo() != 0)
7819 continue;
7820
7821 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7822 SDNode *User = Use.getUser();
7823 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7824 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7825 return false;
7826
7827 switch (User->getConstantOperandVal(1)) {
7828 case SystemZ::subreg_l64:
7829 if (LoPart)
7830 return false;
7831 LoPart = User;
7832 break;
7833 case SystemZ::subreg_h64:
7834 if (HiPart)
7835 return false;
7836 HiPart = User;
7837 break;
7838 default:
7839 return false;
7840 }
7841 }
7842 return true;
7843}
7844
7845SDValue SystemZTargetLowering::combineLOAD(
7846 SDNode *N, DAGCombinerInfo &DCI) const {
7847 SelectionDAG &DAG = DCI.DAG;
7848 EVT LdVT = N->getValueType(0);
7849 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7850 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7851 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7852 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7853 if (PtrVT != LoadNodeVT) {
7854 SDLoc DL(LN);
7855 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7856 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7857 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7858 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7859 LN->getMemOperand());
7860 }
7861 }
7862 }
7863 SDLoc DL(N);
7864
7865 // Replace a 128-bit load that is used solely to move its value into GPRs
7866 // by separate loads of both halves.
7867 LoadSDNode *LD = cast<LoadSDNode>(N);
7868 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7869 SDNode *LoPart, *HiPart;
7870 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7871 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7872 // Rewrite each extraction as an independent load.
7873 SmallVector<SDValue, 2> ArgChains;
7874 if (HiPart) {
7875 SDValue EltLoad = DAG.getLoad(
7876 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7877 LD->getPointerInfo(), LD->getBaseAlign(),
7878 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7879
7880 DCI.CombineTo(HiPart, EltLoad, true);
7881 ArgChains.push_back(EltLoad.getValue(1));
7882 }
7883 if (LoPart) {
7884 SDValue EltLoad = DAG.getLoad(
7885 LoPart->getValueType(0), DL, LD->getChain(),
7886 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7887 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7888 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7889
7890 DCI.CombineTo(LoPart, EltLoad, true);
7891 ArgChains.push_back(EltLoad.getValue(1));
7892 }
7893
7894 // Collect all chains via TokenFactor.
7895 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7896 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7897 DCI.AddToWorklist(Chain.getNode());
7898 return SDValue(N, 0);
7899 }
7900 }
7901
7902 if (LdVT.isVector() || LdVT.isInteger())
7903 return SDValue();
7904 // Transform a scalar load that is REPLICATEd as well as having other
7905 // use(s) to the form where the other use(s) use the first element of the
7906 // REPLICATE instead of the load. Otherwise instruction selection will not
7907 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7908 // point loads.
7909
7910 SDValue Replicate;
7911 SmallVector<SDNode*, 8> OtherUses;
7912 for (SDUse &Use : N->uses()) {
7913 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7914 if (Replicate)
7915 return SDValue(); // Should never happen
7916 Replicate = SDValue(Use.getUser(), 0);
7917 } else if (Use.getResNo() == 0)
7918 OtherUses.push_back(Use.getUser());
7919 }
7920 if (!Replicate || OtherUses.empty())
7921 return SDValue();
7922
7923 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7924 Replicate, DAG.getConstant(0, DL, MVT::i32));
7925 // Update uses of the loaded Value while preserving old chains.
7926 for (SDNode *U : OtherUses) {
7928 for (SDValue Op : U->ops())
7929 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7930 DAG.UpdateNodeOperands(U, Ops);
7931 }
7932 return SDValue(N, 0);
7933}
7934
7935bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7936 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7937 return true;
7938 if (Subtarget.hasVectorEnhancements2())
7939 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7940 return true;
7941 return false;
7942}
7943
7945 if (!VT.isVector() || !VT.isSimple() ||
7946 VT.getSizeInBits() != 128 ||
7947 VT.getScalarSizeInBits() % 8 != 0)
7948 return false;
7949
7950 unsigned NumElts = VT.getVectorNumElements();
7951 for (unsigned i = 0; i < NumElts; ++i) {
7952 if (M[i] < 0) continue; // ignore UNDEF indices
7953 if ((unsigned) M[i] != NumElts - 1 - i)
7954 return false;
7955 }
7956
7957 return true;
7958}
7959
7960static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7961 for (auto *U : StoredVal->users()) {
7962 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7963 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7964 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7965 continue;
7966 } else if (isa<BuildVectorSDNode>(U)) {
7967 SDValue BuildVector = SDValue(U, 0);
7968 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7969 isOnlyUsedByStores(BuildVector, DAG))
7970 continue;
7971 }
7972 return false;
7973 }
7974 return true;
7975}
7976
7977static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7978 SDValue &HiPart) {
7979 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7980 return false;
7981
7982 SDValue Op0 = Val.getOperand(0);
7983 SDValue Op1 = Val.getOperand(1);
7984
7985 if (Op0.getOpcode() == ISD::SHL)
7986 std::swap(Op0, Op1);
7987 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7988 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7989 Op1.getConstantOperandVal(1) != 64)
7990 return false;
7991 Op1 = Op1.getOperand(0);
7992
7993 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7994 Op0.getOperand(0).getValueType() != MVT::i64)
7995 return false;
7996 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7997 Op1.getOperand(0).getValueType() != MVT::i64)
7998 return false;
7999
8000 LoPart = Op0.getOperand(0);
8001 HiPart = Op1.getOperand(0);
8002 return true;
8003}
8004
8005static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
8006 SDValue &HiPart) {
8007 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
8008 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
8009 return false;
8010
8011 if (Val->getNumOperands() != 5 ||
8012 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
8013 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
8014 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
8015 return false;
8016
8017 LoPart = Val->getOperand(1);
8018 HiPart = Val->getOperand(3);
8019 return true;
8020}
8021
8022SDValue SystemZTargetLowering::combineSTORE(
8023 SDNode *N, DAGCombinerInfo &DCI) const {
8024 SelectionDAG &DAG = DCI.DAG;
8025 auto *SN = cast<StoreSDNode>(N);
8026 auto &Op1 = N->getOperand(1);
8027 EVT MemVT = SN->getMemoryVT();
8028
8029 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8030 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8031 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8032 if (PtrVT != StoreNodeVT) {
8033 SDLoc DL(SN);
8034 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8035 SYSTEMZAS::PTR32, 0);
8036 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8037 SN->getPointerInfo(), SN->getBaseAlign(),
8038 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8039 }
8040 }
8041
8042 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8043 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8044 // If X has wider elements then convert it to:
8045 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8046 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8047 if (SDValue Value =
8048 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8049 DCI.AddToWorklist(Value.getNode());
8050
8051 // Rewrite the store with the new form of stored value.
8052 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8053 SN->getBasePtr(), SN->getMemoryVT(),
8054 SN->getMemOperand());
8055 }
8056 }
8057 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8058 if (!SN->isTruncatingStore() &&
8059 Op1.getOpcode() == ISD::BSWAP &&
8060 Op1.getNode()->hasOneUse() &&
8061 canLoadStoreByteSwapped(Op1.getValueType())) {
8062
8063 SDValue BSwapOp = Op1.getOperand(0);
8064
8065 if (BSwapOp.getValueType() == MVT::i16)
8066 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8067
8068 SDValue Ops[] = {
8069 N->getOperand(0), BSwapOp, N->getOperand(2)
8070 };
8071
8072 return
8073 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8074 Ops, MemVT, SN->getMemOperand());
8075 }
8076 // Combine STORE (element-swap) into VSTER
8077 if (!SN->isTruncatingStore() &&
8078 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8079 Op1.getNode()->hasOneUse() &&
8080 Subtarget.hasVectorEnhancements2()) {
8081 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8082 ArrayRef<int> ShuffleMask = SVN->getMask();
8083 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8084 SDValue Ops[] = {
8085 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8086 };
8087
8088 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8089 DAG.getVTList(MVT::Other),
8090 Ops, MemVT, SN->getMemOperand());
8091 }
8092 }
8093
8094 // Combine STORE (READCYCLECOUNTER) into STCKF.
8095 if (!SN->isTruncatingStore() &&
8096 Op1.getOpcode() == ISD::READCYCLECOUNTER &&
8097 Op1.hasOneUse() &&
8098 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8099 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8100 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8101 DAG.getVTList(MVT::Other),
8102 Ops, MemVT, SN->getMemOperand());
8103 }
8104
8105 // Transform a store of a 128-bit value moved from parts into two stores.
8106 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8107 SDValue LoPart, HiPart;
8108 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8109 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8110 SDLoc DL(SN);
8111 SDValue Chain0 = DAG.getStore(
8112 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8113 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8114 SDValue Chain1 = DAG.getStore(
8115 SN->getChain(), DL, LoPart,
8116 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8117 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8118 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8119
8120 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8121 }
8122 }
8123
8124 // Replicate a reg or immediate with VREP instead of scalar multiply or
8125 // immediate load. It seems best to do this during the first DAGCombine as
8126 // it is straight-forward to handle the zero-extend node in the initial
8127 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8128 // extracting an i16 element from a v16i8 vector).
8129 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8130 isOnlyUsedByStores(Op1, DAG)) {
8131 SDValue Word = SDValue();
8132 EVT WordVT;
8133
8134 // Find a replicated immediate and return it if found in Word and its
8135 // type in WordVT.
8136 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8137 // Some constants are better handled with a scalar store.
8138 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8139 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8140 return;
8141
8142 APInt Val = C->getAPIntValue();
8143 // Truncate Val in case of a truncating store.
8144 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8145 assert(SN->isTruncatingStore() &&
8146 "Non-truncating store and immediate value does not fit?");
8147 Val = Val.trunc(TotBytes * 8);
8148 }
8149
8150 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8151 if (VCI.isVectorConstantLegal(Subtarget) &&
8152 VCI.Opcode == SystemZISD::REPLICATE) {
8153 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8154 WordVT = VCI.VecVT.getScalarType();
8155 }
8156 };
8157
8158 // Find a replicated register and return it if found in Word and its type
8159 // in WordVT.
8160 auto FindReplicatedReg = [&](SDValue MulOp) {
8161 EVT MulVT = MulOp.getValueType();
8162 if (MulOp->getOpcode() == ISD::MUL &&
8163 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8164 // Find a zero extended value and its type.
8165 SDValue LHS = MulOp->getOperand(0);
8166 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8167 WordVT = LHS->getOperand(0).getValueType();
8168 else if (LHS->getOpcode() == ISD::AssertZext)
8169 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8170 else
8171 return;
8172 // Find a replicating constant, e.g. 0x00010001.
8173 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8174 SystemZVectorConstantInfo VCI(
8175 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8176 if (VCI.isVectorConstantLegal(Subtarget) &&
8177 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8178 WordVT == VCI.VecVT.getScalarType())
8179 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8180 }
8181 }
8182 };
8183
8184 if (isa<BuildVectorSDNode>(Op1) &&
8185 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8186 SDValue SplatVal = Op1->getOperand(0);
8187 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8188 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8189 else
8190 FindReplicatedReg(SplatVal);
8191 } else {
8192 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8193 FindReplicatedImm(C, MemVT.getStoreSize());
8194 else
8195 FindReplicatedReg(Op1);
8196 }
8197
8198 if (Word != SDValue()) {
8199 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8200 "Bad type handling");
8201 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8202 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8203 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8204 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8205 SN->getBasePtr(), SN->getMemOperand());
8206 }
8207 }
8208
8209 return SDValue();
8210}
8211
8212SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8213 SDNode *N, DAGCombinerInfo &DCI) const {
8214 SelectionDAG &DAG = DCI.DAG;
8215 // Combine element-swap (LOAD) into VLER
8216 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8217 N->getOperand(0).hasOneUse() &&
8218 Subtarget.hasVectorEnhancements2()) {
8219 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8220 ArrayRef<int> ShuffleMask = SVN->getMask();
8221 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8222 SDValue Load = N->getOperand(0);
8223 LoadSDNode *LD = cast<LoadSDNode>(Load);
8224
8225 // Create the element-swapping load.
8226 SDValue Ops[] = {
8227 LD->getChain(), // Chain
8228 LD->getBasePtr() // Ptr
8229 };
8230 SDValue ESLoad =
8231 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8232 DAG.getVTList(LD->getValueType(0), MVT::Other),
8233 Ops, LD->getMemoryVT(), LD->getMemOperand());
8234
8235 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8236 // by the load dead.
8237 DCI.CombineTo(N, ESLoad);
8238
8239 // Next, combine the load away, we give it a bogus result value but a real
8240 // chain result. The result value is dead because the shuffle is dead.
8241 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8242
8243 // Return N so it doesn't get rechecked!
8244 return SDValue(N, 0);
8245 }
8246 }
8247
8248 return SDValue();
8249}
8250
8251SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8252 SDNode *N, DAGCombinerInfo &DCI) const {
8253 SelectionDAG &DAG = DCI.DAG;
8254
8255 if (!Subtarget.hasVector())
8256 return SDValue();
8257
8258 // Look through bitcasts that retain the number of vector elements.
8259 SDValue Op = N->getOperand(0);
8260 if (Op.getOpcode() == ISD::BITCAST &&
8261 Op.getValueType().isVector() &&
8262 Op.getOperand(0).getValueType().isVector() &&
8263 Op.getValueType().getVectorNumElements() ==
8264 Op.getOperand(0).getValueType().getVectorNumElements())
8265 Op = Op.getOperand(0);
8266
8267 // Pull BSWAP out of a vector extraction.
8268 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8269 EVT VecVT = Op.getValueType();
8270 EVT EltVT = VecVT.getVectorElementType();
8271 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8272 Op.getOperand(0), N->getOperand(1));
8273 DCI.AddToWorklist(Op.getNode());
8274 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8275 if (EltVT != N->getValueType(0)) {
8276 DCI.AddToWorklist(Op.getNode());
8277 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8278 }
8279 return Op;
8280 }
8281
8282 // Try to simplify a vector extraction.
8283 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8284 SDValue Op0 = N->getOperand(0);
8285 EVT VecVT = Op0.getValueType();
8286 if (canTreatAsByteVector(VecVT))
8287 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8288 IndexN->getZExtValue(), DCI, false);
8289 }
8290 return SDValue();
8291}
8292
8293SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8294 SDNode *N, DAGCombinerInfo &DCI) const {
8295 SelectionDAG &DAG = DCI.DAG;
8296 // (join_dwords X, X) == (replicate X)
8297 if (N->getOperand(0) == N->getOperand(1))
8298 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8299 N->getOperand(0));
8300 return SDValue();
8301}
8302
8304 SDValue Chain1 = N1->getOperand(0);
8305 SDValue Chain2 = N2->getOperand(0);
8306
8307 // Trivial case: both nodes take the same chain.
8308 if (Chain1 == Chain2)
8309 return Chain1;
8310
8311 // FIXME - we could handle more complex cases via TokenFactor,
8312 // assuming we can verify that this would not create a cycle.
8313 return SDValue();
8314}
8315
8316SDValue SystemZTargetLowering::combineFP_ROUND(
8317 SDNode *N, DAGCombinerInfo &DCI) const {
8318
8319 if (!Subtarget.hasVector())
8320 return SDValue();
8321
8322 // (fpround (extract_vector_elt X 0))
8323 // (fpround (extract_vector_elt X 1)) ->
8324 // (extract_vector_elt (VROUND X) 0)
8325 // (extract_vector_elt (VROUND X) 2)
8326 //
8327 // This is a special case since the target doesn't really support v2f32s.
8328 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8329 SelectionDAG &DAG = DCI.DAG;
8330 SDValue Op0 = N->getOperand(OpNo);
8331 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8333 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8334 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8335 Op0.getConstantOperandVal(1) == 0) {
8336 SDValue Vec = Op0.getOperand(0);
8337 for (auto *U : Vec->users()) {
8338 if (U != Op0.getNode() && U->hasOneUse() &&
8339 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8340 U->getOperand(0) == Vec &&
8341 U->getOperand(1).getOpcode() == ISD::Constant &&
8342 U->getConstantOperandVal(1) == 1) {
8343 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8344 if (OtherRound.getOpcode() == N->getOpcode() &&
8345 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8346 OtherRound.getValueType() == MVT::f32) {
8347 SDValue VRound, Chain;
8348 if (N->isStrictFPOpcode()) {
8349 Chain = MergeInputChains(N, OtherRound.getNode());
8350 if (!Chain)
8351 continue;
8352 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8353 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8354 Chain = VRound.getValue(1);
8355 } else
8356 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8357 MVT::v4f32, Vec);
8358 DCI.AddToWorklist(VRound.getNode());
8359 SDValue Extract1 =
8360 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8361 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8362 DCI.AddToWorklist(Extract1.getNode());
8363 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8364 if (Chain)
8365 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8366 SDValue Extract0 =
8367 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8368 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8369 if (Chain)
8370 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8371 N->getVTList(), Extract0, Chain);
8372 return Extract0;
8373 }
8374 }
8375 }
8376 }
8377 return SDValue();
8378}
8379
8380SDValue SystemZTargetLowering::combineFP_EXTEND(
8381 SDNode *N, DAGCombinerInfo &DCI) const {
8382
8383 if (!Subtarget.hasVector())
8384 return SDValue();
8385
8386 // (fpextend (extract_vector_elt X 0))
8387 // (fpextend (extract_vector_elt X 2)) ->
8388 // (extract_vector_elt (VEXTEND X) 0)
8389 // (extract_vector_elt (VEXTEND X) 1)
8390 //
8391 // This is a special case since the target doesn't really support v2f32s.
8392 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8393 SelectionDAG &DAG = DCI.DAG;
8394 SDValue Op0 = N->getOperand(OpNo);
8395 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8397 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8398 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8399 Op0.getConstantOperandVal(1) == 0) {
8400 SDValue Vec = Op0.getOperand(0);
8401 for (auto *U : Vec->users()) {
8402 if (U != Op0.getNode() && U->hasOneUse() &&
8403 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8404 U->getOperand(0) == Vec &&
8405 U->getOperand(1).getOpcode() == ISD::Constant &&
8406 U->getConstantOperandVal(1) == 2) {
8407 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8408 if (OtherExtend.getOpcode() == N->getOpcode() &&
8409 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8410 OtherExtend.getValueType() == MVT::f64) {
8411 SDValue VExtend, Chain;
8412 if (N->isStrictFPOpcode()) {
8413 Chain = MergeInputChains(N, OtherExtend.getNode());
8414 if (!Chain)
8415 continue;
8416 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8417 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8418 Chain = VExtend.getValue(1);
8419 } else
8420 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8421 MVT::v2f64, Vec);
8422 DCI.AddToWorklist(VExtend.getNode());
8423 SDValue Extract1 =
8424 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8425 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8426 DCI.AddToWorklist(Extract1.getNode());
8427 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8428 if (Chain)
8429 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8430 SDValue Extract0 =
8431 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8432 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8433 if (Chain)
8434 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8435 N->getVTList(), Extract0, Chain);
8436 return Extract0;
8437 }
8438 }
8439 }
8440 }
8441 return SDValue();
8442}
8443
8444SDValue SystemZTargetLowering::combineINT_TO_FP(
8445 SDNode *N, DAGCombinerInfo &DCI) const {
8446 if (DCI.Level != BeforeLegalizeTypes)
8447 return SDValue();
8448 SelectionDAG &DAG = DCI.DAG;
8449 LLVMContext &Ctx = *DAG.getContext();
8450 unsigned Opcode = N->getOpcode();
8451 EVT OutVT = N->getValueType(0);
8452 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8453 SDValue Op = N->getOperand(0);
8454 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8455 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8456
8457 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8458 // v2f64 = uint_to_fp v2i16
8459 // =>
8460 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8461 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8462 OutScalarBits <= 64) {
8463 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8464 EVT ExtVT = EVT::getVectorVT(
8465 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8466 unsigned ExtOpcode =
8468 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8469 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8470 }
8471 return SDValue();
8472}
8473
8474SDValue SystemZTargetLowering::combineFCOPYSIGN(
8475 SDNode *N, DAGCombinerInfo &DCI) const {
8476 SelectionDAG &DAG = DCI.DAG;
8477 EVT VT = N->getValueType(0);
8478 SDValue ValOp = N->getOperand(0);
8479 SDValue SignOp = N->getOperand(1);
8480
8481 // Remove the rounding which is not needed.
8482 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8483 SDValue WideOp = SignOp.getOperand(0);
8484 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8485 }
8486
8487 return SDValue();
8488}
8489
8490SDValue SystemZTargetLowering::combineBSWAP(
8491 SDNode *N, DAGCombinerInfo &DCI) const {
8492 SelectionDAG &DAG = DCI.DAG;
8493 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8494 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8495 N->getOperand(0).hasOneUse() &&
8496 canLoadStoreByteSwapped(N->getValueType(0))) {
8497 SDValue Load = N->getOperand(0);
8498 LoadSDNode *LD = cast<LoadSDNode>(Load);
8499
8500 // Create the byte-swapping load.
8501 SDValue Ops[] = {
8502 LD->getChain(), // Chain
8503 LD->getBasePtr() // Ptr
8504 };
8505 EVT LoadVT = N->getValueType(0);
8506 if (LoadVT == MVT::i16)
8507 LoadVT = MVT::i32;
8508 SDValue BSLoad =
8509 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8510 DAG.getVTList(LoadVT, MVT::Other),
8511 Ops, LD->getMemoryVT(), LD->getMemOperand());
8512
8513 // If this is an i16 load, insert the truncate.
8514 SDValue ResVal = BSLoad;
8515 if (N->getValueType(0) == MVT::i16)
8516 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8517
8518 // First, combine the bswap away. This makes the value produced by the
8519 // load dead.
8520 DCI.CombineTo(N, ResVal);
8521
8522 // Next, combine the load away, we give it a bogus result value but a real
8523 // chain result. The result value is dead because the bswap is dead.
8524 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8525
8526 // Return N so it doesn't get rechecked!
8527 return SDValue(N, 0);
8528 }
8529
8530 // Look through bitcasts that retain the number of vector elements.
8531 SDValue Op = N->getOperand(0);
8532 if (Op.getOpcode() == ISD::BITCAST &&
8533 Op.getValueType().isVector() &&
8534 Op.getOperand(0).getValueType().isVector() &&
8535 Op.getValueType().getVectorNumElements() ==
8536 Op.getOperand(0).getValueType().getVectorNumElements())
8537 Op = Op.getOperand(0);
8538
8539 // Push BSWAP into a vector insertion if at least one side then simplifies.
8540 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8541 SDValue Vec = Op.getOperand(0);
8542 SDValue Elt = Op.getOperand(1);
8543 SDValue Idx = Op.getOperand(2);
8544
8546 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8548 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8549 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8550 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8551 EVT VecVT = N->getValueType(0);
8552 EVT EltVT = N->getValueType(0).getVectorElementType();
8553 if (VecVT != Vec.getValueType()) {
8554 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8555 DCI.AddToWorklist(Vec.getNode());
8556 }
8557 if (EltVT != Elt.getValueType()) {
8558 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8559 DCI.AddToWorklist(Elt.getNode());
8560 }
8561 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8562 DCI.AddToWorklist(Vec.getNode());
8563 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8564 DCI.AddToWorklist(Elt.getNode());
8565 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8566 Vec, Elt, Idx);
8567 }
8568 }
8569
8570 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8571 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8572 if (SV && Op.hasOneUse()) {
8573 SDValue Op0 = Op.getOperand(0);
8574 SDValue Op1 = Op.getOperand(1);
8575
8577 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8579 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8580 EVT VecVT = N->getValueType(0);
8581 if (VecVT != Op0.getValueType()) {
8582 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8583 DCI.AddToWorklist(Op0.getNode());
8584 }
8585 if (VecVT != Op1.getValueType()) {
8586 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8587 DCI.AddToWorklist(Op1.getNode());
8588 }
8589 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8590 DCI.AddToWorklist(Op0.getNode());
8591 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8592 DCI.AddToWorklist(Op1.getNode());
8593 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8594 }
8595 }
8596
8597 return SDValue();
8598}
8599
8600SDValue SystemZTargetLowering::combineSETCC(
8601 SDNode *N, DAGCombinerInfo &DCI) const {
8602 SelectionDAG &DAG = DCI.DAG;
8603 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8604 const SDValue LHS = N->getOperand(0);
8605 const SDValue RHS = N->getOperand(1);
8606 bool CmpNull = isNullConstant(RHS);
8607 bool CmpAllOnes = isAllOnesConstant(RHS);
8608 EVT VT = N->getValueType(0);
8609 SDLoc DL(N);
8610
8611 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8612 // change the outer compare to a i128 compare. This will normally
8613 // allow the reduction to be recognized in adjustICmp128, and even if
8614 // not, the i128 compare will still generate better code.
8615 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8617 if (Src.getOpcode() == ISD::SETCC &&
8618 Src.getValueType().isFixedLengthVector() &&
8619 Src.getValueType().getScalarType() == MVT::i1) {
8620 EVT CmpVT = Src.getOperand(0).getValueType();
8621 if (CmpVT.getSizeInBits() == 128) {
8622 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8623 SDValue LHS =
8624 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8625 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8626 : DAG.getAllOnesConstant(DL, MVT::i128);
8627 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8628 N->getFlags());
8629 }
8630 }
8631 }
8632
8633 return SDValue();
8634}
8635
8636static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8637 switch (Val.getOpcode()) {
8638 default:
8639 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8640 case SystemZISD::IPM:
8641 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8642 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8643 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8644 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8645 case SystemZISD::SELECT_CCMASK: {
8646 SDValue Op4CCReg = Val.getOperand(4);
8647 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8648 Op4CCReg.getOpcode() == SystemZISD::TM) {
8649 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8650 if (OpCC != SDValue())
8651 return std::make_pair(OpCC, OpCCValid);
8652 }
8653 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8654 if (!CCValid)
8655 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8656 int CCValidVal = CCValid->getZExtValue();
8657 return std::make_pair(Op4CCReg, CCValidVal);
8658 }
8659 case ISD::ADD:
8660 case ISD::AND:
8661 case ISD::OR:
8662 case ISD::XOR:
8663 case ISD::SHL:
8664 case ISD::SRA:
8665 case ISD::SRL:
8666 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8667 if (Op0CC != SDValue())
8668 return std::make_pair(Op0CC, Op0CCValid);
8669 return findCCUse(Val.getOperand(1));
8670 }
8671}
8672
8673static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8674 SelectionDAG &DAG);
8675
8677 SelectionDAG &DAG) {
8678 SDLoc DL(Val);
8679 auto Opcode = Val.getOpcode();
8680 switch (Opcode) {
8681 default:
8682 return {};
8683 case ISD::Constant:
8684 return {Val, Val, Val, Val};
8685 case SystemZISD::IPM: {
8686 SDValue IPMOp0 = Val.getOperand(0);
8687 if (IPMOp0 != CC)
8688 return {};
8689 SmallVector<SDValue, 4> ShiftedCCVals;
8690 for (auto CC : {0, 1, 2, 3})
8691 ShiftedCCVals.emplace_back(
8692 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8693 return ShiftedCCVals;
8694 }
8695 case SystemZISD::SELECT_CCMASK: {
8696 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8697 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8698 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8699 if (!CCValid || !CCMask)
8700 return {};
8701
8702 int CCValidVal = CCValid->getZExtValue();
8703 int CCMaskVal = CCMask->getZExtValue();
8704 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8705 // recursive call to simplifyAssumingCCVal.
8706 SDValue Op4CCReg = Val.getOperand(4);
8707 if (Op4CCReg != CC)
8708 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8709 if (Op4CCReg != CC)
8710 return {};
8711 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8712 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8713 if (TrueSDVals.empty() || FalseSDVals.empty())
8714 return {};
8715 SmallVector<SDValue, 4> MergedSDVals;
8716 for (auto &CCVal : {0, 1, 2, 3})
8717 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8718 ? TrueSDVals[CCVal]
8719 : FalseSDVals[CCVal]);
8720 return MergedSDVals;
8721 }
8722 case ISD::ADD:
8723 case ISD::AND:
8724 case ISD::OR:
8725 case ISD::XOR:
8726 case ISD::SRA:
8727 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8728 // would clobber CC).
8729 if (!Val.hasOneUse())
8730 return {};
8731 [[fallthrough]];
8732 case ISD::SHL:
8733 case ISD::SRL:
8734 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8735 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8736 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8737 if (Op0SDVals.empty() || Op1SDVals.empty())
8738 return {};
8739 SmallVector<SDValue, 4> BinaryOpSDVals;
8740 for (auto CCVal : {0, 1, 2, 3})
8741 BinaryOpSDVals.emplace_back(DAG.getNode(
8742 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8743 return BinaryOpSDVals;
8744 }
8745}
8746
8747static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8748 SelectionDAG &DAG) {
8749 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8750 // set by the CCReg instruction using the CCValid / CCMask masks,
8751 // If the CCReg instruction is itself a ICMP / TM testing the condition
8752 // code set by some other instruction, see whether we can directly
8753 // use that condition code.
8754 auto *CCNode = CCReg.getNode();
8755 if (!CCNode)
8756 return false;
8757
8758 if (CCNode->getOpcode() == SystemZISD::TM) {
8759 if (CCValid != SystemZ::CCMASK_TM)
8760 return false;
8761 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8762 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8763 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8764 if (!Op0Node || !Op1Node)
8765 return -1;
8766 auto Op0APVal = Op0Node->getAPIntValue();
8767 auto Op1APVal = Op1Node->getAPIntValue();
8768 auto Result = Op0APVal & Op1APVal;
8769 bool AllOnes = Result == Op1APVal;
8770 bool AllZeros = Result == 0;
8771 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8772 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8773 };
8774 SDValue Op0 = CCNode->getOperand(0);
8775 SDValue Op1 = CCNode->getOperand(1);
8776 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8777 if (Op0CC == SDValue())
8778 return false;
8779 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8780 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8781 if (Op0SDVals.empty() || Op1SDVals.empty())
8782 return false;
8783 int NewCCMask = 0;
8784 for (auto CC : {0, 1, 2, 3}) {
8785 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8786 if (CCVal < 0)
8787 return false;
8788 NewCCMask <<= 1;
8789 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8790 }
8791 NewCCMask &= Op0CCValid;
8792 CCReg = Op0CC;
8793 CCMask = NewCCMask;
8794 CCValid = Op0CCValid;
8795 return true;
8796 }
8797 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8798 CCValid != SystemZ::CCMASK_ICMP)
8799 return false;
8800
8801 SDValue CmpOp0 = CCNode->getOperand(0);
8802 SDValue CmpOp1 = CCNode->getOperand(1);
8803 SDValue CmpOp2 = CCNode->getOperand(2);
8804 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8805 if (Op0CC != SDValue()) {
8806 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8807 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8808 if (Op0SDVals.empty() || Op1SDVals.empty())
8809 return false;
8810
8811 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8812 auto CmpTypeVal = CmpType->getZExtValue();
8813 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8814 const SDValue &Op1Val) {
8815 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8816 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8817 if (!Op0Node || !Op1Node)
8818 return -1;
8819 auto Op0APVal = Op0Node->getAPIntValue();
8820 auto Op1APVal = Op1Node->getAPIntValue();
8821 if (CmpTypeVal == SystemZICMP::SignedOnly)
8822 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8823 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8824 };
8825 int NewCCMask = 0;
8826 for (auto CC : {0, 1, 2, 3}) {
8827 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8828 if (CCVal < 0)
8829 return false;
8830 NewCCMask <<= 1;
8831 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8832 }
8833 NewCCMask &= Op0CCValid;
8834 CCMask = NewCCMask;
8835 CCReg = Op0CC;
8836 CCValid = Op0CCValid;
8837 return true;
8838 }
8839
8840 return false;
8841}
8842
8843// Merging versus split in multiple branches cost.
8846 const Value *Lhs,
8847 const Value *Rhs) const {
8848 const auto isFlagOutOpCC = [](const Value *V) {
8849 using namespace llvm::PatternMatch;
8850 const Value *RHSVal;
8851 const APInt *RHSC;
8852 if (const auto *I = dyn_cast<Instruction>(V)) {
8853 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8854 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8855 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8856 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8857 if (CB->isInlineAsm()) {
8858 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8859 return IA && IA->getConstraintString().contains("{@cc}");
8860 }
8861 }
8862 }
8863 }
8864 return false;
8865 };
8866 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8867 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8868 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8869 // conditionals will be merged or else conditionals will be split.
8870 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8871 return {3, 0, -1};
8872 // Default.
8873 return {-1, -1, -1};
8874}
8875
8876SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8877 DAGCombinerInfo &DCI) const {
8878 SelectionDAG &DAG = DCI.DAG;
8879
8880 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8881 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8882 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8883 if (!CCValid || !CCMask)
8884 return SDValue();
8885
8886 int CCValidVal = CCValid->getZExtValue();
8887 int CCMaskVal = CCMask->getZExtValue();
8888 SDValue Chain = N->getOperand(0);
8889 SDValue CCReg = N->getOperand(4);
8890 // If combineCMask was able to merge or simplify ccvalid or ccmask, re-emit
8891 // the modified BR_CCMASK with the new values.
8892 // In order to avoid conditional branches with full or empty cc masks, do not
8893 // do this if ccmask is 0 or equal to ccvalid.
8894 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG) && CCMaskVal != 0 &&
8895 CCMaskVal != CCValidVal)
8896 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8897 Chain,
8898 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8899 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8900 N->getOperand(3), CCReg);
8901 return SDValue();
8902}
8903
8904SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8905 SDNode *N, DAGCombinerInfo &DCI) const {
8906 SelectionDAG &DAG = DCI.DAG;
8907
8908 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8909 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8910 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8911 if (!CCValid || !CCMask)
8912 return SDValue();
8913
8914 int CCValidVal = CCValid->getZExtValue();
8915 int CCMaskVal = CCMask->getZExtValue();
8916 SDValue CCReg = N->getOperand(4);
8917
8918 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8919
8920 // Populate SDVals vector for each condition code ccval for given Val, which
8921 // can again be another nested select_ccmask with the same CC.
8922 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8923 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8925 if (Val.getOperand(4) != CCReg)
8926 return SmallVector<SDValue, 4>{};
8927 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8928 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8929 if (!CCMask)
8930 return SmallVector<SDValue, 4>{};
8931
8932 int CCMaskVal = CCMask->getZExtValue();
8933 for (auto &CC : {0, 1, 2, 3})
8934 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
8935 : FalseVal);
8936 return Res;
8937 }
8938 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
8939 };
8940 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
8941 // with CCReg found by combineCCMask or original CCReg.
8942 SDValue TrueVal = N->getOperand(0);
8943 SDValue FalseVal = N->getOperand(1);
8944 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
8945 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
8946 // TrueSDVals/FalseSDVals might be empty in case of non-constant
8947 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
8948 if (TrueSDVals.empty())
8949 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
8950 if (FalseSDVals.empty())
8951 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
8952 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
8953 SmallSet<SDValue, 4> MergedSDValsSet;
8954 // Ignoring CC values outside CCValiid.
8955 for (auto CC : {0, 1, 2, 3}) {
8956 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
8957 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
8958 ? TrueSDVals[CC]
8959 : FalseSDVals[CC]);
8960 }
8961 if (MergedSDValsSet.size() == 1)
8962 return *MergedSDValsSet.begin();
8963 if (MergedSDValsSet.size() == 2) {
8964 auto BeginIt = MergedSDValsSet.begin();
8965 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
8966 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
8967 std::swap(NewTrueVal, NewFalseVal);
8968 int NewCCMask = 0;
8969 for (auto CC : {0, 1, 2, 3}) {
8970 NewCCMask <<= 1;
8971 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
8972 ? (TrueSDVals[CC] == NewTrueVal)
8973 : (FalseSDVals[CC] == NewTrueVal);
8974 }
8975 CCMaskVal = NewCCMask;
8976 CCMaskVal &= CCValidVal;
8977 TrueVal = NewTrueVal;
8978 FalseVal = NewFalseVal;
8979 IsCombinedCCReg = true;
8980 }
8981 }
8982 // If the condition is trivially false or trivially true after
8983 // combineCCMask, just collapse this SELECT_CCMASK to the indicated value
8984 // (possibly modified by constructCCSDValsFromSELECT).
8985 if (CCMaskVal == 0)
8986 return FalseVal;
8987 if (CCMaskVal == CCValidVal)
8988 return TrueVal;
8989
8990 if (IsCombinedCCReg)
8991 return DAG.getNode(
8992 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
8993 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8994 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
8995
8996 return SDValue();
8997}
8998
8999SDValue SystemZTargetLowering::combineGET_CCMASK(
9000 SDNode *N, DAGCombinerInfo &DCI) const {
9001
9002 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
9003 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
9004 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
9005 if (!CCValid || !CCMask)
9006 return SDValue();
9007 int CCValidVal = CCValid->getZExtValue();
9008 int CCMaskVal = CCMask->getZExtValue();
9009
9010 SDValue Select = N->getOperand(0);
9011 if (Select->getOpcode() == ISD::TRUNCATE)
9012 Select = Select->getOperand(0);
9013 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
9014 return SDValue();
9015
9016 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
9017 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
9018 if (!SelectCCValid || !SelectCCMask)
9019 return SDValue();
9020 int SelectCCValidVal = SelectCCValid->getZExtValue();
9021 int SelectCCMaskVal = SelectCCMask->getZExtValue();
9022
9023 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
9024 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
9025 if (!TrueVal || !FalseVal)
9026 return SDValue();
9027 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
9028 ;
9029 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
9030 SelectCCMaskVal ^= SelectCCValidVal;
9031 else
9032 return SDValue();
9033
9034 if (SelectCCValidVal & ~CCValidVal)
9035 return SDValue();
9036 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
9037 return SDValue();
9038
9039 return Select->getOperand(4);
9040}
9041
9042SDValue SystemZTargetLowering::combineIntDIVREM(
9043 SDNode *N, DAGCombinerInfo &DCI) const {
9044 SelectionDAG &DAG = DCI.DAG;
9045 EVT VT = N->getValueType(0);
9046 // In the case where the divisor is a vector of constants a cheaper
9047 // sequence of instructions can replace the divide. BuildSDIV is called to
9048 // do this during DAG combining, but it only succeeds when it can build a
9049 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9050 // since it is not Legal but Custom it can only happen before
9051 // legalization. Therefore we must scalarize this early before Combine
9052 // 1. For widened vectors, this is already the result of type legalization.
9053 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9054 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9055 return DAG.UnrollVectorOp(N);
9056 return SDValue();
9057}
9058
9059
9060// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9061// This is closely modeled after the common-code combineShiftToMULH.
9062SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9063 SDNode *N, DAGCombinerInfo &DCI) const {
9064 SelectionDAG &DAG = DCI.DAG;
9065 SDLoc DL(N);
9066
9067 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9068 "SRL or SRA node is required here!");
9069
9070 if (!Subtarget.hasVector())
9071 return SDValue();
9072
9073 // Check the shift amount. Proceed with the transformation if the shift
9074 // amount is constant.
9075 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9076 if (!ShiftAmtSrc)
9077 return SDValue();
9078
9079 // The operation feeding into the shift must be an add.
9080 SDValue ShiftOperand = N->getOperand(0);
9081 if (ShiftOperand.getOpcode() != ISD::ADD)
9082 return SDValue();
9083
9084 // One operand of the add must be a multiply.
9085 SDValue MulOp = ShiftOperand.getOperand(0);
9086 SDValue AddOp = ShiftOperand.getOperand(1);
9087 if (MulOp.getOpcode() != ISD::MUL) {
9088 if (AddOp.getOpcode() != ISD::MUL)
9089 return SDValue();
9090 std::swap(MulOp, AddOp);
9091 }
9092
9093 // All operands must be equivalent extend nodes.
9094 SDValue LeftOp = MulOp.getOperand(0);
9095 SDValue RightOp = MulOp.getOperand(1);
9096
9097 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9098 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9099
9100 if (!IsSignExt && !IsZeroExt)
9101 return SDValue();
9102
9103 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9104 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9105
9106 SDValue MulhRightOp;
9107 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9108 unsigned ActiveBits = IsSignExt
9109 ? Constant->getAPIntValue().getSignificantBits()
9110 : Constant->getAPIntValue().getActiveBits();
9111 if (ActiveBits > NarrowVTSize)
9112 return SDValue();
9113 MulhRightOp = DAG.getConstant(
9114 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9115 NarrowVT);
9116 } else {
9117 if (LeftOp.getOpcode() != RightOp.getOpcode())
9118 return SDValue();
9119 // Check that the two extend nodes are the same type.
9120 if (NarrowVT != RightOp.getOperand(0).getValueType())
9121 return SDValue();
9122 MulhRightOp = RightOp.getOperand(0);
9123 }
9124
9125 SDValue MulhAddOp;
9126 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9127 unsigned ActiveBits = IsSignExt
9128 ? Constant->getAPIntValue().getSignificantBits()
9129 : Constant->getAPIntValue().getActiveBits();
9130 if (ActiveBits > NarrowVTSize)
9131 return SDValue();
9132 MulhAddOp = DAG.getConstant(
9133 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9134 NarrowVT);
9135 } else {
9136 if (LeftOp.getOpcode() != AddOp.getOpcode())
9137 return SDValue();
9138 // Check that the two extend nodes are the same type.
9139 if (NarrowVT != AddOp.getOperand(0).getValueType())
9140 return SDValue();
9141 MulhAddOp = AddOp.getOperand(0);
9142 }
9143
9144 EVT WideVT = LeftOp.getValueType();
9145 // Proceed with the transformation if the wide types match.
9146 assert((WideVT == RightOp.getValueType()) &&
9147 "Cannot have a multiply node with two different operand types.");
9148 assert((WideVT == AddOp.getValueType()) &&
9149 "Cannot have an add node with two different operand types.");
9150
9151 // Proceed with the transformation if the wide type is twice as large
9152 // as the narrow type.
9153 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9154 return SDValue();
9155
9156 // Check the shift amount with the narrow type size.
9157 // Proceed with the transformation if the shift amount is the width
9158 // of the narrow type.
9159 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9160 if (ShiftAmt != NarrowVTSize)
9161 return SDValue();
9162
9163 // Proceed if we support the multiply-and-add-high operation.
9164 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9165 NarrowVT == MVT::v4i32 ||
9166 (Subtarget.hasVectorEnhancements3() &&
9167 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9168 return SDValue();
9169
9170 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9171 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9172 DL, NarrowVT, LeftOp.getOperand(0),
9173 MulhRightOp, MulhAddOp);
9174 bool IsSigned = N->getOpcode() == ISD::SRA;
9175 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9176}
9177
9178// Op is an operand of a multiplication. Check whether this can be folded
9179// into an even/odd widening operation; if so, return the opcode to be used
9180// and update Op to the appropriate sub-operand. Note that the caller must
9181// verify that *both* operands of the multiplication support the operation.
9183 const SystemZSubtarget &Subtarget,
9184 SDValue &Op) {
9185 EVT VT = Op.getValueType();
9186
9187 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9188 // to selecting the even or odd vector elements.
9189 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9190 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9191 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9192 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9193 unsigned NumElts = VT.getVectorNumElements();
9194 Op = Op.getOperand(0);
9195 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9196 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9198 ArrayRef<int> ShuffleMask = SVN->getMask();
9199 bool CanUseEven = true, CanUseOdd = true;
9200 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9201 if (ShuffleMask[Elt] == -1)
9202 continue;
9203 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9204 CanUseEven = false;
9205 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9206 CanUseOdd = false;
9207 }
9208 Op = Op.getOperand(0);
9209 if (CanUseEven)
9210 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9211 if (CanUseOdd)
9212 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9213 }
9214 }
9215
9216 // For z17, we can also support the v2i64->i128 case, which looks like
9217 // (sign/zero_extend (extract_vector_elt X 0/1))
9218 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9219 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9220 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9221 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9222 Op = Op.getOperand(0);
9223 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9224 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9225 Op.getOperand(1).getOpcode() == ISD::Constant) {
9226 unsigned Elem = Op.getConstantOperandVal(1);
9227 Op = Op.getOperand(0);
9228 if (Elem == 0)
9229 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9230 if (Elem == 1)
9231 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9232 }
9233 }
9234
9235 return 0;
9236}
9237
9238SDValue SystemZTargetLowering::combineMUL(
9239 SDNode *N, DAGCombinerInfo &DCI) const {
9240 SelectionDAG &DAG = DCI.DAG;
9241
9242 // Detect even/odd widening multiplication.
9243 SDValue Op0 = N->getOperand(0);
9244 SDValue Op1 = N->getOperand(1);
9245 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9246 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9247 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9248 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9249
9250 return SDValue();
9251}
9252
9253SDValue SystemZTargetLowering::combineINTRINSIC(
9254 SDNode *N, DAGCombinerInfo &DCI) const {
9255 SelectionDAG &DAG = DCI.DAG;
9256
9257 unsigned Id = N->getConstantOperandVal(1);
9258 switch (Id) {
9259 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9260 // or larger is simply a vector load.
9261 case Intrinsic::s390_vll:
9262 case Intrinsic::s390_vlrl:
9263 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9264 if (C->getZExtValue() >= 15)
9265 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9266 N->getOperand(3), MachinePointerInfo());
9267 break;
9268 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9269 case Intrinsic::s390_vstl:
9270 case Intrinsic::s390_vstrl:
9271 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9272 if (C->getZExtValue() >= 15)
9273 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9274 N->getOperand(4), MachinePointerInfo());
9275 break;
9276 }
9277
9278 return SDValue();
9279}
9280
9281SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9282 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9283 return N->getOperand(0);
9284 return N;
9285}
9286
9288 DAGCombinerInfo &DCI) const {
9289 switch(N->getOpcode()) {
9290 default: break;
9291 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9292 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9293 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9294 case SystemZISD::MERGE_HIGH:
9295 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9296 case ISD::LOAD: return combineLOAD(N, DCI);
9297 case ISD::STORE: return combineSTORE(N, DCI);
9298 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9299 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9300 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9302 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9304 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9305 case ISD::SINT_TO_FP:
9306 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9307 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9308 case ISD::BSWAP: return combineBSWAP(N, DCI);
9309 case ISD::SETCC: return combineSETCC(N, DCI);
9310 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9311 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9312 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9313 case ISD::SRL:
9314 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9315 case ISD::MUL: return combineMUL(N, DCI);
9316 case ISD::SDIV:
9317 case ISD::UDIV:
9318 case ISD::SREM:
9319 case ISD::UREM: return combineIntDIVREM(N, DCI);
9321 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9322 }
9323
9324 return SDValue();
9325}
9326
9327// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9328// are for Op.
9329static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9330 unsigned OpNo) {
9331 EVT VT = Op.getValueType();
9332 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9333 APInt SrcDemE;
9334 unsigned Opcode = Op.getOpcode();
9335 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9336 unsigned Id = Op.getConstantOperandVal(0);
9337 switch (Id) {
9338 case Intrinsic::s390_vpksh: // PACKS
9339 case Intrinsic::s390_vpksf:
9340 case Intrinsic::s390_vpksg:
9341 case Intrinsic::s390_vpkshs: // PACKS_CC
9342 case Intrinsic::s390_vpksfs:
9343 case Intrinsic::s390_vpksgs:
9344 case Intrinsic::s390_vpklsh: // PACKLS
9345 case Intrinsic::s390_vpklsf:
9346 case Intrinsic::s390_vpklsg:
9347 case Intrinsic::s390_vpklshs: // PACKLS_CC
9348 case Intrinsic::s390_vpklsfs:
9349 case Intrinsic::s390_vpklsgs:
9350 // VECTOR PACK truncates the elements of two source vectors into one.
9351 SrcDemE = DemandedElts;
9352 if (OpNo == 2)
9353 SrcDemE.lshrInPlace(NumElts / 2);
9354 SrcDemE = SrcDemE.trunc(NumElts / 2);
9355 break;
9356 // VECTOR UNPACK extends half the elements of the source vector.
9357 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9358 case Intrinsic::s390_vuphh:
9359 case Intrinsic::s390_vuphf:
9360 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9361 case Intrinsic::s390_vuplhh:
9362 case Intrinsic::s390_vuplhf:
9363 SrcDemE = APInt(NumElts * 2, 0);
9364 SrcDemE.insertBits(DemandedElts, 0);
9365 break;
9366 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9367 case Intrinsic::s390_vuplhw:
9368 case Intrinsic::s390_vuplf:
9369 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9370 case Intrinsic::s390_vupllh:
9371 case Intrinsic::s390_vupllf:
9372 SrcDemE = APInt(NumElts * 2, 0);
9373 SrcDemE.insertBits(DemandedElts, NumElts);
9374 break;
9375 case Intrinsic::s390_vpdi: {
9376 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9377 SrcDemE = APInt(NumElts, 0);
9378 if (!DemandedElts[OpNo - 1])
9379 break;
9380 unsigned Mask = Op.getConstantOperandVal(3);
9381 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9382 // Demand input element 0 or 1, given by the mask bit value.
9383 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9384 break;
9385 }
9386 case Intrinsic::s390_vsldb: {
9387 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9388 assert(VT == MVT::v16i8 && "Unexpected type.");
9389 unsigned FirstIdx = Op.getConstantOperandVal(3);
9390 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9391 unsigned NumSrc0Els = 16 - FirstIdx;
9392 SrcDemE = APInt(NumElts, 0);
9393 if (OpNo == 1) {
9394 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9395 SrcDemE.insertBits(DemEls, FirstIdx);
9396 } else {
9397 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9398 SrcDemE.insertBits(DemEls, 0);
9399 }
9400 break;
9401 }
9402 case Intrinsic::s390_vperm:
9403 SrcDemE = APInt::getAllOnes(NumElts);
9404 break;
9405 default:
9406 llvm_unreachable("Unhandled intrinsic.");
9407 break;
9408 }
9409 } else {
9410 switch (Opcode) {
9411 case SystemZISD::JOIN_DWORDS:
9412 // Scalar operand.
9413 SrcDemE = APInt(1, 1);
9414 break;
9415 case SystemZISD::SELECT_CCMASK:
9416 SrcDemE = DemandedElts;
9417 break;
9418 default:
9419 llvm_unreachable("Unhandled opcode.");
9420 break;
9421 }
9422 }
9423 return SrcDemE;
9424}
9425
9426static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9427 const APInt &DemandedElts,
9428 const SelectionDAG &DAG, unsigned Depth,
9429 unsigned OpNo) {
9430 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9431 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9432 KnownBits LHSKnown =
9433 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9434 KnownBits RHSKnown =
9435 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9436 Known = LHSKnown.intersectWith(RHSKnown);
9437}
9438
9439void
9441 KnownBits &Known,
9442 const APInt &DemandedElts,
9443 const SelectionDAG &DAG,
9444 unsigned Depth) const {
9445 Known.resetAll();
9446
9447 // Intrinsic CC result is returned in the two low bits.
9448 unsigned Tmp0, Tmp1; // not used
9449 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9450 Known.Zero.setBitsFrom(2);
9451 return;
9452 }
9453 EVT VT = Op.getValueType();
9454 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9455 return;
9456 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9457 "KnownBits does not match VT in bitwidth");
9458 assert ((!VT.isVector() ||
9459 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9460 "DemandedElts does not match VT number of elements");
9461 unsigned BitWidth = Known.getBitWidth();
9462 unsigned Opcode = Op.getOpcode();
9463 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9464 bool IsLogical = false;
9465 unsigned Id = Op.getConstantOperandVal(0);
9466 switch (Id) {
9467 case Intrinsic::s390_vpksh: // PACKS
9468 case Intrinsic::s390_vpksf:
9469 case Intrinsic::s390_vpksg:
9470 case Intrinsic::s390_vpkshs: // PACKS_CC
9471 case Intrinsic::s390_vpksfs:
9472 case Intrinsic::s390_vpksgs:
9473 case Intrinsic::s390_vpklsh: // PACKLS
9474 case Intrinsic::s390_vpklsf:
9475 case Intrinsic::s390_vpklsg:
9476 case Intrinsic::s390_vpklshs: // PACKLS_CC
9477 case Intrinsic::s390_vpklsfs:
9478 case Intrinsic::s390_vpklsgs:
9479 case Intrinsic::s390_vpdi:
9480 case Intrinsic::s390_vsldb:
9481 case Intrinsic::s390_vperm:
9482 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9483 break;
9484 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9485 case Intrinsic::s390_vuplhh:
9486 case Intrinsic::s390_vuplhf:
9487 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9488 case Intrinsic::s390_vupllh:
9489 case Intrinsic::s390_vupllf:
9490 IsLogical = true;
9491 [[fallthrough]];
9492 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9493 case Intrinsic::s390_vuphh:
9494 case Intrinsic::s390_vuphf:
9495 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9496 case Intrinsic::s390_vuplhw:
9497 case Intrinsic::s390_vuplf: {
9498 SDValue SrcOp = Op.getOperand(1);
9499 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9500 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9501 if (IsLogical) {
9502 Known = Known.zext(BitWidth);
9503 } else
9504 Known = Known.sext(BitWidth);
9505 break;
9506 }
9507 default:
9508 break;
9509 }
9510 } else {
9511 switch (Opcode) {
9512 case SystemZISD::JOIN_DWORDS:
9513 case SystemZISD::SELECT_CCMASK:
9514 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9515 break;
9516 case SystemZISD::REPLICATE: {
9517 SDValue SrcOp = Op.getOperand(0);
9518 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9520 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9521 break;
9522 }
9523 default:
9524 break;
9525 }
9526 }
9527
9528 // Known has the width of the source operand(s). Adjust if needed to match
9529 // the passed bitwidth.
9530 if (Known.getBitWidth() != BitWidth)
9531 Known = Known.anyextOrTrunc(BitWidth);
9532}
9533
9534static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9535 const SelectionDAG &DAG, unsigned Depth,
9536 unsigned OpNo) {
9537 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9538 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9539 if (LHS == 1) return 1; // Early out.
9540 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9541 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9542 if (RHS == 1) return 1; // Early out.
9543 unsigned Common = std::min(LHS, RHS);
9544 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9545 EVT VT = Op.getValueType();
9546 unsigned VTBits = VT.getScalarSizeInBits();
9547 if (SrcBitWidth > VTBits) { // PACK
9548 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9549 if (Common > SrcExtraBits)
9550 return (Common - SrcExtraBits);
9551 return 1;
9552 }
9553 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9554 return Common;
9555}
9556
9557unsigned
9559 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9560 unsigned Depth) const {
9561 if (Op.getResNo() != 0)
9562 return 1;
9563 unsigned Opcode = Op.getOpcode();
9564 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9565 unsigned Id = Op.getConstantOperandVal(0);
9566 switch (Id) {
9567 case Intrinsic::s390_vpksh: // PACKS
9568 case Intrinsic::s390_vpksf:
9569 case Intrinsic::s390_vpksg:
9570 case Intrinsic::s390_vpkshs: // PACKS_CC
9571 case Intrinsic::s390_vpksfs:
9572 case Intrinsic::s390_vpksgs:
9573 case Intrinsic::s390_vpklsh: // PACKLS
9574 case Intrinsic::s390_vpklsf:
9575 case Intrinsic::s390_vpklsg:
9576 case Intrinsic::s390_vpklshs: // PACKLS_CC
9577 case Intrinsic::s390_vpklsfs:
9578 case Intrinsic::s390_vpklsgs:
9579 case Intrinsic::s390_vpdi:
9580 case Intrinsic::s390_vsldb:
9581 case Intrinsic::s390_vperm:
9582 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9583 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9584 case Intrinsic::s390_vuphh:
9585 case Intrinsic::s390_vuphf:
9586 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9587 case Intrinsic::s390_vuplhw:
9588 case Intrinsic::s390_vuplf: {
9589 SDValue PackedOp = Op.getOperand(1);
9590 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9591 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9592 EVT VT = Op.getValueType();
9593 unsigned VTBits = VT.getScalarSizeInBits();
9594 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9595 return Tmp;
9596 }
9597 default:
9598 break;
9599 }
9600 } else {
9601 switch (Opcode) {
9602 case SystemZISD::SELECT_CCMASK:
9603 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9604 default:
9605 break;
9606 }
9607 }
9608
9609 return 1;
9610}
9611
9614 const APInt &DemandedElts, const SelectionDAG &DAG,
9615 bool PoisonOnly, unsigned Depth) const {
9616 switch (Op->getOpcode()) {
9617 case SystemZISD::PCREL_WRAPPER:
9618 case SystemZISD::PCREL_OFFSET:
9619 return true;
9620 }
9621 return false;
9622}
9623
9624unsigned
9626 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9627 unsigned StackAlign = TFI->getStackAlignment();
9628 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9629 "Unexpected stack alignment");
9630 // The default stack probe size is 4096 if the function has no
9631 // stack-probe-size attribute.
9632 unsigned StackProbeSize =
9633 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9634 // Round down to the stack alignment.
9635 StackProbeSize &= ~(StackAlign - 1);
9636 return StackProbeSize ? StackProbeSize : StackAlign;
9637}
9638
9639//===----------------------------------------------------------------------===//
9640// Custom insertion
9641//===----------------------------------------------------------------------===//
9642
9643// Force base value Base into a register before MI. Return the register.
9645 const SystemZInstrInfo *TII) {
9646 MachineBasicBlock *MBB = MI.getParent();
9647 MachineFunction &MF = *MBB->getParent();
9649
9650 if (Base.isReg()) {
9651 // Copy Base into a new virtual register to help register coalescing in
9652 // cases with multiple uses.
9653 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9654 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9655 .add(Base);
9656 return Reg;
9657 }
9658
9659 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9660 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9661 .add(Base)
9662 .addImm(0)
9663 .addReg(0);
9664 return Reg;
9665}
9666
9667// The CC operand of MI might be missing a kill marker because there
9668// were multiple uses of CC, and ISel didn't know which to mark.
9669// Figure out whether MI should have had a kill marker.
9671 // Scan forward through BB for a use/def of CC.
9673 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9674 const MachineInstr &MI = *miI;
9675 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9676 return false;
9677 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9678 break; // Should have kill-flag - update below.
9679 }
9680
9681 // If we hit the end of the block, check whether CC is live into a
9682 // successor.
9683 if (miI == MBB->end()) {
9684 for (const MachineBasicBlock *Succ : MBB->successors())
9685 if (Succ->isLiveIn(SystemZ::CC))
9686 return false;
9687 }
9688
9689 return true;
9690}
9691
9692// Return true if it is OK for this Select pseudo-opcode to be cascaded
9693// together with other Select pseudo-opcodes into a single basic-block with
9694// a conditional jump around it.
9696 switch (MI.getOpcode()) {
9697 case SystemZ::Select32:
9698 case SystemZ::Select64:
9699 case SystemZ::Select128:
9700 case SystemZ::SelectF32:
9701 case SystemZ::SelectF64:
9702 case SystemZ::SelectF128:
9703 case SystemZ::SelectVR32:
9704 case SystemZ::SelectVR64:
9705 case SystemZ::SelectVR128:
9706 return true;
9707
9708 default:
9709 return false;
9710 }
9711}
9712
9713// Helper function, which inserts PHI functions into SinkMBB:
9714// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9715// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9717 MachineBasicBlock *TrueMBB,
9718 MachineBasicBlock *FalseMBB,
9719 MachineBasicBlock *SinkMBB) {
9720 MachineFunction *MF = TrueMBB->getParent();
9722
9723 MachineInstr *FirstMI = Selects.front();
9724 unsigned CCValid = FirstMI->getOperand(3).getImm();
9725 unsigned CCMask = FirstMI->getOperand(4).getImm();
9726
9727 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9728
9729 // As we are creating the PHIs, we have to be careful if there is more than
9730 // one. Later Selects may reference the results of earlier Selects, but later
9731 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9732 // That also means that PHI construction must work forward from earlier to
9733 // later, and that the code must maintain a mapping from earlier PHI's
9734 // destination registers, and the registers that went into the PHI.
9736
9737 for (auto *MI : Selects) {
9738 Register DestReg = MI->getOperand(0).getReg();
9739 Register TrueReg = MI->getOperand(1).getReg();
9740 Register FalseReg = MI->getOperand(2).getReg();
9741
9742 // If this Select we are generating is the opposite condition from
9743 // the jump we generated, then we have to swap the operands for the
9744 // PHI that is going to be generated.
9745 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9746 std::swap(TrueReg, FalseReg);
9747
9748 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9749 TrueReg = It->second.first;
9750
9751 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9752 FalseReg = It->second.second;
9753
9754 DebugLoc DL = MI->getDebugLoc();
9755 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9756 .addReg(TrueReg).addMBB(TrueMBB)
9757 .addReg(FalseReg).addMBB(FalseMBB);
9758
9759 // Add this PHI to the rewrite table.
9760 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9761 }
9762
9763 MF->getProperties().resetNoPHIs();
9764}
9765
9767SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9768 MachineBasicBlock *BB) const {
9769 MachineFunction &MF = *BB->getParent();
9770 MachineFrameInfo &MFI = MF.getFrameInfo();
9771 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9772 assert(TFL->hasReservedCallFrame(MF) &&
9773 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9774 (void)TFL;
9775 // Get the MaxCallFrameSize value and erase MI since it serves no further
9776 // purpose as the call frame is statically reserved in the prolog. Set
9777 // AdjustsStack as MI is *not* mapped as a frame instruction.
9778 uint32_t NumBytes = MI.getOperand(0).getImm();
9779 if (NumBytes > MFI.getMaxCallFrameSize())
9780 MFI.setMaxCallFrameSize(NumBytes);
9781 MFI.setAdjustsStack(true);
9782
9783 MI.eraseFromParent();
9784 return BB;
9785}
9786
9787// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9789SystemZTargetLowering::emitSelect(MachineInstr &MI,
9790 MachineBasicBlock *MBB) const {
9791 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9792 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9793
9794 unsigned CCValid = MI.getOperand(3).getImm();
9795 unsigned CCMask = MI.getOperand(4).getImm();
9796
9797 // If we have a sequence of Select* pseudo instructions using the
9798 // same condition code value, we want to expand all of them into
9799 // a single pair of basic blocks using the same condition.
9800 SmallVector<MachineInstr*, 8> Selects;
9801 SmallVector<MachineInstr*, 8> DbgValues;
9802 Selects.push_back(&MI);
9803 unsigned Count = 0;
9804 for (MachineInstr &NextMI : llvm::make_range(
9805 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9806 if (isSelectPseudo(NextMI)) {
9807 assert(NextMI.getOperand(3).getImm() == CCValid &&
9808 "Bad CCValid operands since CC was not redefined.");
9809 if (NextMI.getOperand(4).getImm() == CCMask ||
9810 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9811 Selects.push_back(&NextMI);
9812 continue;
9813 }
9814 break;
9815 }
9816 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9817 NextMI.usesCustomInsertionHook())
9818 break;
9819 bool User = false;
9820 for (auto *SelMI : Selects)
9821 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9822 User = true;
9823 break;
9824 }
9825 if (NextMI.isDebugInstr()) {
9826 if (User) {
9827 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9828 DbgValues.push_back(&NextMI);
9829 }
9830 } else if (User || ++Count > 20)
9831 break;
9832 }
9833
9834 MachineInstr *LastMI = Selects.back();
9835 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9836 checkCCKill(*LastMI, MBB));
9837 MachineBasicBlock *StartMBB = MBB;
9838 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9839 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9840
9841 // Unless CC was killed in the last Select instruction, mark it as
9842 // live-in to both FalseMBB and JoinMBB.
9843 if (!CCKilled) {
9844 FalseMBB->addLiveIn(SystemZ::CC);
9845 JoinMBB->addLiveIn(SystemZ::CC);
9846 }
9847
9848 // StartMBB:
9849 // BRC CCMask, JoinMBB
9850 // # fallthrough to FalseMBB
9851 MBB = StartMBB;
9852 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9853 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9854 MBB->addSuccessor(JoinMBB);
9855 MBB->addSuccessor(FalseMBB);
9856
9857 // FalseMBB:
9858 // # fallthrough to JoinMBB
9859 MBB = FalseMBB;
9860 MBB->addSuccessor(JoinMBB);
9861
9862 // JoinMBB:
9863 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9864 // ...
9865 MBB = JoinMBB;
9866 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9867 for (auto *SelMI : Selects)
9868 SelMI->eraseFromParent();
9869
9871 for (auto *DbgMI : DbgValues)
9872 MBB->splice(InsertPos, StartMBB, DbgMI);
9873
9874 return JoinMBB;
9875}
9876
9877// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9878// StoreOpcode is the store to use and Invert says whether the store should
9879// happen when the condition is false rather than true. If a STORE ON
9880// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9881MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9883 unsigned StoreOpcode,
9884 unsigned STOCOpcode,
9885 bool Invert) const {
9886 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9887
9888 Register SrcReg = MI.getOperand(0).getReg();
9889 MachineOperand Base = MI.getOperand(1);
9890 int64_t Disp = MI.getOperand(2).getImm();
9891 Register IndexReg = MI.getOperand(3).getReg();
9892 unsigned CCValid = MI.getOperand(4).getImm();
9893 unsigned CCMask = MI.getOperand(5).getImm();
9894 DebugLoc DL = MI.getDebugLoc();
9895
9896 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9897
9898 // ISel pattern matching also adds a load memory operand of the same
9899 // address, so take special care to find the storing memory operand.
9900 MachineMemOperand *MMO = nullptr;
9901 for (auto *I : MI.memoperands())
9902 if (I->isStore()) {
9903 MMO = I;
9904 break;
9905 }
9906
9907 // Use STOCOpcode if possible. We could use different store patterns in
9908 // order to avoid matching the index register, but the performance trade-offs
9909 // might be more complicated in that case.
9910 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9911 if (Invert)
9912 CCMask ^= CCValid;
9913
9914 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9915 .addReg(SrcReg)
9916 .add(Base)
9917 .addImm(Disp)
9918 .addImm(CCValid)
9919 .addImm(CCMask)
9920 .addMemOperand(MMO);
9921
9922 MI.eraseFromParent();
9923 return MBB;
9924 }
9925
9926 // Get the condition needed to branch around the store.
9927 if (!Invert)
9928 CCMask ^= CCValid;
9929
9930 MachineBasicBlock *StartMBB = MBB;
9931 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
9932 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9933
9934 // Unless CC was killed in the CondStore instruction, mark it as
9935 // live-in to both FalseMBB and JoinMBB.
9936 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9937 !checkCCKill(MI, JoinMBB)) {
9938 FalseMBB->addLiveIn(SystemZ::CC);
9939 JoinMBB->addLiveIn(SystemZ::CC);
9940 }
9941
9942 // StartMBB:
9943 // BRC CCMask, JoinMBB
9944 // # fallthrough to FalseMBB
9945 MBB = StartMBB;
9946 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9947 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9948 MBB->addSuccessor(JoinMBB);
9949 MBB->addSuccessor(FalseMBB);
9950
9951 // FalseMBB:
9952 // store %SrcReg, %Disp(%Index,%Base)
9953 // # fallthrough to JoinMBB
9954 MBB = FalseMBB;
9955 BuildMI(MBB, DL, TII->get(StoreOpcode))
9956 .addReg(SrcReg)
9957 .add(Base)
9958 .addImm(Disp)
9959 .addReg(IndexReg)
9960 .addMemOperand(MMO);
9961 MBB->addSuccessor(JoinMBB);
9962
9963 MI.eraseFromParent();
9964 return JoinMBB;
9965}
9966
9967// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
9969SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
9971 bool Unsigned) const {
9972 MachineFunction &MF = *MBB->getParent();
9973 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9974 MachineRegisterInfo &MRI = MF.getRegInfo();
9975
9976 // Synthetic instruction to compare 128-bit values.
9977 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
9978 Register Op0 = MI.getOperand(0).getReg();
9979 Register Op1 = MI.getOperand(1).getReg();
9980
9981 MachineBasicBlock *StartMBB = MBB;
9982 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
9983 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
9984
9985 // StartMBB:
9986 //
9987 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
9988 // Swap the inputs to get:
9989 // CC 1 if high(Op0) > high(Op1)
9990 // CC 2 if high(Op0) < high(Op1)
9991 // CC 0 if high(Op0) == high(Op1)
9992 //
9993 // If CC != 0, we'd done, so jump over the next instruction.
9994 //
9995 // VEC[L]G Op1, Op0
9996 // JNE JoinMBB
9997 // # fallthrough to HiEqMBB
9998 MBB = StartMBB;
9999 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
10000 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
10001 .addReg(Op1).addReg(Op0);
10002 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
10004 MBB->addSuccessor(JoinMBB);
10005 MBB->addSuccessor(HiEqMBB);
10006
10007 // HiEqMBB:
10008 //
10009 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
10010 // Since we already know the high parts are equal, the CC
10011 // result will only depend on the low parts:
10012 // CC 1 if low(Op0) > low(Op1)
10013 // CC 3 if low(Op0) <= low(Op1)
10014 //
10015 // VCHLGS Tmp, Op0, Op1
10016 // # fallthrough to JoinMBB
10017 MBB = HiEqMBB;
10018 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
10019 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
10020 .addReg(Op0).addReg(Op1);
10021 MBB->addSuccessor(JoinMBB);
10022
10023 // Mark CC as live-in to JoinMBB.
10024 JoinMBB->addLiveIn(SystemZ::CC);
10025
10026 MI.eraseFromParent();
10027 return JoinMBB;
10028}
10029
10030// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
10031// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
10032// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
10033// whether the field should be inverted after performing BinOpcode (e.g. for
10034// NAND).
10035MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
10036 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
10037 bool Invert) const {
10038 MachineFunction &MF = *MBB->getParent();
10039 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10040 MachineRegisterInfo &MRI = MF.getRegInfo();
10041
10042 // Extract the operands. Base can be a register or a frame index.
10043 // Src2 can be a register or immediate.
10044 Register Dest = MI.getOperand(0).getReg();
10045 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10046 int64_t Disp = MI.getOperand(2).getImm();
10047 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10048 Register BitShift = MI.getOperand(4).getReg();
10049 Register NegBitShift = MI.getOperand(5).getReg();
10050 unsigned BitSize = MI.getOperand(6).getImm();
10051 DebugLoc DL = MI.getDebugLoc();
10052
10053 // Get the right opcodes for the displacement.
10054 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10055 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10056 assert(LOpcode && CSOpcode && "Displacement out of range");
10057
10058 // Create virtual registers for temporary results.
10059 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10060 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10061 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10062 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10063 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10064
10065 // Insert a basic block for the main loop.
10066 MachineBasicBlock *StartMBB = MBB;
10067 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10068 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10069
10070 // StartMBB:
10071 // ...
10072 // %OrigVal = L Disp(%Base)
10073 // # fall through to LoopMBB
10074 MBB = StartMBB;
10075 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10076 MBB->addSuccessor(LoopMBB);
10077
10078 // LoopMBB:
10079 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10080 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10081 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10082 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10083 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10084 // JNE LoopMBB
10085 // # fall through to DoneMBB
10086 MBB = LoopMBB;
10087 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10088 .addReg(OrigVal).addMBB(StartMBB)
10089 .addReg(Dest).addMBB(LoopMBB);
10090 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10091 .addReg(OldVal).addReg(BitShift).addImm(0);
10092 if (Invert) {
10093 // Perform the operation normally and then invert every bit of the field.
10094 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10095 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10096 // XILF with the upper BitSize bits set.
10097 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10098 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10099 } else if (BinOpcode)
10100 // A simply binary operation.
10101 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10102 .addReg(RotatedOldVal)
10103 .add(Src2);
10104 else
10105 // Use RISBG to rotate Src2 into position and use it to replace the
10106 // field in RotatedOldVal.
10107 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10108 .addReg(RotatedOldVal).addReg(Src2.getReg())
10109 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10110 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10111 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10112 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10113 .addReg(OldVal)
10114 .addReg(NewVal)
10115 .add(Base)
10116 .addImm(Disp);
10117 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10119 MBB->addSuccessor(LoopMBB);
10120 MBB->addSuccessor(DoneMBB);
10121
10122 MI.eraseFromParent();
10123 return DoneMBB;
10124}
10125
10126// Implement EmitInstrWithCustomInserter for subword pseudo
10127// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10128// instruction that should be used to compare the current field with the
10129// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10130// for when the current field should be kept.
10131MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10132 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10133 unsigned KeepOldMask) const {
10134 MachineFunction &MF = *MBB->getParent();
10135 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10136 MachineRegisterInfo &MRI = MF.getRegInfo();
10137
10138 // Extract the operands. Base can be a register or a frame index.
10139 Register Dest = MI.getOperand(0).getReg();
10140 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10141 int64_t Disp = MI.getOperand(2).getImm();
10142 Register Src2 = MI.getOperand(3).getReg();
10143 Register BitShift = MI.getOperand(4).getReg();
10144 Register NegBitShift = MI.getOperand(5).getReg();
10145 unsigned BitSize = MI.getOperand(6).getImm();
10146 DebugLoc DL = MI.getDebugLoc();
10147
10148 // Get the right opcodes for the displacement.
10149 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10150 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10151 assert(LOpcode && CSOpcode && "Displacement out of range");
10152
10153 // Create virtual registers for temporary results.
10154 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10155 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10156 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10157 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10158 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10159 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10160
10161 // Insert 3 basic blocks for the loop.
10162 MachineBasicBlock *StartMBB = MBB;
10163 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10164 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10165 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10166 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10167
10168 // StartMBB:
10169 // ...
10170 // %OrigVal = L Disp(%Base)
10171 // # fall through to LoopMBB
10172 MBB = StartMBB;
10173 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10174 MBB->addSuccessor(LoopMBB);
10175
10176 // LoopMBB:
10177 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10178 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10179 // CompareOpcode %RotatedOldVal, %Src2
10180 // BRC KeepOldMask, UpdateMBB
10181 MBB = LoopMBB;
10182 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10183 .addReg(OrigVal).addMBB(StartMBB)
10184 .addReg(Dest).addMBB(UpdateMBB);
10185 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10186 .addReg(OldVal).addReg(BitShift).addImm(0);
10187 BuildMI(MBB, DL, TII->get(CompareOpcode))
10188 .addReg(RotatedOldVal).addReg(Src2);
10189 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10190 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10191 MBB->addSuccessor(UpdateMBB);
10192 MBB->addSuccessor(UseAltMBB);
10193
10194 // UseAltMBB:
10195 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10196 // # fall through to UpdateMBB
10197 MBB = UseAltMBB;
10198 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10199 .addReg(RotatedOldVal).addReg(Src2)
10200 .addImm(32).addImm(31 + BitSize).addImm(0);
10201 MBB->addSuccessor(UpdateMBB);
10202
10203 // UpdateMBB:
10204 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10205 // [ %RotatedAltVal, UseAltMBB ]
10206 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10207 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10208 // JNE LoopMBB
10209 // # fall through to DoneMBB
10210 MBB = UpdateMBB;
10211 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10212 .addReg(RotatedOldVal).addMBB(LoopMBB)
10213 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10214 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10215 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10216 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10217 .addReg(OldVal)
10218 .addReg(NewVal)
10219 .add(Base)
10220 .addImm(Disp);
10221 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10223 MBB->addSuccessor(LoopMBB);
10224 MBB->addSuccessor(DoneMBB);
10225
10226 MI.eraseFromParent();
10227 return DoneMBB;
10228}
10229
10230// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10231// instruction MI.
10233SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10234 MachineBasicBlock *MBB) const {
10235 MachineFunction &MF = *MBB->getParent();
10236 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10237 MachineRegisterInfo &MRI = MF.getRegInfo();
10238
10239 // Extract the operands. Base can be a register or a frame index.
10240 Register Dest = MI.getOperand(0).getReg();
10241 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10242 int64_t Disp = MI.getOperand(2).getImm();
10243 Register CmpVal = MI.getOperand(3).getReg();
10244 Register OrigSwapVal = MI.getOperand(4).getReg();
10245 Register BitShift = MI.getOperand(5).getReg();
10246 Register NegBitShift = MI.getOperand(6).getReg();
10247 int64_t BitSize = MI.getOperand(7).getImm();
10248 DebugLoc DL = MI.getDebugLoc();
10249
10250 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10251
10252 // Get the right opcodes for the displacement and zero-extension.
10253 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10254 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10255 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10256 assert(LOpcode && CSOpcode && "Displacement out of range");
10257
10258 // Create virtual registers for temporary results.
10259 Register OrigOldVal = MRI.createVirtualRegister(RC);
10260 Register OldVal = MRI.createVirtualRegister(RC);
10261 Register SwapVal = MRI.createVirtualRegister(RC);
10262 Register StoreVal = MRI.createVirtualRegister(RC);
10263 Register OldValRot = MRI.createVirtualRegister(RC);
10264 Register RetryOldVal = MRI.createVirtualRegister(RC);
10265 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10266
10267 // Insert 2 basic blocks for the loop.
10268 MachineBasicBlock *StartMBB = MBB;
10269 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10270 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10271 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10272
10273 // StartMBB:
10274 // ...
10275 // %OrigOldVal = L Disp(%Base)
10276 // # fall through to LoopMBB
10277 MBB = StartMBB;
10278 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10279 .add(Base)
10280 .addImm(Disp)
10281 .addReg(0);
10282 MBB->addSuccessor(LoopMBB);
10283
10284 // LoopMBB:
10285 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10286 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10287 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10288 // ^^ The low BitSize bits contain the field
10289 // of interest.
10290 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10291 // ^^ Replace the upper 32-BitSize bits of the
10292 // swap value with those that we loaded and rotated.
10293 // %Dest = LL[CH] %OldValRot
10294 // CR %Dest, %CmpVal
10295 // JNE DoneMBB
10296 // # Fall through to SetMBB
10297 MBB = LoopMBB;
10298 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10299 .addReg(OrigOldVal).addMBB(StartMBB)
10300 .addReg(RetryOldVal).addMBB(SetMBB);
10301 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10302 .addReg(OrigSwapVal).addMBB(StartMBB)
10303 .addReg(RetrySwapVal).addMBB(SetMBB);
10304 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10305 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10306 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10307 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10308 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10309 .addReg(OldValRot);
10310 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10311 .addReg(Dest).addReg(CmpVal);
10312 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10315 MBB->addSuccessor(DoneMBB);
10316 MBB->addSuccessor(SetMBB);
10317
10318 // SetMBB:
10319 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10320 // ^^ Rotate the new field to its proper position.
10321 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10322 // JNE LoopMBB
10323 // # fall through to ExitMBB
10324 MBB = SetMBB;
10325 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10326 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10327 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10328 .addReg(OldVal)
10329 .addReg(StoreVal)
10330 .add(Base)
10331 .addImm(Disp);
10332 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10334 MBB->addSuccessor(LoopMBB);
10335 MBB->addSuccessor(DoneMBB);
10336
10337 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10338 // to the block after the loop. At this point, CC may have been defined
10339 // either by the CR in LoopMBB or by the CS in SetMBB.
10340 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10341 DoneMBB->addLiveIn(SystemZ::CC);
10342
10343 MI.eraseFromParent();
10344 return DoneMBB;
10345}
10346
10347// Emit a move from two GR64s to a GR128.
10349SystemZTargetLowering::emitPair128(MachineInstr &MI,
10350 MachineBasicBlock *MBB) const {
10351 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10352 const DebugLoc &DL = MI.getDebugLoc();
10353
10354 Register Dest = MI.getOperand(0).getReg();
10355 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10356 .add(MI.getOperand(1))
10357 .addImm(SystemZ::subreg_h64)
10358 .add(MI.getOperand(2))
10359 .addImm(SystemZ::subreg_l64);
10360 MI.eraseFromParent();
10361 return MBB;
10362}
10363
10364// Emit an extension from a GR64 to a GR128. ClearEven is true
10365// if the high register of the GR128 value must be cleared or false if
10366// it's "don't care".
10367MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10369 bool ClearEven) const {
10370 MachineFunction &MF = *MBB->getParent();
10371 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10372 MachineRegisterInfo &MRI = MF.getRegInfo();
10373 DebugLoc DL = MI.getDebugLoc();
10374
10375 Register Dest = MI.getOperand(0).getReg();
10376 Register Src = MI.getOperand(1).getReg();
10377 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10378
10379 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10380 if (ClearEven) {
10381 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10382 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10383
10384 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10385 .addImm(0);
10386 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10387 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10388 In128 = NewIn128;
10389 }
10390 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10391 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10392
10393 MI.eraseFromParent();
10394 return MBB;
10395}
10396
10398SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10400 unsigned Opcode, bool IsMemset) const {
10401 MachineFunction &MF = *MBB->getParent();
10402 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10403 MachineRegisterInfo &MRI = MF.getRegInfo();
10404 DebugLoc DL = MI.getDebugLoc();
10405
10406 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10407 uint64_t DestDisp = MI.getOperand(1).getImm();
10408 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10409 uint64_t SrcDisp;
10410
10411 // Fold the displacement Disp if it is out of range.
10412 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10413 if (!isUInt<12>(Disp)) {
10414 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10415 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10416 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10417 .add(Base).addImm(Disp).addReg(0);
10419 Disp = 0;
10420 }
10421 };
10422
10423 if (!IsMemset) {
10424 SrcBase = earlyUseOperand(MI.getOperand(2));
10425 SrcDisp = MI.getOperand(3).getImm();
10426 } else {
10427 SrcBase = DestBase;
10428 SrcDisp = DestDisp++;
10429 foldDisplIfNeeded(DestBase, DestDisp);
10430 }
10431
10432 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10433 bool IsImmForm = LengthMO.isImm();
10434 bool IsRegForm = !IsImmForm;
10435
10436 // Build and insert one Opcode of Length, with special treatment for memset.
10437 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10439 MachineOperand DBase, uint64_t DDisp,
10440 MachineOperand SBase, uint64_t SDisp,
10441 unsigned Length) -> void {
10442 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10443 if (IsMemset) {
10444 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10445 if (ByteMO.isImm())
10446 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10447 .add(SBase).addImm(SDisp).add(ByteMO);
10448 else
10449 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10450 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10451 if (--Length == 0)
10452 return;
10453 }
10454 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10455 .add(DBase).addImm(DDisp).addImm(Length)
10456 .add(SBase).addImm(SDisp)
10457 .setMemRefs(MI.memoperands());
10458 };
10459
10460 bool NeedsLoop = false;
10461 uint64_t ImmLength = 0;
10462 Register LenAdjReg = SystemZ::NoRegister;
10463 if (IsImmForm) {
10464 ImmLength = LengthMO.getImm();
10465 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10466 if (ImmLength == 0) {
10467 MI.eraseFromParent();
10468 return MBB;
10469 }
10470 if (Opcode == SystemZ::CLC) {
10471 if (ImmLength > 3 * 256)
10472 // A two-CLC sequence is a clear win over a loop, not least because
10473 // it needs only one branch. A three-CLC sequence needs the same
10474 // number of branches as a loop (i.e. 2), but is shorter. That
10475 // brings us to lengths greater than 768 bytes. It seems relatively
10476 // likely that a difference will be found within the first 768 bytes,
10477 // so we just optimize for the smallest number of branch
10478 // instructions, in order to avoid polluting the prediction buffer
10479 // too much.
10480 NeedsLoop = true;
10481 } else if (ImmLength > 6 * 256)
10482 // The heuristic we use is to prefer loops for anything that would
10483 // require 7 or more MVCs. With these kinds of sizes there isn't much
10484 // to choose between straight-line code and looping code, since the
10485 // time will be dominated by the MVCs themselves.
10486 NeedsLoop = true;
10487 } else {
10488 NeedsLoop = true;
10489 LenAdjReg = LengthMO.getReg();
10490 }
10491
10492 // When generating more than one CLC, all but the last will need to
10493 // branch to the end when a difference is found.
10494 MachineBasicBlock *EndMBB =
10495 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10497 : nullptr);
10498
10499 if (NeedsLoop) {
10500 Register StartCountReg =
10501 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10502 if (IsImmForm) {
10503 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10504 ImmLength &= 255;
10505 } else {
10506 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10507 .addReg(LenAdjReg)
10508 .addReg(0)
10509 .addImm(8);
10510 }
10511
10512 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10513 auto loadZeroAddress = [&]() -> MachineOperand {
10514 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10515 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10516 return MachineOperand::CreateReg(Reg, false);
10517 };
10518 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10519 DestBase = loadZeroAddress();
10520 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10521 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10522
10523 MachineBasicBlock *StartMBB = nullptr;
10524 MachineBasicBlock *LoopMBB = nullptr;
10525 MachineBasicBlock *NextMBB = nullptr;
10526 MachineBasicBlock *DoneMBB = nullptr;
10527 MachineBasicBlock *AllDoneMBB = nullptr;
10528
10529 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10530 Register StartDestReg =
10531 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10532
10533 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10534 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10535 Register ThisDestReg =
10536 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10537 Register NextSrcReg = MRI.createVirtualRegister(RC);
10538 Register NextDestReg =
10539 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10540 RC = &SystemZ::GR64BitRegClass;
10541 Register ThisCountReg = MRI.createVirtualRegister(RC);
10542 Register NextCountReg = MRI.createVirtualRegister(RC);
10543
10544 if (IsRegForm) {
10545 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10546 StartMBB = SystemZ::emitBlockAfter(MBB);
10547 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10548 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10549 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10550
10551 // MBB:
10552 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10553 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10554 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10555 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10557 .addMBB(AllDoneMBB);
10558 MBB->addSuccessor(AllDoneMBB);
10559 if (!IsMemset)
10560 MBB->addSuccessor(StartMBB);
10561 else {
10562 // MemsetOneCheckMBB:
10563 // # Jump to MemsetOneMBB for a memset of length 1, or
10564 // # fall thru to StartMBB.
10565 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10566 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10567 MBB->addSuccessor(MemsetOneCheckMBB);
10568 MBB = MemsetOneCheckMBB;
10569 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10570 .addReg(LenAdjReg).addImm(-1);
10571 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10573 .addMBB(MemsetOneMBB);
10574 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10575 MBB->addSuccessor(StartMBB, {90, 100});
10576
10577 // MemsetOneMBB:
10578 // # Jump back to AllDoneMBB after a single MVI or STC.
10579 MBB = MemsetOneMBB;
10580 insertMemMemOp(MBB, MBB->end(),
10581 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10582 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10583 1);
10584 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10585 MBB->addSuccessor(AllDoneMBB);
10586 }
10587
10588 // StartMBB:
10589 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10590 MBB = StartMBB;
10591 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10592 .addReg(StartCountReg).addImm(0);
10593 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10595 .addMBB(DoneMBB);
10596 MBB->addSuccessor(DoneMBB);
10597 MBB->addSuccessor(LoopMBB);
10598 }
10599 else {
10600 StartMBB = MBB;
10601 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10602 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10603 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10604
10605 // StartMBB:
10606 // # fall through to LoopMBB
10607 MBB->addSuccessor(LoopMBB);
10608
10609 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10610 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10611 if (EndMBB && !ImmLength)
10612 // If the loop handled the whole CLC range, DoneMBB will be empty with
10613 // CC live-through into EndMBB, so add it as live-in.
10614 DoneMBB->addLiveIn(SystemZ::CC);
10615 }
10616
10617 // LoopMBB:
10618 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10619 // [ %NextDestReg, NextMBB ]
10620 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10621 // [ %NextSrcReg, NextMBB ]
10622 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10623 // [ %NextCountReg, NextMBB ]
10624 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10625 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10626 // ( JLH EndMBB )
10627 //
10628 // The prefetch is used only for MVC. The JLH is used only for CLC.
10629 MBB = LoopMBB;
10630 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10631 .addReg(StartDestReg).addMBB(StartMBB)
10632 .addReg(NextDestReg).addMBB(NextMBB);
10633 if (!HaveSingleBase)
10634 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10635 .addReg(StartSrcReg).addMBB(StartMBB)
10636 .addReg(NextSrcReg).addMBB(NextMBB);
10637 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10638 .addReg(StartCountReg).addMBB(StartMBB)
10639 .addReg(NextCountReg).addMBB(NextMBB);
10640 if (Opcode == SystemZ::MVC)
10641 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10643 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10644 insertMemMemOp(MBB, MBB->end(),
10645 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10646 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10647 if (EndMBB) {
10648 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10650 .addMBB(EndMBB);
10651 MBB->addSuccessor(EndMBB);
10652 MBB->addSuccessor(NextMBB);
10653 }
10654
10655 // NextMBB:
10656 // %NextDestReg = LA 256(%ThisDestReg)
10657 // %NextSrcReg = LA 256(%ThisSrcReg)
10658 // %NextCountReg = AGHI %ThisCountReg, -1
10659 // CGHI %NextCountReg, 0
10660 // JLH LoopMBB
10661 // # fall through to DoneMBB
10662 //
10663 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10664 MBB = NextMBB;
10665 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10666 .addReg(ThisDestReg).addImm(256).addReg(0);
10667 if (!HaveSingleBase)
10668 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10669 .addReg(ThisSrcReg).addImm(256).addReg(0);
10670 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10671 .addReg(ThisCountReg).addImm(-1);
10672 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10673 .addReg(NextCountReg).addImm(0);
10674 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10676 .addMBB(LoopMBB);
10677 MBB->addSuccessor(LoopMBB);
10678 MBB->addSuccessor(DoneMBB);
10679
10680 MBB = DoneMBB;
10681 if (IsRegForm) {
10682 // DoneMBB:
10683 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10684 // # Use EXecute Relative Long for the remainder of the bytes. The target
10685 // instruction of the EXRL will have a length field of 1 since 0 is an
10686 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10687 // 0xff) + 1.
10688 // # Fall through to AllDoneMBB.
10689 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10690 Register RemDestReg = HaveSingleBase ? RemSrcReg
10691 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10692 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10693 .addReg(StartDestReg).addMBB(StartMBB)
10694 .addReg(NextDestReg).addMBB(NextMBB);
10695 if (!HaveSingleBase)
10696 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10697 .addReg(StartSrcReg).addMBB(StartMBB)
10698 .addReg(NextSrcReg).addMBB(NextMBB);
10699 if (IsMemset)
10700 insertMemMemOp(MBB, MBB->end(),
10701 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10702 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10703 MachineInstrBuilder EXRL_MIB =
10704 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10705 .addImm(Opcode)
10706 .addReg(LenAdjReg)
10707 .addReg(RemDestReg).addImm(DestDisp)
10708 .addReg(RemSrcReg).addImm(SrcDisp);
10709 MBB->addSuccessor(AllDoneMBB);
10710 MBB = AllDoneMBB;
10711 if (Opcode != SystemZ::MVC) {
10712 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10713 if (EndMBB)
10714 MBB->addLiveIn(SystemZ::CC);
10715 }
10716 }
10717 MF.getProperties().resetNoPHIs();
10718 }
10719
10720 // Handle any remaining bytes with straight-line code.
10721 while (ImmLength > 0) {
10722 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10723 // The previous iteration might have created out-of-range displacements.
10724 // Apply them using LA/LAY if so.
10725 foldDisplIfNeeded(DestBase, DestDisp);
10726 foldDisplIfNeeded(SrcBase, SrcDisp);
10727 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10728 DestDisp += ThisLength;
10729 SrcDisp += ThisLength;
10730 ImmLength -= ThisLength;
10731 // If there's another CLC to go, branch to the end if a difference
10732 // was found.
10733 if (EndMBB && ImmLength > 0) {
10734 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10735 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10737 .addMBB(EndMBB);
10738 MBB->addSuccessor(EndMBB);
10739 MBB->addSuccessor(NextMBB);
10740 MBB = NextMBB;
10741 }
10742 }
10743 if (EndMBB) {
10744 MBB->addSuccessor(EndMBB);
10745 MBB = EndMBB;
10746 MBB->addLiveIn(SystemZ::CC);
10747 }
10748
10749 MI.eraseFromParent();
10750 return MBB;
10751}
10752
10753// Decompose string pseudo-instruction MI into a loop that continually performs
10754// Opcode until CC != 3.
10755MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10756 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10757 MachineFunction &MF = *MBB->getParent();
10758 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10759 MachineRegisterInfo &MRI = MF.getRegInfo();
10760 DebugLoc DL = MI.getDebugLoc();
10761
10762 uint64_t End1Reg = MI.getOperand(0).getReg();
10763 uint64_t Start1Reg = MI.getOperand(1).getReg();
10764 uint64_t Start2Reg = MI.getOperand(2).getReg();
10765 uint64_t CharReg = MI.getOperand(3).getReg();
10766
10767 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10768 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10769 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10770 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10771
10772 MachineBasicBlock *StartMBB = MBB;
10773 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10774 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10775
10776 // StartMBB:
10777 // # fall through to LoopMBB
10778 MBB->addSuccessor(LoopMBB);
10779
10780 // LoopMBB:
10781 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10782 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10783 // R0L = %CharReg
10784 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10785 // JO LoopMBB
10786 // # fall through to DoneMBB
10787 //
10788 // The load of R0L can be hoisted by post-RA LICM.
10789 MBB = LoopMBB;
10790
10791 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10792 .addReg(Start1Reg).addMBB(StartMBB)
10793 .addReg(End1Reg).addMBB(LoopMBB);
10794 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10795 .addReg(Start2Reg).addMBB(StartMBB)
10796 .addReg(End2Reg).addMBB(LoopMBB);
10797 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10798 BuildMI(MBB, DL, TII->get(Opcode))
10799 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10800 .addReg(This1Reg).addReg(This2Reg);
10801 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10803 MBB->addSuccessor(LoopMBB);
10804 MBB->addSuccessor(DoneMBB);
10805
10806 DoneMBB->addLiveIn(SystemZ::CC);
10807
10808 MI.eraseFromParent();
10809 return DoneMBB;
10810}
10811
10812// Update TBEGIN instruction with final opcode and register clobbers.
10813MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10814 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10815 bool NoFloat) const {
10816 MachineFunction &MF = *MBB->getParent();
10817 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10818 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10819
10820 // Update opcode.
10821 MI.setDesc(TII->get(Opcode));
10822
10823 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10824 // Make sure to add the corresponding GRSM bits if they are missing.
10825 uint64_t Control = MI.getOperand(2).getImm();
10826 static const unsigned GPRControlBit[16] = {
10827 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10828 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10829 };
10830 Control |= GPRControlBit[15];
10831 if (TFI->hasFP(MF))
10832 Control |= GPRControlBit[11];
10833 MI.getOperand(2).setImm(Control);
10834
10835 // Add GPR clobbers.
10836 for (int I = 0; I < 16; I++) {
10837 if ((Control & GPRControlBit[I]) == 0) {
10838 unsigned Reg = SystemZMC::GR64Regs[I];
10839 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10840 }
10841 }
10842
10843 // Add FPR/VR clobbers.
10844 if (!NoFloat && (Control & 4) != 0) {
10845 if (Subtarget.hasVector()) {
10846 for (unsigned Reg : SystemZMC::VR128Regs) {
10847 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10848 }
10849 } else {
10850 for (unsigned Reg : SystemZMC::FP64Regs) {
10851 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10852 }
10853 }
10854 }
10855
10856 return MBB;
10857}
10858
10859MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10860 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10861 MachineFunction &MF = *MBB->getParent();
10862 MachineRegisterInfo *MRI = &MF.getRegInfo();
10863 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10864 DebugLoc DL = MI.getDebugLoc();
10865
10866 Register SrcReg = MI.getOperand(0).getReg();
10867
10868 // Create new virtual register of the same class as source.
10869 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10870 Register DstReg = MRI->createVirtualRegister(RC);
10871
10872 // Replace pseudo with a normal load-and-test that models the def as
10873 // well.
10874 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10875 .addReg(SrcReg)
10876 .setMIFlags(MI.getFlags());
10877 MI.eraseFromParent();
10878
10879 return MBB;
10880}
10881
10882MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10884 MachineFunction &MF = *MBB->getParent();
10885 MachineRegisterInfo *MRI = &MF.getRegInfo();
10886 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10887 DebugLoc DL = MI.getDebugLoc();
10888 const unsigned ProbeSize = getStackProbeSize(MF);
10889 Register DstReg = MI.getOperand(0).getReg();
10890 Register SizeReg = MI.getOperand(2).getReg();
10891
10892 MachineBasicBlock *StartMBB = MBB;
10893 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10894 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10895 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10896 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10897 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10898
10899 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10901
10902 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10903 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10904
10905 // LoopTestMBB
10906 // BRC TailTestMBB
10907 // # fallthrough to LoopBodyMBB
10908 StartMBB->addSuccessor(LoopTestMBB);
10909 MBB = LoopTestMBB;
10910 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10911 .addReg(SizeReg)
10912 .addMBB(StartMBB)
10913 .addReg(IncReg)
10914 .addMBB(LoopBodyMBB);
10915 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10916 .addReg(PHIReg)
10917 .addImm(ProbeSize);
10918 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10920 .addMBB(TailTestMBB);
10921 MBB->addSuccessor(LoopBodyMBB);
10922 MBB->addSuccessor(TailTestMBB);
10923
10924 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10925 // J LoopTestMBB
10926 MBB = LoopBodyMBB;
10927 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10928 .addReg(PHIReg)
10929 .addImm(ProbeSize);
10930 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10931 .addReg(SystemZ::R15D)
10932 .addImm(ProbeSize);
10933 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10934 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10935 .setMemRefs(VolLdMMO);
10936 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10937 MBB->addSuccessor(LoopTestMBB);
10938
10939 // TailTestMBB
10940 // BRC DoneMBB
10941 // # fallthrough to TailMBB
10942 MBB = TailTestMBB;
10943 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10944 .addReg(PHIReg)
10945 .addImm(0);
10946 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10948 .addMBB(DoneMBB);
10949 MBB->addSuccessor(TailMBB);
10950 MBB->addSuccessor(DoneMBB);
10951
10952 // TailMBB
10953 // # fallthrough to DoneMBB
10954 MBB = TailMBB;
10955 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
10956 .addReg(SystemZ::R15D)
10957 .addReg(PHIReg);
10958 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10959 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
10960 .setMemRefs(VolLdMMO);
10961 MBB->addSuccessor(DoneMBB);
10962
10963 // DoneMBB
10964 MBB = DoneMBB;
10965 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
10966 .addReg(SystemZ::R15D);
10967
10968 MI.eraseFromParent();
10969 return DoneMBB;
10970}
10971
10972SDValue SystemZTargetLowering::
10973getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
10974 MachineFunction &MF = DAG.getMachineFunction();
10975 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
10976 SDLoc DL(SP);
10977 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
10978 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
10979}
10980
10983 switch (MI.getOpcode()) {
10984 case SystemZ::ADJCALLSTACKDOWN:
10985 case SystemZ::ADJCALLSTACKUP:
10986 return emitAdjCallStack(MI, MBB);
10987
10988 case SystemZ::Select32:
10989 case SystemZ::Select64:
10990 case SystemZ::Select128:
10991 case SystemZ::SelectF32:
10992 case SystemZ::SelectF64:
10993 case SystemZ::SelectF128:
10994 case SystemZ::SelectVR32:
10995 case SystemZ::SelectVR64:
10996 case SystemZ::SelectVR128:
10997 return emitSelect(MI, MBB);
10998
10999 case SystemZ::CondStore8Mux:
11000 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
11001 case SystemZ::CondStore8MuxInv:
11002 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
11003 case SystemZ::CondStore16Mux:
11004 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
11005 case SystemZ::CondStore16MuxInv:
11006 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
11007 case SystemZ::CondStore32Mux:
11008 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
11009 case SystemZ::CondStore32MuxInv:
11010 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
11011 case SystemZ::CondStore8:
11012 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
11013 case SystemZ::CondStore8Inv:
11014 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
11015 case SystemZ::CondStore16:
11016 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
11017 case SystemZ::CondStore16Inv:
11018 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
11019 case SystemZ::CondStore32:
11020 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
11021 case SystemZ::CondStore32Inv:
11022 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
11023 case SystemZ::CondStore64:
11024 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
11025 case SystemZ::CondStore64Inv:
11026 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
11027 case SystemZ::CondStoreF32:
11028 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
11029 case SystemZ::CondStoreF32Inv:
11030 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
11031 case SystemZ::CondStoreF64:
11032 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
11033 case SystemZ::CondStoreF64Inv:
11034 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
11035
11036 case SystemZ::SCmp128Hi:
11037 return emitICmp128Hi(MI, MBB, false);
11038 case SystemZ::UCmp128Hi:
11039 return emitICmp128Hi(MI, MBB, true);
11040
11041 case SystemZ::PAIR128:
11042 return emitPair128(MI, MBB);
11043 case SystemZ::AEXT128:
11044 return emitExt128(MI, MBB, false);
11045 case SystemZ::ZEXT128:
11046 return emitExt128(MI, MBB, true);
11047
11048 case SystemZ::ATOMIC_SWAPW:
11049 return emitAtomicLoadBinary(MI, MBB, 0);
11050
11051 case SystemZ::ATOMIC_LOADW_AR:
11052 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11053 case SystemZ::ATOMIC_LOADW_AFI:
11054 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11055
11056 case SystemZ::ATOMIC_LOADW_SR:
11057 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11058
11059 case SystemZ::ATOMIC_LOADW_NR:
11060 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11061 case SystemZ::ATOMIC_LOADW_NILH:
11062 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11063
11064 case SystemZ::ATOMIC_LOADW_OR:
11065 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11066 case SystemZ::ATOMIC_LOADW_OILH:
11067 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11068
11069 case SystemZ::ATOMIC_LOADW_XR:
11070 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11071 case SystemZ::ATOMIC_LOADW_XILF:
11072 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11073
11074 case SystemZ::ATOMIC_LOADW_NRi:
11075 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11076 case SystemZ::ATOMIC_LOADW_NILHi:
11077 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11078
11079 case SystemZ::ATOMIC_LOADW_MIN:
11080 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11081 case SystemZ::ATOMIC_LOADW_MAX:
11082 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11083 case SystemZ::ATOMIC_LOADW_UMIN:
11084 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11085 case SystemZ::ATOMIC_LOADW_UMAX:
11086 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11087
11088 case SystemZ::ATOMIC_CMP_SWAPW:
11089 return emitAtomicCmpSwapW(MI, MBB);
11090 case SystemZ::MVCImm:
11091 case SystemZ::MVCReg:
11092 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11093 case SystemZ::NCImm:
11094 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11095 case SystemZ::OCImm:
11096 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11097 case SystemZ::XCImm:
11098 case SystemZ::XCReg:
11099 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11100 case SystemZ::CLCImm:
11101 case SystemZ::CLCReg:
11102 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11103 case SystemZ::MemsetImmImm:
11104 case SystemZ::MemsetImmReg:
11105 case SystemZ::MemsetRegImm:
11106 case SystemZ::MemsetRegReg:
11107 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11108 case SystemZ::CLSTLoop:
11109 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11110 case SystemZ::MVSTLoop:
11111 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11112 case SystemZ::SRSTLoop:
11113 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11114 case SystemZ::TBEGIN:
11115 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11116 case SystemZ::TBEGIN_nofloat:
11117 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11118 case SystemZ::TBEGINC:
11119 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11120 case SystemZ::LTEBRCompare_Pseudo:
11121 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11122 case SystemZ::LTDBRCompare_Pseudo:
11123 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11124 case SystemZ::LTXBRCompare_Pseudo:
11125 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11126
11127 case SystemZ::PROBED_ALLOCA:
11128 return emitProbedAlloca(MI, MBB);
11129 case SystemZ::EH_SjLj_SetJmp:
11130 return emitEHSjLjSetJmp(MI, MBB);
11131 case SystemZ::EH_SjLj_LongJmp:
11132 return emitEHSjLjLongJmp(MI, MBB);
11133
11134 case TargetOpcode::STACKMAP:
11135 case TargetOpcode::PATCHPOINT:
11136 return emitPatchPoint(MI, MBB);
11137
11138 default:
11139 llvm_unreachable("Unexpected instr type to insert");
11140 }
11141}
11142
11143// This is only used by the isel schedulers, and is needed only to prevent
11144// compiler from crashing when list-ilp is used.
11145const TargetRegisterClass *
11146SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11147 if (VT == MVT::Untyped)
11148 return &SystemZ::ADDR128BitRegClass;
11150}
11151
11152SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11153 SelectionDAG &DAG) const {
11154 SDLoc dl(Op);
11155 /*
11156 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11157 settings:
11158 00 Round to nearest
11159 01 Round to 0
11160 10 Round to +inf
11161 11 Round to -inf
11162
11163 FLT_ROUNDS, on the other hand, expects the following:
11164 -1 Undefined
11165 0 Round to 0
11166 1 Round to nearest
11167 2 Round to +inf
11168 3 Round to -inf
11169 */
11170
11171 // Save FPC to register.
11172 SDValue Chain = Op.getOperand(0);
11173 SDValue EFPC(
11174 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11175 Chain = EFPC.getValue(1);
11176
11177 // Transform as necessary
11178 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11179 DAG.getConstant(3, dl, MVT::i32));
11180 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11181 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11182 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11183 DAG.getConstant(1, dl, MVT::i32)));
11184
11185 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11186 DAG.getConstant(1, dl, MVT::i32));
11187 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11188
11189 return DAG.getMergeValues({RetVal, Chain}, dl);
11190}
11191
11192SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11193 SelectionDAG &DAG) const {
11194 EVT VT = Op.getValueType();
11195 Op = Op.getOperand(0);
11196 EVT OpVT = Op.getValueType();
11197
11198 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11199
11200 SDLoc DL(Op);
11201
11202 // load a 0 vector for the third operand of VSUM.
11203 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11204
11205 // execute VSUM.
11206 switch (OpVT.getScalarSizeInBits()) {
11207 case 8:
11208 case 16:
11209 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11210 [[fallthrough]];
11211 case 32:
11212 case 64:
11213 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11214 DAG.getBitcast(Op.getValueType(), Zero));
11215 break;
11216 case 128:
11217 break; // VSUM over v1i128 should not happen and would be a noop
11218 default:
11219 llvm_unreachable("Unexpected scalar size.");
11220 }
11221 // Cast to original vector type, retrieve last element.
11222 return DAG.getNode(
11223 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11224 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11225}
11226
11228 FunctionType *FT = F->getFunctionType();
11229 const AttributeList &Attrs = F->getAttributes();
11230 if (Attrs.hasRetAttrs())
11231 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11232 OS << *F->getReturnType() << " @" << F->getName() << "(";
11233 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11234 if (I)
11235 OS << ", ";
11236 OS << *FT->getParamType(I);
11237 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11238 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11239 if (ArgAttrs.hasAttribute(A))
11240 OS << " " << Attribute::getNameFromAttrKind(A);
11241 }
11242 OS << ")\n";
11243}
11244
11245bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11246 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11247 if (Itr == IsInternalCache.end())
11248 Itr = IsInternalCache
11249 .insert(std::pair<const Function *, bool>(
11250 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11251 .first;
11252 return Itr->second;
11253}
11254
11255void SystemZTargetLowering::
11256verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11257 const Function *F, SDValue Callee) const {
11258 // Temporarily only do the check when explicitly requested, until it can be
11259 // enabled by default.
11261 return;
11262
11263 bool IsInternal = false;
11264 const Function *CalleeFn = nullptr;
11265 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11266 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11267 IsInternal = isInternal(CalleeFn);
11268 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11269 errs() << "ERROR: Missing extension attribute of passed "
11270 << "value in call to function:\n" << "Callee: ";
11271 if (CalleeFn != nullptr)
11272 printFunctionArgExts(CalleeFn, errs());
11273 else
11274 errs() << "-\n";
11275 errs() << "Caller: ";
11277 llvm_unreachable("");
11278 }
11279}
11280
11281void SystemZTargetLowering::
11282verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11283 const Function *F) const {
11284 // Temporarily only do the check when explicitly requested, until it can be
11285 // enabled by default.
11287 return;
11288
11289 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11290 errs() << "ERROR: Missing extension attribute of returned "
11291 << "value from function:\n";
11293 llvm_unreachable("");
11294 }
11295}
11296
11297// Verify that narrow integer arguments are extended as required by the ABI.
11298// Return false if an error is found.
11299bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11300 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11301 if (!Subtarget.isTargetELF())
11302 return true;
11303
11306 return true;
11307 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11308 return true;
11309
11310 for (unsigned i = 0; i < Outs.size(); ++i) {
11311 MVT VT = Outs[i].VT;
11312 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11313 if (VT.isInteger()) {
11314 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11315 "Unexpected integer argument VT.");
11316 if (VT == MVT::i32 &&
11317 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11318 return false;
11319 }
11320 }
11321
11322 return true;
11323}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool analyzeArgSplit(const SmallVectorImpl< ArgTy > &Args, SmallVector< CCValAssign, 16 > &ArgLocs, unsigned I, MVT &PartVT, unsigned &NumParts)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:123
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:954
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::LibcallImpl LibcallImpl, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:464
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:463
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.