LLVM 22.0.0git
SystemZISelLowering.cpp
Go to the documentation of this file.
1//===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SystemZTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SystemZISelLowering.h"
14#include "SystemZCallingConv.h"
18#include "llvm/ADT/SmallSet.h"
24#include "llvm/IR/GlobalAlias.h"
26#include "llvm/IR/Intrinsics.h"
27#include "llvm/IR/IntrinsicsS390.h"
32#include <cctype>
33#include <optional>
34
35using namespace llvm;
36
37#define DEBUG_TYPE "systemz-lower"
38
39// Temporarily let this be disabled by default until all known problems
40// related to argument extensions are fixed.
42 "argext-abi-check", cl::init(false),
43 cl::desc("Verify that narrow int args are properly extended per the "
44 "SystemZ ABI."));
45
46namespace {
47// Represents information about a comparison.
48struct Comparison {
49 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn)
50 : Op0(Op0In), Op1(Op1In), Chain(ChainIn),
51 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {}
52
53 // The operands to the comparison.
54 SDValue Op0, Op1;
55
56 // Chain if this is a strict floating-point comparison.
57 SDValue Chain;
58
59 // The opcode that should be used to compare Op0 and Op1.
60 unsigned Opcode;
61
62 // A SystemZICMP value. Only used for integer comparisons.
63 unsigned ICmpType;
64
65 // The mask of CC values that Opcode can produce.
66 unsigned CCValid;
67
68 // The mask of CC values for which the original condition is true.
69 unsigned CCMask;
70};
71} // end anonymous namespace
72
73// Classify VT as either 32 or 64 bit.
74static bool is32Bit(EVT VT) {
75 switch (VT.getSimpleVT().SimpleTy) {
76 case MVT::i32:
77 return true;
78 case MVT::i64:
79 return false;
80 default:
81 llvm_unreachable("Unsupported type");
82 }
83}
84
85// Return a version of MachineOperand that can be safely used before the
86// final use.
88 if (Op.isReg())
89 Op.setIsKill(false);
90 return Op;
91}
92
94 const SystemZSubtarget &STI)
95 : TargetLowering(TM, STI), Subtarget(STI) {
96 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
97
98 auto *Regs = STI.getSpecialRegisters();
99
100 // Set up the register classes.
101 if (Subtarget.hasHighWord())
102 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
103 else
104 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
105 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
106 if (!useSoftFloat()) {
107 if (Subtarget.hasVector()) {
108 addRegisterClass(MVT::f16, &SystemZ::VR16BitRegClass);
109 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
110 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
111 } else {
112 addRegisterClass(MVT::f16, &SystemZ::FP16BitRegClass);
113 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
114 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
115 }
116 if (Subtarget.hasVectorEnhancements1())
117 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass);
118 else
119 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
120
121 if (Subtarget.hasVector()) {
122 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
123 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
124 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
125 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
126 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
127 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
128 }
129
130 if (Subtarget.hasVector())
131 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass);
132 }
133
134 // Compute derived properties from the register classes
135 computeRegisterProperties(Subtarget.getRegisterInfo());
136
137 // Set up special registers.
138 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister());
139
140 // TODO: It may be better to default to latency-oriented scheduling, however
141 // LLVM's current latency-oriented scheduler can't handle physreg definitions
142 // such as SystemZ has with CC, so set this to the register-pressure
143 // scheduler, because it can.
145
148
150
151 // Instructions are strings of 2-byte aligned 2-byte values.
153 // For performance reasons we prefer 16-byte alignment.
155
156 // Handle operations that are handled in a similar way for all types.
157 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
158 I <= MVT::LAST_FP_VALUETYPE;
159 ++I) {
161 if (isTypeLegal(VT)) {
162 // Lower SET_CC into an IPM-based sequence.
166
167 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE).
169
170 // Lower SELECT_CC and BR_CC into separate comparisons and branches.
172 setOperationAction(ISD::BR_CC, VT, Custom);
173 }
174 }
175
176 // Expand jump table branches as address arithmetic followed by an
177 // indirect jump.
178 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
179
180 // Expand BRCOND into a BR_CC (see above).
181 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
182
183 // Handle integer types except i128.
184 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE;
185 I <= MVT::LAST_INTEGER_VALUETYPE;
186 ++I) {
188 if (isTypeLegal(VT) && VT != MVT::i128) {
190
191 // Expand individual DIV and REMs into DIVREMs.
198
199 // Support addition/subtraction with overflow.
202
203 // Support addition/subtraction with carry.
206
207 // Support carry in as value rather than glue.
210
211 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are
212 // available, or if the operand is constant.
213 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
214
215 // Use POPCNT on z196 and above.
216 if (Subtarget.hasPopulationCount())
218 else
220
221 // No special instructions for these.
224
225 // Use *MUL_LOHI where possible instead of MULH*.
230
231 // The fp<=>i32/i64 conversions are all Legal except for f16 and for
232 // unsigned on z10 (only z196 and above have native support for
233 // unsigned conversions).
240 // Handle unsigned 32-bit input types as signed 64-bit types on z10.
241 auto OpAction =
242 (!Subtarget.hasFPExtension() && VT == MVT::i32) ? Promote : Custom;
243 setOperationAction(Op, VT, OpAction);
244 }
245 }
246 }
247
248 // Handle i128 if legal.
249 if (isTypeLegal(MVT::i128)) {
250 // No special instructions for these.
257
258 // We may be able to use VSLDB/VSLD/VSRD for these.
261
262 // No special instructions for these before z17.
263 if (!Subtarget.hasVectorEnhancements3()) {
273 } else {
274 // Even if we do have a legal 128-bit multiply, we do not
275 // want 64-bit multiply-high operations to use it.
278 }
279
280 // Support addition/subtraction with carry.
285
286 // Use VPOPCT and add up partial results.
288
289 // Additional instructions available with z17.
290 if (Subtarget.hasVectorEnhancements3()) {
291 setOperationAction(ISD::ABS, MVT::i128, Legal);
292
294 MVT::i128, Legal);
295 }
296 }
297
298 // These need custom handling in order to handle the f16 conversions.
307
308 // Type legalization will convert 8- and 16-bit atomic operations into
309 // forms that operate on i32s (but still keeping the original memory VT).
310 // Lower them into full i32 operations.
311 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom);
312 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom);
313 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
314 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
315 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom);
316 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom);
317 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom);
318 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom);
319 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom);
320 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom);
321 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom);
322
323 // Whether or not i128 is not a legal type, we need to custom lower
324 // the atomic operations in order to exploit SystemZ instructions.
325 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom);
326 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom);
327 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom);
328 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom);
329
330 // Mark sign/zero extending atomic loads as legal, which will make
331 // DAGCombiner fold extensions into atomic loads if possible.
333 {MVT::i8, MVT::i16, MVT::i32}, Legal);
335 {MVT::i8, MVT::i16}, Legal);
337 MVT::i8, Legal);
338
339 // We can use the CC result of compare-and-swap to implement
340 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS.
341 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom);
342 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom);
343 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
344
345 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
346
347 // Traps are legal, as we will convert them to "j .+2".
348 setOperationAction(ISD::TRAP, MVT::Other, Legal);
349
350 // We have native support for a 64-bit CTLZ, via FLOGR.
354
355 // On z17 we have native support for a 64-bit CTTZ.
356 if (Subtarget.hasMiscellaneousExtensions4()) {
360 }
361
362 // On z15 we have native support for a 64-bit CTPOP.
363 if (Subtarget.hasMiscellaneousExtensions3()) {
366 }
367
368 // Give LowerOperation the chance to replace 64-bit ORs with subregs.
370
371 // Expand 128 bit shifts without using a libcall.
375
376 // Also expand 256 bit shifts if i128 is a legal type.
377 if (isTypeLegal(MVT::i128)) {
381 }
382
383 // Handle bitcast from fp128 to i128.
384 if (!isTypeLegal(MVT::i128))
385 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
386
387 // We have native instructions for i8, i16 and i32 extensions, but not i1.
389 for (MVT VT : MVT::integer_valuetypes()) {
393 }
394
395 // Handle the various types of symbolic address.
401
402 // We need to handle dynamic allocations specially because of the
403 // 160-byte area at the bottom of the stack.
404 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
405 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom);
406
407 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom);
408 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom);
409
410 // Handle prefetches with PFD or PFDRL.
411 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
412
413 // Handle readcyclecounter with STCKF.
414 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
415
417 // Assume by default that all vector operations need to be expanded.
418 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
419 if (getOperationAction(Opcode, VT) == Legal)
420 setOperationAction(Opcode, VT, Expand);
421
422 // Likewise all truncating stores and extending loads.
423 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
424 setTruncStoreAction(VT, InnerVT, Expand);
427 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
428 }
429
430 if (isTypeLegal(VT)) {
431 // These operations are legal for anything that can be stored in a
432 // vector register, even if there is no native support for the format
433 // as such. In particular, we can do these for v4f32 even though there
434 // are no specific instructions for that format.
435 setOperationAction(ISD::LOAD, VT, Legal);
436 setOperationAction(ISD::STORE, VT, Legal);
438 setOperationAction(ISD::BITCAST, VT, Legal);
440
441 // Likewise, except that we need to replace the nodes with something
442 // more specific.
445 }
446 }
447
448 // Handle integer vector types.
450 if (isTypeLegal(VT)) {
451 // These operations have direct equivalents.
456 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) {
460 }
461 if (Subtarget.hasVectorEnhancements3() &&
462 VT != MVT::v16i8 && VT != MVT::v8i16) {
467 }
472 if (Subtarget.hasVectorEnhancements1())
474 else
478
479 // Convert a GPR scalar to a vector by inserting it into element 0.
481
482 // Use a series of unpacks for extensions.
485
486 // Detect shifts/rotates by a scalar amount and convert them into
487 // V*_BY_SCALAR.
492
493 // Add ISD::VECREDUCE_ADD as custom in order to implement
494 // it with VZERO+VSUM
495 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
496
497 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
498 // and inverting the result as necessary.
500
502 Legal);
503 }
504 }
505
506 if (Subtarget.hasVector()) {
507 // There should be no need to check for float types other than v2f64
508 // since <2 x f32> isn't a legal type.
517
526 }
527
528 if (Subtarget.hasVectorEnhancements2()) {
537
546 }
547
548 // Handle floating-point types.
549 if (!useSoftFloat()) {
550 // Promote all f16 operations to float, with some exceptions below.
551 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc)
552 setOperationAction(Opc, MVT::f16, Promote);
554 for (MVT VT : {MVT::f32, MVT::f64, MVT::f128}) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setTruncStoreAction(VT, MVT::f16, Expand);
557 }
558 for (auto Op : {ISD::LOAD, ISD::ATOMIC_LOAD, ISD::STORE, ISD::ATOMIC_STORE})
559 setOperationAction(Op, MVT::f16, Subtarget.hasVector() ? Legal : Custom);
562 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
564 for (auto Op : {ISD::FNEG, ISD::FABS, ISD::FCOPYSIGN})
565 setOperationAction(Op, MVT::f16, Legal);
566 }
567
568 for (unsigned I = MVT::FIRST_FP_VALUETYPE;
569 I <= MVT::LAST_FP_VALUETYPE;
570 ++I) {
572 if (isTypeLegal(VT) && VT != MVT::f16) {
573 // We can use FI for FRINT.
574 setOperationAction(ISD::FRINT, VT, Legal);
575
576 // We can use the extended form of FI for other rounding operations.
577 if (Subtarget.hasFPExtension()) {
578 setOperationAction(ISD::FNEARBYINT, VT, Legal);
579 setOperationAction(ISD::FFLOOR, VT, Legal);
580 setOperationAction(ISD::FCEIL, VT, Legal);
581 setOperationAction(ISD::FTRUNC, VT, Legal);
582 setOperationAction(ISD::FROUND, VT, Legal);
583 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
584 }
585
586 // No special instructions for these.
587 setOperationAction(ISD::FSIN, VT, Expand);
588 setOperationAction(ISD::FCOS, VT, Expand);
589 setOperationAction(ISD::FSINCOS, VT, Expand);
591 setOperationAction(ISD::FPOW, VT, Expand);
592
593 // Special treatment.
595
596 // Handle constrained floating-point operations.
605 if (Subtarget.hasFPExtension()) {
612 }
613
614 // Extension from f16 needs libcall.
615 setOperationAction(ISD::FP_EXTEND, VT, Custom);
617 }
618 }
619
620 // Handle floating-point vector types.
621 if (Subtarget.hasVector()) {
622 // Scalar-to-vector conversion is just a subreg.
625
626 // Some insertions and extractions can be done directly but others
627 // need to go via integers.
632
633 // These operations have direct equivalents.
634 setOperationAction(ISD::FADD, MVT::v2f64, Legal);
635 setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
636 setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
637 setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
638 setOperationAction(ISD::FMA, MVT::v2f64, Legal);
639 setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
640 setOperationAction(ISD::FABS, MVT::v2f64, Legal);
641 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
642 setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
643 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
644 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
645 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
646 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
647 setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
648 setOperationAction(ISD::FROUNDEVEN, MVT::v2f64, Legal);
649
650 // Handle constrained floating-point operations.
664
669 if (Subtarget.hasVectorEnhancements1()) {
672 }
673 }
674
675 // The vector enhancements facility 1 has instructions for these.
676 if (Subtarget.hasVectorEnhancements1()) {
677 setOperationAction(ISD::FADD, MVT::v4f32, Legal);
678 setOperationAction(ISD::FNEG, MVT::v4f32, Legal);
679 setOperationAction(ISD::FSUB, MVT::v4f32, Legal);
680 setOperationAction(ISD::FMUL, MVT::v4f32, Legal);
681 setOperationAction(ISD::FMA, MVT::v4f32, Legal);
682 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
683 setOperationAction(ISD::FABS, MVT::v4f32, Legal);
684 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
685 setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
686 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
687 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal);
688 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal);
689 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal);
690 setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
691 setOperationAction(ISD::FROUNDEVEN, MVT::v4f32, Legal);
692
693 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
694 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal);
695 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
696 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal);
697
698 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal);
699 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal);
700 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal);
701 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal);
702
703 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
704 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal);
705 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
706 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal);
707
708 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
709 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal);
710 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
711 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal);
712
713 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal);
714 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
715 setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
716 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
717
718 // Handle constrained floating-point operations.
732 for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
733 MVT::v4f32, MVT::v2f64 }) {
738 }
739 }
740
741 // We only have fused f128 multiply-addition on vector registers.
742 if (!Subtarget.hasVectorEnhancements1()) {
745 }
746
747 // We don't have a copysign instruction on vector registers.
748 if (Subtarget.hasVectorEnhancements1())
750
751 // Needed so that we don't try to implement f128 constant loads using
752 // a load-and-extend of a f80 constant (in cases where the constant
753 // would fit in an f80).
754 for (MVT VT : MVT::fp_valuetypes())
755 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
756
757 // We don't have extending load instruction on vector registers.
758 if (Subtarget.hasVectorEnhancements1()) {
759 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
760 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
761 }
762
763 // Floating-point truncation and stores need to be done separately.
764 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
765 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
766 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
767
768 // We have 64-bit FPR<->GPR moves, but need special handling for
769 // 32-bit forms.
770 if (!Subtarget.hasVector()) {
771 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
772 setOperationAction(ISD::BITCAST, MVT::f32, Custom);
773 }
774
775 // VASTART and VACOPY need to deal with the SystemZ-specific varargs
776 // structure, but VAEND is a no-op.
777 setOperationAction(ISD::VASTART, MVT::Other, Custom);
778 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
779 setOperationAction(ISD::VAEND, MVT::Other, Expand);
780
781 if (Subtarget.isTargetzOS()) {
782 // Handle address space casts between mixed sized pointers.
783 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
784 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
785 }
786
788
789 // Codes for which we want to perform some z-specific combinations.
793 ISD::LOAD,
794 ISD::STORE,
799 ISD::FP_EXTEND,
806 ISD::SRL,
807 ISD::SRA,
808 ISD::MUL,
809 ISD::SDIV,
810 ISD::UDIV,
811 ISD::SREM,
812 ISD::UREM,
815
816 // Handle intrinsics.
819
820 // We're not using SJLJ for exception handling, but they're implemented
821 // solely to support use of __builtin_setjmp / __builtin_longjmp.
824
825 // We want to use MVC in preference to even a single load/store pair.
826 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0;
828
829 // The main memset sequence is a byte store followed by an MVC.
830 // Two STC or MV..I stores win over that, but the kind of fused stores
831 // generated by target-independent code don't when the byte value is
832 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better
833 // than "STC;MVC". Handle the choice in target-specific code instead.
834 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0;
836
837 // Default to having -disable-strictnode-mutation on
838 IsStrictFPEnabled = true;
839}
840
842 return Subtarget.hasSoftFloat();
843}
844
846 LLVMContext &, EVT VT) const {
847 if (!VT.isVector())
848 return MVT::i32;
850}
851
853 const MachineFunction &MF, EVT VT) const {
854 if (useSoftFloat())
855 return false;
856
857 VT = VT.getScalarType();
858
859 if (!VT.isSimple())
860 return false;
861
862 switch (VT.getSimpleVT().SimpleTy) {
863 case MVT::f32:
864 case MVT::f64:
865 return true;
866 case MVT::f128:
867 return Subtarget.hasVectorEnhancements1();
868 default:
869 break;
870 }
871
872 return false;
873}
874
875// Return true if the constant can be generated with a vector instruction,
876// such as VGM, VGMB or VREPI.
878 const SystemZSubtarget &Subtarget) {
879 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
880 if (!Subtarget.hasVector() ||
881 (isFP128 && !Subtarget.hasVectorEnhancements1()))
882 return false;
883
884 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally-
885 // preferred way of creating all-zero and all-one vectors so give it
886 // priority over other methods below.
887 unsigned Mask = 0;
888 unsigned I = 0;
889 for (; I < SystemZ::VectorBytes; ++I) {
890 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue();
891 if (Byte == 0xff)
892 Mask |= 1ULL << I;
893 else if (Byte != 0)
894 break;
895 }
896 if (I == SystemZ::VectorBytes) {
897 Opcode = SystemZISD::BYTE_MASK;
898 OpVals.push_back(Mask);
900 return true;
901 }
902
903 if (SplatBitSize > 64)
904 return false;
905
906 auto TryValue = [&](uint64_t Value) -> bool {
907 // Try VECTOR REPLICATE IMMEDIATE
908 int64_t SignedValue = SignExtend64(Value, SplatBitSize);
909 if (isInt<16>(SignedValue)) {
910 OpVals.push_back(((unsigned) SignedValue));
911 Opcode = SystemZISD::REPLICATE;
913 SystemZ::VectorBits / SplatBitSize);
914 return true;
915 }
916 // Try VECTOR GENERATE MASK
917 unsigned Start, End;
918 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) {
919 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0
920 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for
921 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1).
922 OpVals.push_back(Start - (64 - SplatBitSize));
923 OpVals.push_back(End - (64 - SplatBitSize));
924 Opcode = SystemZISD::ROTATE_MASK;
926 SystemZ::VectorBits / SplatBitSize);
927 return true;
928 }
929 return false;
930 };
931
932 // First try assuming that any undefined bits above the highest set bit
933 // and below the lowest set bit are 1s. This increases the likelihood of
934 // being able to use a sign-extended element value in VECTOR REPLICATE
935 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
936 uint64_t SplatBitsZ = SplatBits.getZExtValue();
937 uint64_t SplatUndefZ = SplatUndef.getZExtValue();
938 unsigned LowerBits = llvm::countr_zero(SplatBitsZ);
939 unsigned UpperBits = llvm::countl_zero(SplatBitsZ);
940 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits);
941 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits);
942 if (TryValue(SplatBitsZ | Upper | Lower))
943 return true;
944
945 // Now try assuming that any undefined bits between the first and
946 // last defined set bits are set. This increases the chances of
947 // using a non-wraparound mask.
948 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
949 return TryValue(SplatBitsZ | Middle);
950}
951
953 if (IntImm.isSingleWord()) {
954 IntBits = APInt(128, IntImm.getZExtValue());
955 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth());
956 } else
957 IntBits = IntImm;
958 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt.");
959
960 // Find the smallest splat.
961 SplatBits = IntImm;
962 unsigned Width = SplatBits.getBitWidth();
963 while (Width > 8) {
964 unsigned HalfSize = Width / 2;
965 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize);
966 APInt LowValue = SplatBits.trunc(HalfSize);
967
968 // If the two halves do not match, stop here.
969 if (HighValue != LowValue || 8 > HalfSize)
970 break;
971
972 SplatBits = HighValue;
973 Width = HalfSize;
974 }
975 SplatUndef = 0;
976 SplatBitSize = Width;
977}
978
980 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR");
981 bool HasAnyUndefs;
982
983 // Get IntBits by finding the 128 bit splat.
984 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128,
985 true);
986
987 // Get SplatBits by finding the 8 bit or greater splat.
988 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8,
989 true);
990}
991
993 bool ForCodeSize) const {
994 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR.
995 if (Imm.isZero() || Imm.isNegZero())
996 return true;
997
999}
1000
1003 MachineBasicBlock *MBB) const {
1004 DebugLoc DL = MI.getDebugLoc();
1005 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1006 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo();
1007
1008 MachineFunction *MF = MBB->getParent();
1010
1011 const BasicBlock *BB = MBB->getBasicBlock();
1012 MachineFunction::iterator I = ++MBB->getIterator();
1013
1014 Register DstReg = MI.getOperand(0).getReg();
1015 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1016 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1017 (void)TRI;
1018 Register MainDstReg = MRI.createVirtualRegister(RC);
1019 Register RestoreDstReg = MRI.createVirtualRegister(RC);
1020
1021 MVT PVT = getPointerTy(MF->getDataLayout());
1022 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1023 // For v = setjmp(buf), we generate.
1024 // Algorithm:
1025 //
1026 // ---------
1027 // | thisMBB |
1028 // ---------
1029 // |
1030 // ------------------------
1031 // | |
1032 // ---------- ---------------
1033 // | mainMBB | | restoreMBB |
1034 // | v = 0 | | v = 1 |
1035 // ---------- ---------------
1036 // | |
1037 // -------------------------
1038 // |
1039 // -----------------------------
1040 // | sinkMBB |
1041 // | phi(v_mainMBB,v_restoreMBB) |
1042 // -----------------------------
1043 // thisMBB:
1044 // buf[FPOffset] = Frame Pointer if hasFP.
1045 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB.
1046 // buf[BCOffset] = Backchain value if building with -mbackchain.
1047 // buf[SPOffset] = Stack Pointer.
1048 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always.
1049 // SjLjSetup restoreMBB
1050 // mainMBB:
1051 // v_main = 0
1052 // sinkMBB:
1053 // v = phi(v_main, v_restore)
1054 // restoreMBB:
1055 // v_restore = 1
1056
1057 MachineBasicBlock *ThisMBB = MBB;
1058 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
1059 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
1060 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
1061
1062 MF->insert(I, MainMBB);
1063 MF->insert(I, SinkMBB);
1064 MF->push_back(RestoreMBB);
1065 RestoreMBB->setMachineBlockAddressTaken();
1066
1068
1069 // Transfer the remainder of BB and its successor edges to sinkMBB.
1070 SinkMBB->splice(SinkMBB->begin(), MBB,
1071 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
1073
1074 // thisMBB:
1075 const int64_t FPOffset = 0; // Slot 1.
1076 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2.
1077 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3.
1078 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4.
1079
1080 // Buf address.
1081 Register BufReg = MI.getOperand(1).getReg();
1082
1083 const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
1084 Register LabelReg = MRI.createVirtualRegister(PtrRC);
1085
1086 // Prepare IP for longjmp.
1087 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg)
1088 .addMBB(RestoreMBB);
1089 // Store IP for return from jmp, slot 2, offset = 1.
1090 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1091 .addReg(LabelReg)
1092 .addReg(BufReg)
1093 .addImm(LabelOffset)
1094 .addReg(0);
1095
1096 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1097 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF);
1098 if (HasFP) {
1099 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1100 .addReg(SpecialRegs->getFramePointerRegister())
1101 .addReg(BufReg)
1102 .addImm(FPOffset)
1103 .addReg(0);
1104 }
1105
1106 // Store SP.
1107 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1108 .addReg(SpecialRegs->getStackPointerRegister())
1109 .addReg(BufReg)
1110 .addImm(SPOffset)
1111 .addReg(0);
1112
1113 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain).
1114 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1115 if (BackChain) {
1116 Register BCReg = MRI.createVirtualRegister(PtrRC);
1117 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1118 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1119 .addReg(SpecialRegs->getStackPointerRegister())
1120 .addImm(TFL->getBackchainOffset(*MF))
1121 .addReg(0);
1122
1123 BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::STG))
1124 .addReg(BCReg)
1125 .addReg(BufReg)
1126 .addImm(BCOffset)
1127 .addReg(0);
1128 }
1129
1130 // Setup.
1131 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup))
1132 .addMBB(RestoreMBB);
1133
1134 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1135 MIB.addRegMask(RegInfo->getNoPreservedMask());
1136
1137 ThisMBB->addSuccessor(MainMBB);
1138 ThisMBB->addSuccessor(RestoreMBB);
1139
1140 // mainMBB:
1141 BuildMI(MainMBB, DL, TII->get(SystemZ::LHI), MainDstReg).addImm(0);
1142 MainMBB->addSuccessor(SinkMBB);
1143
1144 // sinkMBB:
1145 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg)
1146 .addReg(MainDstReg)
1147 .addMBB(MainMBB)
1148 .addReg(RestoreDstReg)
1149 .addMBB(RestoreMBB);
1150
1151 // restoreMBB.
1152 BuildMI(RestoreMBB, DL, TII->get(SystemZ::LHI), RestoreDstReg).addImm(1);
1153 BuildMI(RestoreMBB, DL, TII->get(SystemZ::J)).addMBB(SinkMBB);
1154 RestoreMBB->addSuccessor(SinkMBB);
1155
1156 MI.eraseFromParent();
1157
1158 return SinkMBB;
1159}
1160
1163 MachineBasicBlock *MBB) const {
1164
1165 DebugLoc DL = MI.getDebugLoc();
1166 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1167
1168 MachineFunction *MF = MBB->getParent();
1170
1171 MVT PVT = getPointerTy(MF->getDataLayout());
1172 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!");
1173 Register BufReg = MI.getOperand(0).getReg();
1174 const TargetRegisterClass *RC = MRI.getRegClass(BufReg);
1175 auto *SpecialRegs = Subtarget.getSpecialRegisters();
1176
1177 Register Tmp = MRI.createVirtualRegister(RC);
1178 Register BCReg = MRI.createVirtualRegister(RC);
1179
1181
1182 const int64_t FPOffset = 0;
1183 const int64_t LabelOffset = 1 * PVT.getStoreSize();
1184 const int64_t BCOffset = 2 * PVT.getStoreSize();
1185 const int64_t SPOffset = 3 * PVT.getStoreSize();
1186 const int64_t LPOffset = 4 * PVT.getStoreSize();
1187
1188 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp)
1189 .addReg(BufReg)
1190 .addImm(LabelOffset)
1191 .addReg(0);
1192
1193 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1194 SpecialRegs->getFramePointerRegister())
1195 .addReg(BufReg)
1196 .addImm(FPOffset)
1197 .addReg(0);
1198
1199 // We are restoring R13 even though we never stored in setjmp from llvm,
1200 // as gcc always stores R13 in builtin_setjmp. We could have mixed code
1201 // gcc setjmp and llvm longjmp.
1202 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D)
1203 .addReg(BufReg)
1204 .addImm(LPOffset)
1205 .addReg(0);
1206
1207 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain();
1208 if (BackChain) {
1209 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg)
1210 .addReg(BufReg)
1211 .addImm(BCOffset)
1212 .addReg(0);
1213 }
1214
1215 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG),
1216 SpecialRegs->getStackPointerRegister())
1217 .addReg(BufReg)
1218 .addImm(SPOffset)
1219 .addReg(0);
1220
1221 if (BackChain) {
1222 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
1223 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG))
1224 .addReg(BCReg)
1225 .addReg(SpecialRegs->getStackPointerRegister())
1226 .addImm(TFL->getBackchainOffset(*MF))
1227 .addReg(0);
1228 }
1229
1230 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp);
1231
1232 MI.eraseFromParent();
1233 return MBB;
1234}
1235
1236/// Returns true if stack probing through inline assembly is requested.
1238 // If the function specifically requests inline stack probes, emit them.
1239 if (MF.getFunction().hasFnAttribute("probe-stack"))
1240 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
1241 "inline-asm";
1242 return false;
1243}
1244
1249
1254
1257 // Don't expand subword operations as they require special treatment.
1258 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16))
1260
1261 // Don't expand if there is a target instruction available.
1262 if (Subtarget.hasInterlockedAccess1() &&
1263 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) &&
1270
1272}
1273
1275 // We can use CGFI or CLGFI.
1276 return isInt<32>(Imm) || isUInt<32>(Imm);
1277}
1278
1280 // We can use ALGFI or SLGFI.
1281 return isUInt<32>(Imm) || isUInt<32>(-Imm);
1282}
1283
1285 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const {
1286 // Unaligned accesses should never be slower than the expanded version.
1287 // We check specifically for aligned accesses in the few cases where
1288 // they are required.
1289 if (Fast)
1290 *Fast = 1;
1291 return true;
1292}
1293
1295 EVT VT = Y.getValueType();
1296
1297 // We can use NC(G)RK for types in GPRs ...
1298 if (VT == MVT::i32 || VT == MVT::i64)
1299 return Subtarget.hasMiscellaneousExtensions3();
1300
1301 // ... or VNC for types in VRs.
1302 if (VT.isVector() || VT == MVT::i128)
1303 return Subtarget.hasVector();
1304
1305 return false;
1306}
1307
1308// Information about the addressing mode for a memory access.
1310 // True if a long displacement is supported.
1312
1313 // True if use of index register is supported.
1315
1316 AddressingMode(bool LongDispl, bool IdxReg) :
1317 LongDisplacement(LongDispl), IndexReg(IdxReg) {}
1318};
1319
1320// Return the desired addressing mode for a Load which has only one use (in
1321// the same block) which is a Store.
1323 Type *Ty) {
1324 // With vector support a Load->Store combination may be combined to either
1325 // an MVC or vector operations and it seems to work best to allow the
1326 // vector addressing mode.
1327 if (HasVector)
1328 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1329
1330 // Otherwise only the MVC case is special.
1331 bool MVC = Ty->isIntegerTy(8);
1332 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
1333}
1334
1335// Return the addressing mode which seems most desirable given an LLVM
1336// Instruction pointer.
1337static AddressingMode
1340 switch (II->getIntrinsicID()) {
1341 default: break;
1342 case Intrinsic::memset:
1343 case Intrinsic::memmove:
1344 case Intrinsic::memcpy:
1345 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1346 }
1347 }
1348
1349 if (isa<LoadInst>(I) && I->hasOneUse()) {
1350 auto *SingleUser = cast<Instruction>(*I->user_begin());
1351 if (SingleUser->getParent() == I->getParent()) {
1352 if (isa<ICmpInst>(SingleUser)) {
1353 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
1354 if (C->getBitWidth() <= 64 &&
1355 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())))
1356 // Comparison of memory with 16 bit signed / unsigned immediate
1357 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
1358 } else if (isa<StoreInst>(SingleUser))
1359 // Load->Store
1360 return getLoadStoreAddrMode(HasVector, I->getType());
1361 }
1362 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
1363 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
1364 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
1365 // Load->Store
1366 return getLoadStoreAddrMode(HasVector, LoadI->getType());
1367 }
1368
1369 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
1370
1371 // * Use LDE instead of LE/LEY for z13 to avoid partial register
1372 // dependencies (LDE only supports small offsets).
1373 // * Utilize the vector registers to hold floating point
1374 // values (vector load / store instructions only support small
1375 // offsets).
1376
1377 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
1378 I->getOperand(0)->getType());
1379 bool IsFPAccess = MemAccessTy->isFloatingPointTy();
1380 bool IsVectorAccess = MemAccessTy->isVectorTy();
1381
1382 // A store of an extracted vector element will be combined into a VSTE type
1383 // instruction.
1384 if (!IsVectorAccess && isa<StoreInst>(I)) {
1385 Value *DataOp = I->getOperand(0);
1386 if (isa<ExtractElementInst>(DataOp))
1387 IsVectorAccess = true;
1388 }
1389
1390 // A load which gets inserted into a vector element will be combined into a
1391 // VLE type instruction.
1392 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
1393 User *LoadUser = *I->user_begin();
1394 if (isa<InsertElementInst>(LoadUser))
1395 IsVectorAccess = true;
1396 }
1397
1398 if (IsFPAccess || IsVectorAccess)
1399 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
1400 }
1401
1402 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
1403}
1404
1406 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
1407 // Punt on globals for now, although they can be used in limited
1408 // RELATIVE LONG cases.
1409 if (AM.BaseGV)
1410 return false;
1411
1412 // Require a 20-bit signed offset.
1413 if (!isInt<20>(AM.BaseOffs))
1414 return false;
1415
1416 bool RequireD12 =
1417 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128));
1418 AddressingMode SupportedAM(!RequireD12, true);
1419 if (I != nullptr)
1420 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector());
1421
1422 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs))
1423 return false;
1424
1425 if (!SupportedAM.IndexReg)
1426 // No indexing allowed.
1427 return AM.Scale == 0;
1428 else
1429 // Indexing is OK but no scale factor can be applied.
1430 return AM.Scale == 0 || AM.Scale == 1;
1431}
1432
1434 LLVMContext &Context, std::vector<EVT> &MemOps, unsigned Limit,
1435 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
1436 const AttributeList &FuncAttributes) const {
1437 const int MVCFastLen = 16;
1438
1439 if (Limit != ~unsigned(0)) {
1440 // Don't expand Op into scalar loads/stores in these cases:
1441 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen)
1442 return false; // Small memcpy: Use MVC
1443 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen)
1444 return false; // Small memset (first byte with STC/MVI): Use MVC
1445 if (Op.isZeroMemset())
1446 return false; // Memset zero: Use XC
1447 }
1448
1449 return TargetLowering::findOptimalMemOpLowering(Context, MemOps, Limit, Op,
1450 DstAS, SrcAS, FuncAttributes);
1451}
1452
1454 LLVMContext &Context, const MemOp &Op,
1455 const AttributeList &FuncAttributes) const {
1456 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other;
1457}
1458
1459bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
1460 if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
1461 return false;
1462 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue();
1463 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue();
1464 return FromBits > ToBits;
1465}
1466
1468 if (!FromVT.isInteger() || !ToVT.isInteger())
1469 return false;
1470 unsigned FromBits = FromVT.getFixedSizeInBits();
1471 unsigned ToBits = ToVT.getFixedSizeInBits();
1472 return FromBits > ToBits;
1473}
1474
1475//===----------------------------------------------------------------------===//
1476// Inline asm support
1477//===----------------------------------------------------------------------===//
1478
1481 if (Constraint.size() == 1) {
1482 switch (Constraint[0]) {
1483 case 'a': // Address register
1484 case 'd': // Data register (equivalent to 'r')
1485 case 'f': // Floating-point register
1486 case 'h': // High-part register
1487 case 'r': // General-purpose register
1488 case 'v': // Vector register
1489 return C_RegisterClass;
1490
1491 case 'Q': // Memory with base and unsigned 12-bit displacement
1492 case 'R': // Likewise, plus an index
1493 case 'S': // Memory with base and signed 20-bit displacement
1494 case 'T': // Likewise, plus an index
1495 case 'm': // Equivalent to 'T'.
1496 return C_Memory;
1497
1498 case 'I': // Unsigned 8-bit constant
1499 case 'J': // Unsigned 12-bit constant
1500 case 'K': // Signed 16-bit constant
1501 case 'L': // Signed 20-bit displacement (on all targets we support)
1502 case 'M': // 0x7fffffff
1503 return C_Immediate;
1504
1505 default:
1506 break;
1507 }
1508 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') {
1509 switch (Constraint[1]) {
1510 case 'Q': // Address with base and unsigned 12-bit displacement
1511 case 'R': // Likewise, plus an index
1512 case 'S': // Address with base and signed 20-bit displacement
1513 case 'T': // Likewise, plus an index
1514 return C_Address;
1515
1516 default:
1517 break;
1518 }
1519 } else if (Constraint.size() == 5 && Constraint.starts_with("{")) {
1520 if (StringRef("{@cc}").compare(Constraint) == 0)
1521 return C_Other;
1522 }
1523 return TargetLowering::getConstraintType(Constraint);
1524}
1525
1528 AsmOperandInfo &Info, const char *Constraint) const {
1530 Value *CallOperandVal = Info.CallOperandVal;
1531 // If we don't have a value, we can't do a match,
1532 // but allow it at the lowest weight.
1533 if (!CallOperandVal)
1534 return CW_Default;
1535 Type *type = CallOperandVal->getType();
1536 // Look at the constraint type.
1537 switch (*Constraint) {
1538 default:
1539 Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
1540 break;
1541
1542 case 'a': // Address register
1543 case 'd': // Data register (equivalent to 'r')
1544 case 'h': // High-part register
1545 case 'r': // General-purpose register
1546 Weight =
1547 CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default;
1548 break;
1549
1550 case 'f': // Floating-point register
1551 if (!useSoftFloat())
1552 Weight = type->isFloatingPointTy() ? CW_Register : CW_Default;
1553 break;
1554
1555 case 'v': // Vector register
1556 if (Subtarget.hasVector())
1557 Weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register
1558 : CW_Default;
1559 break;
1560
1561 case 'I': // Unsigned 8-bit constant
1562 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1563 if (isUInt<8>(C->getZExtValue()))
1564 Weight = CW_Constant;
1565 break;
1566
1567 case 'J': // Unsigned 12-bit constant
1568 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1569 if (isUInt<12>(C->getZExtValue()))
1570 Weight = CW_Constant;
1571 break;
1572
1573 case 'K': // Signed 16-bit constant
1574 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1575 if (isInt<16>(C->getSExtValue()))
1576 Weight = CW_Constant;
1577 break;
1578
1579 case 'L': // Signed 20-bit displacement (on all targets we support)
1580 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1581 if (isInt<20>(C->getSExtValue()))
1582 Weight = CW_Constant;
1583 break;
1584
1585 case 'M': // 0x7fffffff
1586 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal))
1587 if (C->getZExtValue() == 0x7fffffff)
1588 Weight = CW_Constant;
1589 break;
1590 }
1591 return Weight;
1592}
1593
1594// Parse a "{tNNN}" register constraint for which the register type "t"
1595// has already been verified. MC is the class associated with "t" and
1596// Map maps 0-based register numbers to LLVM register numbers.
1597static std::pair<unsigned, const TargetRegisterClass *>
1599 const unsigned *Map, unsigned Size) {
1600 assert(*(Constraint.end()-1) == '}' && "Missing '}'");
1601 if (isdigit(Constraint[2])) {
1602 unsigned Index;
1603 bool Failed =
1604 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index);
1605 if (!Failed && Index < Size && Map[Index])
1606 return std::make_pair(Map[Index], RC);
1607 }
1608 return std::make_pair(0U, nullptr);
1609}
1610
1611std::pair<unsigned, const TargetRegisterClass *>
1613 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
1614 if (Constraint.size() == 1) {
1615 // GCC Constraint Letters
1616 switch (Constraint[0]) {
1617 default: break;
1618 case 'd': // Data register (equivalent to 'r')
1619 case 'r': // General-purpose register
1620 if (VT.getSizeInBits() == 64)
1621 return std::make_pair(0U, &SystemZ::GR64BitRegClass);
1622 else if (VT.getSizeInBits() == 128)
1623 return std::make_pair(0U, &SystemZ::GR128BitRegClass);
1624 return std::make_pair(0U, &SystemZ::GR32BitRegClass);
1625
1626 case 'a': // Address register
1627 if (VT == MVT::i64)
1628 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass);
1629 else if (VT == MVT::i128)
1630 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass);
1631 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass);
1632
1633 case 'h': // High-part register (an LLVM extension)
1634 return std::make_pair(0U, &SystemZ::GRH32BitRegClass);
1635
1636 case 'f': // Floating-point register
1637 if (!useSoftFloat()) {
1638 if (VT.getSizeInBits() == 16)
1639 return std::make_pair(0U, &SystemZ::FP16BitRegClass);
1640 else if (VT.getSizeInBits() == 64)
1641 return std::make_pair(0U, &SystemZ::FP64BitRegClass);
1642 else if (VT.getSizeInBits() == 128)
1643 return std::make_pair(0U, &SystemZ::FP128BitRegClass);
1644 return std::make_pair(0U, &SystemZ::FP32BitRegClass);
1645 }
1646 break;
1647
1648 case 'v': // Vector register
1649 if (Subtarget.hasVector()) {
1650 if (VT.getSizeInBits() == 16)
1651 return std::make_pair(0U, &SystemZ::VR16BitRegClass);
1652 if (VT.getSizeInBits() == 32)
1653 return std::make_pair(0U, &SystemZ::VR32BitRegClass);
1654 if (VT.getSizeInBits() == 64)
1655 return std::make_pair(0U, &SystemZ::VR64BitRegClass);
1656 return std::make_pair(0U, &SystemZ::VR128BitRegClass);
1657 }
1658 break;
1659 }
1660 }
1661 if (Constraint.starts_with("{")) {
1662
1663 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal
1664 // to check the size on.
1665 auto getVTSizeInBits = [&VT]() {
1666 return VT == MVT::Other ? 0 : VT.getSizeInBits();
1667 };
1668
1669 // We need to override the default register parsing for GPRs and FPRs
1670 // because the interpretation depends on VT. The internal names of
1671 // the registers are also different from the external names
1672 // (F0D and F0S instead of F0, etc.).
1673 if (Constraint[1] == 'r') {
1674 if (getVTSizeInBits() == 32)
1675 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass,
1677 if (getVTSizeInBits() == 128)
1678 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass,
1680 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass,
1682 }
1683 if (Constraint[1] == 'f') {
1684 if (useSoftFloat())
1685 return std::make_pair(
1686 0u, static_cast<const TargetRegisterClass *>(nullptr));
1687 if (getVTSizeInBits() == 16)
1688 return parseRegisterNumber(Constraint, &SystemZ::FP16BitRegClass,
1690 if (getVTSizeInBits() == 32)
1691 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass,
1693 if (getVTSizeInBits() == 128)
1694 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass,
1696 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass,
1698 }
1699 if (Constraint[1] == 'v') {
1700 if (!Subtarget.hasVector())
1701 return std::make_pair(
1702 0u, static_cast<const TargetRegisterClass *>(nullptr));
1703 if (getVTSizeInBits() == 16)
1704 return parseRegisterNumber(Constraint, &SystemZ::VR16BitRegClass,
1706 if (getVTSizeInBits() == 32)
1707 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass,
1709 if (getVTSizeInBits() == 64)
1710 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass,
1712 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass,
1714 }
1715 if (Constraint[1] == '@') {
1716 if (StringRef("{@cc}").compare(Constraint) == 0)
1717 return std::make_pair(SystemZ::CC, &SystemZ::CCRRegClass);
1718 }
1719 }
1720 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
1721}
1722
1723// FIXME? Maybe this could be a TableGen attribute on some registers and
1724// this table could be generated automatically from RegInfo.
1727 const MachineFunction &MF) const {
1728 Register Reg =
1730 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D
1731 : SystemZ::NoRegister)
1732 .Case("r15",
1733 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister)
1734 .Default(Register());
1735
1736 return Reg;
1737}
1738
1740 const Constant *PersonalityFn) const {
1741 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D;
1742}
1743
1745 const Constant *PersonalityFn) const {
1746 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D;
1747}
1748
1749// Convert condition code in CCReg to an i32 value.
1751 SDLoc DL(CCReg);
1752 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg);
1753 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM,
1754 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32));
1755}
1756
1757// Lower @cc targets via setcc.
1759 SDValue &Chain, SDValue &Glue, const SDLoc &DL,
1760 const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const {
1761 if (StringRef("{@cc}").compare(OpInfo.ConstraintCode) != 0)
1762 return SDValue();
1763
1764 // Check that return type is valid.
1765 if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() ||
1766 OpInfo.ConstraintVT.getSizeInBits() < 8)
1767 report_fatal_error("Glue output operand is of invalid type");
1768
1769 if (Glue.getNode()) {
1770 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32, Glue);
1771 Chain = Glue.getValue(1);
1772 } else
1773 Glue = DAG.getCopyFromReg(Chain, DL, SystemZ::CC, MVT::i32);
1774 return getCCResult(DAG, Glue);
1775}
1776
1778 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
1779 SelectionDAG &DAG) const {
1780 // Only support length 1 constraints for now.
1781 if (Constraint.size() == 1) {
1782 switch (Constraint[0]) {
1783 case 'I': // Unsigned 8-bit constant
1784 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1785 if (isUInt<8>(C->getZExtValue()))
1786 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1787 Op.getValueType()));
1788 return;
1789
1790 case 'J': // Unsigned 12-bit constant
1791 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1792 if (isUInt<12>(C->getZExtValue()))
1793 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1794 Op.getValueType()));
1795 return;
1796
1797 case 'K': // Signed 16-bit constant
1798 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1799 if (isInt<16>(C->getSExtValue()))
1800 Ops.push_back(DAG.getSignedTargetConstant(
1801 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1802 return;
1803
1804 case 'L': // Signed 20-bit displacement (on all targets we support)
1805 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1806 if (isInt<20>(C->getSExtValue()))
1807 Ops.push_back(DAG.getSignedTargetConstant(
1808 C->getSExtValue(), SDLoc(Op), Op.getValueType()));
1809 return;
1810
1811 case 'M': // 0x7fffffff
1812 if (auto *C = dyn_cast<ConstantSDNode>(Op))
1813 if (C->getZExtValue() == 0x7fffffff)
1814 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op),
1815 Op.getValueType()));
1816 return;
1817 }
1818 }
1820}
1821
1822//===----------------------------------------------------------------------===//
1823// Calling conventions
1824//===----------------------------------------------------------------------===//
1825
1826#include "SystemZGenCallingConv.inc"
1827
1829 CallingConv::ID) const {
1830 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D,
1831 SystemZ::R14D, 0 };
1832 return ScratchRegs;
1833}
1834
1836 Type *ToType) const {
1837 return isTruncateFree(FromType, ToType);
1838}
1839
1841 return CI->isTailCall();
1842}
1843
1844// Value is a value that has been passed to us in the location described by VA
1845// (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining
1846// any loads onto Chain.
1848 CCValAssign &VA, SDValue Chain,
1849 SDValue Value) {
1850 // If the argument has been promoted from a smaller type, insert an
1851 // assertion to capture this.
1852 if (VA.getLocInfo() == CCValAssign::SExt)
1854 DAG.getValueType(VA.getValVT()));
1855 else if (VA.getLocInfo() == CCValAssign::ZExt)
1857 DAG.getValueType(VA.getValVT()));
1858
1859 if (VA.isExtInLoc())
1860 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value);
1861 else if (VA.getLocInfo() == CCValAssign::BCvt) {
1862 // If this is a short vector argument loaded from the stack,
1863 // extend from i64 to full vector size and then bitcast.
1864 assert(VA.getLocVT() == MVT::i64);
1865 assert(VA.getValVT().isVector());
1866 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)});
1867 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
1868 } else
1869 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
1870 return Value;
1871}
1872
1873// Value is a value of type VA.getValVT() that we need to copy into
1874// the location described by VA. Return a copy of Value converted to
1875// VA.getValVT(). The caller is responsible for handling indirect values.
1877 CCValAssign &VA, SDValue Value) {
1878 switch (VA.getLocInfo()) {
1879 case CCValAssign::SExt:
1880 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value);
1881 case CCValAssign::ZExt:
1882 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
1883 case CCValAssign::AExt:
1884 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
1885 case CCValAssign::BCvt: {
1886 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
1887 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 ||
1888 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128);
1889 // For an f32 vararg we need to first promote it to an f64 and then
1890 // bitcast it to an i64.
1891 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64)
1892 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value);
1893 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
1894 ? MVT::v2i64
1895 : VA.getLocVT();
1896 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
1897 // For ELF, this is a short vector argument to be stored to the stack,
1898 // bitcast to v2i64 and then extract first element.
1899 if (BitCastToType == MVT::v2i64)
1900 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
1901 DAG.getConstant(0, DL, MVT::i32));
1902 return Value;
1903 }
1904 case CCValAssign::Full:
1905 return Value;
1906 default:
1907 llvm_unreachable("Unhandled getLocInfo()");
1908 }
1909}
1910
1912 SDLoc DL(In);
1913 SDValue Lo, Hi;
1914 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1915 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In);
1916 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64,
1917 DAG.getNode(ISD::SRL, DL, MVT::i128, In,
1918 DAG.getConstant(64, DL, MVT::i32)));
1919 } else {
1920 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64);
1921 }
1922
1923 // FIXME: If v2i64 were a legal type, we could use it instead of
1924 // Untyped here. This might enable improved folding.
1925 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL,
1926 MVT::Untyped, Hi, Lo);
1927 return SDValue(Pair, 0);
1928}
1929
1931 SDLoc DL(In);
1932 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64,
1933 DL, MVT::i64, In);
1934 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64,
1935 DL, MVT::i64, In);
1936
1937 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) {
1938 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo);
1939 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi);
1940 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi,
1941 DAG.getConstant(64, DL, MVT::i32));
1942 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi);
1943 } else {
1944 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi);
1945 }
1946}
1947
1949 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1950 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const {
1951 EVT ValueVT = Val.getValueType();
1952 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1953 // Inline assembly operand.
1954 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val));
1955 return true;
1956 }
1957
1958 return false;
1959}
1960
1962 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
1963 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const {
1964 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) {
1965 // Inline assembly operand.
1966 SDValue Res = lowerGR128ToI128(DAG, Parts[0]);
1967 return DAG.getBitcast(ValueVT, Res);
1968 }
1969
1970 return SDValue();
1971}
1972
1974 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1975 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1976 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1978 MachineFrameInfo &MFI = MF.getFrameInfo();
1980 SystemZMachineFunctionInfo *FuncInfo =
1982 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
1983 EVT PtrVT = getPointerTy(DAG.getDataLayout());
1984
1985 // Assign locations to all of the incoming arguments.
1987 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1988 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
1989 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize());
1990
1991 unsigned NumFixedGPRs = 0;
1992 unsigned NumFixedFPRs = 0;
1993 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
1994 SDValue ArgValue;
1995 CCValAssign &VA = ArgLocs[I];
1996 EVT LocVT = VA.getLocVT();
1997 if (VA.isRegLoc()) {
1998 // Arguments passed in registers
1999 const TargetRegisterClass *RC;
2000 switch (LocVT.getSimpleVT().SimpleTy) {
2001 default:
2002 // Integers smaller than i64 should be promoted to i64.
2003 llvm_unreachable("Unexpected argument type");
2004 case MVT::i32:
2005 NumFixedGPRs += 1;
2006 RC = &SystemZ::GR32BitRegClass;
2007 break;
2008 case MVT::i64:
2009 NumFixedGPRs += 1;
2010 RC = &SystemZ::GR64BitRegClass;
2011 break;
2012 case MVT::f16:
2013 NumFixedFPRs += 1;
2014 RC = &SystemZ::FP16BitRegClass;
2015 break;
2016 case MVT::f32:
2017 NumFixedFPRs += 1;
2018 RC = &SystemZ::FP32BitRegClass;
2019 break;
2020 case MVT::f64:
2021 NumFixedFPRs += 1;
2022 RC = &SystemZ::FP64BitRegClass;
2023 break;
2024 case MVT::f128:
2025 NumFixedFPRs += 2;
2026 RC = &SystemZ::FP128BitRegClass;
2027 break;
2028 case MVT::v16i8:
2029 case MVT::v8i16:
2030 case MVT::v4i32:
2031 case MVT::v2i64:
2032 case MVT::v4f32:
2033 case MVT::v2f64:
2034 RC = &SystemZ::VR128BitRegClass;
2035 break;
2036 }
2037
2038 Register VReg = MRI.createVirtualRegister(RC);
2039 MRI.addLiveIn(VA.getLocReg(), VReg);
2040 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
2041 } else {
2042 assert(VA.isMemLoc() && "Argument not register or memory");
2043
2044 // Create the frame index object for this incoming parameter.
2045 // FIXME: Pre-include call frame size in the offset, should not
2046 // need to manually add it here.
2047 int64_t ArgSPOffset = VA.getLocMemOffset();
2048 if (Subtarget.isTargetXPLINK64()) {
2049 auto &XPRegs =
2050 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
2051 ArgSPOffset += XPRegs.getCallFrameSize();
2052 }
2053 int FI =
2054 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);
2055
2056 // Create the SelectionDAG nodes corresponding to a load
2057 // from this parameter. Unpromoted ints and floats are
2058 // passed as right-justified 8-byte values.
2059 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2060 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32 ||
2061 VA.getLocVT() == MVT::f16) {
2062 unsigned SlotOffs = VA.getLocVT() == MVT::f16 ? 6 : 4;
2063 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
2064 DAG.getIntPtrConstant(SlotOffs, DL));
2065 }
2066 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN,
2068 }
2069
2070 // Convert the value of the argument register into the value that's
2071 // being passed.
2072 if (VA.getLocInfo() == CCValAssign::Indirect) {
2073 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
2075 // If the original argument was split (e.g. i128), we need
2076 // to load all parts of it here (using the same address).
2077 unsigned ArgIndex = Ins[I].OrigArgIndex;
2078 assert (Ins[I].PartOffset == 0);
2079 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) {
2080 CCValAssign &PartVA = ArgLocs[I + 1];
2081 unsigned PartOffset = Ins[I + 1].PartOffset;
2082 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
2083 DAG.getIntPtrConstant(PartOffset, DL));
2084 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
2086 ++I;
2087 }
2088 } else
2089 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
2090 }
2091
2092 if (IsVarArg && Subtarget.isTargetXPLINK64()) {
2093 // Save the number of non-varargs registers for later use by va_start, etc.
2094 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2095 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2096
2097 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2098 Subtarget.getSpecialRegisters());
2099
2100 // Likewise the address (in the form of a frame index) of where the
2101 // first stack vararg would be. The 1-byte size here is arbitrary.
2102 // FIXME: Pre-include call frame size in the offset, should not
2103 // need to manually add it here.
2104 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize();
2105 int FI = MFI.CreateFixedObject(1, VarArgOffset, true);
2106 FuncInfo->setVarArgsFrameIndex(FI);
2107 }
2108
2109 if (IsVarArg && Subtarget.isTargetELF()) {
2110 // Save the number of non-varargs registers for later use by va_start, etc.
2111 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
2112 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
2113
2114 // Likewise the address (in the form of a frame index) of where the
2115 // first stack vararg would be. The 1-byte size here is arbitrary.
2116 int64_t VarArgsOffset = CCInfo.getStackSize();
2117 FuncInfo->setVarArgsFrameIndex(
2118 MFI.CreateFixedObject(1, VarArgsOffset, true));
2119
2120 // ...and a similar frame index for the caller-allocated save area
2121 // that will be used to store the incoming registers.
2122 int64_t RegSaveOffset =
2123 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16;
2124 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
2125 FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
2126
2127 // Store the FPR varargs in the reserved frame slots. (We store the
2128 // GPRs as part of the prologue.)
2129 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) {
2131 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) {
2132 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]);
2133 int FI =
2135 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2137 &SystemZ::FP64BitRegClass);
2138 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64);
2139 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN,
2141 }
2142 // Join the stores, which are independent of one another.
2143 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
2144 ArrayRef(&MemOps[NumFixedFPRs],
2145 SystemZ::ELFNumArgFPRs - NumFixedFPRs));
2146 }
2147 }
2148
2149 if (Subtarget.isTargetXPLINK64()) {
2150 // Create virual register for handling incoming "ADA" special register (R5)
2151 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
2152 Register ADAvReg = MRI.createVirtualRegister(RC);
2153 auto *Regs = static_cast<SystemZXPLINK64Registers *>(
2154 Subtarget.getSpecialRegisters());
2155 MRI.addLiveIn(Regs->getADARegister(), ADAvReg);
2156 FuncInfo->setADAVirtualRegister(ADAvReg);
2157 }
2158 return Chain;
2159}
2160
2161static bool canUseSiblingCall(const CCState &ArgCCInfo,
2164 // Punt if there are any indirect or stack arguments, or if the call
2165 // needs the callee-saved argument register R6, or if the call uses
2166 // the callee-saved register arguments SwiftSelf and SwiftError.
2167 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2168 CCValAssign &VA = ArgLocs[I];
2170 return false;
2171 if (!VA.isRegLoc())
2172 return false;
2173 Register Reg = VA.getLocReg();
2174 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D)
2175 return false;
2176 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError())
2177 return false;
2178 }
2179 return true;
2180}
2181
2183 unsigned Offset, bool LoadAdr = false) {
2186 Register ADAvReg = MFI->getADAVirtualRegister();
2188
2189 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT);
2190 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT);
2191
2192 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs);
2193 if (!LoadAdr)
2194 Result = DAG.getLoad(
2195 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8),
2197
2198 return Result;
2199}
2200
2201// ADA access using Global value
2202// Note: for functions, address of descriptor is returned
2204 EVT PtrVT) {
2205 unsigned ADAtype;
2206 bool LoadAddr = false;
2207 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV);
2208 bool IsFunction =
2209 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject()));
2210 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage());
2211
2212 if (IsFunction) {
2213 if (IsInternal) {
2215 LoadAddr = true;
2216 } else
2218 } else {
2220 }
2221 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype);
2222
2223 return getADAEntry(DAG, Val, DL, 0, LoadAddr);
2224}
2225
2226static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA,
2227 SDLoc &DL, SDValue &Chain) {
2228 unsigned ADADelta = 0; // ADA offset in desc.
2229 unsigned EPADelta = 8; // EPA offset in desc.
2232
2233 // XPLink calling convention.
2234 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2235 bool IsInternal = (G->getGlobal()->hasInternalLinkage() ||
2236 G->getGlobal()->hasPrivateLinkage());
2237 if (IsInternal) {
2240 Register ADAvReg = MFI->getADAVirtualRegister();
2241 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT);
2242 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2243 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2244 return true;
2245 } else {
2247 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2248 ADA = getADAEntry(DAG, GA, DL, ADADelta);
2249 Callee = getADAEntry(DAG, GA, DL, EPADelta);
2250 }
2251 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2253 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC);
2254 ADA = getADAEntry(DAG, ES, DL, ADADelta);
2255 Callee = getADAEntry(DAG, ES, DL, EPADelta);
2256 } else {
2257 // Function pointer case
2258 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2259 DAG.getConstant(ADADelta, DL, PtrVT));
2260 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA,
2262 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee,
2263 DAG.getConstant(EPADelta, DL, PtrVT));
2264 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee,
2266 }
2267 return false;
2268}
2269
2270SDValue
2272 SmallVectorImpl<SDValue> &InVals) const {
2273 SelectionDAG &DAG = CLI.DAG;
2274 SDLoc &DL = CLI.DL;
2276 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2278 SDValue Chain = CLI.Chain;
2279 SDValue Callee = CLI.Callee;
2280 bool &IsTailCall = CLI.IsTailCall;
2281 CallingConv::ID CallConv = CLI.CallConv;
2282 bool IsVarArg = CLI.IsVarArg;
2284 EVT PtrVT = getPointerTy(MF.getDataLayout());
2285 LLVMContext &Ctx = *DAG.getContext();
2286 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
2287
2288 // FIXME: z/OS support to be added in later.
2289 if (Subtarget.isTargetXPLINK64())
2290 IsTailCall = false;
2291
2292 // Integer args <=32 bits should have an extension attribute.
2293 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee);
2294
2295 // Analyze the operands of the call, assigning locations to each operand.
2297 CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
2298 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
2299
2300 // We don't support GuaranteedTailCallOpt, only automatically-detected
2301 // sibling calls.
2302 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs))
2303 IsTailCall = false;
2304
2305 // Get a count of how many bytes are to be pushed on the stack.
2306 unsigned NumBytes = ArgCCInfo.getStackSize();
2307
2308 // Mark the start of the call.
2309 if (!IsTailCall)
2310 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
2311
2312 // Copy argument values to their designated locations.
2314 SmallVector<SDValue, 8> MemOpChains;
2315 SDValue StackPtr;
2316 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2317 CCValAssign &VA = ArgLocs[I];
2318 SDValue ArgValue = OutVals[I];
2319
2320 if (VA.getLocInfo() == CCValAssign::Indirect) {
2321 // Store the argument in a stack slot and pass its address.
2322 unsigned ArgIndex = Outs[I].OrigArgIndex;
2323 EVT SlotVT;
2324 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2325 // Allocate the full stack space for a promoted (and split) argument.
2326 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
2327 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
2328 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2329 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
2330 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
2331 } else {
2332 SlotVT = Outs[I].VT;
2333 }
2334 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
2335 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2336 MemOpChains.push_back(
2337 DAG.getStore(Chain, DL, ArgValue, SpillSlot,
2339 // If the original argument was split (e.g. i128), we need
2340 // to store all parts of it here (and pass just one address).
2341 assert (Outs[I].PartOffset == 0);
2342 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
2343 SDValue PartValue = OutVals[I + 1];
2344 unsigned PartOffset = Outs[I + 1].PartOffset;
2345 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
2346 DAG.getIntPtrConstant(PartOffset, DL));
2347 MemOpChains.push_back(
2348 DAG.getStore(Chain, DL, PartValue, Address,
2350 assert((PartOffset + PartValue.getValueType().getStoreSize() <=
2351 SlotVT.getStoreSize()) && "Not enough space for argument part!");
2352 ++I;
2353 }
2354 ArgValue = SpillSlot;
2355 } else
2356 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
2357
2358 if (VA.isRegLoc()) {
2359 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
2360 // MVT::i128 type. We decompose the 128-bit type to a pair of its high
2361 // and low values.
2362 if (VA.getLocVT() == MVT::i128)
2363 ArgValue = lowerI128ToGR128(DAG, ArgValue);
2364 // Queue up the argument copies and emit them at the end.
2365 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
2366 } else {
2367 assert(VA.isMemLoc() && "Argument not register or memory");
2368
2369 // Work out the address of the stack slot. Unpromoted ints and
2370 // floats are passed as right-justified 8-byte values.
2371 if (!StackPtr.getNode())
2372 StackPtr = DAG.getCopyFromReg(Chain, DL,
2373 Regs->getStackPointerRegister(), PtrVT);
2374 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
2375 VA.getLocMemOffset();
2376 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
2377 Offset += 4;
2378 else if (VA.getLocVT() == MVT::f16)
2379 Offset += 6;
2380 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
2382
2383 // Emit the store.
2384 MemOpChains.push_back(
2385 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
2386
2387 // Although long doubles or vectors are passed through the stack when
2388 // they are vararg (non-fixed arguments), if a long double or vector
2389 // occupies the third and fourth slot of the argument list GPR3 should
2390 // still shadow the third slot of the argument list.
2391 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
2392 SDValue ShadowArgValue =
2393 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
2394 DAG.getIntPtrConstant(1, DL));
2395 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
2396 }
2397 }
2398 }
2399
2400 // Join the stores, which are independent of one another.
2401 if (!MemOpChains.empty())
2402 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
2403
2404 // Accept direct calls by converting symbolic call addresses to the
2405 // associated Target* opcodes. Force %r1 to be used for indirect
2406 // tail calls.
2407 SDValue Glue;
2408
2409 if (Subtarget.isTargetXPLINK64()) {
2410 SDValue ADA;
2411 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain);
2412 if (!IsBRASL) {
2413 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs)
2414 ->getAddressOfCalleeRegister();
2415 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue);
2416 Glue = Chain.getValue(1);
2417 Callee = DAG.getRegister(CalleeReg, Callee.getValueType());
2418 }
2419 RegsToPass.push_back(std::make_pair(
2420 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA));
2421 } else {
2422 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2423 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
2424 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2425 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2426 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT);
2427 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
2428 } else if (IsTailCall) {
2429 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue);
2430 Glue = Chain.getValue(1);
2431 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType());
2432 }
2433 }
2434
2435 // Build a sequence of copy-to-reg nodes, chained and glued together.
2436 for (const auto &[Reg, N] : RegsToPass) {
2437 Chain = DAG.getCopyToReg(Chain, DL, Reg, N, Glue);
2438 Glue = Chain.getValue(1);
2439 }
2440
2441 // The first call operand is the chain and the second is the target address.
2443 Ops.push_back(Chain);
2444 Ops.push_back(Callee);
2445
2446 // Add argument registers to the end of the list so that they are
2447 // known live into the call.
2448 for (const auto &[Reg, N] : RegsToPass)
2449 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2450
2451 // Add a register mask operand representing the call-preserved registers.
2452 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2453 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
2454 assert(Mask && "Missing call preserved mask for calling convention");
2455 Ops.push_back(DAG.getRegisterMask(Mask));
2456
2457 // Glue the call to the argument copies, if any.
2458 if (Glue.getNode())
2459 Ops.push_back(Glue);
2460
2461 // Emit the call.
2462 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2463 if (IsTailCall) {
2464 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops);
2465 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2466 return Ret;
2467 }
2468 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops);
2469 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2470 Glue = Chain.getValue(1);
2471
2472 // Mark the end of the call, which is glued to the call itself.
2473 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL);
2474 Glue = Chain.getValue(1);
2475
2476 // Assign locations to each value returned by this call.
2478 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
2479 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
2480
2481 // Copy all of the result registers out of their specified physreg.
2482 for (CCValAssign &VA : RetLocs) {
2483 // Copy the value out, gluing the copy to the end of the call sequence.
2484 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(),
2485 VA.getLocVT(), Glue);
2486 Chain = RetValue.getValue(1);
2487 Glue = RetValue.getValue(2);
2488
2489 // Convert the value of the return register into the value that's
2490 // being returned.
2491 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue));
2492 }
2493
2494 return Chain;
2495}
2496
2497// Generate a call taking the given operands as arguments and returning a
2498// result of type RetVT.
2500 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT,
2501 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL,
2502 bool DoesNotReturn, bool IsReturnValueUsed) const {
2504 Args.reserve(Ops.size());
2505
2506 for (SDValue Op : Ops) {
2508 Op, Op.getValueType().getTypeForEVT(*DAG.getContext()));
2509 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned);
2510 Entry.IsZExt = !Entry.IsSExt;
2511 Args.push_back(Entry);
2512 }
2513
2514 SDValue Callee =
2515 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout()));
2516
2517 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
2519 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned);
2520 CLI.setDebugLoc(DL)
2521 .setChain(Chain)
2522 .setCallee(CallConv, RetTy, Callee, std::move(Args))
2523 .setNoReturn(DoesNotReturn)
2524 .setDiscardResult(!IsReturnValueUsed)
2525 .setSExtResult(SignExtend)
2526 .setZExtResult(!SignExtend);
2527 return LowerCallTo(CLI);
2528}
2529
2531 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
2532 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
2533 const Type *RetTy) const {
2534 // Special case that we cannot easily detect in RetCC_SystemZ since
2535 // i128 may not be a legal type.
2536 for (auto &Out : Outs)
2537 if (Out.ArgVT == MVT::i128)
2538 return false;
2539
2541 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Context);
2542 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
2543}
2544
2545SDValue
2547 bool IsVarArg,
2549 const SmallVectorImpl<SDValue> &OutVals,
2550 const SDLoc &DL, SelectionDAG &DAG) const {
2552
2553 // Integer args <=32 bits should have an extension attribute.
2554 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction());
2555
2556 // Assign locations to each returned value.
2558 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
2559 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ);
2560
2561 // Quick exit for void returns
2562 if (RetLocs.empty())
2563 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain);
2564
2565 if (CallConv == CallingConv::GHC)
2566 report_fatal_error("GHC functions return void only");
2567
2568 // Copy the result values into the output registers.
2569 SDValue Glue;
2571 RetOps.push_back(Chain);
2572 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) {
2573 CCValAssign &VA = RetLocs[I];
2574 SDValue RetValue = OutVals[I];
2575
2576 // Make the return register live on exit.
2577 assert(VA.isRegLoc() && "Can only return in registers!");
2578
2579 // Promote the value as required.
2580 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue);
2581
2582 // Chain and glue the copies together.
2583 Register Reg = VA.getLocReg();
2584 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue);
2585 Glue = Chain.getValue(1);
2586 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT()));
2587 }
2588
2589 // Update chain and glue.
2590 RetOps[0] = Chain;
2591 if (Glue.getNode())
2592 RetOps.push_back(Glue);
2593
2594 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps);
2595}
2596
2597// Return true if Op is an intrinsic node with chain that returns the CC value
2598// as its only (other) argument. Provide the associated SystemZISD opcode and
2599// the mask of valid CC values if so.
2600static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
2601 unsigned &CCValid) {
2602 unsigned Id = Op.getConstantOperandVal(1);
2603 switch (Id) {
2604 case Intrinsic::s390_tbegin:
2605 Opcode = SystemZISD::TBEGIN;
2606 CCValid = SystemZ::CCMASK_TBEGIN;
2607 return true;
2608
2609 case Intrinsic::s390_tbegin_nofloat:
2610 Opcode = SystemZISD::TBEGIN_NOFLOAT;
2611 CCValid = SystemZ::CCMASK_TBEGIN;
2612 return true;
2613
2614 case Intrinsic::s390_tend:
2615 Opcode = SystemZISD::TEND;
2616 CCValid = SystemZ::CCMASK_TEND;
2617 return true;
2618
2619 default:
2620 return false;
2621 }
2622}
2623
2624// Return true if Op is an intrinsic node without chain that returns the
2625// CC value as its final argument. Provide the associated SystemZISD
2626// opcode and the mask of valid CC values if so.
2627static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
2628 unsigned Id = Op.getConstantOperandVal(0);
2629 switch (Id) {
2630 case Intrinsic::s390_vpkshs:
2631 case Intrinsic::s390_vpksfs:
2632 case Intrinsic::s390_vpksgs:
2633 Opcode = SystemZISD::PACKS_CC;
2634 CCValid = SystemZ::CCMASK_VCMP;
2635 return true;
2636
2637 case Intrinsic::s390_vpklshs:
2638 case Intrinsic::s390_vpklsfs:
2639 case Intrinsic::s390_vpklsgs:
2640 Opcode = SystemZISD::PACKLS_CC;
2641 CCValid = SystemZ::CCMASK_VCMP;
2642 return true;
2643
2644 case Intrinsic::s390_vceqbs:
2645 case Intrinsic::s390_vceqhs:
2646 case Intrinsic::s390_vceqfs:
2647 case Intrinsic::s390_vceqgs:
2648 case Intrinsic::s390_vceqqs:
2649 Opcode = SystemZISD::VICMPES;
2650 CCValid = SystemZ::CCMASK_VCMP;
2651 return true;
2652
2653 case Intrinsic::s390_vchbs:
2654 case Intrinsic::s390_vchhs:
2655 case Intrinsic::s390_vchfs:
2656 case Intrinsic::s390_vchgs:
2657 case Intrinsic::s390_vchqs:
2658 Opcode = SystemZISD::VICMPHS;
2659 CCValid = SystemZ::CCMASK_VCMP;
2660 return true;
2661
2662 case Intrinsic::s390_vchlbs:
2663 case Intrinsic::s390_vchlhs:
2664 case Intrinsic::s390_vchlfs:
2665 case Intrinsic::s390_vchlgs:
2666 case Intrinsic::s390_vchlqs:
2667 Opcode = SystemZISD::VICMPHLS;
2668 CCValid = SystemZ::CCMASK_VCMP;
2669 return true;
2670
2671 case Intrinsic::s390_vtm:
2672 Opcode = SystemZISD::VTM;
2673 CCValid = SystemZ::CCMASK_VCMP;
2674 return true;
2675
2676 case Intrinsic::s390_vfaebs:
2677 case Intrinsic::s390_vfaehs:
2678 case Intrinsic::s390_vfaefs:
2679 Opcode = SystemZISD::VFAE_CC;
2680 CCValid = SystemZ::CCMASK_ANY;
2681 return true;
2682
2683 case Intrinsic::s390_vfaezbs:
2684 case Intrinsic::s390_vfaezhs:
2685 case Intrinsic::s390_vfaezfs:
2686 Opcode = SystemZISD::VFAEZ_CC;
2687 CCValid = SystemZ::CCMASK_ANY;
2688 return true;
2689
2690 case Intrinsic::s390_vfeebs:
2691 case Intrinsic::s390_vfeehs:
2692 case Intrinsic::s390_vfeefs:
2693 Opcode = SystemZISD::VFEE_CC;
2694 CCValid = SystemZ::CCMASK_ANY;
2695 return true;
2696
2697 case Intrinsic::s390_vfeezbs:
2698 case Intrinsic::s390_vfeezhs:
2699 case Intrinsic::s390_vfeezfs:
2700 Opcode = SystemZISD::VFEEZ_CC;
2701 CCValid = SystemZ::CCMASK_ANY;
2702 return true;
2703
2704 case Intrinsic::s390_vfenebs:
2705 case Intrinsic::s390_vfenehs:
2706 case Intrinsic::s390_vfenefs:
2707 Opcode = SystemZISD::VFENE_CC;
2708 CCValid = SystemZ::CCMASK_ANY;
2709 return true;
2710
2711 case Intrinsic::s390_vfenezbs:
2712 case Intrinsic::s390_vfenezhs:
2713 case Intrinsic::s390_vfenezfs:
2714 Opcode = SystemZISD::VFENEZ_CC;
2715 CCValid = SystemZ::CCMASK_ANY;
2716 return true;
2717
2718 case Intrinsic::s390_vistrbs:
2719 case Intrinsic::s390_vistrhs:
2720 case Intrinsic::s390_vistrfs:
2721 Opcode = SystemZISD::VISTR_CC;
2723 return true;
2724
2725 case Intrinsic::s390_vstrcbs:
2726 case Intrinsic::s390_vstrchs:
2727 case Intrinsic::s390_vstrcfs:
2728 Opcode = SystemZISD::VSTRC_CC;
2729 CCValid = SystemZ::CCMASK_ANY;
2730 return true;
2731
2732 case Intrinsic::s390_vstrczbs:
2733 case Intrinsic::s390_vstrczhs:
2734 case Intrinsic::s390_vstrczfs:
2735 Opcode = SystemZISD::VSTRCZ_CC;
2736 CCValid = SystemZ::CCMASK_ANY;
2737 return true;
2738
2739 case Intrinsic::s390_vstrsb:
2740 case Intrinsic::s390_vstrsh:
2741 case Intrinsic::s390_vstrsf:
2742 Opcode = SystemZISD::VSTRS_CC;
2743 CCValid = SystemZ::CCMASK_ANY;
2744 return true;
2745
2746 case Intrinsic::s390_vstrszb:
2747 case Intrinsic::s390_vstrszh:
2748 case Intrinsic::s390_vstrszf:
2749 Opcode = SystemZISD::VSTRSZ_CC;
2750 CCValid = SystemZ::CCMASK_ANY;
2751 return true;
2752
2753 case Intrinsic::s390_vfcedbs:
2754 case Intrinsic::s390_vfcesbs:
2755 Opcode = SystemZISD::VFCMPES;
2756 CCValid = SystemZ::CCMASK_VCMP;
2757 return true;
2758
2759 case Intrinsic::s390_vfchdbs:
2760 case Intrinsic::s390_vfchsbs:
2761 Opcode = SystemZISD::VFCMPHS;
2762 CCValid = SystemZ::CCMASK_VCMP;
2763 return true;
2764
2765 case Intrinsic::s390_vfchedbs:
2766 case Intrinsic::s390_vfchesbs:
2767 Opcode = SystemZISD::VFCMPHES;
2768 CCValid = SystemZ::CCMASK_VCMP;
2769 return true;
2770
2771 case Intrinsic::s390_vftcidb:
2772 case Intrinsic::s390_vftcisb:
2773 Opcode = SystemZISD::VFTCI;
2774 CCValid = SystemZ::CCMASK_VCMP;
2775 return true;
2776
2777 case Intrinsic::s390_tdc:
2778 Opcode = SystemZISD::TDC;
2779 CCValid = SystemZ::CCMASK_TDC;
2780 return true;
2781
2782 default:
2783 return false;
2784 }
2785}
2786
2787// Emit an intrinsic with chain and an explicit CC register result.
2789 unsigned Opcode) {
2790 // Copy all operands except the intrinsic ID.
2791 unsigned NumOps = Op.getNumOperands();
2793 Ops.reserve(NumOps - 1);
2794 Ops.push_back(Op.getOperand(0));
2795 for (unsigned I = 2; I < NumOps; ++I)
2796 Ops.push_back(Op.getOperand(I));
2797
2798 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
2799 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other);
2800 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
2801 SDValue OldChain = SDValue(Op.getNode(), 1);
2802 SDValue NewChain = SDValue(Intr.getNode(), 1);
2803 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
2804 return Intr.getNode();
2805}
2806
2807// Emit an intrinsic with an explicit CC register result.
2809 unsigned Opcode) {
2810 // Copy all operands except the intrinsic ID.
2811 SDLoc DL(Op);
2812 unsigned NumOps = Op.getNumOperands();
2814 Ops.reserve(NumOps - 1);
2815 for (unsigned I = 1; I < NumOps; ++I) {
2816 SDValue CurrOper = Op.getOperand(I);
2817 if (CurrOper.getValueType() == MVT::f16) {
2818 assert((Op.getConstantOperandVal(0) == Intrinsic::s390_tdc && I == 1) &&
2819 "Unhandled intrinsic with f16 operand.");
2820 CurrOper = DAG.getFPExtendOrRound(CurrOper, DL, MVT::f32);
2821 }
2822 Ops.push_back(CurrOper);
2823 }
2824
2825 SDValue Intr = DAG.getNode(Opcode, DL, Op->getVTList(), Ops);
2826 return Intr.getNode();
2827}
2828
2829// CC is a comparison that will be implemented using an integer or
2830// floating-point comparison. Return the condition code mask for
2831// a branch on true. In the integer case, CCMASK_CMP_UO is set for
2832// unsigned comparisons and clear for signed ones. In the floating-point
2833// case, CCMASK_CMP_UO has its normal mask meaning (unordered).
2835#define CONV(X) \
2836 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \
2837 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \
2838 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X
2839
2840 switch (CC) {
2841 default:
2842 llvm_unreachable("Invalid integer condition!");
2843
2844 CONV(EQ);
2845 CONV(NE);
2846 CONV(GT);
2847 CONV(GE);
2848 CONV(LT);
2849 CONV(LE);
2850
2851 case ISD::SETO: return SystemZ::CCMASK_CMP_O;
2853 }
2854#undef CONV
2855}
2856
2857// If C can be converted to a comparison against zero, adjust the operands
2858// as necessary.
2859static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
2860 if (C.ICmpType == SystemZICMP::UnsignedOnly)
2861 return;
2862
2863 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode());
2864 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2865 return;
2866
2867 int64_t Value = ConstOp1->getSExtValue();
2868 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) ||
2869 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) ||
2870 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) ||
2871 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) {
2872 C.CCMask ^= SystemZ::CCMASK_CMP_EQ;
2873 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType());
2874 }
2875}
2876
2877// If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI,
2878// adjust the operands as necessary.
2879static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL,
2880 Comparison &C) {
2881 // For us to make any changes, it must a comparison between a single-use
2882 // load and a constant.
2883 if (!C.Op0.hasOneUse() ||
2884 C.Op0.getOpcode() != ISD::LOAD ||
2885 C.Op1.getOpcode() != ISD::Constant)
2886 return;
2887
2888 // We must have an 8- or 16-bit load.
2889 auto *Load = cast<LoadSDNode>(C.Op0);
2890 unsigned NumBits = Load->getMemoryVT().getSizeInBits();
2891 if ((NumBits != 8 && NumBits != 16) ||
2892 NumBits != Load->getMemoryVT().getStoreSizeInBits())
2893 return;
2894
2895 // The load must be an extending one and the constant must be within the
2896 // range of the unextended value.
2897 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1);
2898 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64)
2899 return;
2900 uint64_t Value = ConstOp1->getZExtValue();
2901 uint64_t Mask = (1 << NumBits) - 1;
2902 if (Load->getExtensionType() == ISD::SEXTLOAD) {
2903 // Make sure that ConstOp1 is in range of C.Op0.
2904 int64_t SignedValue = ConstOp1->getSExtValue();
2905 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask)
2906 return;
2907 if (C.ICmpType != SystemZICMP::SignedOnly) {
2908 // Unsigned comparison between two sign-extended values is equivalent
2909 // to unsigned comparison between two zero-extended values.
2910 Value &= Mask;
2911 } else if (NumBits == 8) {
2912 // Try to treat the comparison as unsigned, so that we can use CLI.
2913 // Adjust CCMask and Value as necessary.
2914 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT)
2915 // Test whether the high bit of the byte is set.
2916 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT;
2917 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE)
2918 // Test whether the high bit of the byte is clear.
2919 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT;
2920 else
2921 // No instruction exists for this combination.
2922 return;
2923 C.ICmpType = SystemZICMP::UnsignedOnly;
2924 }
2925 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) {
2926 if (Value > Mask)
2927 return;
2928 // If the constant is in range, we can use any comparison.
2929 C.ICmpType = SystemZICMP::Any;
2930 } else
2931 return;
2932
2933 // Make sure that the first operand is an i32 of the right extension type.
2934 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ?
2937 if (C.Op0.getValueType() != MVT::i32 ||
2938 Load->getExtensionType() != ExtType) {
2939 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(),
2940 Load->getBasePtr(), Load->getPointerInfo(),
2941 Load->getMemoryVT(), Load->getAlign(),
2942 Load->getMemOperand()->getFlags());
2943 // Update the chain uses.
2944 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1));
2945 }
2946
2947 // Make sure that the second operand is an i32 with the right value.
2948 if (C.Op1.getValueType() != MVT::i32 ||
2949 Value != ConstOp1->getZExtValue())
2950 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32);
2951}
2952
2953// Return true if Op is either an unextended load, or a load suitable
2954// for integer register-memory comparisons of type ICmpType.
2955static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) {
2956 auto *Load = dyn_cast<LoadSDNode>(Op.getNode());
2957 if (Load) {
2958 // There are no instructions to compare a register with a memory byte.
2959 if (Load->getMemoryVT() == MVT::i8)
2960 return false;
2961 // Otherwise decide on extension type.
2962 switch (Load->getExtensionType()) {
2963 case ISD::NON_EXTLOAD:
2964 return true;
2965 case ISD::SEXTLOAD:
2966 return ICmpType != SystemZICMP::UnsignedOnly;
2967 case ISD::ZEXTLOAD:
2968 return ICmpType != SystemZICMP::SignedOnly;
2969 default:
2970 break;
2971 }
2972 }
2973 return false;
2974}
2975
2976// Return true if it is better to swap the operands of C.
2977static bool shouldSwapCmpOperands(const Comparison &C) {
2978 // Leave i128 and f128 comparisons alone, since they have no memory forms.
2979 if (C.Op0.getValueType() == MVT::i128)
2980 return false;
2981 if (C.Op0.getValueType() == MVT::f128)
2982 return false;
2983
2984 // Always keep a floating-point constant second, since comparisons with
2985 // zero can use LOAD TEST and comparisons with other constants make a
2986 // natural memory operand.
2987 if (isa<ConstantFPSDNode>(C.Op1))
2988 return false;
2989
2990 // Never swap comparisons with zero since there are many ways to optimize
2991 // those later.
2992 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
2993 if (ConstOp1 && ConstOp1->getZExtValue() == 0)
2994 return false;
2995
2996 // Also keep natural memory operands second if the loaded value is
2997 // only used here. Several comparisons have memory forms.
2998 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse())
2999 return false;
3000
3001 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't.
3002 // In that case we generally prefer the memory to be second.
3003 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) {
3004 // The only exceptions are when the second operand is a constant and
3005 // we can use things like CHHSI.
3006 if (!ConstOp1)
3007 return true;
3008 // The unsigned memory-immediate instructions can handle 16-bit
3009 // unsigned integers.
3010 if (C.ICmpType != SystemZICMP::SignedOnly &&
3011 isUInt<16>(ConstOp1->getZExtValue()))
3012 return false;
3013 // The signed memory-immediate instructions can handle 16-bit
3014 // signed integers.
3015 if (C.ICmpType != SystemZICMP::UnsignedOnly &&
3016 isInt<16>(ConstOp1->getSExtValue()))
3017 return false;
3018 return true;
3019 }
3020
3021 // Try to promote the use of CGFR and CLGFR.
3022 unsigned Opcode0 = C.Op0.getOpcode();
3023 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND)
3024 return true;
3025 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND)
3026 return true;
3027 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND &&
3028 C.Op0.getOperand(1).getOpcode() == ISD::Constant &&
3029 C.Op0.getConstantOperandVal(1) == 0xffffffff)
3030 return true;
3031
3032 return false;
3033}
3034
3035// Check whether C tests for equality between X and Y and whether X - Y
3036// or Y - X is also computed. In that case it's better to compare the
3037// result of the subtraction against zero.
3039 Comparison &C) {
3040 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3041 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3042 for (SDNode *N : C.Op0->users()) {
3043 if (N->getOpcode() == ISD::SUB &&
3044 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) ||
3045 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) {
3046 // Disable the nsw and nuw flags: the backend needs to handle
3047 // overflow as well during comparison elimination.
3048 N->dropFlags(SDNodeFlags::NoWrap);
3049 C.Op0 = SDValue(N, 0);
3050 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0));
3051 return;
3052 }
3053 }
3054 }
3055}
3056
3057// Check whether C compares a floating-point value with zero and if that
3058// floating-point value is also negated. In this case we can use the
3059// negation to set CC, so avoiding separate LOAD AND TEST and
3060// LOAD (NEGATIVE/COMPLEMENT) instructions.
3061static void adjustForFNeg(Comparison &C) {
3062 // This optimization is invalid for strict comparisons, since FNEG
3063 // does not raise any exceptions.
3064 if (C.Chain)
3065 return;
3066 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1);
3067 if (C1 && C1->isZero()) {
3068 for (SDNode *N : C.Op0->users()) {
3069 if (N->getOpcode() == ISD::FNEG) {
3070 C.Op0 = SDValue(N, 0);
3071 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3072 return;
3073 }
3074 }
3075 }
3076}
3077
3078// Check whether C compares (shl X, 32) with 0 and whether X is
3079// also sign-extended. In that case it is better to test the result
3080// of the sign extension using LTGFR.
3081//
3082// This case is important because InstCombine transforms a comparison
3083// with (sext (trunc X)) into a comparison with (shl X, 32).
3084static void adjustForLTGFR(Comparison &C) {
3085 // Check for a comparison between (shl X, 32) and 0.
3086 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 &&
3087 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) {
3088 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3089 if (C1 && C1->getZExtValue() == 32) {
3090 SDValue ShlOp0 = C.Op0.getOperand(0);
3091 // See whether X has any SIGN_EXTEND_INREG uses.
3092 for (SDNode *N : ShlOp0->users()) {
3093 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG &&
3094 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) {
3095 C.Op0 = SDValue(N, 0);
3096 return;
3097 }
3098 }
3099 }
3100 }
3101}
3102
3103// If C compares the truncation of an extending load, try to compare
3104// the untruncated value instead. This exposes more opportunities to
3105// reuse CC.
3106static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
3107 Comparison &C) {
3108 if (C.Op0.getOpcode() == ISD::TRUNCATE &&
3109 C.Op0.getOperand(0).getOpcode() == ISD::LOAD &&
3110 C.Op1.getOpcode() == ISD::Constant &&
3111 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3112 C.Op1->getAsZExtVal() == 0) {
3113 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
3114 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <=
3115 C.Op0.getValueSizeInBits().getFixedValue()) {
3116 unsigned Type = L->getExtensionType();
3117 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
3118 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
3119 C.Op0 = C.Op0.getOperand(0);
3120 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType());
3121 }
3122 }
3123 }
3124}
3125
3126// Return true if shift operation N has an in-range constant shift value.
3127// Store it in ShiftVal if so.
3128static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
3129 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1));
3130 if (!Shift)
3131 return false;
3132
3133 uint64_t Amount = Shift->getZExtValue();
3134 if (Amount >= N.getValueSizeInBits())
3135 return false;
3136
3137 ShiftVal = Amount;
3138 return true;
3139}
3140
3141// Check whether an AND with Mask is suitable for a TEST UNDER MASK
3142// instruction and whether the CC value is descriptive enough to handle
3143// a comparison of type Opcode between the AND result and CmpVal.
3144// CCMask says which comparison result is being tested and BitSize is
3145// the number of bits in the operands. If TEST UNDER MASK can be used,
3146// return the corresponding CC mask, otherwise return 0.
3147static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask,
3148 uint64_t Mask, uint64_t CmpVal,
3149 unsigned ICmpType) {
3150 assert(Mask != 0 && "ANDs with zero should have been removed by now");
3151
3152 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL.
3153 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) &&
3154 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask))
3155 return 0;
3156
3157 // Work out the masks for the lowest and highest bits.
3159 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask);
3160
3161 // Signed ordered comparisons are effectively unsigned if the sign
3162 // bit is dropped.
3163 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly);
3164
3165 // Check for equality comparisons with 0, or the equivalent.
3166 if (CmpVal == 0) {
3167 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3169 if (CCMask == SystemZ::CCMASK_CMP_NE)
3171 }
3172 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) {
3173 if (CCMask == SystemZ::CCMASK_CMP_LT)
3175 if (CCMask == SystemZ::CCMASK_CMP_GE)
3177 }
3178 if (EffectivelyUnsigned && CmpVal < Low) {
3179 if (CCMask == SystemZ::CCMASK_CMP_LE)
3181 if (CCMask == SystemZ::CCMASK_CMP_GT)
3183 }
3184
3185 // Check for equality comparisons with the mask, or the equivalent.
3186 if (CmpVal == Mask) {
3187 if (CCMask == SystemZ::CCMASK_CMP_EQ)
3189 if (CCMask == SystemZ::CCMASK_CMP_NE)
3191 }
3192 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) {
3193 if (CCMask == SystemZ::CCMASK_CMP_GT)
3195 if (CCMask == SystemZ::CCMASK_CMP_LE)
3197 }
3198 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) {
3199 if (CCMask == SystemZ::CCMASK_CMP_GE)
3201 if (CCMask == SystemZ::CCMASK_CMP_LT)
3203 }
3204
3205 // Check for ordered comparisons with the top bit.
3206 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) {
3207 if (CCMask == SystemZ::CCMASK_CMP_LE)
3209 if (CCMask == SystemZ::CCMASK_CMP_GT)
3211 }
3212 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) {
3213 if (CCMask == SystemZ::CCMASK_CMP_LT)
3215 if (CCMask == SystemZ::CCMASK_CMP_GE)
3217 }
3218
3219 // If there are just two bits, we can do equality checks for Low and High
3220 // as well.
3221 if (Mask == Low + High) {
3222 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low)
3224 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low)
3226 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High)
3228 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High)
3230 }
3231
3232 // Looks like we've exhausted our options.
3233 return 0;
3234}
3235
3236// See whether C can be implemented as a TEST UNDER MASK instruction.
3237// Update the arguments with the TM version if so.
3239 Comparison &C) {
3240 // Use VECTOR TEST UNDER MASK for i128 operations.
3241 if (C.Op0.getValueType() == MVT::i128) {
3242 // We can use VTM for EQ/NE comparisons of x & y against 0.
3243 if (C.Op0.getOpcode() == ISD::AND &&
3244 (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3245 C.CCMask == SystemZ::CCMASK_CMP_NE)) {
3246 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1);
3247 if (Mask && Mask->getAPIntValue() == 0) {
3248 C.Opcode = SystemZISD::VTM;
3249 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1));
3250 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0));
3251 C.CCValid = SystemZ::CCMASK_VCMP;
3252 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3253 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3254 else
3255 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3256 }
3257 }
3258 return;
3259 }
3260
3261 // Check that we have a comparison with a constant.
3262 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1);
3263 if (!ConstOp1)
3264 return;
3265 uint64_t CmpVal = ConstOp1->getZExtValue();
3266
3267 // Check whether the nonconstant input is an AND with a constant mask.
3268 Comparison NewC(C);
3269 uint64_t MaskVal;
3270 ConstantSDNode *Mask = nullptr;
3271 if (C.Op0.getOpcode() == ISD::AND) {
3272 NewC.Op0 = C.Op0.getOperand(0);
3273 NewC.Op1 = C.Op0.getOperand(1);
3274 Mask = dyn_cast<ConstantSDNode>(NewC.Op1);
3275 if (!Mask)
3276 return;
3277 MaskVal = Mask->getZExtValue();
3278 } else {
3279 // There is no instruction to compare with a 64-bit immediate
3280 // so use TMHH instead if possible. We need an unsigned ordered
3281 // comparison with an i64 immediate.
3282 if (NewC.Op0.getValueType() != MVT::i64 ||
3283 NewC.CCMask == SystemZ::CCMASK_CMP_EQ ||
3284 NewC.CCMask == SystemZ::CCMASK_CMP_NE ||
3285 NewC.ICmpType == SystemZICMP::SignedOnly)
3286 return;
3287 // Convert LE and GT comparisons into LT and GE.
3288 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE ||
3289 NewC.CCMask == SystemZ::CCMASK_CMP_GT) {
3290 if (CmpVal == uint64_t(-1))
3291 return;
3292 CmpVal += 1;
3293 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ;
3294 }
3295 // If the low N bits of Op1 are zero than the low N bits of Op0 can
3296 // be masked off without changing the result.
3297 MaskVal = -(CmpVal & -CmpVal);
3298 NewC.ICmpType = SystemZICMP::UnsignedOnly;
3299 }
3300 if (!MaskVal)
3301 return;
3302
3303 // Check whether the combination of mask, comparison value and comparison
3304 // type are suitable.
3305 unsigned BitSize = NewC.Op0.getValueSizeInBits();
3306 unsigned NewCCMask, ShiftVal;
3307 if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3308 NewC.Op0.getOpcode() == ISD::SHL &&
3309 isSimpleShift(NewC.Op0, ShiftVal) &&
3310 (MaskVal >> ShiftVal != 0) &&
3311 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal &&
3312 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3313 MaskVal >> ShiftVal,
3314 CmpVal >> ShiftVal,
3315 SystemZICMP::Any))) {
3316 NewC.Op0 = NewC.Op0.getOperand(0);
3317 MaskVal >>= ShiftVal;
3318 } else if (NewC.ICmpType != SystemZICMP::SignedOnly &&
3319 NewC.Op0.getOpcode() == ISD::SRL &&
3320 isSimpleShift(NewC.Op0, ShiftVal) &&
3321 (MaskVal << ShiftVal != 0) &&
3322 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal &&
3323 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask,
3324 MaskVal << ShiftVal,
3325 CmpVal << ShiftVal,
3327 NewC.Op0 = NewC.Op0.getOperand(0);
3328 MaskVal <<= ShiftVal;
3329 } else {
3330 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal,
3331 NewC.ICmpType);
3332 if (!NewCCMask)
3333 return;
3334 }
3335
3336 // Go ahead and make the change.
3337 C.Opcode = SystemZISD::TM;
3338 C.Op0 = NewC.Op0;
3339 if (Mask && Mask->getZExtValue() == MaskVal)
3340 C.Op1 = SDValue(Mask, 0);
3341 else
3342 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType());
3343 C.CCValid = SystemZ::CCMASK_TM;
3344 C.CCMask = NewCCMask;
3345}
3346
3347// Implement i128 comparison in vector registers.
3348static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
3349 Comparison &C) {
3350 if (C.Opcode != SystemZISD::ICMP)
3351 return;
3352 if (C.Op0.getValueType() != MVT::i128)
3353 return;
3354
3355 // Recognize vector comparison reductions.
3356 if ((C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3357 C.CCMask == SystemZ::CCMASK_CMP_NE) &&
3358 (isNullConstant(C.Op1) || isAllOnesConstant(C.Op1))) {
3359 bool CmpEq = C.CCMask == SystemZ::CCMASK_CMP_EQ;
3360 bool CmpNull = isNullConstant(C.Op1);
3361 SDValue Src = peekThroughBitcasts(C.Op0);
3362 if (Src.hasOneUse() && isBitwiseNot(Src)) {
3363 Src = Src.getOperand(0);
3364 CmpNull = !CmpNull;
3365 }
3366 unsigned Opcode = 0;
3367 if (Src.hasOneUse()) {
3368 switch (Src.getOpcode()) {
3369 case SystemZISD::VICMPE: Opcode = SystemZISD::VICMPES; break;
3370 case SystemZISD::VICMPH: Opcode = SystemZISD::VICMPHS; break;
3371 case SystemZISD::VICMPHL: Opcode = SystemZISD::VICMPHLS; break;
3372 case SystemZISD::VFCMPE: Opcode = SystemZISD::VFCMPES; break;
3373 case SystemZISD::VFCMPH: Opcode = SystemZISD::VFCMPHS; break;
3374 case SystemZISD::VFCMPHE: Opcode = SystemZISD::VFCMPHES; break;
3375 default: break;
3376 }
3377 }
3378 if (Opcode) {
3379 C.Opcode = Opcode;
3380 C.Op0 = Src->getOperand(0);
3381 C.Op1 = Src->getOperand(1);
3382 C.CCValid = SystemZ::CCMASK_VCMP;
3384 if (!CmpEq)
3385 C.CCMask ^= C.CCValid;
3386 return;
3387 }
3388 }
3389
3390 // Everything below here is not useful if we have native i128 compares.
3391 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
3392 return;
3393
3394 // (In-)Equality comparisons can be implemented via VCEQGS.
3395 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3396 C.CCMask == SystemZ::CCMASK_CMP_NE) {
3397 C.Opcode = SystemZISD::VICMPES;
3398 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0);
3399 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1);
3400 C.CCValid = SystemZ::CCMASK_VCMP;
3401 if (C.CCMask == SystemZ::CCMASK_CMP_EQ)
3402 C.CCMask = SystemZ::CCMASK_VCMP_ALL;
3403 else
3404 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid;
3405 return;
3406 }
3407
3408 // Normalize other comparisons to GT.
3409 bool Swap = false, Invert = false;
3410 switch (C.CCMask) {
3411 case SystemZ::CCMASK_CMP_GT: break;
3412 case SystemZ::CCMASK_CMP_LT: Swap = true; break;
3413 case SystemZ::CCMASK_CMP_LE: Invert = true; break;
3414 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break;
3415 default: llvm_unreachable("Invalid integer condition!");
3416 }
3417 if (Swap)
3418 std::swap(C.Op0, C.Op1);
3419
3420 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3421 C.Opcode = SystemZISD::UCMP128HI;
3422 else
3423 C.Opcode = SystemZISD::SCMP128HI;
3424 C.CCValid = SystemZ::CCMASK_ANY;
3425 C.CCMask = SystemZ::CCMASK_1;
3426
3427 if (Invert)
3428 C.CCMask ^= C.CCValid;
3429}
3430
3431// See whether the comparison argument contains a redundant AND
3432// and remove it if so. This sometimes happens due to the generic
3433// BRCOND expansion.
3435 Comparison &C) {
3436 if (C.Op0.getOpcode() != ISD::AND)
3437 return;
3438 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1));
3439 if (!Mask || Mask->getValueSizeInBits(0) > 64)
3440 return;
3441 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0));
3442 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue())
3443 return;
3444
3445 C.Op0 = C.Op0.getOperand(0);
3446}
3447
3448// Return a Comparison that tests the condition-code result of intrinsic
3449// node Call against constant integer CC using comparison code Cond.
3450// Opcode is the opcode of the SystemZISD operation for the intrinsic
3451// and CCValid is the set of possible condition-code results.
3452static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
3453 SDValue Call, unsigned CCValid, uint64_t CC,
3455 Comparison C(Call, SDValue(), SDValue());
3456 C.Opcode = Opcode;
3457 C.CCValid = CCValid;
3458 if (Cond == ISD::SETEQ)
3459 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
3460 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
3461 else if (Cond == ISD::SETNE)
3462 // ...and the inverse of that.
3463 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
3464 else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
3465 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
3466 // always true for CC>3.
3467 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1;
3468 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
3469 // ...and the inverse of that.
3470 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0;
3471 else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
3472 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
3473 // always true for CC>3.
3474 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1;
3475 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
3476 // ...and the inverse of that.
3477 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0;
3478 else
3479 llvm_unreachable("Unexpected integer comparison type");
3480 C.CCMask &= CCValid;
3481 return C;
3482}
3483
3484// Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
3485static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
3486 ISD::CondCode Cond, const SDLoc &DL,
3487 SDValue Chain = SDValue(),
3488 bool IsSignaling = false) {
3489 if (CmpOp1.getOpcode() == ISD::Constant) {
3490 assert(!Chain);
3491 unsigned Opcode, CCValid;
3492 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
3493 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
3494 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
3495 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3496 CmpOp1->getAsZExtVal(), Cond);
3497 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
3498 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
3499 isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
3500 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid,
3501 CmpOp1->getAsZExtVal(), Cond);
3502 }
3503 Comparison C(CmpOp0, CmpOp1, Chain);
3504 C.CCMask = CCMaskForCondCode(Cond);
3505 if (C.Op0.getValueType().isFloatingPoint()) {
3506 C.CCValid = SystemZ::CCMASK_FCMP;
3507 if (!C.Chain)
3508 C.Opcode = SystemZISD::FCMP;
3509 else if (!IsSignaling)
3510 C.Opcode = SystemZISD::STRICT_FCMP;
3511 else
3512 C.Opcode = SystemZISD::STRICT_FCMPS;
3514 } else {
3515 assert(!C.Chain);
3516 C.CCValid = SystemZ::CCMASK_ICMP;
3517 C.Opcode = SystemZISD::ICMP;
3518 // Choose the type of comparison. Equality and inequality tests can
3519 // use either signed or unsigned comparisons. The choice also doesn't
3520 // matter if both sign bits are known to be clear. In those cases we
3521 // want to give the main isel code the freedom to choose whichever
3522 // form fits best.
3523 if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
3524 C.CCMask == SystemZ::CCMASK_CMP_NE ||
3525 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1)))
3526 C.ICmpType = SystemZICMP::Any;
3527 else if (C.CCMask & SystemZ::CCMASK_CMP_UO)
3528 C.ICmpType = SystemZICMP::UnsignedOnly;
3529 else
3530 C.ICmpType = SystemZICMP::SignedOnly;
3531 C.CCMask &= ~SystemZ::CCMASK_CMP_UO;
3532 adjustForRedundantAnd(DAG, DL, C);
3533 adjustZeroCmp(DAG, DL, C);
3534 adjustSubwordCmp(DAG, DL, C);
3535 adjustForSubtraction(DAG, DL, C);
3537 adjustICmpTruncate(DAG, DL, C);
3538 }
3539
3540 if (shouldSwapCmpOperands(C)) {
3541 std::swap(C.Op0, C.Op1);
3542 C.CCMask = SystemZ::reverseCCMask(C.CCMask);
3543 }
3544
3546 adjustICmp128(DAG, DL, C);
3547 return C;
3548}
3549
3550// Emit the comparison instruction described by C.
3551static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) {
3552 if (!C.Op1.getNode()) {
3553 SDNode *Node;
3554 switch (C.Op0.getOpcode()) {
3556 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode);
3557 return SDValue(Node, 0);
3559 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode);
3560 return SDValue(Node, Node->getNumValues() - 1);
3561 default:
3562 llvm_unreachable("Invalid comparison operands");
3563 }
3564 }
3565 if (C.Opcode == SystemZISD::ICMP)
3566 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1,
3567 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32));
3568 if (C.Opcode == SystemZISD::TM) {
3569 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) !=
3571 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1,
3572 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32));
3573 }
3574 if (C.Opcode == SystemZISD::VICMPES ||
3575 C.Opcode == SystemZISD::VICMPHS ||
3576 C.Opcode == SystemZISD::VICMPHLS ||
3577 C.Opcode == SystemZISD::VFCMPES ||
3578 C.Opcode == SystemZISD::VFCMPHS ||
3579 C.Opcode == SystemZISD::VFCMPHES) {
3580 EVT IntVT = C.Op0.getValueType().changeVectorElementTypeToInteger();
3581 SDVTList VTs = DAG.getVTList(IntVT, MVT::i32);
3582 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1);
3583 return SDValue(Val.getNode(), 1);
3584 }
3585 if (C.Chain) {
3586 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
3587 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1);
3588 }
3589 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1);
3590}
3591
3592// Implement a 32-bit *MUL_LOHI operation by extending both operands to
3593// 64 bits. Extend is the extension type to use. Store the high part
3594// in Hi and the low part in Lo.
3595static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend,
3596 SDValue Op0, SDValue Op1, SDValue &Hi,
3597 SDValue &Lo) {
3598 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0);
3599 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1);
3600 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1);
3601 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
3602 DAG.getConstant(32, DL, MVT::i64));
3603 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi);
3604 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul);
3605}
3606
3607// Lower a binary operation that produces two VT results, one in each
3608// half of a GR128 pair. Op0 and Op1 are the VT operands to the operation,
3609// and Opcode performs the GR128 operation. Store the even register result
3610// in Even and the odd register result in Odd.
3611static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
3612 unsigned Opcode, SDValue Op0, SDValue Op1,
3613 SDValue &Even, SDValue &Odd) {
3614 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1);
3615 bool Is32Bit = is32Bit(VT);
3616 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result);
3617 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result);
3618}
3619
3620// Return an i32 value that is 1 if the CC value produced by CCReg is
3621// in the mask CCMask and 0 otherwise. CC is known to have a value
3622// in CCValid, so other values can be ignored.
3623static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg,
3624 unsigned CCValid, unsigned CCMask) {
3625 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32),
3626 DAG.getConstant(0, DL, MVT::i32),
3627 DAG.getTargetConstant(CCValid, DL, MVT::i32),
3628 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg};
3629 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops);
3630}
3631
3632// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
3633// be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP
3634// for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet)
3635// floating-point comparisons, and CmpMode::SignalingFP for strict signaling
3636// floating-point comparisons.
3639 switch (CC) {
3640 case ISD::SETOEQ:
3641 case ISD::SETEQ:
3642 switch (Mode) {
3643 case CmpMode::Int: return SystemZISD::VICMPE;
3644 case CmpMode::FP: return SystemZISD::VFCMPE;
3645 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE;
3646 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES;
3647 }
3648 llvm_unreachable("Bad mode");
3649
3650 case ISD::SETOGE:
3651 case ISD::SETGE:
3652 switch (Mode) {
3653 case CmpMode::Int: return 0;
3654 case CmpMode::FP: return SystemZISD::VFCMPHE;
3655 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE;
3656 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES;
3657 }
3658 llvm_unreachable("Bad mode");
3659
3660 case ISD::SETOGT:
3661 case ISD::SETGT:
3662 switch (Mode) {
3663 case CmpMode::Int: return SystemZISD::VICMPH;
3664 case CmpMode::FP: return SystemZISD::VFCMPH;
3665 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH;
3666 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS;
3667 }
3668 llvm_unreachable("Bad mode");
3669
3670 case ISD::SETUGT:
3671 switch (Mode) {
3672 case CmpMode::Int: return SystemZISD::VICMPHL;
3673 case CmpMode::FP: return 0;
3674 case CmpMode::StrictFP: return 0;
3675 case CmpMode::SignalingFP: return 0;
3676 }
3677 llvm_unreachable("Bad mode");
3678
3679 default:
3680 return 0;
3681 }
3682}
3683
3684// Return the SystemZISD vector comparison operation for CC or its inverse,
3685// or 0 if neither can be done directly. Indicate in Invert whether the
3686// result is for the inverse of CC. Mode is as above.
3688 bool &Invert) {
3689 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3690 Invert = false;
3691 return Opcode;
3692 }
3693
3694 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32);
3695 if (unsigned Opcode = getVectorComparison(CC, Mode)) {
3696 Invert = true;
3697 return Opcode;
3698 }
3699
3700 return 0;
3701}
3702
3703// Return a v2f64 that contains the extended form of elements Start and Start+1
3704// of v4f32 value Op. If Chain is nonnull, return the strict form.
3705static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL,
3706 SDValue Op, SDValue Chain) {
3707 int Mask[] = { Start, -1, Start + 1, -1 };
3708 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
3709 if (Chain) {
3710 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other);
3711 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op);
3712 }
3713 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
3714}
3715
3716// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
3717// producing a result of type VT. If Chain is nonnull, return the strict form.
3718SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode,
3719 const SDLoc &DL, EVT VT,
3720 SDValue CmpOp0,
3721 SDValue CmpOp1,
3722 SDValue Chain) const {
3723 // There is no hardware support for v4f32 (unless we have the vector
3724 // enhancements facility 1), so extend the vector into two v2f64s
3725 // and compare those.
3726 if (CmpOp0.getValueType() == MVT::v4f32 &&
3727 !Subtarget.hasVectorEnhancements1()) {
3728 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain);
3729 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain);
3730 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain);
3731 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain);
3732 if (Chain) {
3733 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other);
3734 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1);
3735 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1);
3736 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3737 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1),
3738 H1.getValue(1), L1.getValue(1),
3739 HRes.getValue(1), LRes.getValue(1) };
3740 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
3741 SDValue Ops[2] = { Res, NewChain };
3742 return DAG.getMergeValues(Ops, DL);
3743 }
3744 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
3745 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
3746 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
3747 }
3748 if (Chain) {
3749 SDVTList VTs = DAG.getVTList(VT, MVT::Other);
3750 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1);
3751 }
3752 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
3753}
3754
3755// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
3756// an integer mask of type VT. If Chain is nonnull, we have a strict
3757// floating-point comparison. If in addition IsSignaling is true, we have
3758// a strict signaling floating-point comparison.
3759SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
3760 const SDLoc &DL, EVT VT,
3761 ISD::CondCode CC,
3762 SDValue CmpOp0,
3763 SDValue CmpOp1,
3764 SDValue Chain,
3765 bool IsSignaling) const {
3766 bool IsFP = CmpOp0.getValueType().isFloatingPoint();
3767 assert (!Chain || IsFP);
3768 assert (!IsSignaling || Chain);
3769 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP :
3770 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int;
3771 bool Invert = false;
3772 SDValue Cmp;
3773 switch (CC) {
3774 // Handle tests for order using (or (ogt y x) (oge x y)).
3775 case ISD::SETUO:
3776 Invert = true;
3777 [[fallthrough]];
3778 case ISD::SETO: {
3779 assert(IsFP && "Unexpected integer comparison");
3780 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3781 DL, VT, CmpOp1, CmpOp0, Chain);
3782 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode),
3783 DL, VT, CmpOp0, CmpOp1, Chain);
3784 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
3785 if (Chain)
3786 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3787 LT.getValue(1), GE.getValue(1));
3788 break;
3789 }
3790
3791 // Handle <> tests using (or (ogt y x) (ogt x y)).
3792 case ISD::SETUEQ:
3793 Invert = true;
3794 [[fallthrough]];
3795 case ISD::SETONE: {
3796 assert(IsFP && "Unexpected integer comparison");
3797 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3798 DL, VT, CmpOp1, CmpOp0, Chain);
3799 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode),
3800 DL, VT, CmpOp0, CmpOp1, Chain);
3801 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
3802 if (Chain)
3803 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
3804 LT.getValue(1), GT.getValue(1));
3805 break;
3806 }
3807
3808 // Otherwise a single comparison is enough. It doesn't really
3809 // matter whether we try the inversion or the swap first, since
3810 // there are no cases where both work.
3811 default:
3812 // Optimize sign-bit comparisons to signed compares.
3813 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
3815 unsigned EltSize = VT.getVectorElementType().getSizeInBits();
3816 APInt Mask;
3817 if (CmpOp0.getOpcode() == ISD::AND
3818 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
3819 && Mask == APInt::getSignMask(EltSize)) {
3820 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
3821 CmpOp0 = CmpOp0.getOperand(0);
3822 }
3823 }
3824 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3825 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
3826 else {
3828 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
3829 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain);
3830 else
3831 llvm_unreachable("Unhandled comparison");
3832 }
3833 if (Chain)
3834 Chain = Cmp.getValue(1);
3835 break;
3836 }
3837 if (Invert) {
3838 SDValue Mask =
3839 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64));
3840 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
3841 }
3842 if (Chain && Chain.getNode() != Cmp.getNode()) {
3843 SDValue Ops[2] = { Cmp, Chain };
3844 Cmp = DAG.getMergeValues(Ops, DL);
3845 }
3846 return Cmp;
3847}
3848
3849SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
3850 SelectionDAG &DAG) const {
3851 SDValue CmpOp0 = Op.getOperand(0);
3852 SDValue CmpOp1 = Op.getOperand(1);
3853 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
3854 SDLoc DL(Op);
3855 EVT VT = Op.getValueType();
3856 if (VT.isVector())
3857 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
3858
3859 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3860 SDValue CCReg = emitCmp(DAG, DL, C);
3861 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3862}
3863
3864SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op,
3865 SelectionDAG &DAG,
3866 bool IsSignaling) const {
3867 SDValue Chain = Op.getOperand(0);
3868 SDValue CmpOp0 = Op.getOperand(1);
3869 SDValue CmpOp1 = Op.getOperand(2);
3870 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get();
3871 SDLoc DL(Op);
3872 EVT VT = Op.getNode()->getValueType(0);
3873 if (VT.isVector()) {
3874 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1,
3875 Chain, IsSignaling);
3876 return Res.getValue(Op.getResNo());
3877 }
3878
3879 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling));
3880 SDValue CCReg = emitCmp(DAG, DL, C);
3881 CCReg->setFlags(Op->getFlags());
3882 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask);
3883 SDValue Ops[2] = { Result, CCReg.getValue(1) };
3884 return DAG.getMergeValues(Ops, DL);
3885}
3886
3887SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3888 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3889 SDValue CmpOp0 = Op.getOperand(2);
3890 SDValue CmpOp1 = Op.getOperand(3);
3891 SDValue Dest = Op.getOperand(4);
3892 SDLoc DL(Op);
3893
3894 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3895 SDValue CCReg = emitCmp(DAG, DL, C);
3896 return DAG.getNode(
3897 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0),
3898 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
3899 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg);
3900}
3901
3902// Return true if Pos is CmpOp and Neg is the negative of CmpOp,
3903// allowing Pos and Neg to be wider than CmpOp.
3904static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) {
3905 return (Neg.getOpcode() == ISD::SUB &&
3906 Neg.getOperand(0).getOpcode() == ISD::Constant &&
3907 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos &&
3908 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND &&
3909 Pos.getOperand(0) == CmpOp)));
3910}
3911
3912// Return the absolute or negative absolute of Op; IsNegative decides which.
3914 bool IsNegative) {
3915 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op);
3916 if (IsNegative)
3917 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(),
3918 DAG.getConstant(0, DL, Op.getValueType()), Op);
3919 return Op;
3920}
3921
3923 Comparison C, SDValue TrueOp, SDValue FalseOp) {
3924 EVT VT = MVT::i128;
3925 unsigned Op;
3926
3927 if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
3928 C.CCMask == SystemZ::CCMASK_CMP_GE ||
3929 C.CCMask == SystemZ::CCMASK_CMP_LE) {
3930 std::swap(TrueOp, FalseOp);
3931 C.CCMask ^= C.CCValid;
3932 }
3933 if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
3934 std::swap(C.Op0, C.Op1);
3935 C.CCMask = SystemZ::CCMASK_CMP_GT;
3936 }
3937 switch (C.CCMask) {
3939 Op = SystemZISD::VICMPE;
3940 break;
3942 if (C.ICmpType == SystemZICMP::UnsignedOnly)
3943 Op = SystemZISD::VICMPHL;
3944 else
3945 Op = SystemZISD::VICMPH;
3946 break;
3947 default:
3948 llvm_unreachable("Unhandled comparison");
3949 break;
3950 }
3951
3952 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
3953 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
3954 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
3955 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
3956}
3957
3958SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
3959 SelectionDAG &DAG) const {
3960 SDValue CmpOp0 = Op.getOperand(0);
3961 SDValue CmpOp1 = Op.getOperand(1);
3962 SDValue TrueOp = Op.getOperand(2);
3963 SDValue FalseOp = Op.getOperand(3);
3964 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
3965 SDLoc DL(Op);
3966
3967 // SELECT_CC involving f16 will not have the cmp-ops promoted by the
3968 // legalizer, as it will be handled according to the type of the resulting
3969 // value. Extend them here if needed.
3970 if (CmpOp0.getSimpleValueType() == MVT::f16) {
3971 CmpOp0 = DAG.getFPExtendOrRound(CmpOp0, SDLoc(CmpOp0), MVT::f32);
3972 CmpOp1 = DAG.getFPExtendOrRound(CmpOp1, SDLoc(CmpOp1), MVT::f32);
3973 }
3974
3975 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL));
3976
3977 // Check for absolute and negative-absolute selections, including those
3978 // where the comparison value is sign-extended (for LPGFR and LNGFR).
3979 // This check supplements the one in DAGCombiner.
3980 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ &&
3981 C.CCMask != SystemZ::CCMASK_CMP_NE &&
3982 C.Op1.getOpcode() == ISD::Constant &&
3983 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 &&
3984 C.Op1->getAsZExtVal() == 0) {
3985 if (isAbsolute(C.Op0, TrueOp, FalseOp))
3986 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT);
3987 if (isAbsolute(C.Op0, FalseOp, TrueOp))
3988 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
3989 }
3990
3991 if (Subtarget.hasVectorEnhancements3() &&
3992 C.Opcode == SystemZISD::ICMP &&
3993 C.Op0.getValueType() == MVT::i128 &&
3994 TrueOp.getValueType() == MVT::i128) {
3995 return getI128Select(DAG, DL, C, TrueOp, FalseOp);
3996 }
3997
3998 SDValue CCReg = emitCmp(DAG, DL, C);
3999 SDValue Ops[] = {TrueOp, FalseOp,
4000 DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
4001 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg};
4002
4003 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops);
4004}
4005
4006SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node,
4007 SelectionDAG &DAG) const {
4008 SDLoc DL(Node);
4009 const GlobalValue *GV = Node->getGlobal();
4010 int64_t Offset = Node->getOffset();
4011 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4013
4015 if (Subtarget.isPC32DBLSymbol(GV, CM)) {
4016 if (isInt<32>(Offset)) {
4017 // Assign anchors at 1<<12 byte boundaries.
4018 uint64_t Anchor = Offset & ~uint64_t(0xfff);
4019 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor);
4020 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4021
4022 // The offset can be folded into the address if it is aligned to a
4023 // halfword.
4024 Offset -= Anchor;
4025 if (Offset != 0 && (Offset & 1) == 0) {
4026 SDValue Full =
4027 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset);
4028 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result);
4029 Offset = 0;
4030 }
4031 } else {
4032 // Conservatively load a constant offset greater than 32 bits into a
4033 // register below.
4034 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT);
4035 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4036 }
4037 } else if (Subtarget.isTargetELF()) {
4038 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT);
4039 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4040 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4042 } else if (Subtarget.isTargetzOS()) {
4043 Result = getADAEntry(DAG, GV, DL, PtrVT);
4044 } else
4045 llvm_unreachable("Unexpected Subtarget");
4046
4047 // If there was a non-zero offset that we didn't fold, create an explicit
4048 // addition for it.
4049 if (Offset != 0)
4050 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4051 DAG.getSignedConstant(Offset, DL, PtrVT));
4052
4053 return Result;
4054}
4055
4056SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
4057 SelectionDAG &DAG,
4058 unsigned Opcode,
4059 SDValue GOTOffset) const {
4060 SDLoc DL(Node);
4061 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4062 SDValue Chain = DAG.getEntryNode();
4063 SDValue Glue;
4064
4067 report_fatal_error("In GHC calling convention TLS is not supported");
4068
4069 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
4070 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
4071 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
4072 Glue = Chain.getValue(1);
4073 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
4074 Glue = Chain.getValue(1);
4075
4076 // The first call operand is the chain and the second is the TLS symbol.
4078 Ops.push_back(Chain);
4079 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
4080 Node->getValueType(0),
4081 0, 0));
4082
4083 // Add argument registers to the end of the list so that they are
4084 // known live into the call.
4085 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
4086 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
4087
4088 // Add a register mask operand representing the call-preserved registers.
4089 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4090 const uint32_t *Mask =
4091 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C);
4092 assert(Mask && "Missing call preserved mask for calling convention");
4093 Ops.push_back(DAG.getRegisterMask(Mask));
4094
4095 // Glue the call to the argument copies.
4096 Ops.push_back(Glue);
4097
4098 // Emit the call.
4099 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4100 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
4101 Glue = Chain.getValue(1);
4102
4103 // Copy the return value from %r2.
4104 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
4105}
4106
4107SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
4108 SelectionDAG &DAG) const {
4109 SDValue Chain = DAG.getEntryNode();
4110 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4111
4112 // The high part of the thread pointer is in access register 0.
4113 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
4114 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
4115
4116 // The low part of the thread pointer is in access register 1.
4117 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
4118 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
4119
4120 // Merge them into a single 64-bit address.
4121 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi,
4122 DAG.getConstant(32, DL, PtrVT));
4123 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
4124}
4125
4126SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
4127 SelectionDAG &DAG) const {
4128 if (DAG.getTarget().useEmulatedTLS())
4129 return LowerToTLSEmulatedModel(Node, DAG);
4130 SDLoc DL(Node);
4131 const GlobalValue *GV = Node->getGlobal();
4132 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4133 TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
4134
4137 report_fatal_error("In GHC calling convention TLS is not supported");
4138
4139 SDValue TP = lowerThreadPointer(DL, DAG);
4140
4141 // Get the offset of GA from the thread pointer, based on the TLS model.
4143 switch (model) {
4145 // Load the GOT offset of the tls_index (module ID / per-symbol offset).
4146 SystemZConstantPoolValue *CPV =
4148
4149 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4150 Offset = DAG.getLoad(
4151 PtrVT, DL, DAG.getEntryNode(), Offset,
4153
4154 // Call __tls_get_offset to retrieve the offset.
4155 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
4156 break;
4157 }
4158
4160 // Load the GOT offset of the module ID.
4161 SystemZConstantPoolValue *CPV =
4163
4164 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4165 Offset = DAG.getLoad(
4166 PtrVT, DL, DAG.getEntryNode(), Offset,
4168
4169 // Call __tls_get_offset to retrieve the module base offset.
4170 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
4171
4172 // Note: The SystemZLDCleanupPass will remove redundant computations
4173 // of the module base offset. Count total number of local-dynamic
4174 // accesses to trigger execution of that pass.
4175 SystemZMachineFunctionInfo* MFI =
4176 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
4178
4179 // Add the per-symbol offset.
4181
4182 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4183 DTPOffset = DAG.getLoad(
4184 PtrVT, DL, DAG.getEntryNode(), DTPOffset,
4186
4187 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
4188 break;
4189 }
4190
4191 case TLSModel::InitialExec: {
4192 // Load the offset from the GOT.
4193 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
4195 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
4196 Offset =
4197 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset,
4199 break;
4200 }
4201
4202 case TLSModel::LocalExec: {
4203 // Force the offset into the constant pool and load it from there.
4204 SystemZConstantPoolValue *CPV =
4206
4207 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8));
4208 Offset = DAG.getLoad(
4209 PtrVT, DL, DAG.getEntryNode(), Offset,
4211 break;
4212 }
4213 }
4214
4215 // Add the base and offset together.
4216 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
4217}
4218
4219SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node,
4220 SelectionDAG &DAG) const {
4221 SDLoc DL(Node);
4222 const BlockAddress *BA = Node->getBlockAddress();
4223 int64_t Offset = Node->getOffset();
4224 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4225
4226 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset);
4227 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4228 return Result;
4229}
4230
4231SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT,
4232 SelectionDAG &DAG) const {
4233 SDLoc DL(JT);
4234 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4235 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
4236
4237 // Use LARL to load the address of the table.
4238 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4239}
4240
4241SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
4242 SelectionDAG &DAG) const {
4243 SDLoc DL(CP);
4244 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4245
4247 if (CP->isMachineConstantPoolEntry())
4248 Result =
4249 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign());
4250 else
4251 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(),
4252 CP->getOffset());
4253
4254 // Use LARL to load the address of the constant pool entry.
4255 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result);
4256}
4257
4258SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
4259 SelectionDAG &DAG) const {
4260 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4261 MachineFunction &MF = DAG.getMachineFunction();
4262 MachineFrameInfo &MFI = MF.getFrameInfo();
4263 MFI.setFrameAddressIsTaken(true);
4264
4265 SDLoc DL(Op);
4266 unsigned Depth = Op.getConstantOperandVal(0);
4267 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4268
4269 // By definition, the frame address is the address of the back chain. (In
4270 // the case of packed stack without backchain, return the address where the
4271 // backchain would have been stored. This will either be an unused space or
4272 // contain a saved register).
4273 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF);
4274 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
4275
4276 if (Depth > 0) {
4277 // FIXME The frontend should detect this case.
4278 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4279 report_fatal_error("Unsupported stack frame traversal count");
4280
4281 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT);
4282 while (Depth--) {
4283 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain,
4284 MachinePointerInfo());
4285 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset);
4286 }
4287 }
4288
4289 return BackChain;
4290}
4291
4292SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
4293 SelectionDAG &DAG) const {
4294 MachineFunction &MF = DAG.getMachineFunction();
4295 MachineFrameInfo &MFI = MF.getFrameInfo();
4296 MFI.setReturnAddressIsTaken(true);
4297
4298 SDLoc DL(Op);
4299 unsigned Depth = Op.getConstantOperandVal(0);
4300 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4301
4302 if (Depth > 0) {
4303 // FIXME The frontend should detect this case.
4304 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain())
4305 report_fatal_error("Unsupported stack frame traversal count");
4306
4307 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
4308 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
4309 int Offset = TFL->getReturnAddressOffset(MF);
4310 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr,
4311 DAG.getSignedConstant(Offset, DL, PtrVT));
4312 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr,
4313 MachinePointerInfo());
4314 }
4315
4316 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an
4317 // implicit live-in.
4318 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters();
4320 &SystemZ::GR64BitRegClass);
4321 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT);
4322}
4323
4324SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op,
4325 SelectionDAG &DAG) const {
4326 SDLoc DL(Op);
4327 SDValue In = Op.getOperand(0);
4328 EVT InVT = In.getValueType();
4329 EVT ResVT = Op.getValueType();
4330
4331 // Convert loads directly. This is normally done by DAGCombiner,
4332 // but we need this case for bitcasts that are created during lowering
4333 // and which are then lowered themselves.
4334 if (auto *LoadN = dyn_cast<LoadSDNode>(In))
4335 if (ISD::isNormalLoad(LoadN)) {
4336 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(),
4337 LoadN->getBasePtr(), LoadN->getMemOperand());
4338 // Update the chain uses.
4339 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1));
4340 return NewLoad;
4341 }
4342
4343 if (InVT == MVT::i32 && ResVT == MVT::f32) {
4344 SDValue In64;
4345 if (Subtarget.hasHighWord()) {
4346 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL,
4347 MVT::i64);
4348 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4349 MVT::i64, SDValue(U64, 0), In);
4350 } else {
4351 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In);
4352 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64,
4353 DAG.getConstant(32, DL, MVT::i64));
4354 }
4355 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
4356 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
4357 DL, MVT::f32, Out64);
4358 }
4359 if (InVT == MVT::f32 && ResVT == MVT::i32) {
4360 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
4361 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
4362 MVT::f64, SDValue(U64, 0), In);
4363 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
4364 if (Subtarget.hasHighWord())
4365 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL,
4366 MVT::i32, Out64);
4367 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64,
4368 DAG.getConstant(32, DL, MVT::i64));
4369 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift);
4370 }
4371 llvm_unreachable("Unexpected bitcast combination");
4372}
4373
4374SDValue SystemZTargetLowering::lowerVASTART(SDValue Op,
4375 SelectionDAG &DAG) const {
4376
4377 if (Subtarget.isTargetXPLINK64())
4378 return lowerVASTART_XPLINK(Op, DAG);
4379 else
4380 return lowerVASTART_ELF(Op, DAG);
4381}
4382
4383SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op,
4384 SelectionDAG &DAG) const {
4385 MachineFunction &MF = DAG.getMachineFunction();
4386 SystemZMachineFunctionInfo *FuncInfo =
4387 MF.getInfo<SystemZMachineFunctionInfo>();
4388
4389 SDLoc DL(Op);
4390
4391 // vastart just stores the address of the VarArgsFrameIndex slot into the
4392 // memory location argument.
4393 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4394 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4395 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4396 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4397 MachinePointerInfo(SV));
4398}
4399
4400SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op,
4401 SelectionDAG &DAG) const {
4402 MachineFunction &MF = DAG.getMachineFunction();
4403 SystemZMachineFunctionInfo *FuncInfo =
4404 MF.getInfo<SystemZMachineFunctionInfo>();
4405 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4406
4407 SDValue Chain = Op.getOperand(0);
4408 SDValue Addr = Op.getOperand(1);
4409 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4410 SDLoc DL(Op);
4411
4412 // The initial values of each field.
4413 const unsigned NumFields = 4;
4414 SDValue Fields[NumFields] = {
4415 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT),
4416 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT),
4417 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT),
4418 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT)
4419 };
4420
4421 // Store each field into its respective slot.
4422 SDValue MemOps[NumFields];
4423 unsigned Offset = 0;
4424 for (unsigned I = 0; I < NumFields; ++I) {
4425 SDValue FieldAddr = Addr;
4426 if (Offset != 0)
4427 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr,
4429 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr,
4430 MachinePointerInfo(SV, Offset));
4431 Offset += 8;
4432 }
4433 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4434}
4435
4436SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op,
4437 SelectionDAG &DAG) const {
4438 SDValue Chain = Op.getOperand(0);
4439 SDValue DstPtr = Op.getOperand(1);
4440 SDValue SrcPtr = Op.getOperand(2);
4441 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4442 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4443 SDLoc DL(Op);
4444
4445 uint32_t Sz =
4446 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32;
4447 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL),
4448 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false,
4449 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV),
4450 MachinePointerInfo(SrcSV));
4451}
4452
4453SDValue
4454SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
4455 SelectionDAG &DAG) const {
4456 if (Subtarget.isTargetXPLINK64())
4457 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG);
4458 else
4459 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG);
4460}
4461
4462SDValue
4463SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op,
4464 SelectionDAG &DAG) const {
4465 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4466 MachineFunction &MF = DAG.getMachineFunction();
4467 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4468 SDValue Chain = Op.getOperand(0);
4469 SDValue Size = Op.getOperand(1);
4470 SDValue Align = Op.getOperand(2);
4471 SDLoc DL(Op);
4472
4473 // If user has set the no alignment function attribute, ignore
4474 // alloca alignments.
4475 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4476
4477 uint64_t StackAlign = TFI->getStackAlignment();
4478 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4479 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4480
4481 SDValue NeededSpace = Size;
4482
4483 // Add extra space for alignment if needed.
4484 EVT PtrVT = getPointerTy(MF.getDataLayout());
4485 if (ExtraAlignSpace)
4486 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace,
4487 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4488
4489 bool IsSigned = false;
4490 bool DoesNotReturn = false;
4491 bool IsReturnValueUsed = false;
4492 EVT VT = Op.getValueType();
4493 SDValue AllocaCall =
4494 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace),
4495 CallingConv::C, IsSigned, DL, DoesNotReturn,
4496 IsReturnValueUsed)
4497 .first;
4498
4499 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue
4500 // to end of call in order to ensure it isn't broken up from the call
4501 // sequence.
4502 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
4503 Register SPReg = Regs.getStackPointerRegister();
4504 Chain = AllocaCall.getValue(1);
4505 SDValue Glue = AllocaCall.getValue(2);
4506 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue);
4507 Chain = NewSPRegNode.getValue(1);
4508
4509 MVT PtrMVT = getPointerMemTy(MF.getDataLayout());
4510 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT);
4511 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust);
4512
4513 // Dynamically realign if needed.
4514 if (ExtraAlignSpace) {
4515 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result,
4516 DAG.getConstant(ExtraAlignSpace, DL, PtrVT));
4517 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result,
4518 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT));
4519 }
4520
4521 SDValue Ops[2] = {Result, Chain};
4522 return DAG.getMergeValues(Ops, DL);
4523}
4524
4525SDValue
4526SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op,
4527 SelectionDAG &DAG) const {
4528 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
4529 MachineFunction &MF = DAG.getMachineFunction();
4530 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack");
4531 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
4532
4533 SDValue Chain = Op.getOperand(0);
4534 SDValue Size = Op.getOperand(1);
4535 SDValue Align = Op.getOperand(2);
4536 SDLoc DL(Op);
4537
4538 // If user has set the no alignment function attribute, ignore
4539 // alloca alignments.
4540 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0);
4541
4542 uint64_t StackAlign = TFI->getStackAlignment();
4543 uint64_t RequiredAlign = std::max(AlignVal, StackAlign);
4544 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign;
4545
4547 SDValue NeededSpace = Size;
4548
4549 // Get a reference to the stack pointer.
4550 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64);
4551
4552 // If we need a backchain, save it now.
4553 SDValue Backchain;
4554 if (StoreBackchain)
4555 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
4556 MachinePointerInfo());
4557
4558 // Add extra space for alignment if needed.
4559 if (ExtraAlignSpace)
4560 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace,
4561 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4562
4563 // Get the new stack pointer value.
4564 SDValue NewSP;
4565 if (hasInlineStackProbe(MF)) {
4566 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL,
4567 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace);
4568 Chain = NewSP.getValue(1);
4569 }
4570 else {
4571 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace);
4572 // Copy the new stack pointer back.
4573 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP);
4574 }
4575
4576 // The allocated data lives above the 160 bytes allocated for the standard
4577 // frame, plus any outgoing stack arguments. We don't know how much that
4578 // amounts to yet, so emit a special ADJDYNALLOC placeholder.
4579 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4580 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust);
4581
4582 // Dynamically realign if needed.
4583 if (RequiredAlign > StackAlign) {
4584 Result =
4585 DAG.getNode(ISD::ADD, DL, MVT::i64, Result,
4586 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64));
4587 Result =
4588 DAG.getNode(ISD::AND, DL, MVT::i64, Result,
4589 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64));
4590 }
4591
4592 if (StoreBackchain)
4593 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
4594 MachinePointerInfo());
4595
4596 SDValue Ops[2] = { Result, Chain };
4597 return DAG.getMergeValues(Ops, DL);
4598}
4599
4600SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
4601 SDValue Op, SelectionDAG &DAG) const {
4602 SDLoc DL(Op);
4603
4604 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
4605}
4606
4607SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
4608 SelectionDAG &DAG,
4609 unsigned Opcode) const {
4610 EVT VT = Op.getValueType();
4611 SDLoc DL(Op);
4612 SDValue Even, Odd;
4613
4614 // This custom expander is only used on z17 and later for 64-bit types.
4615 assert(!is32Bit(VT));
4616 assert(Subtarget.hasMiscellaneousExtensions2());
4617
4618 // SystemZISD::xMUL_LOHI returns the low result in the odd register and
4619 // the high result in the even register. Return the latter.
4620 lowerGR128Binary(DAG, DL, VT, Opcode,
4621 Op.getOperand(0), Op.getOperand(1), Even, Odd);
4622 return Even;
4623}
4624
4625SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
4626 SelectionDAG &DAG) const {
4627 EVT VT = Op.getValueType();
4628 SDLoc DL(Op);
4629 SDValue Ops[2];
4630 if (is32Bit(VT))
4631 // Just do a normal 64-bit multiplication and extract the results.
4632 // We define this so that it can be used for constant division.
4633 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0),
4634 Op.getOperand(1), Ops[1], Ops[0]);
4635 else if (Subtarget.hasMiscellaneousExtensions2())
4636 // SystemZISD::SMUL_LOHI returns the low result in the odd register and
4637 // the high result in the even register. ISD::SMUL_LOHI is defined to
4638 // return the low half first, so the results are in reverse order.
4639 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI,
4640 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4641 else {
4642 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI:
4643 //
4644 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64)
4645 //
4646 // but using the fact that the upper halves are either all zeros
4647 // or all ones:
4648 //
4649 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64)
4650 //
4651 // and grouping the right terms together since they are quicker than the
4652 // multiplication:
4653 //
4654 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64)
4655 SDValue C63 = DAG.getConstant(63, DL, MVT::i64);
4656 SDValue LL = Op.getOperand(0);
4657 SDValue RL = Op.getOperand(1);
4658 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63);
4659 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63);
4660 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4661 // the high result in the even register. ISD::SMUL_LOHI is defined to
4662 // return the low half first, so the results are in reverse order.
4663 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4664 LL, RL, Ops[1], Ops[0]);
4665 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH);
4666 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL);
4667 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL);
4668 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum);
4669 }
4670 return DAG.getMergeValues(Ops, DL);
4671}
4672
4673SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op,
4674 SelectionDAG &DAG) const {
4675 EVT VT = Op.getValueType();
4676 SDLoc DL(Op);
4677 SDValue Ops[2];
4678 if (is32Bit(VT))
4679 // Just do a normal 64-bit multiplication and extract the results.
4680 // We define this so that it can be used for constant division.
4681 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0),
4682 Op.getOperand(1), Ops[1], Ops[0]);
4683 else
4684 // SystemZISD::UMUL_LOHI returns the low result in the odd register and
4685 // the high result in the even register. ISD::UMUL_LOHI is defined to
4686 // return the low half first, so the results are in reverse order.
4687 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI,
4688 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4689 return DAG.getMergeValues(Ops, DL);
4690}
4691
4692SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op,
4693 SelectionDAG &DAG) const {
4694 SDValue Op0 = Op.getOperand(0);
4695 SDValue Op1 = Op.getOperand(1);
4696 EVT VT = Op.getValueType();
4697 SDLoc DL(Op);
4698
4699 // We use DSGF for 32-bit division. This means the first operand must
4700 // always be 64-bit, and the second operand should be 32-bit whenever
4701 // that is possible, to improve performance.
4702 if (is32Bit(VT))
4703 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0);
4704 else if (DAG.ComputeNumSignBits(Op1) > 32)
4705 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1);
4706
4707 // DSG(F) returns the remainder in the even register and the
4708 // quotient in the odd register.
4709 SDValue Ops[2];
4710 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]);
4711 return DAG.getMergeValues(Ops, DL);
4712}
4713
4714SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op,
4715 SelectionDAG &DAG) const {
4716 EVT VT = Op.getValueType();
4717 SDLoc DL(Op);
4718
4719 // DL(G) returns the remainder in the even register and the
4720 // quotient in the odd register.
4721 SDValue Ops[2];
4722 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM,
4723 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]);
4724 return DAG.getMergeValues(Ops, DL);
4725}
4726
4727SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const {
4728 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation");
4729
4730 // Get the known-zero masks for each operand.
4731 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)};
4732 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]),
4733 DAG.computeKnownBits(Ops[1])};
4734
4735 // See if the upper 32 bits of one operand and the lower 32 bits of the
4736 // other are known zero. They are the low and high operands respectively.
4737 uint64_t Masks[] = { Known[0].Zero.getZExtValue(),
4738 Known[1].Zero.getZExtValue() };
4739 unsigned High, Low;
4740 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff)
4741 High = 1, Low = 0;
4742 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff)
4743 High = 0, Low = 1;
4744 else
4745 return Op;
4746
4747 SDValue LowOp = Ops[Low];
4748 SDValue HighOp = Ops[High];
4749
4750 // If the high part is a constant, we're better off using IILH.
4751 if (HighOp.getOpcode() == ISD::Constant)
4752 return Op;
4753
4754 // If the low part is a constant that is outside the range of LHI,
4755 // then we're better off using IILF.
4756 if (LowOp.getOpcode() == ISD::Constant) {
4757 int64_t Value = int32_t(LowOp->getAsZExtVal());
4758 if (!isInt<16>(Value))
4759 return Op;
4760 }
4761
4762 // Check whether the high part is an AND that doesn't change the
4763 // high 32 bits and just masks out low bits. We can skip it if so.
4764 if (HighOp.getOpcode() == ISD::AND &&
4765 HighOp.getOperand(1).getOpcode() == ISD::Constant) {
4766 SDValue HighOp0 = HighOp.getOperand(0);
4767 uint64_t Mask = HighOp.getConstantOperandVal(1);
4768 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff))))
4769 HighOp = HighOp0;
4770 }
4771
4772 // Take advantage of the fact that all GR32 operations only change the
4773 // low 32 bits by truncating Low to an i32 and inserting it directly
4774 // using a subreg. The interesting cases are those where the truncation
4775 // can be folded.
4776 SDLoc DL(Op);
4777 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp);
4778 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL,
4779 MVT::i64, HighOp, Low32);
4780}
4781
4782// Lower SADDO/SSUBO/UADDO/USUBO nodes.
4783SDValue SystemZTargetLowering::lowerXALUO(SDValue Op,
4784 SelectionDAG &DAG) const {
4785 SDNode *N = Op.getNode();
4786 SDValue LHS = N->getOperand(0);
4787 SDValue RHS = N->getOperand(1);
4788 SDLoc DL(N);
4789
4790 if (N->getValueType(0) == MVT::i128) {
4791 unsigned BaseOp = 0;
4792 unsigned FlagOp = 0;
4793 bool IsBorrow = false;
4794 switch (Op.getOpcode()) {
4795 default: llvm_unreachable("Unknown instruction!");
4796 case ISD::UADDO:
4797 BaseOp = ISD::ADD;
4798 FlagOp = SystemZISD::VACC;
4799 break;
4800 case ISD::USUBO:
4801 BaseOp = ISD::SUB;
4802 FlagOp = SystemZISD::VSCBI;
4803 IsBorrow = true;
4804 break;
4805 }
4806 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS);
4807 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS);
4808 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4809 DAG.getValueType(MVT::i1));
4810 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4811 if (IsBorrow)
4812 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4813 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4814 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4815 }
4816
4817 unsigned BaseOp = 0;
4818 unsigned CCValid = 0;
4819 unsigned CCMask = 0;
4820
4821 switch (Op.getOpcode()) {
4822 default: llvm_unreachable("Unknown instruction!");
4823 case ISD::SADDO:
4824 BaseOp = SystemZISD::SADDO;
4825 CCValid = SystemZ::CCMASK_ARITH;
4827 break;
4828 case ISD::SSUBO:
4829 BaseOp = SystemZISD::SSUBO;
4830 CCValid = SystemZ::CCMASK_ARITH;
4832 break;
4833 case ISD::UADDO:
4834 BaseOp = SystemZISD::UADDO;
4835 CCValid = SystemZ::CCMASK_LOGICAL;
4837 break;
4838 case ISD::USUBO:
4839 BaseOp = SystemZISD::USUBO;
4840 CCValid = SystemZ::CCMASK_LOGICAL;
4842 break;
4843 }
4844
4845 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
4846 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
4847
4848 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4849 if (N->getValueType(1) == MVT::i1)
4850 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4851
4852 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4853}
4854
4855static bool isAddCarryChain(SDValue Carry) {
4856 while (Carry.getOpcode() == ISD::UADDO_CARRY &&
4857 Carry->getValueType(0) != MVT::i128)
4858 Carry = Carry.getOperand(2);
4859 return Carry.getOpcode() == ISD::UADDO &&
4860 Carry->getValueType(0) != MVT::i128;
4861}
4862
4863static bool isSubBorrowChain(SDValue Carry) {
4864 while (Carry.getOpcode() == ISD::USUBO_CARRY &&
4865 Carry->getValueType(0) != MVT::i128)
4866 Carry = Carry.getOperand(2);
4867 return Carry.getOpcode() == ISD::USUBO &&
4868 Carry->getValueType(0) != MVT::i128;
4869}
4870
4871// Lower UADDO_CARRY/USUBO_CARRY nodes.
4872SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op,
4873 SelectionDAG &DAG) const {
4874
4875 SDNode *N = Op.getNode();
4876 MVT VT = N->getSimpleValueType(0);
4877
4878 // Let legalize expand this if it isn't a legal type yet.
4879 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
4880 return SDValue();
4881
4882 SDValue LHS = N->getOperand(0);
4883 SDValue RHS = N->getOperand(1);
4884 SDValue Carry = Op.getOperand(2);
4885 SDLoc DL(N);
4886
4887 if (VT == MVT::i128) {
4888 unsigned BaseOp = 0;
4889 unsigned FlagOp = 0;
4890 bool IsBorrow = false;
4891 switch (Op.getOpcode()) {
4892 default: llvm_unreachable("Unknown instruction!");
4893 case ISD::UADDO_CARRY:
4894 BaseOp = SystemZISD::VAC;
4895 FlagOp = SystemZISD::VACCC;
4896 break;
4897 case ISD::USUBO_CARRY:
4898 BaseOp = SystemZISD::VSBI;
4899 FlagOp = SystemZISD::VSBCBI;
4900 IsBorrow = true;
4901 break;
4902 }
4903 if (IsBorrow)
4904 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(),
4905 Carry, DAG.getConstant(1, DL, Carry.getValueType()));
4906 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128);
4907 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry);
4908 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry);
4909 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag,
4910 DAG.getValueType(MVT::i1));
4911 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1));
4912 if (IsBorrow)
4913 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(),
4914 Flag, DAG.getConstant(1, DL, Flag.getValueType()));
4915 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag);
4916 }
4917
4918 unsigned BaseOp = 0;
4919 unsigned CCValid = 0;
4920 unsigned CCMask = 0;
4921
4922 switch (Op.getOpcode()) {
4923 default: llvm_unreachable("Unknown instruction!");
4924 case ISD::UADDO_CARRY:
4925 if (!isAddCarryChain(Carry))
4926 return SDValue();
4927
4928 BaseOp = SystemZISD::ADDCARRY;
4929 CCValid = SystemZ::CCMASK_LOGICAL;
4931 break;
4932 case ISD::USUBO_CARRY:
4933 if (!isSubBorrowChain(Carry))
4934 return SDValue();
4935
4936 BaseOp = SystemZISD::SUBCARRY;
4937 CCValid = SystemZ::CCMASK_LOGICAL;
4939 break;
4940 }
4941
4942 // Set the condition code from the carry flag.
4943 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry,
4944 DAG.getConstant(CCValid, DL, MVT::i32),
4945 DAG.getConstant(CCMask, DL, MVT::i32));
4946
4947 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4948 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry);
4949
4950 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask);
4951 if (N->getValueType(1) == MVT::i1)
4952 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
4953
4954 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC);
4955}
4956
4957SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
4958 SelectionDAG &DAG) const {
4959 EVT VT = Op.getValueType();
4960 SDLoc DL(Op);
4961 Op = Op.getOperand(0);
4962
4963 if (VT.getScalarSizeInBits() == 128) {
4964 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op);
4965 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op);
4966 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL,
4967 DAG.getConstant(0, DL, MVT::i64));
4968 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4969 return Op;
4970 }
4971
4972 // Handle vector types via VPOPCT.
4973 if (VT.isVector()) {
4974 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
4975 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
4976 switch (VT.getScalarSizeInBits()) {
4977 case 8:
4978 break;
4979 case 16: {
4980 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
4981 SDValue Shift = DAG.getConstant(8, DL, MVT::i32);
4982 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
4983 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
4984 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
4985 break;
4986 }
4987 case 32: {
4988 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4989 DAG.getConstant(0, DL, MVT::i32));
4990 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4991 break;
4992 }
4993 case 64: {
4994 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
4995 DAG.getConstant(0, DL, MVT::i32));
4996 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
4997 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
4998 break;
4999 }
5000 default:
5001 llvm_unreachable("Unexpected type");
5002 }
5003 return Op;
5004 }
5005
5006 // Get the known-zero mask for the operand.
5007 KnownBits Known = DAG.computeKnownBits(Op);
5008 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits();
5009 if (NumSignificantBits == 0)
5010 return DAG.getConstant(0, DL, VT);
5011
5012 // Skip known-zero high parts of the operand.
5013 int64_t OrigBitSize = VT.getSizeInBits();
5014 int64_t BitSize = llvm::bit_ceil(NumSignificantBits);
5015 BitSize = std::min(BitSize, OrigBitSize);
5016
5017 // The POPCNT instruction counts the number of bits in each byte.
5018 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
5019 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
5020 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
5021
5022 // Add up per-byte counts in a binary tree. All bits of Op at
5023 // position larger than BitSize remain zero throughout.
5024 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
5025 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT));
5026 if (BitSize != OrigBitSize)
5027 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
5028 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT));
5029 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
5030 }
5031
5032 // Extract overall result from high byte.
5033 if (BitSize > 8)
5034 Op = DAG.getNode(ISD::SRL, DL, VT, Op,
5035 DAG.getConstant(BitSize - 8, DL, VT));
5036
5037 return Op;
5038}
5039
5040SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op,
5041 SelectionDAG &DAG) const {
5042 SDLoc DL(Op);
5043 AtomicOrdering FenceOrdering =
5044 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
5045 SyncScope::ID FenceSSID =
5046 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
5047
5048 // The only fence that needs an instruction is a sequentially-consistent
5049 // cross-thread fence.
5050 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent &&
5051 FenceSSID == SyncScope::System) {
5052 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other,
5053 Op.getOperand(0)),
5054 0);
5055 }
5056
5057 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
5058 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
5059}
5060
5061SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
5062 SelectionDAG &DAG) const {
5063 EVT RegVT = Op.getValueType();
5064 if (RegVT.getSizeInBits() == 128)
5065 return lowerATOMIC_LDST_I128(Op, DAG);
5066 return lowerLoadF16(Op, DAG);
5067}
5068
5069SDValue SystemZTargetLowering::lowerATOMIC_STORE(SDValue Op,
5070 SelectionDAG &DAG) const {
5071 auto *Node = cast<AtomicSDNode>(Op.getNode());
5072 if (Node->getMemoryVT().getSizeInBits() == 128)
5073 return lowerATOMIC_LDST_I128(Op, DAG);
5074 return lowerStoreF16(Op, DAG);
5075}
5076
5077SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op,
5078 SelectionDAG &DAG) const {
5079 auto *Node = cast<AtomicSDNode>(Op.getNode());
5080 assert(
5081 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) &&
5082 "Only custom lowering i128 or f128.");
5083 // Use same code to handle both legal and non-legal i128 types.
5085 LowerOperationWrapper(Node, Results, DAG);
5086 return DAG.getMergeValues(Results, SDLoc(Op));
5087}
5088
5089// Prepare for a Compare And Swap for a subword operation. This needs to be
5090// done in memory with 4 bytes at natural alignment.
5092 SDValue &AlignedAddr, SDValue &BitShift,
5093 SDValue &NegBitShift) {
5094 EVT PtrVT = Addr.getValueType();
5095 EVT WideVT = MVT::i32;
5096
5097 // Get the address of the containing word.
5098 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr,
5099 DAG.getSignedConstant(-4, DL, PtrVT));
5100
5101 // Get the number of bits that the word must be rotated left in order
5102 // to bring the field to the top bits of a GR32.
5103 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr,
5104 DAG.getConstant(3, DL, PtrVT));
5105 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift);
5106
5107 // Get the complementing shift amount, for rotating a field in the top
5108 // bits back to its proper position.
5109 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT,
5110 DAG.getConstant(0, DL, WideVT), BitShift);
5111
5112}
5113
5114// Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first
5115// two into the fullword ATOMIC_LOADW_* operation given by Opcode.
5116SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op,
5117 SelectionDAG &DAG,
5118 unsigned Opcode) const {
5119 auto *Node = cast<AtomicSDNode>(Op.getNode());
5120
5121 // 32-bit operations need no special handling.
5122 EVT NarrowVT = Node->getMemoryVT();
5123 EVT WideVT = MVT::i32;
5124 if (NarrowVT == WideVT)
5125 return Op;
5126
5127 int64_t BitSize = NarrowVT.getSizeInBits();
5128 SDValue ChainIn = Node->getChain();
5129 SDValue Addr = Node->getBasePtr();
5130 SDValue Src2 = Node->getVal();
5131 MachineMemOperand *MMO = Node->getMemOperand();
5132 SDLoc DL(Node);
5133
5134 // Convert atomic subtracts of constants into additions.
5135 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB)
5136 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) {
5137 Opcode = SystemZISD::ATOMIC_LOADW_ADD;
5138 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL,
5139 Src2.getValueType());
5140 }
5141
5142 SDValue AlignedAddr, BitShift, NegBitShift;
5143 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5144
5145 // Extend the source operand to 32 bits and prepare it for the inner loop.
5146 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other
5147 // operations require the source to be shifted in advance. (This shift
5148 // can be folded if the source is constant.) For AND and NAND, the lower
5149 // bits must be set, while for other opcodes they should be left clear.
5150 if (Opcode != SystemZISD::ATOMIC_SWAPW)
5151 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2,
5152 DAG.getConstant(32 - BitSize, DL, WideVT));
5153 if (Opcode == SystemZISD::ATOMIC_LOADW_AND ||
5154 Opcode == SystemZISD::ATOMIC_LOADW_NAND)
5155 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2,
5156 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT));
5157
5158 // Construct the ATOMIC_LOADW_* node.
5159 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other);
5160 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift,
5161 DAG.getConstant(BitSize, DL, WideVT) };
5162 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops,
5163 NarrowVT, MMO);
5164
5165 // Rotate the result of the final CS so that the field is in the lower
5166 // bits of a GR32, then truncate it.
5167 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift,
5168 DAG.getConstant(BitSize, DL, WideVT));
5169 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift);
5170
5171 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) };
5172 return DAG.getMergeValues(RetOps, DL);
5173}
5174
5175// Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into
5176// ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions.
5177SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
5178 SelectionDAG &DAG) const {
5179 auto *Node = cast<AtomicSDNode>(Op.getNode());
5180 EVT MemVT = Node->getMemoryVT();
5181 if (MemVT == MVT::i32 || MemVT == MVT::i64) {
5182 // A full-width operation: negate and use LAA(G).
5183 assert(Op.getValueType() == MemVT && "Mismatched VTs");
5184 assert(Subtarget.hasInterlockedAccess1() &&
5185 "Should have been expanded by AtomicExpand pass.");
5186 SDValue Src2 = Node->getVal();
5187 SDLoc DL(Src2);
5188 SDValue NegSrc2 =
5189 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2);
5190 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
5191 Node->getChain(), Node->getBasePtr(), NegSrc2,
5192 Node->getMemOperand());
5193 }
5194
5195 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB);
5196}
5197
5198// Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node.
5199SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op,
5200 SelectionDAG &DAG) const {
5201 auto *Node = cast<AtomicSDNode>(Op.getNode());
5202 SDValue ChainIn = Node->getOperand(0);
5203 SDValue Addr = Node->getOperand(1);
5204 SDValue CmpVal = Node->getOperand(2);
5205 SDValue SwapVal = Node->getOperand(3);
5206 MachineMemOperand *MMO = Node->getMemOperand();
5207 SDLoc DL(Node);
5208
5209 if (Node->getMemoryVT() == MVT::i128) {
5210 // Use same code to handle both legal and non-legal i128 types.
5212 LowerOperationWrapper(Node, Results, DAG);
5213 return DAG.getMergeValues(Results, DL);
5214 }
5215
5216 // We have native support for 32-bit and 64-bit compare and swap, but we
5217 // still need to expand extracting the "success" result from the CC.
5218 EVT NarrowVT = Node->getMemoryVT();
5219 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32;
5220 if (NarrowVT == WideVT) {
5221 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5222 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal };
5223 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP,
5224 DL, Tys, Ops, NarrowVT, MMO);
5225 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5227
5228 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0));
5229 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5230 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5231 return SDValue();
5232 }
5233
5234 // Convert 8-bit and 16-bit compare and swap to a loop, implemented
5235 // via a fullword ATOMIC_CMP_SWAPW operation.
5236 int64_t BitSize = NarrowVT.getSizeInBits();
5237
5238 SDValue AlignedAddr, BitShift, NegBitShift;
5239 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift);
5240
5241 // Construct the ATOMIC_CMP_SWAPW node.
5242 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other);
5243 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift,
5244 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) };
5245 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL,
5246 VTList, Ops, NarrowVT, MMO);
5247 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1),
5249
5250 // emitAtomicCmpSwapW() will zero extend the result (original value).
5251 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0),
5252 DAG.getValueType(NarrowVT));
5253 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal);
5254 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
5255 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2));
5256 return SDValue();
5257}
5258
5260SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const {
5261 // Because of how we convert atomic_load and atomic_store to normal loads and
5262 // stores in the DAG, we need to ensure that the MMOs are marked volatile
5263 // since DAGCombine hasn't been updated to account for atomic, but non
5264 // volatile loads. (See D57601)
5265 if (auto *SI = dyn_cast<StoreInst>(&I))
5266 if (SI->isAtomic())
5268 if (auto *LI = dyn_cast<LoadInst>(&I))
5269 if (LI->isAtomic())
5271 if (auto *AI = dyn_cast<AtomicRMWInst>(&I))
5272 if (AI->isAtomic())
5274 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I))
5275 if (AI->isAtomic())
5278}
5279
5280SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op,
5281 SelectionDAG &DAG) const {
5282 MachineFunction &MF = DAG.getMachineFunction();
5283 auto *Regs = Subtarget.getSpecialRegisters();
5285 report_fatal_error("Variable-sized stack allocations are not supported "
5286 "in GHC calling convention");
5287 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op),
5288 Regs->getStackPointerRegister(), Op.getValueType());
5289}
5290
5291SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op,
5292 SelectionDAG &DAG) const {
5293 MachineFunction &MF = DAG.getMachineFunction();
5294 auto *Regs = Subtarget.getSpecialRegisters();
5295 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain();
5296
5298 report_fatal_error("Variable-sized stack allocations are not supported "
5299 "in GHC calling convention");
5300
5301 SDValue Chain = Op.getOperand(0);
5302 SDValue NewSP = Op.getOperand(1);
5303 SDValue Backchain;
5304 SDLoc DL(Op);
5305
5306 if (StoreBackchain) {
5307 SDValue OldSP = DAG.getCopyFromReg(
5308 Chain, DL, Regs->getStackPointerRegister(), MVT::i64);
5309 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG),
5310 MachinePointerInfo());
5311 }
5312
5313 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP);
5314
5315 if (StoreBackchain)
5316 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG),
5317 MachinePointerInfo());
5318
5319 return Chain;
5320}
5321
5322SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op,
5323 SelectionDAG &DAG) const {
5324 bool IsData = Op.getConstantOperandVal(4);
5325 if (!IsData)
5326 // Just preserve the chain.
5327 return Op.getOperand(0);
5328
5329 SDLoc DL(Op);
5330 bool IsWrite = Op.getConstantOperandVal(2);
5331 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ;
5332 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode());
5333 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32),
5334 Op.getOperand(1)};
5335 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL,
5336 Node->getVTList(), Ops,
5337 Node->getMemoryVT(), Node->getMemOperand());
5338}
5339
5340SDValue
5341SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
5342 SelectionDAG &DAG) const {
5343 unsigned Opcode, CCValid;
5344 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
5345 assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
5346 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode);
5347 SDValue CC = getCCResult(DAG, SDValue(Node, 0));
5348 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
5349 return SDValue();
5350 }
5351
5352 return SDValue();
5353}
5354
5355SDValue
5356SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
5357 SelectionDAG &DAG) const {
5358 unsigned Opcode, CCValid;
5359 if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
5360 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode);
5361 if (Op->getNumValues() == 1)
5362 return getCCResult(DAG, SDValue(Node, 0));
5363 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
5364 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
5365 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1)));
5366 }
5367
5368 unsigned Id = Op.getConstantOperandVal(0);
5369 switch (Id) {
5370 case Intrinsic::thread_pointer:
5371 return lowerThreadPointer(SDLoc(Op), DAG);
5372
5373 case Intrinsic::s390_vpdi:
5374 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
5375 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5376
5377 case Intrinsic::s390_vperm:
5378 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
5379 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5380
5381 case Intrinsic::s390_vuphb:
5382 case Intrinsic::s390_vuphh:
5383 case Intrinsic::s390_vuphf:
5384 case Intrinsic::s390_vuphg:
5385 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
5386 Op.getOperand(1));
5387
5388 case Intrinsic::s390_vuplhb:
5389 case Intrinsic::s390_vuplhh:
5390 case Intrinsic::s390_vuplhf:
5391 case Intrinsic::s390_vuplhg:
5392 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
5393 Op.getOperand(1));
5394
5395 case Intrinsic::s390_vuplb:
5396 case Intrinsic::s390_vuplhw:
5397 case Intrinsic::s390_vuplf:
5398 case Intrinsic::s390_vuplg:
5399 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
5400 Op.getOperand(1));
5401
5402 case Intrinsic::s390_vupllb:
5403 case Intrinsic::s390_vupllh:
5404 case Intrinsic::s390_vupllf:
5405 case Intrinsic::s390_vupllg:
5406 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
5407 Op.getOperand(1));
5408
5409 case Intrinsic::s390_vsumb:
5410 case Intrinsic::s390_vsumh:
5411 case Intrinsic::s390_vsumgh:
5412 case Intrinsic::s390_vsumgf:
5413 case Intrinsic::s390_vsumqf:
5414 case Intrinsic::s390_vsumqg:
5415 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
5416 Op.getOperand(1), Op.getOperand(2));
5417
5418 case Intrinsic::s390_vaq:
5419 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5420 Op.getOperand(1), Op.getOperand(2));
5421 case Intrinsic::s390_vaccb:
5422 case Intrinsic::s390_vacch:
5423 case Intrinsic::s390_vaccf:
5424 case Intrinsic::s390_vaccg:
5425 case Intrinsic::s390_vaccq:
5426 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(),
5427 Op.getOperand(1), Op.getOperand(2));
5428 case Intrinsic::s390_vacq:
5429 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(),
5430 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5431 case Intrinsic::s390_vacccq:
5432 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(),
5433 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5434
5435 case Intrinsic::s390_vsq:
5436 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(),
5437 Op.getOperand(1), Op.getOperand(2));
5438 case Intrinsic::s390_vscbib:
5439 case Intrinsic::s390_vscbih:
5440 case Intrinsic::s390_vscbif:
5441 case Intrinsic::s390_vscbig:
5442 case Intrinsic::s390_vscbiq:
5443 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(),
5444 Op.getOperand(1), Op.getOperand(2));
5445 case Intrinsic::s390_vsbiq:
5446 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(),
5447 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5448 case Intrinsic::s390_vsbcbiq:
5449 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(),
5450 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5451
5452 case Intrinsic::s390_vmhb:
5453 case Intrinsic::s390_vmhh:
5454 case Intrinsic::s390_vmhf:
5455 case Intrinsic::s390_vmhg:
5456 case Intrinsic::s390_vmhq:
5457 return DAG.getNode(ISD::MULHS, SDLoc(Op), Op.getValueType(),
5458 Op.getOperand(1), Op.getOperand(2));
5459 case Intrinsic::s390_vmlhb:
5460 case Intrinsic::s390_vmlhh:
5461 case Intrinsic::s390_vmlhf:
5462 case Intrinsic::s390_vmlhg:
5463 case Intrinsic::s390_vmlhq:
5464 return DAG.getNode(ISD::MULHU, SDLoc(Op), Op.getValueType(),
5465 Op.getOperand(1), Op.getOperand(2));
5466
5467 case Intrinsic::s390_vmahb:
5468 case Intrinsic::s390_vmahh:
5469 case Intrinsic::s390_vmahf:
5470 case Intrinsic::s390_vmahg:
5471 case Intrinsic::s390_vmahq:
5472 return DAG.getNode(SystemZISD::VMAH, SDLoc(Op), Op.getValueType(),
5473 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5474 case Intrinsic::s390_vmalhb:
5475 case Intrinsic::s390_vmalhh:
5476 case Intrinsic::s390_vmalhf:
5477 case Intrinsic::s390_vmalhg:
5478 case Intrinsic::s390_vmalhq:
5479 return DAG.getNode(SystemZISD::VMALH, SDLoc(Op), Op.getValueType(),
5480 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
5481
5482 case Intrinsic::s390_vmeb:
5483 case Intrinsic::s390_vmeh:
5484 case Intrinsic::s390_vmef:
5485 case Intrinsic::s390_vmeg:
5486 return DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5487 Op.getOperand(1), Op.getOperand(2));
5488 case Intrinsic::s390_vmleb:
5489 case Intrinsic::s390_vmleh:
5490 case Intrinsic::s390_vmlef:
5491 case Intrinsic::s390_vmleg:
5492 return DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5493 Op.getOperand(1), Op.getOperand(2));
5494 case Intrinsic::s390_vmob:
5495 case Intrinsic::s390_vmoh:
5496 case Intrinsic::s390_vmof:
5497 case Intrinsic::s390_vmog:
5498 return DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5499 Op.getOperand(1), Op.getOperand(2));
5500 case Intrinsic::s390_vmlob:
5501 case Intrinsic::s390_vmloh:
5502 case Intrinsic::s390_vmlof:
5503 case Intrinsic::s390_vmlog:
5504 return DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5505 Op.getOperand(1), Op.getOperand(2));
5506
5507 case Intrinsic::s390_vmaeb:
5508 case Intrinsic::s390_vmaeh:
5509 case Intrinsic::s390_vmaef:
5510 case Intrinsic::s390_vmaeg:
5511 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5512 DAG.getNode(SystemZISD::VME, SDLoc(Op), Op.getValueType(),
5513 Op.getOperand(1), Op.getOperand(2)),
5514 Op.getOperand(3));
5515 case Intrinsic::s390_vmaleb:
5516 case Intrinsic::s390_vmaleh:
5517 case Intrinsic::s390_vmalef:
5518 case Intrinsic::s390_vmaleg:
5519 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5520 DAG.getNode(SystemZISD::VMLE, SDLoc(Op), Op.getValueType(),
5521 Op.getOperand(1), Op.getOperand(2)),
5522 Op.getOperand(3));
5523 case Intrinsic::s390_vmaob:
5524 case Intrinsic::s390_vmaoh:
5525 case Intrinsic::s390_vmaof:
5526 case Intrinsic::s390_vmaog:
5527 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5528 DAG.getNode(SystemZISD::VMO, SDLoc(Op), Op.getValueType(),
5529 Op.getOperand(1), Op.getOperand(2)),
5530 Op.getOperand(3));
5531 case Intrinsic::s390_vmalob:
5532 case Intrinsic::s390_vmaloh:
5533 case Intrinsic::s390_vmalof:
5534 case Intrinsic::s390_vmalog:
5535 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(),
5536 DAG.getNode(SystemZISD::VMLO, SDLoc(Op), Op.getValueType(),
5537 Op.getOperand(1), Op.getOperand(2)),
5538 Op.getOperand(3));
5539 }
5540
5541 return SDValue();
5542}
5543
5544namespace {
5545// Says that SystemZISD operation Opcode can be used to perform the equivalent
5546// of a VPERM with permute vector Bytes. If Opcode takes three operands,
5547// Operand is the constant third operand, otherwise it is the number of
5548// bytes in each element of the result.
5549struct Permute {
5550 unsigned Opcode;
5551 unsigned Operand;
5552 unsigned char Bytes[SystemZ::VectorBytes];
5553};
5554}
5555
5556static const Permute PermuteForms[] = {
5557 // VMRHG
5558 { SystemZISD::MERGE_HIGH, 8,
5559 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
5560 // VMRHF
5561 { SystemZISD::MERGE_HIGH, 4,
5562 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
5563 // VMRHH
5564 { SystemZISD::MERGE_HIGH, 2,
5565 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
5566 // VMRHB
5567 { SystemZISD::MERGE_HIGH, 1,
5568 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
5569 // VMRLG
5570 { SystemZISD::MERGE_LOW, 8,
5571 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
5572 // VMRLF
5573 { SystemZISD::MERGE_LOW, 4,
5574 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
5575 // VMRLH
5576 { SystemZISD::MERGE_LOW, 2,
5577 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
5578 // VMRLB
5579 { SystemZISD::MERGE_LOW, 1,
5580 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
5581 // VPKG
5582 { SystemZISD::PACK, 4,
5583 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
5584 // VPKF
5585 { SystemZISD::PACK, 2,
5586 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
5587 // VPKH
5588 { SystemZISD::PACK, 1,
5589 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
5590 // VPDI V1, V2, 4 (low half of V1, high half of V2)
5591 { SystemZISD::PERMUTE_DWORDS, 4,
5592 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
5593 // VPDI V1, V2, 1 (high half of V1, low half of V2)
5594 { SystemZISD::PERMUTE_DWORDS, 1,
5595 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
5596};
5597
5598// Called after matching a vector shuffle against a particular pattern.
5599// Both the original shuffle and the pattern have two vector operands.
5600// OpNos[0] is the operand of the original shuffle that should be used for
5601// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
5602// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and
5603// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
5604// for operands 0 and 1 of the pattern.
5605static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
5606 if (OpNos[0] < 0) {
5607 if (OpNos[1] < 0)
5608 return false;
5609 OpNo0 = OpNo1 = OpNos[1];
5610 } else if (OpNos[1] < 0) {
5611 OpNo0 = OpNo1 = OpNos[0];
5612 } else {
5613 OpNo0 = OpNos[0];
5614 OpNo1 = OpNos[1];
5615 }
5616 return true;
5617}
5618
5619// Bytes is a VPERM-like permute vector, except that -1 is used for
5620// undefined bytes. Return true if the VPERM can be implemented using P.
5621// When returning true set OpNo0 to the VPERM operand that should be
5622// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
5623//
5624// For example, if swapping the VPERM operands allows P to match, OpNo0
5625// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one
5626// operand, but rewriting it to use two duplicated operands allows it to
5627// match P, then OpNo0 and OpNo1 will be the same.
5628static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
5629 unsigned &OpNo0, unsigned &OpNo1) {
5630 int OpNos[] = { -1, -1 };
5631 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5632 int Elt = Bytes[I];
5633 if (Elt >= 0) {
5634 // Make sure that the two permute vectors use the same suboperand
5635 // byte number. Only the operand numbers (the high bits) are
5636 // allowed to differ.
5637 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
5638 return false;
5639 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
5640 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
5641 // Make sure that the operand mappings are consistent with previous
5642 // elements.
5643 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5644 return false;
5645 OpNos[ModelOpNo] = RealOpNo;
5646 }
5647 }
5648 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5649}
5650
5651// As above, but search for a matching permute.
5652static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
5653 unsigned &OpNo0, unsigned &OpNo1) {
5654 for (auto &P : PermuteForms)
5655 if (matchPermute(Bytes, P, OpNo0, OpNo1))
5656 return &P;
5657 return nullptr;
5658}
5659
5660// Bytes is a VPERM-like permute vector, except that -1 is used for
5661// undefined bytes. This permute is an operand of an outer permute.
5662// See whether redistributing the -1 bytes gives a shuffle that can be
5663// implemented using P. If so, set Transform to a VPERM-like permute vector
5664// that, when applied to the result of P, gives the original permute in Bytes.
5666 const Permute &P,
5667 SmallVectorImpl<int> &Transform) {
5668 unsigned To = 0;
5669 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
5670 int Elt = Bytes[From];
5671 if (Elt < 0)
5672 // Byte number From of the result is undefined.
5673 Transform[From] = -1;
5674 else {
5675 while (P.Bytes[To] != Elt) {
5676 To += 1;
5677 if (To == SystemZ::VectorBytes)
5678 return false;
5679 }
5680 Transform[From] = To;
5681 }
5682 }
5683 return true;
5684}
5685
5686// As above, but search for a matching permute.
5687static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
5688 SmallVectorImpl<int> &Transform) {
5689 for (auto &P : PermuteForms)
5690 if (matchDoublePermute(Bytes, P, Transform))
5691 return &P;
5692 return nullptr;
5693}
5694
5695// Convert the mask of the given shuffle op into a byte-level mask,
5696// as if it had type vNi8.
5697static bool getVPermMask(SDValue ShuffleOp,
5698 SmallVectorImpl<int> &Bytes) {
5699 EVT VT = ShuffleOp.getValueType();
5700 unsigned NumElements = VT.getVectorNumElements();
5701 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5702
5703 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) {
5704 Bytes.resize(NumElements * BytesPerElement, -1);
5705 for (unsigned I = 0; I < NumElements; ++I) {
5706 int Index = VSN->getMaskElt(I);
5707 if (Index >= 0)
5708 for (unsigned J = 0; J < BytesPerElement; ++J)
5709 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5710 }
5711 return true;
5712 }
5713 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() &&
5714 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) {
5715 unsigned Index = ShuffleOp.getConstantOperandVal(1);
5716 Bytes.resize(NumElements * BytesPerElement, -1);
5717 for (unsigned I = 0; I < NumElements; ++I)
5718 for (unsigned J = 0; J < BytesPerElement; ++J)
5719 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
5720 return true;
5721 }
5722 return false;
5723}
5724
5725// Bytes is a VPERM-like permute vector, except that -1 is used for
5726// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of
5727// the result come from a contiguous sequence of bytes from one input.
5728// Set Base to the selector for the first byte if so.
5729static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
5730 unsigned BytesPerElement, int &Base) {
5731 Base = -1;
5732 for (unsigned I = 0; I < BytesPerElement; ++I) {
5733 if (Bytes[Start + I] >= 0) {
5734 unsigned Elem = Bytes[Start + I];
5735 if (Base < 0) {
5736 Base = Elem - I;
5737 // Make sure the bytes would come from one input operand.
5738 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
5739 return false;
5740 } else if (unsigned(Base) != Elem - I)
5741 return false;
5742 }
5743 }
5744 return true;
5745}
5746
5747// Bytes is a VPERM-like permute vector, except that -1 is used for
5748// undefined bytes. Return true if it can be performed using VSLDB.
5749// When returning true, set StartIndex to the shift amount and OpNo0
5750// and OpNo1 to the VPERM operands that should be used as the first
5751// and second shift operand respectively.
5753 unsigned &StartIndex, unsigned &OpNo0,
5754 unsigned &OpNo1) {
5755 int OpNos[] = { -1, -1 };
5756 int Shift = -1;
5757 for (unsigned I = 0; I < 16; ++I) {
5758 int Index = Bytes[I];
5759 if (Index >= 0) {
5760 int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
5761 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
5762 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
5763 if (Shift < 0)
5764 Shift = ExpectedShift;
5765 else if (Shift != ExpectedShift)
5766 return false;
5767 // Make sure that the operand mappings are consistent with previous
5768 // elements.
5769 if (OpNos[ModelOpNo] == 1 - RealOpNo)
5770 return false;
5771 OpNos[ModelOpNo] = RealOpNo;
5772 }
5773 }
5774 StartIndex = Shift;
5775 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
5776}
5777
5778// Create a node that performs P on operands Op0 and Op1, casting the
5779// operands to the appropriate type. The type of the result is determined by P.
5781 const Permute &P, SDValue Op0, SDValue Op1) {
5782 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input
5783 // elements of a PACK are twice as wide as the outputs.
5784 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
5785 P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
5786 P.Operand);
5787 // Cast both operands to the appropriate type.
5788 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
5789 SystemZ::VectorBytes / InBytes);
5790 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
5791 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
5792 SDValue Op;
5793 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
5794 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32);
5795 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
5796 } else if (P.Opcode == SystemZISD::PACK) {
5797 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
5798 SystemZ::VectorBytes / P.Operand);
5799 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
5800 } else {
5801 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
5802 }
5803 return Op;
5804}
5805
5806static bool isZeroVector(SDValue N) {
5807 if (N->getOpcode() == ISD::BITCAST)
5808 N = N->getOperand(0);
5809 if (N->getOpcode() == ISD::SPLAT_VECTOR)
5810 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0)))
5811 return Op->getZExtValue() == 0;
5812 return ISD::isBuildVectorAllZeros(N.getNode());
5813}
5814
5815// Return the index of the zero/undef vector, or UINT32_MAX if not found.
5816static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) {
5817 for (unsigned I = 0; I < Num ; I++)
5818 if (isZeroVector(Ops[I]))
5819 return I;
5820 return UINT32_MAX;
5821}
5822
5823// Bytes is a VPERM-like permute vector, except that -1 is used for
5824// undefined bytes. Implement it on operands Ops[0] and Ops[1] using
5825// VSLDB or VPERM.
5827 SDValue *Ops,
5828 const SmallVectorImpl<int> &Bytes) {
5829 for (unsigned I = 0; I < 2; ++I)
5830 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
5831
5832 // First see whether VSLDB can be used.
5833 unsigned StartIndex, OpNo0, OpNo1;
5834 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
5835 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
5836 Ops[OpNo1],
5837 DAG.getTargetConstant(StartIndex, DL, MVT::i32));
5838
5839 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to
5840 // eliminate a zero vector by reusing any zero index in the permute vector.
5841 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2);
5842 if (ZeroVecIdx != UINT32_MAX) {
5843 bool MaskFirst = true;
5844 int ZeroIdx = -1;
5845 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5846 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5847 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5848 if (OpNo == ZeroVecIdx && I == 0) {
5849 // If the first byte is zero, use mask as first operand.
5850 ZeroIdx = 0;
5851 break;
5852 }
5853 if (OpNo != ZeroVecIdx && Byte == 0) {
5854 // If mask contains a zero, use it by placing that vector first.
5855 ZeroIdx = I + SystemZ::VectorBytes;
5856 MaskFirst = false;
5857 break;
5858 }
5859 }
5860 if (ZeroIdx != -1) {
5861 SDValue IndexNodes[SystemZ::VectorBytes];
5862 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
5863 if (Bytes[I] >= 0) {
5864 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
5865 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes;
5866 if (OpNo == ZeroVecIdx)
5867 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32);
5868 else {
5869 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte;
5870 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32);
5871 }
5872 } else
5873 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5874 }
5875 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5876 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0];
5877 if (MaskFirst)
5878 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src,
5879 Mask);
5880 else
5881 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask,
5882 Mask);
5883 }
5884 }
5885
5886 SDValue IndexNodes[SystemZ::VectorBytes];
5887 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
5888 if (Bytes[I] >= 0)
5889 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32);
5890 else
5891 IndexNodes[I] = DAG.getUNDEF(MVT::i32);
5892 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes);
5893 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0],
5894 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2);
5895}
5896
5897namespace {
5898// Describes a general N-operand vector shuffle.
5899struct GeneralShuffle {
5900 GeneralShuffle(EVT vt)
5901 : VT(vt), UnpackFromEltSize(UINT_MAX), UnpackLow(false) {}
5902 void addUndef();
5903 bool add(SDValue, unsigned);
5904 SDValue getNode(SelectionDAG &, const SDLoc &);
5905 void tryPrepareForUnpack();
5906 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; }
5907 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op);
5908
5909 // The operands of the shuffle.
5911
5912 // Index I is -1 if byte I of the result is undefined. Otherwise the
5913 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
5914 // Bytes[I] / SystemZ::VectorBytes.
5916
5917 // The type of the shuffle result.
5918 EVT VT;
5919
5920 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for.
5921 unsigned UnpackFromEltSize;
5922 // True if the final unpack uses the low half.
5923 bool UnpackLow;
5924};
5925} // namespace
5926
5927// Add an extra undefined element to the shuffle.
5928void GeneralShuffle::addUndef() {
5929 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5930 for (unsigned I = 0; I < BytesPerElement; ++I)
5931 Bytes.push_back(-1);
5932}
5933
5934// Add an extra element to the shuffle, taking it from element Elem of Op.
5935// A null Op indicates a vector input whose value will be calculated later;
5936// there is at most one such input per shuffle and it always has the same
5937// type as the result. Aborts and returns false if the source vector elements
5938// of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per
5939// LLVM they become implicitly extended, but this is rare and not optimized.
5940bool GeneralShuffle::add(SDValue Op, unsigned Elem) {
5941 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
5942
5943 // The source vector can have wider elements than the result,
5944 // either through an explicit TRUNCATE or because of type legalization.
5945 // We want the least significant part.
5946 EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
5947 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
5948
5949 // Return false if the source elements are smaller than their destination
5950 // elements.
5951 if (FromBytesPerElement < BytesPerElement)
5952 return false;
5953
5954 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
5955 (FromBytesPerElement - BytesPerElement));
5956
5957 // Look through things like shuffles and bitcasts.
5958 while (Op.getNode()) {
5959 if (Op.getOpcode() == ISD::BITCAST)
5960 Op = Op.getOperand(0);
5961 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
5962 // See whether the bytes we need come from a contiguous part of one
5963 // operand.
5965 if (!getVPermMask(Op, OpBytes))
5966 break;
5967 int NewByte;
5968 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
5969 break;
5970 if (NewByte < 0) {
5971 addUndef();
5972 return true;
5973 }
5974 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
5975 Byte = unsigned(NewByte) % SystemZ::VectorBytes;
5976 } else if (Op.isUndef()) {
5977 addUndef();
5978 return true;
5979 } else
5980 break;
5981 }
5982
5983 // Make sure that the source of the extraction is in Ops.
5984 unsigned OpNo = 0;
5985 for (; OpNo < Ops.size(); ++OpNo)
5986 if (Ops[OpNo] == Op)
5987 break;
5988 if (OpNo == Ops.size())
5989 Ops.push_back(Op);
5990
5991 // Add the element to Bytes.
5992 unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
5993 for (unsigned I = 0; I < BytesPerElement; ++I)
5994 Bytes.push_back(Base + I);
5995
5996 return true;
5997}
5998
5999// Return SDNodes for the completed shuffle.
6000SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) {
6001 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
6002
6003 if (Ops.size() == 0)
6004 return DAG.getUNDEF(VT);
6005
6006 // Use a single unpack if possible as the last operation.
6007 tryPrepareForUnpack();
6008
6009 // Make sure that there are at least two shuffle operands.
6010 if (Ops.size() == 1)
6011 Ops.push_back(DAG.getUNDEF(MVT::v16i8));
6012
6013 // Create a tree of shuffles, deferring root node until after the loop.
6014 // Try to redistribute the undefined elements of non-root nodes so that
6015 // the non-root shuffles match something like a pack or merge, then adjust
6016 // the parent node's permute vector to compensate for the new order.
6017 // Among other things, this copes with vectors like <2 x i16> that were
6018 // padded with undefined elements during type legalization.
6019 //
6020 // In the best case this redistribution will lead to the whole tree
6021 // using packs and merges. It should rarely be a loss in other cases.
6022 unsigned Stride = 1;
6023 for (; Stride * 2 < Ops.size(); Stride *= 2) {
6024 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
6025 SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
6026
6027 // Create a mask for just these two operands.
6029 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6030 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
6031 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
6032 if (OpNo == I)
6033 NewBytes[J] = Byte;
6034 else if (OpNo == I + Stride)
6035 NewBytes[J] = SystemZ::VectorBytes + Byte;
6036 else
6037 NewBytes[J] = -1;
6038 }
6039 // See if it would be better to reorganize NewMask to avoid using VPERM.
6041 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
6042 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
6043 // Applying NewBytesMap to Ops[I] gets back to NewBytes.
6044 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
6045 if (NewBytes[J] >= 0) {
6046 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
6047 "Invalid double permute");
6048 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
6049 } else
6050 assert(NewBytesMap[J] < 0 && "Invalid double permute");
6051 }
6052 } else {
6053 // Just use NewBytes on the operands.
6054 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
6055 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
6056 if (NewBytes[J] >= 0)
6057 Bytes[J] = I * SystemZ::VectorBytes + J;
6058 }
6059 }
6060 }
6061
6062 // Now we just have 2 inputs. Put the second operand in Ops[1].
6063 if (Stride > 1) {
6064 Ops[1] = Ops[Stride];
6065 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6066 if (Bytes[I] >= int(SystemZ::VectorBytes))
6067 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
6068 }
6069
6070 // Look for an instruction that can do the permute without resorting
6071 // to VPERM.
6072 unsigned OpNo0, OpNo1;
6073 SDValue Op;
6074 if (unpackWasPrepared() && Ops[1].isUndef())
6075 Op = Ops[0];
6076 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
6077 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
6078 else
6079 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
6080
6081 Op = insertUnpackIfPrepared(DAG, DL, Op);
6082
6083 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6084}
6085
6086#ifndef NDEBUG
6087static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) {
6088 dbgs() << Msg.c_str() << " { ";
6089 for (unsigned I = 0; I < Bytes.size(); I++)
6090 dbgs() << Bytes[I] << " ";
6091 dbgs() << "}\n";
6092}
6093#endif
6094
6095// If the Bytes vector matches an unpack operation, prepare to do the unpack
6096// after all else by removing the zero vector and the effect of the unpack on
6097// Bytes.
6098void GeneralShuffle::tryPrepareForUnpack() {
6099 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size());
6100 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1)
6101 return;
6102
6103 // Only do this if removing the zero vector reduces the depth, otherwise
6104 // the critical path will increase with the final unpack.
6105 if (Ops.size() > 2 &&
6106 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1))
6107 return;
6108
6109 // Find an unpack that would allow removing the zero vector from Ops.
6110 UnpackFromEltSize = 1;
6111 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) {
6112 bool MatchUnpack = true;
6114 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) {
6115 unsigned ToEltSize = UnpackFromEltSize * 2;
6116 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize;
6117 if (!IsZextByte)
6118 SrcBytes.push_back(Bytes[Elt]);
6119 if (Bytes[Elt] != -1) {
6120 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes;
6121 if (IsZextByte != (OpNo == ZeroVecOpNo)) {
6122 MatchUnpack = false;
6123 break;
6124 }
6125 }
6126 }
6127 if (MatchUnpack) {
6128 if (Ops.size() == 2) {
6129 // Don't use unpack if a single source operand needs rearrangement.
6130 bool CanUseUnpackLow = true, CanUseUnpackHigh = true;
6131 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) {
6132 if (SrcBytes[i] == -1)
6133 continue;
6134 if (SrcBytes[i] % 16 != int(i))
6135 CanUseUnpackHigh = false;
6136 if (SrcBytes[i] % 16 != int(i + SystemZ::VectorBytes / 2))
6137 CanUseUnpackLow = false;
6138 if (!CanUseUnpackLow && !CanUseUnpackHigh) {
6139 UnpackFromEltSize = UINT_MAX;
6140 return;
6141 }
6142 }
6143 if (!CanUseUnpackHigh)
6144 UnpackLow = true;
6145 }
6146 break;
6147 }
6148 }
6149 if (UnpackFromEltSize > 4)
6150 return;
6151
6152 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size "
6153 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo
6154 << ".\n";
6155 dumpBytes(Bytes, "Original Bytes vector:"););
6156
6157 // Apply the unpack in reverse to the Bytes array.
6158 unsigned B = 0;
6159 if (UnpackLow) {
6160 while (B < SystemZ::VectorBytes / 2)
6161 Bytes[B++] = -1;
6162 }
6163 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) {
6164 Elt += UnpackFromEltSize;
6165 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++)
6166 Bytes[B] = Bytes[Elt];
6167 }
6168 if (!UnpackLow) {
6169 while (B < SystemZ::VectorBytes)
6170 Bytes[B++] = -1;
6171 }
6172
6173 // Remove the zero vector from Ops
6174 Ops.erase(&Ops[ZeroVecOpNo]);
6175 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
6176 if (Bytes[I] >= 0) {
6177 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes;
6178 if (OpNo > ZeroVecOpNo)
6179 Bytes[I] -= SystemZ::VectorBytes;
6180 }
6181
6182 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:");
6183 dbgs() << "\n";);
6184}
6185
6186SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG,
6187 const SDLoc &DL,
6188 SDValue Op) {
6189 if (!unpackWasPrepared())
6190 return Op;
6191 unsigned InBits = UnpackFromEltSize * 8;
6192 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits),
6193 SystemZ::VectorBits / InBits);
6194 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op);
6195 unsigned OutBits = InBits * 2;
6196 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits),
6197 SystemZ::VectorBits / OutBits);
6198 return DAG.getNode(UnpackLow ? SystemZISD::UNPACKL_LOW
6199 : SystemZISD::UNPACKL_HIGH,
6200 DL, OutVT, PackedOp);
6201}
6202
6203// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
6205 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
6206 if (!Op.getOperand(I).isUndef())
6207 return false;
6208 return true;
6209}
6210
6211// Return a vector of type VT that contains Value in the first element.
6212// The other elements don't matter.
6214 SDValue Value) {
6215 // If we have a constant, replicate it to all elements and let the
6216 // BUILD_VECTOR lowering take care of it.
6217 if (Value.getOpcode() == ISD::Constant ||
6218 Value.getOpcode() == ISD::ConstantFP) {
6220 return DAG.getBuildVector(VT, DL, Ops);
6221 }
6222 if (Value.isUndef())
6223 return DAG.getUNDEF(VT);
6224 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
6225}
6226
6227// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
6228// element 1. Used for cases in which replication is cheap.
6230 SDValue Op0, SDValue Op1) {
6231 if (Op0.isUndef()) {
6232 if (Op1.isUndef())
6233 return DAG.getUNDEF(VT);
6234 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
6235 }
6236 if (Op1.isUndef())
6237 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
6238 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
6239 buildScalarToVector(DAG, DL, VT, Op0),
6240 buildScalarToVector(DAG, DL, VT, Op1));
6241}
6242
6243// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
6244// vector for them.
6246 SDValue Op1) {
6247 if (Op0.isUndef() && Op1.isUndef())
6248 return DAG.getUNDEF(MVT::v2i64);
6249 // If one of the two inputs is undefined then replicate the other one,
6250 // in order to avoid using another register unnecessarily.
6251 if (Op0.isUndef())
6252 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6253 else if (Op1.isUndef())
6254 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6255 else {
6256 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
6257 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
6258 }
6259 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
6260}
6261
6262// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
6263// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
6264// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR
6265// would benefit from this representation and return it if so.
6267 BuildVectorSDNode *BVN) {
6268 EVT VT = BVN->getValueType(0);
6269 unsigned NumElements = VT.getVectorNumElements();
6270
6271 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
6272 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still
6273 // need a BUILD_VECTOR, add an additional placeholder operand for that
6274 // BUILD_VECTOR and store its operands in ResidueOps.
6275 GeneralShuffle GS(VT);
6277 bool FoundOne = false;
6278 for (unsigned I = 0; I < NumElements; ++I) {
6279 SDValue Op = BVN->getOperand(I);
6280 if (Op.getOpcode() == ISD::TRUNCATE)
6281 Op = Op.getOperand(0);
6282 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6283 Op.getOperand(1).getOpcode() == ISD::Constant) {
6284 unsigned Elem = Op.getConstantOperandVal(1);
6285 if (!GS.add(Op.getOperand(0), Elem))
6286 return SDValue();
6287 FoundOne = true;
6288 } else if (Op.isUndef()) {
6289 GS.addUndef();
6290 } else {
6291 if (!GS.add(SDValue(), ResidueOps.size()))
6292 return SDValue();
6293 ResidueOps.push_back(BVN->getOperand(I));
6294 }
6295 }
6296
6297 // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
6298 if (!FoundOne)
6299 return SDValue();
6300
6301 // Create the BUILD_VECTOR for the remaining elements, if any.
6302 if (!ResidueOps.empty()) {
6303 while (ResidueOps.size() < NumElements)
6304 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType()));
6305 for (auto &Op : GS.Ops) {
6306 if (!Op.getNode()) {
6307 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps);
6308 break;
6309 }
6310 }
6311 }
6312 return GS.getNode(DAG, SDLoc(BVN));
6313}
6314
6315bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
6316 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
6317 return true;
6318 if (auto *AL = dyn_cast<AtomicSDNode>(Op))
6319 if (AL->getOpcode() == ISD::ATOMIC_LOAD)
6320 return true;
6321 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
6322 return true;
6323 return false;
6324}
6325
6326// Combine GPR scalar values Elems into a vector of type VT.
6327SDValue
6328SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
6329 SmallVectorImpl<SDValue> &Elems) const {
6330 // See whether there is a single replicated value.
6332 unsigned int NumElements = Elems.size();
6333 unsigned int Count = 0;
6334 for (auto Elem : Elems) {
6335 if (!Elem.isUndef()) {
6336 if (!Single.getNode())
6337 Single = Elem;
6338 else if (Elem != Single) {
6339 Single = SDValue();
6340 break;
6341 }
6342 Count += 1;
6343 }
6344 }
6345 // There are three cases here:
6346 //
6347 // - if the only defined element is a loaded one, the best sequence
6348 // is a replicating load.
6349 //
6350 // - otherwise, if the only defined element is an i64 value, we will
6351 // end up with the same VLVGP sequence regardless of whether we short-cut
6352 // for replication or fall through to the later code.
6353 //
6354 // - otherwise, if the only defined element is an i32 or smaller value,
6355 // we would need 2 instructions to replicate it: VLVGP followed by VREPx.
6356 // This is only a win if the single defined element is used more than once.
6357 // In other cases we're better off using a single VLVGx.
6358 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
6359 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
6360
6361 // If all elements are loads, use VLREP/VLEs (below).
6362 bool AllLoads = true;
6363 for (auto Elem : Elems)
6364 if (!isVectorElementLoad(Elem)) {
6365 AllLoads = false;
6366 break;
6367 }
6368
6369 // The best way of building a v2i64 from two i64s is to use VLVGP.
6370 if (VT == MVT::v2i64 && !AllLoads)
6371 return joinDwords(DAG, DL, Elems[0], Elems[1]);
6372
6373 // Use a 64-bit merge high to combine two doubles.
6374 if (VT == MVT::v2f64 && !AllLoads)
6375 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6376
6377 // Build v4f32 values directly from the FPRs:
6378 //
6379 // <Axxx> <Bxxx> <Cxxxx> <Dxxx>
6380 // V V VMRHF
6381 // <ABxx> <CDxx>
6382 // V VMRHG
6383 // <ABCD>
6384 if (VT == MVT::v4f32 && !AllLoads) {
6385 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
6386 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
6387 // Avoid unnecessary undefs by reusing the other operand.
6388 if (Op01.isUndef())
6389 Op01 = Op23;
6390 else if (Op23.isUndef())
6391 Op23 = Op01;
6392 // Merging identical replications is a no-op.
6393 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
6394 return Op01;
6395 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
6396 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
6397 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
6398 DL, MVT::v2i64, Op01, Op23);
6399 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
6400 }
6401
6402 // Collect the constant terms.
6405
6406 unsigned NumConstants = 0;
6407 for (unsigned I = 0; I < NumElements; ++I) {
6408 SDValue Elem = Elems[I];
6409 if (Elem.getOpcode() == ISD::Constant ||
6410 Elem.getOpcode() == ISD::ConstantFP) {
6411 NumConstants += 1;
6412 Constants[I] = Elem;
6413 Done[I] = true;
6414 }
6415 }
6416 // If there was at least one constant, fill in the other elements of
6417 // Constants with undefs to get a full vector constant and use that
6418 // as the starting point.
6420 SDValue ReplicatedVal;
6421 if (NumConstants > 0) {
6422 for (unsigned I = 0; I < NumElements; ++I)
6423 if (!Constants[I].getNode())
6424 Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
6425 Result = DAG.getBuildVector(VT, DL, Constants);
6426 } else {
6427 // Otherwise try to use VLREP or VLVGP to start the sequence in order to
6428 // avoid a false dependency on any previous contents of the vector
6429 // register.
6430
6431 // Use a VLREP if at least one element is a load. Make sure to replicate
6432 // the load with the most elements having its value.
6433 std::map<const SDNode*, unsigned> UseCounts;
6434 SDNode *LoadMaxUses = nullptr;
6435 for (unsigned I = 0; I < NumElements; ++I)
6436 if (isVectorElementLoad(Elems[I])) {
6437 SDNode *Ld = Elems[I].getNode();
6438 unsigned Count = ++UseCounts[Ld];
6439 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < Count)
6440 LoadMaxUses = Ld;
6441 }
6442 if (LoadMaxUses != nullptr) {
6443 ReplicatedVal = SDValue(LoadMaxUses, 0);
6444 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal);
6445 } else {
6446 // Try to use VLVGP.
6447 unsigned I1 = NumElements / 2 - 1;
6448 unsigned I2 = NumElements - 1;
6449 bool Def1 = !Elems[I1].isUndef();
6450 bool Def2 = !Elems[I2].isUndef();
6451 if (Def1 || Def2) {
6452 SDValue Elem1 = Elems[Def1 ? I1 : I2];
6453 SDValue Elem2 = Elems[Def2 ? I2 : I1];
6454 Result = DAG.getNode(ISD::BITCAST, DL, VT,
6455 joinDwords(DAG, DL, Elem1, Elem2));
6456 Done[I1] = true;
6457 Done[I2] = true;
6458 } else
6459 Result = DAG.getUNDEF(VT);
6460 }
6461 }
6462
6463 // Use VLVGx to insert the other elements.
6464 for (unsigned I = 0; I < NumElements; ++I)
6465 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal)
6466 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
6467 DAG.getConstant(I, DL, MVT::i32));
6468 return Result;
6469}
6470
6471SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
6472 SelectionDAG &DAG) const {
6473 auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
6474 SDLoc DL(Op);
6475 EVT VT = Op.getValueType();
6476
6477 if (BVN->isConstant()) {
6478 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget))
6479 return Op;
6480
6481 // Fall back to loading it from memory.
6482 return SDValue();
6483 }
6484
6485 // See if we should use shuffles to construct the vector from other vectors.
6486 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN))
6487 return Res;
6488
6489 // Detect SCALAR_TO_VECTOR conversions.
6491 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
6492
6493 // Otherwise use buildVector to build the vector up from GPRs.
6494 unsigned NumElements = Op.getNumOperands();
6496 for (unsigned I = 0; I < NumElements; ++I)
6497 Ops[I] = Op.getOperand(I);
6498 return buildVector(DAG, DL, VT, Ops);
6499}
6500
6501SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
6502 SelectionDAG &DAG) const {
6503 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
6504 SDLoc DL(Op);
6505 EVT VT = Op.getValueType();
6506 unsigned NumElements = VT.getVectorNumElements();
6507
6508 if (VSN->isSplat()) {
6509 SDValue Op0 = Op.getOperand(0);
6510 unsigned Index = VSN->getSplatIndex();
6511 assert(Index < VT.getVectorNumElements() &&
6512 "Splat index should be defined and in first operand");
6513 // See whether the value we're splatting is directly available as a scalar.
6514 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6516 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
6517 // Otherwise keep it as a vector-to-vector operation.
6518 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
6519 DAG.getTargetConstant(Index, DL, MVT::i32));
6520 }
6521
6522 GeneralShuffle GS(VT);
6523 for (unsigned I = 0; I < NumElements; ++I) {
6524 int Elt = VSN->getMaskElt(I);
6525 if (Elt < 0)
6526 GS.addUndef();
6527 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements),
6528 unsigned(Elt) % NumElements))
6529 return SDValue();
6530 }
6531 return GS.getNode(DAG, SDLoc(VSN));
6532}
6533
6534SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
6535 SelectionDAG &DAG) const {
6536 SDLoc DL(Op);
6537 // Just insert the scalar into element 0 of an undefined vector.
6538 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
6539 Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
6540 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32));
6541}
6542
6543SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
6544 SelectionDAG &DAG) const {
6545 // Handle insertions of floating-point values.
6546 SDLoc DL(Op);
6547 SDValue Op0 = Op.getOperand(0);
6548 SDValue Op1 = Op.getOperand(1);
6549 SDValue Op2 = Op.getOperand(2);
6550 EVT VT = Op.getValueType();
6551
6552 // Insertions into constant indices of a v2f64 can be done using VPDI.
6553 // However, if the inserted value is a bitcast or a constant then it's
6554 // better to use GPRs, as below.
6555 if (VT == MVT::v2f64 &&
6556 Op1.getOpcode() != ISD::BITCAST &&
6557 Op1.getOpcode() != ISD::ConstantFP &&
6558 Op2.getOpcode() == ISD::Constant) {
6559 uint64_t Index = Op2->getAsZExtVal();
6560 unsigned Mask = VT.getVectorNumElements() - 1;
6561 if (Index <= Mask)
6562 return Op;
6563 }
6564
6565 // Otherwise bitcast to the equivalent integer form and insert via a GPR.
6566 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
6567 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
6568 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
6569 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
6570 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
6571 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6572}
6573
6574SDValue
6575SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
6576 SelectionDAG &DAG) const {
6577 // Handle extractions of floating-point values.
6578 SDLoc DL(Op);
6579 SDValue Op0 = Op.getOperand(0);
6580 SDValue Op1 = Op.getOperand(1);
6581 EVT VT = Op.getValueType();
6582 EVT VecVT = Op0.getValueType();
6583
6584 // Extractions of constant indices can be done directly.
6585 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
6586 uint64_t Index = CIndexN->getZExtValue();
6587 unsigned Mask = VecVT.getVectorNumElements() - 1;
6588 if (Index <= Mask)
6589 return Op;
6590 }
6591
6592 // Otherwise bitcast to the equivalent integer form and extract via a GPR.
6593 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
6594 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
6595 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
6596 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
6597 return DAG.getNode(ISD::BITCAST, DL, VT, Res);
6598}
6599
6600SDValue SystemZTargetLowering::
6601lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6602 SDValue PackedOp = Op.getOperand(0);
6603 EVT OutVT = Op.getValueType();
6604 EVT InVT = PackedOp.getValueType();
6605 unsigned ToBits = OutVT.getScalarSizeInBits();
6606 unsigned FromBits = InVT.getScalarSizeInBits();
6607 unsigned StartOffset = 0;
6608
6609 // If the input is a VECTOR_SHUFFLE, there are a number of important
6610 // cases where we can directly implement the sign-extension of the
6611 // original input lanes of the shuffle.
6612 if (PackedOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
6613 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(PackedOp.getNode());
6614 ArrayRef<int> ShuffleMask = SVN->getMask();
6615 int OutNumElts = OutVT.getVectorNumElements();
6616
6617 // Recognize the special case where the sign-extension can be done
6618 // by the VSEG instruction. Handled via the default expander.
6619 if (ToBits == 64 && OutNumElts == 2) {
6620 int NumElem = ToBits / FromBits;
6621 if (ShuffleMask[0] == NumElem - 1 && ShuffleMask[1] == 2 * NumElem - 1)
6622 return SDValue();
6623 }
6624
6625 // Recognize the special case where we can fold the shuffle by
6626 // replacing some of the UNPACK_HIGH with UNPACK_LOW.
6627 int StartOffsetCandidate = -1;
6628 for (int Elt = 0; Elt < OutNumElts; Elt++) {
6629 if (ShuffleMask[Elt] == -1)
6630 continue;
6631 if (ShuffleMask[Elt] % OutNumElts == Elt) {
6632 if (StartOffsetCandidate == -1)
6633 StartOffsetCandidate = ShuffleMask[Elt] - Elt;
6634 if (StartOffsetCandidate == ShuffleMask[Elt] - Elt)
6635 continue;
6636 }
6637 StartOffsetCandidate = -1;
6638 break;
6639 }
6640 if (StartOffsetCandidate != -1) {
6641 StartOffset = StartOffsetCandidate;
6642 PackedOp = PackedOp.getOperand(0);
6643 }
6644 }
6645
6646 do {
6647 FromBits *= 2;
6648 unsigned OutNumElts = SystemZ::VectorBits / FromBits;
6649 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), OutNumElts);
6650 unsigned Opcode = SystemZISD::UNPACK_HIGH;
6651 if (StartOffset >= OutNumElts) {
6652 Opcode = SystemZISD::UNPACK_LOW;
6653 StartOffset -= OutNumElts;
6654 }
6655 PackedOp = DAG.getNode(Opcode, SDLoc(PackedOp), OutVT, PackedOp);
6656 } while (FromBits != ToBits);
6657 return PackedOp;
6658}
6659
6660// Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector.
6661SDValue SystemZTargetLowering::
6662lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const {
6663 SDValue PackedOp = Op.getOperand(0);
6664 SDLoc DL(Op);
6665 EVT OutVT = Op.getValueType();
6666 EVT InVT = PackedOp.getValueType();
6667 unsigned InNumElts = InVT.getVectorNumElements();
6668 unsigned OutNumElts = OutVT.getVectorNumElements();
6669 unsigned NumInPerOut = InNumElts / OutNumElts;
6670
6671 SDValue ZeroVec =
6672 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType()));
6673
6674 SmallVector<int, 16> Mask(InNumElts);
6675 unsigned ZeroVecElt = InNumElts;
6676 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) {
6677 unsigned MaskElt = PackedElt * NumInPerOut;
6678 unsigned End = MaskElt + NumInPerOut - 1;
6679 for (; MaskElt < End; MaskElt++)
6680 Mask[MaskElt] = ZeroVecElt++;
6681 Mask[MaskElt] = PackedElt;
6682 }
6683 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask);
6684 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf);
6685}
6686
6687SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
6688 unsigned ByScalar) const {
6689 // Look for cases where a vector shift can use the *_BY_SCALAR form.
6690 SDValue Op0 = Op.getOperand(0);
6691 SDValue Op1 = Op.getOperand(1);
6692 SDLoc DL(Op);
6693 EVT VT = Op.getValueType();
6694 unsigned ElemBitSize = VT.getScalarSizeInBits();
6695
6696 // See whether the shift vector is a splat represented as BUILD_VECTOR.
6697 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
6698 APInt SplatBits, SplatUndef;
6699 unsigned SplatBitSize;
6700 bool HasAnyUndefs;
6701 // Check for constant splats. Use ElemBitSize as the minimum element
6702 // width and reject splats that need wider elements.
6703 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
6704 ElemBitSize, true) &&
6705 SplatBitSize == ElemBitSize) {
6706 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
6707 DL, MVT::i32);
6708 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6709 }
6710 // Check for variable splats.
6711 BitVector UndefElements;
6712 SDValue Splat = BVN->getSplatValue(&UndefElements);
6713 if (Splat) {
6714 // Since i32 is the smallest legal type, we either need a no-op
6715 // or a truncation.
6716 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
6717 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6718 }
6719 }
6720
6721 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
6722 // and the shift amount is directly available in a GPR.
6723 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
6724 if (VSN->isSplat()) {
6725 SDValue VSNOp0 = VSN->getOperand(0);
6726 unsigned Index = VSN->getSplatIndex();
6727 assert(Index < VT.getVectorNumElements() &&
6728 "Splat index should be defined and in first operand");
6729 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
6730 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
6731 // Since i32 is the smallest legal type, we either need a no-op
6732 // or a truncation.
6733 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
6734 VSNOp0.getOperand(Index));
6735 return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
6736 }
6737 }
6738 }
6739
6740 // Otherwise just treat the current form as legal.
6741 return Op;
6742}
6743
6744SDValue SystemZTargetLowering::lowerFSHL(SDValue Op, SelectionDAG &DAG) const {
6745 SDLoc DL(Op);
6746
6747 // i128 FSHL with a constant amount that is a multiple of 8 can be
6748 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6749 // facility, FSHL with a constant amount less than 8 can be implemented
6750 // via SHL_DOUBLE_BIT, and FSHL with other constant amounts by a
6751 // combination of the two.
6752 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6753 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6754 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6755 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6756 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6757 if (ShiftAmt > 120) {
6758 // For N in 121..128, fshl N == fshr (128 - N), and for 1 <= N < 8
6759 // SHR_DOUBLE_BIT emits fewer instructions.
6760 SDValue Val =
6761 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6762 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6763 return DAG.getBitcast(MVT::i128, Val);
6764 }
6765 SmallVector<int, 16> Mask(16);
6766 for (unsigned Elt = 0; Elt < 16; Elt++)
6767 Mask[Elt] = (ShiftAmt >> 3) + Elt;
6768 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6769 if ((ShiftAmt & 7) == 0)
6770 return DAG.getBitcast(MVT::i128, Shuf1);
6771 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op1, Op1, Mask);
6772 SDValue Val =
6773 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Shuf1, Shuf2,
6774 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6775 return DAG.getBitcast(MVT::i128, Val);
6776 }
6777 }
6778
6779 return SDValue();
6780}
6781
6782SDValue SystemZTargetLowering::lowerFSHR(SDValue Op, SelectionDAG &DAG) const {
6783 SDLoc DL(Op);
6784
6785 // i128 FSHR with a constant amount that is a multiple of 8 can be
6786 // implemented via VECTOR_SHUFFLE. If we have the vector-enhancements-2
6787 // facility, FSHR with a constant amount less than 8 can be implemented
6788 // via SHR_DOUBLE_BIT, and FSHR with other constant amounts by a
6789 // combination of the two.
6790 if (auto *ShiftAmtNode = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
6791 uint64_t ShiftAmt = ShiftAmtNode->getZExtValue() & 127;
6792 if ((ShiftAmt & 7) == 0 || Subtarget.hasVectorEnhancements2()) {
6793 SDValue Op0 = DAG.getBitcast(MVT::v16i8, Op.getOperand(0));
6794 SDValue Op1 = DAG.getBitcast(MVT::v16i8, Op.getOperand(1));
6795 if (ShiftAmt > 120) {
6796 // For N in 121..128, fshr N == fshl (128 - N), and for 1 <= N < 8
6797 // SHL_DOUBLE_BIT emits fewer instructions.
6798 SDValue Val =
6799 DAG.getNode(SystemZISD::SHL_DOUBLE_BIT, DL, MVT::v16i8, Op0, Op1,
6800 DAG.getTargetConstant(128 - ShiftAmt, DL, MVT::i32));
6801 return DAG.getBitcast(MVT::i128, Val);
6802 }
6803 SmallVector<int, 16> Mask(16);
6804 for (unsigned Elt = 0; Elt < 16; Elt++)
6805 Mask[Elt] = 16 - (ShiftAmt >> 3) + Elt;
6806 SDValue Shuf1 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op1, Mask);
6807 if ((ShiftAmt & 7) == 0)
6808 return DAG.getBitcast(MVT::i128, Shuf1);
6809 SDValue Shuf2 = DAG.getVectorShuffle(MVT::v16i8, DL, Op0, Op0, Mask);
6810 SDValue Val =
6811 DAG.getNode(SystemZISD::SHR_DOUBLE_BIT, DL, MVT::v16i8, Shuf2, Shuf1,
6812 DAG.getTargetConstant(ShiftAmt & 7, DL, MVT::i32));
6813 return DAG.getBitcast(MVT::i128, Val);
6814 }
6815 }
6816
6817 return SDValue();
6818}
6819
6821 SDLoc DL(Op);
6822 SDValue Src = Op.getOperand(0);
6823 MVT DstVT = Op.getSimpleValueType();
6824
6826 unsigned SrcAS = N->getSrcAddressSpace();
6827
6828 assert(SrcAS != N->getDestAddressSpace() &&
6829 "addrspacecast must be between different address spaces");
6830
6831 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer.
6832 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value.
6833 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) {
6834 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Src,
6835 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6836 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6837 } else if (DstVT == MVT::i32) {
6838 Op = DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src);
6839 Op = DAG.getNode(ISD::AND, DL, MVT::i32, Op,
6840 DAG.getConstant(0x7fffffff, DL, MVT::i32));
6841 Op = DAG.getNode(ISD::ZERO_EXTEND, DL, DstVT, Op);
6842 } else {
6843 report_fatal_error("Bad address space in addrspacecast");
6844 }
6845 return Op;
6846}
6847
6848SDValue SystemZTargetLowering::lowerFP_EXTEND(SDValue Op,
6849 SelectionDAG &DAG) const {
6850 SDValue In = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
6851 if (In.getSimpleValueType() != MVT::f16)
6852 return Op; // Legal
6853 return SDValue(); // Let legalizer emit the libcall.
6854}
6855
6857 MVT VT, SDValue Arg, SDLoc DL,
6858 SDValue Chain, bool IsStrict) const {
6859 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected request for libcall!");
6860 MakeLibCallOptions CallOptions;
6861 SDValue Result;
6862 std::tie(Result, Chain) =
6863 makeLibCall(DAG, LC, VT, Arg, CallOptions, DL, Chain);
6864 return IsStrict ? DAG.getMergeValues({Result, Chain}, DL) : Result;
6865}
6866
6867SDValue SystemZTargetLowering::lower_FP_TO_INT(SDValue Op,
6868 SelectionDAG &DAG) const {
6869 bool IsSigned = (Op->getOpcode() == ISD::FP_TO_SINT ||
6870 Op->getOpcode() == ISD::STRICT_FP_TO_SINT);
6871 bool IsStrict = Op->isStrictFPOpcode();
6872 SDLoc DL(Op);
6873 MVT VT = Op.getSimpleValueType();
6874 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6875 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6876 EVT InVT = InOp.getValueType();
6877
6878 // FP to unsigned is not directly supported on z10. Promoting an i32
6879 // result to (signed) i64 doesn't generate an inexact condition (fp
6880 // exception) for values that are outside the i32 range but in the i64
6881 // range, so use the default expansion.
6882 if (!Subtarget.hasFPExtension() && !IsSigned)
6883 // Expand i32/i64. F16 values will be recognized to fit and extended.
6884 return SDValue();
6885
6886 // Conversion from f16 is done via f32.
6887 if (InOp.getSimpleValueType() == MVT::f16) {
6889 LowerOperationWrapper(Op.getNode(), Results, DAG);
6890 return DAG.getMergeValues(Results, DL);
6891 }
6892
6893 if (VT == MVT::i128) {
6894 RTLIB::Libcall LC =
6895 IsSigned ? RTLIB::getFPTOSINT(InVT, VT) : RTLIB::getFPTOUINT(InVT, VT);
6896 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6897 }
6898
6899 return Op; // Legal
6900}
6901
6902SDValue SystemZTargetLowering::lower_INT_TO_FP(SDValue Op,
6903 SelectionDAG &DAG) const {
6904 bool IsSigned = (Op->getOpcode() == ISD::SINT_TO_FP ||
6905 Op->getOpcode() == ISD::STRICT_SINT_TO_FP);
6906 bool IsStrict = Op->isStrictFPOpcode();
6907 SDLoc DL(Op);
6908 MVT VT = Op.getSimpleValueType();
6909 SDValue InOp = Op.getOperand(IsStrict ? 1 : 0);
6910 SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
6911 EVT InVT = InOp.getValueType();
6912
6913 // Conversion to f16 is done via f32.
6914 if (VT == MVT::f16) {
6916 LowerOperationWrapper(Op.getNode(), Results, DAG);
6917 return DAG.getMergeValues(Results, DL);
6918 }
6919
6920 // Unsigned to fp is not directly supported on z10.
6921 if (!Subtarget.hasFPExtension() && !IsSigned)
6922 return SDValue(); // Expand i64.
6923
6924 if (InVT == MVT::i128) {
6925 RTLIB::Libcall LC =
6926 IsSigned ? RTLIB::getSINTTOFP(InVT, VT) : RTLIB::getUINTTOFP(InVT, VT);
6927 return useLibCall(DAG, LC, VT, InOp, DL, Chain, IsStrict);
6928 }
6929
6930 return Op; // Legal
6931}
6932
6933// Shift the lower 2 bytes of Op to the left in order to insert into the
6934// upper 2 bytes of the FP register.
6936 assert(Op.getSimpleValueType() == MVT::i64 &&
6937 "Expexted to convert i64 to f16.");
6938 SDLoc DL(Op);
6939 SDValue Shft = DAG.getNode(ISD::SHL, DL, MVT::i64, Op,
6940 DAG.getConstant(48, DL, MVT::i64));
6941 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Shft);
6942 SDValue F16Val =
6943 DAG.getTargetExtractSubreg(SystemZ::subreg_h16, DL, MVT::f16, BCast);
6944 return F16Val;
6945}
6946
6947// Extract Op into GPR and shift the 2 f16 bytes to the right.
6949 assert(Op.getSimpleValueType() == MVT::f16 &&
6950 "Expected to convert f16 to i64.");
6951 SDNode *U32 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
6952 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h16, DL, MVT::f64,
6953 SDValue(U32, 0), Op);
6954 SDValue BCast = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
6955 SDValue Shft = DAG.getNode(ISD::SRL, DL, MVT::i64, BCast,
6956 DAG.getConstant(48, DL, MVT::i32));
6957 return Shft;
6958}
6959
6960// Lower an f16 LOAD in case of no vector support.
6961SDValue SystemZTargetLowering::lowerLoadF16(SDValue Op,
6962 SelectionDAG &DAG) const {
6963 EVT RegVT = Op.getValueType();
6964 assert(RegVT == MVT::f16 && "Expected to lower an f16 load.");
6965 (void)RegVT;
6966
6967 // Load as integer.
6968 SDLoc DL(Op);
6969 SDValue NewLd;
6970 if (auto *AtomicLd = dyn_cast<AtomicSDNode>(Op.getNode())) {
6971 assert(EVT(RegVT) == AtomicLd->getMemoryVT() && "Unhandled f16 load");
6972 NewLd = DAG.getAtomicLoad(ISD::EXTLOAD, DL, MVT::i16, MVT::i64,
6973 AtomicLd->getChain(), AtomicLd->getBasePtr(),
6974 AtomicLd->getMemOperand());
6975 } else {
6976 LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode());
6977 assert(EVT(RegVT) == Ld->getMemoryVT() && "Unhandled f16 load");
6978 NewLd = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i64, Ld->getChain(),
6979 Ld->getBasePtr(), Ld->getPointerInfo(), MVT::i16,
6980 Ld->getBaseAlign(), Ld->getMemOperand()->getFlags());
6981 }
6982 SDValue F16Val = convertToF16(NewLd, DAG);
6983 return DAG.getMergeValues({F16Val, NewLd.getValue(1)}, DL);
6984}
6985
6986// Lower an f16 STORE in case of no vector support.
6987SDValue SystemZTargetLowering::lowerStoreF16(SDValue Op,
6988 SelectionDAG &DAG) const {
6989 SDLoc DL(Op);
6990 SDValue Shft = convertFromF16(Op->getOperand(1), DL, DAG);
6991
6992 if (auto *AtomicSt = dyn_cast<AtomicSDNode>(Op.getNode()))
6993 return DAG.getAtomic(ISD::ATOMIC_STORE, DL, MVT::i16, AtomicSt->getChain(),
6994 Shft, AtomicSt->getBasePtr(),
6995 AtomicSt->getMemOperand());
6996
6997 StoreSDNode *St = cast<StoreSDNode>(Op.getNode());
6998 return DAG.getTruncStore(St->getChain(), DL, Shft, St->getBasePtr(), MVT::i16,
6999 St->getMemOperand());
7000}
7001
7002SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op,
7003 SelectionDAG &DAG) const {
7004 SDLoc DL(Op);
7005 MVT ResultVT = Op.getSimpleValueType();
7006 SDValue Arg = Op.getOperand(0);
7007 unsigned Check = Op.getConstantOperandVal(1);
7008
7009 unsigned TDCMask = 0;
7010 if (Check & fcSNan)
7012 if (Check & fcQNan)
7014 if (Check & fcPosInf)
7016 if (Check & fcNegInf)
7018 if (Check & fcPosNormal)
7020 if (Check & fcNegNormal)
7022 if (Check & fcPosSubnormal)
7024 if (Check & fcNegSubnormal)
7026 if (Check & fcPosZero)
7027 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS;
7028 if (Check & fcNegZero)
7029 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS;
7030 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64);
7031
7032 if (Arg.getSimpleValueType() == MVT::f16)
7033 Arg = DAG.getFPExtendOrRound(Arg, SDLoc(Arg), MVT::f32);
7034 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV);
7035 return getCCResult(DAG, Intr);
7036}
7037
7038SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op,
7039 SelectionDAG &DAG) const {
7040 SDLoc DL(Op);
7041 SDValue Chain = Op.getOperand(0);
7042
7043 // STCKF only supports a memory operand, so we have to use a temporary.
7044 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64);
7045 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7046 MachinePointerInfo MPI =
7048
7049 // Use STCFK to store the TOD clock into the temporary.
7050 SDValue StoreOps[] = {Chain, StackPtr};
7051 Chain = DAG.getMemIntrinsicNode(
7052 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64,
7053 MPI, MaybeAlign(), MachineMemOperand::MOStore);
7054
7055 // And read it back from there.
7056 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI);
7057}
7058
7060 SelectionDAG &DAG) const {
7061 switch (Op.getOpcode()) {
7062 case ISD::FRAMEADDR:
7063 return lowerFRAMEADDR(Op, DAG);
7064 case ISD::RETURNADDR:
7065 return lowerRETURNADDR(Op, DAG);
7066 case ISD::BR_CC:
7067 return lowerBR_CC(Op, DAG);
7068 case ISD::SELECT_CC:
7069 return lowerSELECT_CC(Op, DAG);
7070 case ISD::SETCC:
7071 return lowerSETCC(Op, DAG);
7072 case ISD::STRICT_FSETCC:
7073 return lowerSTRICT_FSETCC(Op, DAG, false);
7075 return lowerSTRICT_FSETCC(Op, DAG, true);
7076 case ISD::GlobalAddress:
7077 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG);
7079 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG);
7080 case ISD::BlockAddress:
7081 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG);
7082 case ISD::JumpTable:
7083 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG);
7084 case ISD::ConstantPool:
7085 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG);
7086 case ISD::BITCAST:
7087 return lowerBITCAST(Op, DAG);
7088 case ISD::VASTART:
7089 return lowerVASTART(Op, DAG);
7090 case ISD::VACOPY:
7091 return lowerVACOPY(Op, DAG);
7092 case ISD::DYNAMIC_STACKALLOC:
7093 return lowerDYNAMIC_STACKALLOC(Op, DAG);
7094 case ISD::GET_DYNAMIC_AREA_OFFSET:
7095 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
7096 case ISD::MULHS:
7097 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
7098 case ISD::MULHU:
7099 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
7100 case ISD::SMUL_LOHI:
7101 return lowerSMUL_LOHI(Op, DAG);
7102 case ISD::UMUL_LOHI:
7103 return lowerUMUL_LOHI(Op, DAG);
7104 case ISD::SDIVREM:
7105 return lowerSDIVREM(Op, DAG);
7106 case ISD::UDIVREM:
7107 return lowerUDIVREM(Op, DAG);
7108 case ISD::SADDO:
7109 case ISD::SSUBO:
7110 case ISD::UADDO:
7111 case ISD::USUBO:
7112 return lowerXALUO(Op, DAG);
7113 case ISD::UADDO_CARRY:
7114 case ISD::USUBO_CARRY:
7115 return lowerUADDSUBO_CARRY(Op, DAG);
7116 case ISD::OR:
7117 return lowerOR(Op, DAG);
7118 case ISD::CTPOP:
7119 return lowerCTPOP(Op, DAG);
7120 case ISD::VECREDUCE_ADD:
7121 return lowerVECREDUCE_ADD(Op, DAG);
7122 case ISD::ATOMIC_FENCE:
7123 return lowerATOMIC_FENCE(Op, DAG);
7124 case ISD::ATOMIC_SWAP:
7125 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
7126 case ISD::ATOMIC_STORE:
7127 return lowerATOMIC_STORE(Op, DAG);
7128 case ISD::ATOMIC_LOAD:
7129 return lowerATOMIC_LOAD(Op, DAG);
7130 case ISD::ATOMIC_LOAD_ADD:
7131 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD);
7132 case ISD::ATOMIC_LOAD_SUB:
7133 return lowerATOMIC_LOAD_SUB(Op, DAG);
7134 case ISD::ATOMIC_LOAD_AND:
7135 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND);
7136 case ISD::ATOMIC_LOAD_OR:
7137 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR);
7138 case ISD::ATOMIC_LOAD_XOR:
7139 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR);
7140 case ISD::ATOMIC_LOAD_NAND:
7141 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND);
7142 case ISD::ATOMIC_LOAD_MIN:
7143 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN);
7144 case ISD::ATOMIC_LOAD_MAX:
7145 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX);
7146 case ISD::ATOMIC_LOAD_UMIN:
7147 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN);
7148 case ISD::ATOMIC_LOAD_UMAX:
7149 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX);
7150 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
7151 return lowerATOMIC_CMP_SWAP(Op, DAG);
7152 case ISD::STACKSAVE:
7153 return lowerSTACKSAVE(Op, DAG);
7154 case ISD::STACKRESTORE:
7155 return lowerSTACKRESTORE(Op, DAG);
7156 case ISD::PREFETCH:
7157 return lowerPREFETCH(Op, DAG);
7159 return lowerINTRINSIC_W_CHAIN(Op, DAG);
7161 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
7162 case ISD::BUILD_VECTOR:
7163 return lowerBUILD_VECTOR(Op, DAG);
7165 return lowerVECTOR_SHUFFLE(Op, DAG);
7167 return lowerSCALAR_TO_VECTOR(Op, DAG);
7169 return lowerINSERT_VECTOR_ELT(Op, DAG);
7171 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
7173 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG);
7175 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG);
7176 case ISD::SHL:
7177 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
7178 case ISD::SRL:
7179 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
7180 case ISD::SRA:
7181 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
7182 case ISD::ADDRSPACECAST:
7183 return lowerAddrSpaceCast(Op, DAG);
7184 case ISD::ROTL:
7185 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
7186 case ISD::FSHL:
7187 return lowerFSHL(Op, DAG);
7188 case ISD::FSHR:
7189 return lowerFSHR(Op, DAG);
7190 case ISD::FP_EXTEND:
7192 return lowerFP_EXTEND(Op, DAG);
7193 case ISD::FP_TO_UINT:
7194 case ISD::FP_TO_SINT:
7197 return lower_FP_TO_INT(Op, DAG);
7198 case ISD::UINT_TO_FP:
7199 case ISD::SINT_TO_FP:
7202 return lower_INT_TO_FP(Op, DAG);
7203 case ISD::LOAD:
7204 return lowerLoadF16(Op, DAG);
7205 case ISD::STORE:
7206 return lowerStoreF16(Op, DAG);
7207 case ISD::IS_FPCLASS:
7208 return lowerIS_FPCLASS(Op, DAG);
7209 case ISD::GET_ROUNDING:
7210 return lowerGET_ROUNDING(Op, DAG);
7211 case ISD::READCYCLECOUNTER:
7212 return lowerREADCYCLECOUNTER(Op, DAG);
7215 // These operations are legal on our platform, but we cannot actually
7216 // set the operation action to Legal as common code would treat this
7217 // as equivalent to Expand. Instead, we keep the operation action to
7218 // Custom and just leave them unchanged here.
7219 return Op;
7220
7221 default:
7222 llvm_unreachable("Unexpected node to lower");
7223 }
7224}
7225
7227 const SDLoc &SL) {
7228 // If i128 is legal, just use a normal bitcast.
7229 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7230 return DAG.getBitcast(MVT::f128, Src);
7231
7232 // Otherwise, f128 must live in FP128, so do a partwise move.
7234 &SystemZ::FP128BitRegClass);
7235
7236 SDValue Hi, Lo;
7237 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64);
7238
7239 Hi = DAG.getBitcast(MVT::f64, Hi);
7240 Lo = DAG.getBitcast(MVT::f64, Lo);
7241
7242 SDNode *Pair = DAG.getMachineNode(
7243 SystemZ::REG_SEQUENCE, SL, MVT::f128,
7244 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo,
7245 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi,
7246 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)});
7247 return SDValue(Pair, 0);
7248}
7249
7251 const SDLoc &SL) {
7252 // If i128 is legal, just use a normal bitcast.
7253 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128))
7254 return DAG.getBitcast(MVT::i128, Src);
7255
7256 // Otherwise, f128 must live in FP128, so do a partwise move.
7258 &SystemZ::FP128BitRegClass);
7259
7260 SDValue LoFP =
7261 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src);
7262 SDValue HiFP =
7263 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src);
7264 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP);
7265 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP);
7266
7267 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi);
7268}
7269
7270// Lower operations with invalid operand or result types.
7271void
7274 SelectionDAG &DAG) const {
7275 switch (N->getOpcode()) {
7276 case ISD::ATOMIC_LOAD: {
7277 SDLoc DL(N);
7278 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other);
7279 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) };
7280 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7281 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128,
7282 DL, Tys, Ops, MVT::i128, MMO);
7283
7284 SDValue Lowered = lowerGR128ToI128(DAG, Res);
7285 if (N->getValueType(0) == MVT::f128)
7286 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL);
7287 Results.push_back(Lowered);
7288 Results.push_back(Res.getValue(1));
7289 break;
7290 }
7291 case ISD::ATOMIC_STORE: {
7292 SDLoc DL(N);
7293 SDVTList Tys = DAG.getVTList(MVT::Other);
7294 SDValue Val = N->getOperand(1);
7295 if (Val.getValueType() == MVT::f128)
7296 Val = expandBitCastF128ToI128(DAG, Val, DL);
7297 Val = lowerI128ToGR128(DAG, Val);
7298
7299 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)};
7300 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7301 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128,
7302 DL, Tys, Ops, MVT::i128, MMO);
7303 // We have to enforce sequential consistency by performing a
7304 // serialization operation after the store.
7305 if (cast<AtomicSDNode>(N)->getSuccessOrdering() ==
7307 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL,
7308 MVT::Other, Res), 0);
7309 Results.push_back(Res);
7310 break;
7311 }
7312 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: {
7313 SDLoc DL(N);
7314 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other);
7315 SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
7316 lowerI128ToGR128(DAG, N->getOperand(2)),
7317 lowerI128ToGR128(DAG, N->getOperand(3)) };
7318 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand();
7319 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128,
7320 DL, Tys, Ops, MVT::i128, MMO);
7321 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1),
7323 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1));
7324 Results.push_back(lowerGR128ToI128(DAG, Res));
7325 Results.push_back(Success);
7326 Results.push_back(Res.getValue(2));
7327 break;
7328 }
7329 case ISD::BITCAST: {
7330 if (useSoftFloat())
7331 return;
7332 SDLoc DL(N);
7333 SDValue Src = N->getOperand(0);
7334 EVT SrcVT = Src.getValueType();
7335 EVT ResVT = N->getValueType(0);
7336 if (ResVT == MVT::i128 && SrcVT == MVT::f128)
7337 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL));
7338 else if (SrcVT == MVT::i16 && ResVT == MVT::f16) {
7339 if (Subtarget.hasVector()) {
7340 SDValue In32 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Src);
7341 Results.push_back(SDValue(
7342 DAG.getMachineNode(SystemZ::LEFR_16, DL, MVT::f16, In32), 0));
7343 } else {
7344 SDValue In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Src);
7345 Results.push_back(convertToF16(In64, DAG));
7346 }
7347 } else if (SrcVT == MVT::f16 && ResVT == MVT::i16) {
7348 SDValue ExtractedVal =
7349 Subtarget.hasVector()
7350 ? SDValue(DAG.getMachineNode(SystemZ::LFER_16, DL, MVT::i32, Src),
7351 0)
7352 : convertFromF16(Src, DL, DAG);
7353 Results.push_back(DAG.getZExtOrTrunc(ExtractedVal, DL, ResVT));
7354 }
7355 break;
7356 }
7357 case ISD::UINT_TO_FP:
7358 case ISD::SINT_TO_FP:
7361 if (useSoftFloat())
7362 return;
7363 bool IsStrict = N->isStrictFPOpcode();
7364 SDLoc DL(N);
7365 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7366 EVT ResVT = N->getValueType(0);
7367 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7368 if (ResVT == MVT::f16) {
7369 if (!IsStrict) {
7370 SDValue OpF32 = DAG.getNode(N->getOpcode(), DL, MVT::f32, InOp);
7371 Results.push_back(DAG.getFPExtendOrRound(OpF32, DL, MVT::f16));
7372 } else {
7373 SDValue OpF32 =
7374 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::f32, MVT::Other),
7375 {Chain, InOp});
7376 SDValue F16Res;
7377 std::tie(F16Res, Chain) = DAG.getStrictFPExtendOrRound(
7378 OpF32, OpF32.getValue(1), DL, MVT::f16);
7379 Results.push_back(F16Res);
7380 Results.push_back(Chain);
7381 }
7382 }
7383 break;
7384 }
7385 case ISD::FP_TO_UINT:
7386 case ISD::FP_TO_SINT:
7389 if (useSoftFloat())
7390 return;
7391 bool IsStrict = N->isStrictFPOpcode();
7392 SDLoc DL(N);
7393 EVT ResVT = N->getValueType(0);
7394 SDValue InOp = N->getOperand(IsStrict ? 1 : 0);
7395 EVT InVT = InOp->getValueType(0);
7396 SDValue Chain = IsStrict ? N->getOperand(0) : DAG.getEntryNode();
7397 if (InVT == MVT::f16) {
7398 if (!IsStrict) {
7399 SDValue InF32 = DAG.getFPExtendOrRound(InOp, DL, MVT::f32);
7400 Results.push_back(DAG.getNode(N->getOpcode(), DL, ResVT, InF32));
7401 } else {
7402 SDValue InF32;
7403 std::tie(InF32, Chain) =
7404 DAG.getStrictFPExtendOrRound(InOp, Chain, DL, MVT::f32);
7405 SDValue OpF32 =
7406 DAG.getNode(N->getOpcode(), DL, DAG.getVTList(ResVT, MVT::Other),
7407 {Chain, InF32});
7408 Results.push_back(OpF32);
7409 Results.push_back(OpF32.getValue(1));
7410 }
7411 }
7412 break;
7413 }
7414 default:
7415 llvm_unreachable("Unexpected node to lower");
7416 }
7417}
7418
7419void
7425
7426// Return true if VT is a vector whose elements are a whole number of bytes
7427// in width. Also check for presence of vector support.
7428bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const {
7429 if (!Subtarget.hasVector())
7430 return false;
7431
7432 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple();
7433}
7434
7435// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
7436// producing a result of type ResVT. Op is a possibly bitcast version
7437// of the input vector and Index is the index (based on type VecVT) that
7438// should be extracted. Return the new extraction if a simplification
7439// was possible or if Force is true.
7440SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
7441 EVT VecVT, SDValue Op,
7442 unsigned Index,
7443 DAGCombinerInfo &DCI,
7444 bool Force) const {
7445 SelectionDAG &DAG = DCI.DAG;
7446
7447 // The number of bytes being extracted.
7448 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7449
7450 for (;;) {
7451 unsigned Opcode = Op.getOpcode();
7452 if (Opcode == ISD::BITCAST)
7453 // Look through bitcasts.
7454 Op = Op.getOperand(0);
7455 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) &&
7456 canTreatAsByteVector(Op.getValueType())) {
7457 // Get a VPERM-like permute mask and see whether the bytes covered
7458 // by the extracted element are a contiguous sequence from one
7459 // source operand.
7461 if (!getVPermMask(Op, Bytes))
7462 break;
7463 int First;
7464 if (!getShuffleInput(Bytes, Index * BytesPerElement,
7465 BytesPerElement, First))
7466 break;
7467 if (First < 0)
7468 return DAG.getUNDEF(ResVT);
7469 // Make sure the contiguous sequence starts at a multiple of the
7470 // original element size.
7471 unsigned Byte = unsigned(First) % Bytes.size();
7472 if (Byte % BytesPerElement != 0)
7473 break;
7474 // We can get the extracted value directly from an input.
7475 Index = Byte / BytesPerElement;
7476 Op = Op.getOperand(unsigned(First) / Bytes.size());
7477 Force = true;
7478 } else if (Opcode == ISD::BUILD_VECTOR &&
7479 canTreatAsByteVector(Op.getValueType())) {
7480 // We can only optimize this case if the BUILD_VECTOR elements are
7481 // at least as wide as the extracted value.
7482 EVT OpVT = Op.getValueType();
7483 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7484 if (OpBytesPerElement < BytesPerElement)
7485 break;
7486 // Make sure that the least-significant bit of the extracted value
7487 // is the least significant bit of an input.
7488 unsigned End = (Index + 1) * BytesPerElement;
7489 if (End % OpBytesPerElement != 0)
7490 break;
7491 // We're extracting the low part of one operand of the BUILD_VECTOR.
7492 Op = Op.getOperand(End / OpBytesPerElement - 1);
7493 if (!Op.getValueType().isInteger()) {
7494 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
7495 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
7496 DCI.AddToWorklist(Op.getNode());
7497 }
7498 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
7499 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
7500 if (VT != ResVT) {
7501 DCI.AddToWorklist(Op.getNode());
7502 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
7503 }
7504 return Op;
7505 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
7507 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
7508 canTreatAsByteVector(Op.getValueType()) &&
7509 canTreatAsByteVector(Op.getOperand(0).getValueType())) {
7510 // Make sure that only the unextended bits are significant.
7511 EVT ExtVT = Op.getValueType();
7512 EVT OpVT = Op.getOperand(0).getValueType();
7513 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
7514 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
7515 unsigned Byte = Index * BytesPerElement;
7516 unsigned SubByte = Byte % ExtBytesPerElement;
7517 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
7518 if (SubByte < MinSubByte ||
7519 SubByte + BytesPerElement > ExtBytesPerElement)
7520 break;
7521 // Get the byte offset of the unextended element
7522 Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
7523 // ...then add the byte offset relative to that element.
7524 Byte += SubByte - MinSubByte;
7525 if (Byte % BytesPerElement != 0)
7526 break;
7527 Op = Op.getOperand(0);
7528 Index = Byte / BytesPerElement;
7529 Force = true;
7530 } else
7531 break;
7532 }
7533 if (Force) {
7534 if (Op.getValueType() != VecVT) {
7535 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
7536 DCI.AddToWorklist(Op.getNode());
7537 }
7538 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
7539 DAG.getConstant(Index, DL, MVT::i32));
7540 }
7541 return SDValue();
7542}
7543
7544// Optimize vector operations in scalar value Op on the basis that Op
7545// is truncated to TruncVT.
7546SDValue SystemZTargetLowering::combineTruncateExtract(
7547 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const {
7548 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
7549 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
7550 // of type TruncVT.
7551 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7552 TruncVT.getSizeInBits() % 8 == 0) {
7553 SDValue Vec = Op.getOperand(0);
7554 EVT VecVT = Vec.getValueType();
7555 if (canTreatAsByteVector(VecVT)) {
7556 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
7557 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
7558 unsigned TruncBytes = TruncVT.getStoreSize();
7559 if (BytesPerElement % TruncBytes == 0) {
7560 // Calculate the value of Y' in the above description. We are
7561 // splitting the original elements into Scale equal-sized pieces
7562 // and for truncation purposes want the last (least-significant)
7563 // of these pieces for IndexN. This is easiest to do by calculating
7564 // the start index of the following element and then subtracting 1.
7565 unsigned Scale = BytesPerElement / TruncBytes;
7566 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
7567
7568 // Defer the creation of the bitcast from X to combineExtract,
7569 // which might be able to optimize the extraction.
7570 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(),
7571 MVT::getIntegerVT(TruncBytes * 8),
7572 VecVT.getStoreSize() / TruncBytes);
7573 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
7574 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
7575 }
7576 }
7577 }
7578 }
7579 return SDValue();
7580}
7581
7582SDValue SystemZTargetLowering::combineZERO_EXTEND(
7583 SDNode *N, DAGCombinerInfo &DCI) const {
7584 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2')
7585 SelectionDAG &DAG = DCI.DAG;
7586 SDValue N0 = N->getOperand(0);
7587 EVT VT = N->getValueType(0);
7588 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) {
7589 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0));
7590 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7591 if (TrueOp && FalseOp) {
7592 SDLoc DL(N0);
7593 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT),
7594 DAG.getConstant(FalseOp->getZExtValue(), DL, VT),
7595 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) };
7596 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops);
7597 // If N0 has multiple uses, change other uses as well.
7598 if (!N0.hasOneUse()) {
7599 SDValue TruncSelect =
7600 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect);
7601 DCI.CombineTo(N0.getNode(), TruncSelect);
7602 }
7603 return NewSelect;
7604 }
7605 }
7606 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size
7607 // of the result is smaller than the size of X and all the truncated bits
7608 // of X are already zero.
7609 if (N0.getOpcode() == ISD::XOR &&
7610 N0.hasOneUse() && N0.getOperand(0).hasOneUse() &&
7611 N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
7612 N0.getOperand(1).getOpcode() == ISD::Constant) {
7613 SDValue X = N0.getOperand(0).getOperand(0);
7614 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) {
7615 KnownBits Known = DAG.computeKnownBits(X);
7616 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(),
7617 N0.getValueSizeInBits(),
7618 VT.getSizeInBits());
7619 if (TruncatedBits.isSubsetOf(Known.Zero)) {
7620 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
7621 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits());
7622 return DAG.getNode(ISD::XOR, SDLoc(N0), VT,
7623 X, DAG.getConstant(Mask, SDLoc(N0), VT));
7624 }
7625 }
7626 }
7627 // Recognize patterns for VECTOR SUBTRACT COMPUTE BORROW INDICATION
7628 // and VECTOR ADD COMPUTE CARRY for i128:
7629 // (zext (setcc_uge X Y)) --> (VSCBI X Y)
7630 // (zext (setcc_ule Y X)) --> (VSCBI X Y)
7631 // (zext (setcc_ult (add X Y) X/Y) -> (VACC X Y)
7632 // (zext (setcc_ugt X/Y (add X Y)) -> (VACC X Y)
7633 // For vector types, these patterns are recognized in the .td file.
7634 if (N0.getOpcode() == ISD::SETCC && isTypeLegal(VT) && VT == MVT::i128 &&
7635 N0.getOperand(0).getValueType() == VT) {
7636 SDValue Op0 = N0.getOperand(0);
7637 SDValue Op1 = N0.getOperand(1);
7638 const ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7639 switch (CC) {
7640 case ISD::SETULE:
7641 std::swap(Op0, Op1);
7642 [[fallthrough]];
7643 case ISD::SETUGE:
7644 return DAG.getNode(SystemZISD::VSCBI, SDLoc(N0), VT, Op0, Op1);
7645 case ISD::SETUGT:
7646 std::swap(Op0, Op1);
7647 [[fallthrough]];
7648 case ISD::SETULT:
7649 if (Op0->hasOneUse() && Op0->getOpcode() == ISD::ADD &&
7650 (Op0->getOperand(0) == Op1 || Op0->getOperand(1) == Op1))
7651 return DAG.getNode(SystemZISD::VACC, SDLoc(N0), VT, Op0->getOperand(0),
7652 Op0->getOperand(1));
7653 break;
7654 default:
7655 break;
7656 }
7657 }
7658
7659 return SDValue();
7660}
7661
7662SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG(
7663 SDNode *N, DAGCombinerInfo &DCI) const {
7664 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1)
7665 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1)
7666 // into (select_cc LHS, RHS, -1, 0, COND)
7667 SelectionDAG &DAG = DCI.DAG;
7668 SDValue N0 = N->getOperand(0);
7669 EVT VT = N->getValueType(0);
7670 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
7671 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND)
7672 N0 = N0.getOperand(0);
7673 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) {
7674 SDLoc DL(N0);
7675 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1),
7676 DAG.getAllOnesConstant(DL, VT),
7677 DAG.getConstant(0, DL, VT), N0.getOperand(2) };
7678 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
7679 }
7680 return SDValue();
7681}
7682
7683SDValue SystemZTargetLowering::combineSIGN_EXTEND(
7684 SDNode *N, DAGCombinerInfo &DCI) const {
7685 // Convert (sext (ashr (shl X, C1), C2)) to
7686 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as
7687 // cheap as narrower ones.
7688 SelectionDAG &DAG = DCI.DAG;
7689 SDValue N0 = N->getOperand(0);
7690 EVT VT = N->getValueType(0);
7691 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) {
7692 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1));
7693 SDValue Inner = N0.getOperand(0);
7694 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
7695 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
7696 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
7697 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
7698 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
7699 EVT ShiftVT = N0.getOperand(1).getValueType();
7700 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT,
7701 Inner.getOperand(0));
7702 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext,
7703 DAG.getConstant(NewShlAmt, SDLoc(Inner),
7704 ShiftVT));
7705 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl,
7706 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT));
7707 }
7708 }
7709 }
7710
7711 return SDValue();
7712}
7713
7714SDValue SystemZTargetLowering::combineMERGE(
7715 SDNode *N, DAGCombinerInfo &DCI) const {
7716 SelectionDAG &DAG = DCI.DAG;
7717 unsigned Opcode = N->getOpcode();
7718 SDValue Op0 = N->getOperand(0);
7719 SDValue Op1 = N->getOperand(1);
7720 if (Op0.getOpcode() == ISD::BITCAST)
7721 Op0 = Op0.getOperand(0);
7723 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF
7724 // for v4f32.
7725 if (Op1 == N->getOperand(0))
7726 return Op1;
7727 // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
7728 EVT VT = Op1.getValueType();
7729 unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
7730 if (ElemBytes <= 4) {
7731 Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
7732 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
7733 EVT InVT = VT.changeVectorElementTypeToInteger();
7734 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
7735 SystemZ::VectorBytes / ElemBytes / 2);
7736 if (VT != InVT) {
7737 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
7738 DCI.AddToWorklist(Op1.getNode());
7739 }
7740 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
7741 DCI.AddToWorklist(Op.getNode());
7742 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
7743 }
7744 }
7745 return SDValue();
7746}
7747
7748static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7749 SDNode *&HiPart) {
7750 LoPart = HiPart = nullptr;
7751
7752 // Scan through all users.
7753 for (SDUse &Use : LD->uses()) {
7754 // Skip the uses of the chain.
7755 if (Use.getResNo() != 0)
7756 continue;
7757
7758 // Verify every user is a TRUNCATE to i64 of the low or high half.
7759 SDNode *User = Use.getUser();
7760 bool IsLoPart = true;
7761 if (User->getOpcode() == ISD::SRL &&
7762 User->getOperand(1).getOpcode() == ISD::Constant &&
7763 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) {
7764 User = *User->user_begin();
7765 IsLoPart = false;
7766 }
7767 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64)
7768 return false;
7769
7770 if (IsLoPart) {
7771 if (LoPart)
7772 return false;
7773 LoPart = User;
7774 } else {
7775 if (HiPart)
7776 return false;
7777 HiPart = User;
7778 }
7779 }
7780 return true;
7781}
7782
7783static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart,
7784 SDNode *&HiPart) {
7785 LoPart = HiPart = nullptr;
7786
7787 // Scan through all users.
7788 for (SDUse &Use : LD->uses()) {
7789 // Skip the uses of the chain.
7790 if (Use.getResNo() != 0)
7791 continue;
7792
7793 // Verify every user is an EXTRACT_SUBREG of the low or high half.
7794 SDNode *User = Use.getUser();
7795 if (!User->hasOneUse() || !User->isMachineOpcode() ||
7796 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
7797 return false;
7798
7799 switch (User->getConstantOperandVal(1)) {
7800 case SystemZ::subreg_l64:
7801 if (LoPart)
7802 return false;
7803 LoPart = User;
7804 break;
7805 case SystemZ::subreg_h64:
7806 if (HiPart)
7807 return false;
7808 HiPart = User;
7809 break;
7810 default:
7811 return false;
7812 }
7813 }
7814 return true;
7815}
7816
7817SDValue SystemZTargetLowering::combineLOAD(
7818 SDNode *N, DAGCombinerInfo &DCI) const {
7819 SelectionDAG &DAG = DCI.DAG;
7820 EVT LdVT = N->getValueType(0);
7821 if (auto *LN = dyn_cast<LoadSDNode>(N)) {
7822 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) {
7823 MVT PtrVT = getPointerTy(DAG.getDataLayout());
7824 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType();
7825 if (PtrVT != LoadNodeVT) {
7826 SDLoc DL(LN);
7827 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(
7828 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0);
7829 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0),
7830 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(),
7831 LN->getMemOperand());
7832 }
7833 }
7834 }
7835 SDLoc DL(N);
7836
7837 // Replace a 128-bit load that is used solely to move its value into GPRs
7838 // by separate loads of both halves.
7839 LoadSDNode *LD = cast<LoadSDNode>(N);
7840 if (LD->isSimple() && ISD::isNormalLoad(LD)) {
7841 SDNode *LoPart, *HiPart;
7842 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) ||
7843 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) {
7844 // Rewrite each extraction as an independent load.
7845 SmallVector<SDValue, 2> ArgChains;
7846 if (HiPart) {
7847 SDValue EltLoad = DAG.getLoad(
7848 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(),
7849 LD->getPointerInfo(), LD->getBaseAlign(),
7850 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7851
7852 DCI.CombineTo(HiPart, EltLoad, true);
7853 ArgChains.push_back(EltLoad.getValue(1));
7854 }
7855 if (LoPart) {
7856 SDValue EltLoad = DAG.getLoad(
7857 LoPart->getValueType(0), DL, LD->getChain(),
7858 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)),
7859 LD->getPointerInfo().getWithOffset(8), LD->getBaseAlign(),
7860 LD->getMemOperand()->getFlags(), LD->getAAInfo());
7861
7862 DCI.CombineTo(LoPart, EltLoad, true);
7863 ArgChains.push_back(EltLoad.getValue(1));
7864 }
7865
7866 // Collect all chains via TokenFactor.
7867 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains);
7868 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
7869 DCI.AddToWorklist(Chain.getNode());
7870 return SDValue(N, 0);
7871 }
7872 }
7873
7874 if (LdVT.isVector() || LdVT.isInteger())
7875 return SDValue();
7876 // Transform a scalar load that is REPLICATEd as well as having other
7877 // use(s) to the form where the other use(s) use the first element of the
7878 // REPLICATE instead of the load. Otherwise instruction selection will not
7879 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
7880 // point loads.
7881
7882 SDValue Replicate;
7883 SmallVector<SDNode*, 8> OtherUses;
7884 for (SDUse &Use : N->uses()) {
7885 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) {
7886 if (Replicate)
7887 return SDValue(); // Should never happen
7888 Replicate = SDValue(Use.getUser(), 0);
7889 } else if (Use.getResNo() == 0)
7890 OtherUses.push_back(Use.getUser());
7891 }
7892 if (!Replicate || OtherUses.empty())
7893 return SDValue();
7894
7895 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
7896 Replicate, DAG.getConstant(0, DL, MVT::i32));
7897 // Update uses of the loaded Value while preserving old chains.
7898 for (SDNode *U : OtherUses) {
7900 for (SDValue Op : U->ops())
7901 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
7902 DAG.UpdateNodeOperands(U, Ops);
7903 }
7904 return SDValue(N, 0);
7905}
7906
7907bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
7908 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
7909 return true;
7910 if (Subtarget.hasVectorEnhancements2())
7911 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128)
7912 return true;
7913 return false;
7914}
7915
7917 if (!VT.isVector() || !VT.isSimple() ||
7918 VT.getSizeInBits() != 128 ||
7919 VT.getScalarSizeInBits() % 8 != 0)
7920 return false;
7921
7922 unsigned NumElts = VT.getVectorNumElements();
7923 for (unsigned i = 0; i < NumElts; ++i) {
7924 if (M[i] < 0) continue; // ignore UNDEF indices
7925 if ((unsigned) M[i] != NumElts - 1 - i)
7926 return false;
7927 }
7928
7929 return true;
7930}
7931
7932static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) {
7933 for (auto *U : StoredVal->users()) {
7934 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) {
7935 EVT CurrMemVT = ST->getMemoryVT().getScalarType();
7936 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16)
7937 continue;
7938 } else if (isa<BuildVectorSDNode>(U)) {
7939 SDValue BuildVector = SDValue(U, 0);
7940 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) &&
7941 isOnlyUsedByStores(BuildVector, DAG))
7942 continue;
7943 }
7944 return false;
7945 }
7946 return true;
7947}
7948
7949static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart,
7950 SDValue &HiPart) {
7951 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse())
7952 return false;
7953
7954 SDValue Op0 = Val.getOperand(0);
7955 SDValue Op1 = Val.getOperand(1);
7956
7957 if (Op0.getOpcode() == ISD::SHL)
7958 std::swap(Op0, Op1);
7959 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() ||
7960 Op1.getOperand(1).getOpcode() != ISD::Constant ||
7961 Op1.getConstantOperandVal(1) != 64)
7962 return false;
7963 Op1 = Op1.getOperand(0);
7964
7965 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() ||
7966 Op0.getOperand(0).getValueType() != MVT::i64)
7967 return false;
7968 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() ||
7969 Op1.getOperand(0).getValueType() != MVT::i64)
7970 return false;
7971
7972 LoPart = Op0.getOperand(0);
7973 HiPart = Op1.getOperand(0);
7974 return true;
7975}
7976
7977static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart,
7978 SDValue &HiPart) {
7979 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() ||
7980 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE)
7981 return false;
7982
7983 if (Val->getNumOperands() != 5 ||
7984 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID ||
7985 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 ||
7986 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64)
7987 return false;
7988
7989 LoPart = Val->getOperand(1);
7990 HiPart = Val->getOperand(3);
7991 return true;
7992}
7993
7994SDValue SystemZTargetLowering::combineSTORE(
7995 SDNode *N, DAGCombinerInfo &DCI) const {
7996 SelectionDAG &DAG = DCI.DAG;
7997 auto *SN = cast<StoreSDNode>(N);
7998 auto &Op1 = N->getOperand(1);
7999 EVT MemVT = SN->getMemoryVT();
8000
8001 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) {
8002 MVT PtrVT = getPointerTy(DAG.getDataLayout());
8003 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType();
8004 if (PtrVT != StoreNodeVT) {
8005 SDLoc DL(SN);
8006 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(),
8007 SYSTEMZAS::PTR32, 0);
8008 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast,
8009 SN->getPointerInfo(), SN->getBaseAlign(),
8010 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8011 }
8012 }
8013
8014 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
8015 // for the extraction to be done on a vMiN value, so that we can use VSTE.
8016 // If X has wider elements then convert it to:
8017 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
8018 if (MemVT.isInteger() && SN->isTruncatingStore()) {
8019 if (SDValue Value =
8020 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) {
8021 DCI.AddToWorklist(Value.getNode());
8022
8023 // Rewrite the store with the new form of stored value.
8024 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
8025 SN->getBasePtr(), SN->getMemoryVT(),
8026 SN->getMemOperand());
8027 }
8028 }
8029 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
8030 if (!SN->isTruncatingStore() &&
8031 Op1.getOpcode() == ISD::BSWAP &&
8032 Op1.getNode()->hasOneUse() &&
8033 canLoadStoreByteSwapped(Op1.getValueType())) {
8034
8035 SDValue BSwapOp = Op1.getOperand(0);
8036
8037 if (BSwapOp.getValueType() == MVT::i16)
8038 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp);
8039
8040 SDValue Ops[] = {
8041 N->getOperand(0), BSwapOp, N->getOperand(2)
8042 };
8043
8044 return
8045 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
8046 Ops, MemVT, SN->getMemOperand());
8047 }
8048 // Combine STORE (element-swap) into VSTER
8049 if (!SN->isTruncatingStore() &&
8050 Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
8051 Op1.getNode()->hasOneUse() &&
8052 Subtarget.hasVectorEnhancements2()) {
8053 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
8054 ArrayRef<int> ShuffleMask = SVN->getMask();
8055 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
8056 SDValue Ops[] = {
8057 N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
8058 };
8059
8060 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
8061 DAG.getVTList(MVT::Other),
8062 Ops, MemVT, SN->getMemOperand());
8063 }
8064 }
8065
8066 // Combine STORE (READCYCLECOUNTER) into STCKF.
8067 if (!SN->isTruncatingStore() &&
8068 Op1.getOpcode() == ISD::READCYCLECOUNTER &&
8069 Op1.hasOneUse() &&
8070 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) {
8071 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) };
8072 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N),
8073 DAG.getVTList(MVT::Other),
8074 Ops, MemVT, SN->getMemOperand());
8075 }
8076
8077 // Transform a store of a 128-bit value moved from parts into two stores.
8078 if (SN->isSimple() && ISD::isNormalStore(SN)) {
8079 SDValue LoPart, HiPart;
8080 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) ||
8081 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) {
8082 SDLoc DL(SN);
8083 SDValue Chain0 = DAG.getStore(
8084 SN->getChain(), DL, HiPart, SN->getBasePtr(), SN->getPointerInfo(),
8085 SN->getBaseAlign(), SN->getMemOperand()->getFlags(), SN->getAAInfo());
8086 SDValue Chain1 = DAG.getStore(
8087 SN->getChain(), DL, LoPart,
8088 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), TypeSize::getFixed(8)),
8089 SN->getPointerInfo().getWithOffset(8), SN->getBaseAlign(),
8090 SN->getMemOperand()->getFlags(), SN->getAAInfo());
8091
8092 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1);
8093 }
8094 }
8095
8096 // Replicate a reg or immediate with VREP instead of scalar multiply or
8097 // immediate load. It seems best to do this during the first DAGCombine as
8098 // it is straight-forward to handle the zero-extend node in the initial
8099 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when
8100 // extracting an i16 element from a v16i8 vector).
8101 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes &&
8102 isOnlyUsedByStores(Op1, DAG)) {
8103 SDValue Word = SDValue();
8104 EVT WordVT;
8105
8106 // Find a replicated immediate and return it if found in Word and its
8107 // type in WordVT.
8108 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) {
8109 // Some constants are better handled with a scalar store.
8110 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() ||
8111 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2)
8112 return;
8113
8114 APInt Val = C->getAPIntValue();
8115 // Truncate Val in case of a truncating store.
8116 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) {
8117 assert(SN->isTruncatingStore() &&
8118 "Non-truncating store and immediate value does not fit?");
8119 Val = Val.trunc(TotBytes * 8);
8120 }
8121
8122 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue()));
8123 if (VCI.isVectorConstantLegal(Subtarget) &&
8124 VCI.Opcode == SystemZISD::REPLICATE) {
8125 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32);
8126 WordVT = VCI.VecVT.getScalarType();
8127 }
8128 };
8129
8130 // Find a replicated register and return it if found in Word and its type
8131 // in WordVT.
8132 auto FindReplicatedReg = [&](SDValue MulOp) {
8133 EVT MulVT = MulOp.getValueType();
8134 if (MulOp->getOpcode() == ISD::MUL &&
8135 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) {
8136 // Find a zero extended value and its type.
8137 SDValue LHS = MulOp->getOperand(0);
8138 if (LHS->getOpcode() == ISD::ZERO_EXTEND)
8139 WordVT = LHS->getOperand(0).getValueType();
8140 else if (LHS->getOpcode() == ISD::AssertZext)
8141 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT();
8142 else
8143 return;
8144 // Find a replicating constant, e.g. 0x00010001.
8145 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) {
8146 SystemZVectorConstantInfo VCI(
8147 APInt(MulVT.getSizeInBits(), C->getZExtValue()));
8148 if (VCI.isVectorConstantLegal(Subtarget) &&
8149 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 &&
8150 WordVT == VCI.VecVT.getScalarType())
8151 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT);
8152 }
8153 }
8154 };
8155
8156 if (isa<BuildVectorSDNode>(Op1) &&
8157 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) {
8158 SDValue SplatVal = Op1->getOperand(0);
8159 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal))
8160 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize());
8161 else
8162 FindReplicatedReg(SplatVal);
8163 } else {
8164 if (auto *C = dyn_cast<ConstantSDNode>(Op1))
8165 FindReplicatedImm(C, MemVT.getStoreSize());
8166 else
8167 FindReplicatedReg(Op1);
8168 }
8169
8170 if (Word != SDValue()) {
8171 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 &&
8172 "Bad type handling");
8173 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits();
8174 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts);
8175 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word);
8176 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal,
8177 SN->getBasePtr(), SN->getMemOperand());
8178 }
8179 }
8180
8181 return SDValue();
8182}
8183
8184SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
8185 SDNode *N, DAGCombinerInfo &DCI) const {
8186 SelectionDAG &DAG = DCI.DAG;
8187 // Combine element-swap (LOAD) into VLER
8188 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8189 N->getOperand(0).hasOneUse() &&
8190 Subtarget.hasVectorEnhancements2()) {
8191 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
8192 ArrayRef<int> ShuffleMask = SVN->getMask();
8193 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
8194 SDValue Load = N->getOperand(0);
8195 LoadSDNode *LD = cast<LoadSDNode>(Load);
8196
8197 // Create the element-swapping load.
8198 SDValue Ops[] = {
8199 LD->getChain(), // Chain
8200 LD->getBasePtr() // Ptr
8201 };
8202 SDValue ESLoad =
8203 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
8204 DAG.getVTList(LD->getValueType(0), MVT::Other),
8205 Ops, LD->getMemoryVT(), LD->getMemOperand());
8206
8207 // First, combine the VECTOR_SHUFFLE away. This makes the value produced
8208 // by the load dead.
8209 DCI.CombineTo(N, ESLoad);
8210
8211 // Next, combine the load away, we give it a bogus result value but a real
8212 // chain result. The result value is dead because the shuffle is dead.
8213 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
8214
8215 // Return N so it doesn't get rechecked!
8216 return SDValue(N, 0);
8217 }
8218 }
8219
8220 return SDValue();
8221}
8222
8223SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
8224 SDNode *N, DAGCombinerInfo &DCI) const {
8225 SelectionDAG &DAG = DCI.DAG;
8226
8227 if (!Subtarget.hasVector())
8228 return SDValue();
8229
8230 // Look through bitcasts that retain the number of vector elements.
8231 SDValue Op = N->getOperand(0);
8232 if (Op.getOpcode() == ISD::BITCAST &&
8233 Op.getValueType().isVector() &&
8234 Op.getOperand(0).getValueType().isVector() &&
8235 Op.getValueType().getVectorNumElements() ==
8236 Op.getOperand(0).getValueType().getVectorNumElements())
8237 Op = Op.getOperand(0);
8238
8239 // Pull BSWAP out of a vector extraction.
8240 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
8241 EVT VecVT = Op.getValueType();
8242 EVT EltVT = VecVT.getVectorElementType();
8243 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
8244 Op.getOperand(0), N->getOperand(1));
8245 DCI.AddToWorklist(Op.getNode());
8246 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
8247 if (EltVT != N->getValueType(0)) {
8248 DCI.AddToWorklist(Op.getNode());
8249 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
8250 }
8251 return Op;
8252 }
8253
8254 // Try to simplify a vector extraction.
8255 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
8256 SDValue Op0 = N->getOperand(0);
8257 EVT VecVT = Op0.getValueType();
8258 if (canTreatAsByteVector(VecVT))
8259 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
8260 IndexN->getZExtValue(), DCI, false);
8261 }
8262 return SDValue();
8263}
8264
8265SDValue SystemZTargetLowering::combineJOIN_DWORDS(
8266 SDNode *N, DAGCombinerInfo &DCI) const {
8267 SelectionDAG &DAG = DCI.DAG;
8268 // (join_dwords X, X) == (replicate X)
8269 if (N->getOperand(0) == N->getOperand(1))
8270 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
8271 N->getOperand(0));
8272 return SDValue();
8273}
8274
8276 SDValue Chain1 = N1->getOperand(0);
8277 SDValue Chain2 = N2->getOperand(0);
8278
8279 // Trivial case: both nodes take the same chain.
8280 if (Chain1 == Chain2)
8281 return Chain1;
8282
8283 // FIXME - we could handle more complex cases via TokenFactor,
8284 // assuming we can verify that this would not create a cycle.
8285 return SDValue();
8286}
8287
8288SDValue SystemZTargetLowering::combineFP_ROUND(
8289 SDNode *N, DAGCombinerInfo &DCI) const {
8290
8291 if (!Subtarget.hasVector())
8292 return SDValue();
8293
8294 // (fpround (extract_vector_elt X 0))
8295 // (fpround (extract_vector_elt X 1)) ->
8296 // (extract_vector_elt (VROUND X) 0)
8297 // (extract_vector_elt (VROUND X) 2)
8298 //
8299 // This is a special case since the target doesn't really support v2f32s.
8300 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8301 SelectionDAG &DAG = DCI.DAG;
8302 SDValue Op0 = N->getOperand(OpNo);
8303 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() &&
8305 Op0.getOperand(0).getValueType() == MVT::v2f64 &&
8306 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8307 Op0.getConstantOperandVal(1) == 0) {
8308 SDValue Vec = Op0.getOperand(0);
8309 for (auto *U : Vec->users()) {
8310 if (U != Op0.getNode() && U->hasOneUse() &&
8311 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8312 U->getOperand(0) == Vec &&
8313 U->getOperand(1).getOpcode() == ISD::Constant &&
8314 U->getConstantOperandVal(1) == 1) {
8315 SDValue OtherRound = SDValue(*U->user_begin(), 0);
8316 if (OtherRound.getOpcode() == N->getOpcode() &&
8317 OtherRound.getOperand(OpNo) == SDValue(U, 0) &&
8318 OtherRound.getValueType() == MVT::f32) {
8319 SDValue VRound, Chain;
8320 if (N->isStrictFPOpcode()) {
8321 Chain = MergeInputChains(N, OtherRound.getNode());
8322 if (!Chain)
8323 continue;
8324 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N),
8325 {MVT::v4f32, MVT::Other}, {Chain, Vec});
8326 Chain = VRound.getValue(1);
8327 } else
8328 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
8329 MVT::v4f32, Vec);
8330 DCI.AddToWorklist(VRound.getNode());
8331 SDValue Extract1 =
8332 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
8333 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32));
8334 DCI.AddToWorklist(Extract1.getNode());
8335 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
8336 if (Chain)
8337 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain);
8338 SDValue Extract0 =
8339 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
8340 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8341 if (Chain)
8342 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8343 N->getVTList(), Extract0, Chain);
8344 return Extract0;
8345 }
8346 }
8347 }
8348 }
8349 return SDValue();
8350}
8351
8352SDValue SystemZTargetLowering::combineFP_EXTEND(
8353 SDNode *N, DAGCombinerInfo &DCI) const {
8354
8355 if (!Subtarget.hasVector())
8356 return SDValue();
8357
8358 // (fpextend (extract_vector_elt X 0))
8359 // (fpextend (extract_vector_elt X 2)) ->
8360 // (extract_vector_elt (VEXTEND X) 0)
8361 // (extract_vector_elt (VEXTEND X) 1)
8362 //
8363 // This is a special case since the target doesn't really support v2f32s.
8364 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
8365 SelectionDAG &DAG = DCI.DAG;
8366 SDValue Op0 = N->getOperand(OpNo);
8367 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() &&
8369 Op0.getOperand(0).getValueType() == MVT::v4f32 &&
8370 Op0.getOperand(1).getOpcode() == ISD::Constant &&
8371 Op0.getConstantOperandVal(1) == 0) {
8372 SDValue Vec = Op0.getOperand(0);
8373 for (auto *U : Vec->users()) {
8374 if (U != Op0.getNode() && U->hasOneUse() &&
8375 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
8376 U->getOperand(0) == Vec &&
8377 U->getOperand(1).getOpcode() == ISD::Constant &&
8378 U->getConstantOperandVal(1) == 2) {
8379 SDValue OtherExtend = SDValue(*U->user_begin(), 0);
8380 if (OtherExtend.getOpcode() == N->getOpcode() &&
8381 OtherExtend.getOperand(OpNo) == SDValue(U, 0) &&
8382 OtherExtend.getValueType() == MVT::f64) {
8383 SDValue VExtend, Chain;
8384 if (N->isStrictFPOpcode()) {
8385 Chain = MergeInputChains(N, OtherExtend.getNode());
8386 if (!Chain)
8387 continue;
8388 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N),
8389 {MVT::v2f64, MVT::Other}, {Chain, Vec});
8390 Chain = VExtend.getValue(1);
8391 } else
8392 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
8393 MVT::v2f64, Vec);
8394 DCI.AddToWorklist(VExtend.getNode());
8395 SDValue Extract1 =
8396 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
8397 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
8398 DCI.AddToWorklist(Extract1.getNode());
8399 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
8400 if (Chain)
8401 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain);
8402 SDValue Extract0 =
8403 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
8404 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
8405 if (Chain)
8406 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0),
8407 N->getVTList(), Extract0, Chain);
8408 return Extract0;
8409 }
8410 }
8411 }
8412 }
8413 return SDValue();
8414}
8415
8416SDValue SystemZTargetLowering::combineINT_TO_FP(
8417 SDNode *N, DAGCombinerInfo &DCI) const {
8418 if (DCI.Level != BeforeLegalizeTypes)
8419 return SDValue();
8420 SelectionDAG &DAG = DCI.DAG;
8421 LLVMContext &Ctx = *DAG.getContext();
8422 unsigned Opcode = N->getOpcode();
8423 EVT OutVT = N->getValueType(0);
8424 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx);
8425 SDValue Op = N->getOperand(0);
8426 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits();
8427 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
8428
8429 // Insert an extension before type-legalization to avoid scalarization, e.g.:
8430 // v2f64 = uint_to_fp v2i16
8431 // =>
8432 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
8433 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits &&
8434 OutScalarBits <= 64) {
8435 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements();
8436 EVT ExtVT = EVT::getVectorVT(
8437 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts);
8438 unsigned ExtOpcode =
8440 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
8441 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
8442 }
8443 return SDValue();
8444}
8445
8446SDValue SystemZTargetLowering::combineFCOPYSIGN(
8447 SDNode *N, DAGCombinerInfo &DCI) const {
8448 SelectionDAG &DAG = DCI.DAG;
8449 EVT VT = N->getValueType(0);
8450 SDValue ValOp = N->getOperand(0);
8451 SDValue SignOp = N->getOperand(1);
8452
8453 // Remove the rounding which is not needed.
8454 if (SignOp.getOpcode() == ISD::FP_ROUND) {
8455 SDValue WideOp = SignOp.getOperand(0);
8456 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, ValOp, WideOp);
8457 }
8458
8459 return SDValue();
8460}
8461
8462SDValue SystemZTargetLowering::combineBSWAP(
8463 SDNode *N, DAGCombinerInfo &DCI) const {
8464 SelectionDAG &DAG = DCI.DAG;
8465 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
8466 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
8467 N->getOperand(0).hasOneUse() &&
8468 canLoadStoreByteSwapped(N->getValueType(0))) {
8469 SDValue Load = N->getOperand(0);
8470 LoadSDNode *LD = cast<LoadSDNode>(Load);
8471
8472 // Create the byte-swapping load.
8473 SDValue Ops[] = {
8474 LD->getChain(), // Chain
8475 LD->getBasePtr() // Ptr
8476 };
8477 EVT LoadVT = N->getValueType(0);
8478 if (LoadVT == MVT::i16)
8479 LoadVT = MVT::i32;
8480 SDValue BSLoad =
8481 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N),
8482 DAG.getVTList(LoadVT, MVT::Other),
8483 Ops, LD->getMemoryVT(), LD->getMemOperand());
8484
8485 // If this is an i16 load, insert the truncate.
8486 SDValue ResVal = BSLoad;
8487 if (N->getValueType(0) == MVT::i16)
8488 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad);
8489
8490 // First, combine the bswap away. This makes the value produced by the
8491 // load dead.
8492 DCI.CombineTo(N, ResVal);
8493
8494 // Next, combine the load away, we give it a bogus result value but a real
8495 // chain result. The result value is dead because the bswap is dead.
8496 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
8497
8498 // Return N so it doesn't get rechecked!
8499 return SDValue(N, 0);
8500 }
8501
8502 // Look through bitcasts that retain the number of vector elements.
8503 SDValue Op = N->getOperand(0);
8504 if (Op.getOpcode() == ISD::BITCAST &&
8505 Op.getValueType().isVector() &&
8506 Op.getOperand(0).getValueType().isVector() &&
8507 Op.getValueType().getVectorNumElements() ==
8508 Op.getOperand(0).getValueType().getVectorNumElements())
8509 Op = Op.getOperand(0);
8510
8511 // Push BSWAP into a vector insertion if at least one side then simplifies.
8512 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
8513 SDValue Vec = Op.getOperand(0);
8514 SDValue Elt = Op.getOperand(1);
8515 SDValue Idx = Op.getOperand(2);
8516
8518 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
8520 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
8521 (canLoadStoreByteSwapped(N->getValueType(0)) &&
8522 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
8523 EVT VecVT = N->getValueType(0);
8524 EVT EltVT = N->getValueType(0).getVectorElementType();
8525 if (VecVT != Vec.getValueType()) {
8526 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
8527 DCI.AddToWorklist(Vec.getNode());
8528 }
8529 if (EltVT != Elt.getValueType()) {
8530 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
8531 DCI.AddToWorklist(Elt.getNode());
8532 }
8533 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
8534 DCI.AddToWorklist(Vec.getNode());
8535 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
8536 DCI.AddToWorklist(Elt.getNode());
8537 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
8538 Vec, Elt, Idx);
8539 }
8540 }
8541
8542 // Push BSWAP into a vector shuffle if at least one side then simplifies.
8543 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
8544 if (SV && Op.hasOneUse()) {
8545 SDValue Op0 = Op.getOperand(0);
8546 SDValue Op1 = Op.getOperand(1);
8547
8549 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
8551 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
8552 EVT VecVT = N->getValueType(0);
8553 if (VecVT != Op0.getValueType()) {
8554 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
8555 DCI.AddToWorklist(Op0.getNode());
8556 }
8557 if (VecVT != Op1.getValueType()) {
8558 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
8559 DCI.AddToWorklist(Op1.getNode());
8560 }
8561 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
8562 DCI.AddToWorklist(Op0.getNode());
8563 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
8564 DCI.AddToWorklist(Op1.getNode());
8565 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
8566 }
8567 }
8568
8569 return SDValue();
8570}
8571
8572SDValue SystemZTargetLowering::combineSETCC(
8573 SDNode *N, DAGCombinerInfo &DCI) const {
8574 SelectionDAG &DAG = DCI.DAG;
8575 const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
8576 const SDValue LHS = N->getOperand(0);
8577 const SDValue RHS = N->getOperand(1);
8578 bool CmpNull = isNullConstant(RHS);
8579 bool CmpAllOnes = isAllOnesConstant(RHS);
8580 EVT VT = N->getValueType(0);
8581 SDLoc DL(N);
8582
8583 // Match icmp_eq/ne(bitcast(icmp(X,Y)),0/-1) reduction patterns, and
8584 // change the outer compare to a i128 compare. This will normally
8585 // allow the reduction to be recognized in adjustICmp128, and even if
8586 // not, the i128 compare will still generate better code.
8587 if ((CC == ISD::SETNE || CC == ISD::SETEQ) && (CmpNull || CmpAllOnes)) {
8589 if (Src.getOpcode() == ISD::SETCC &&
8590 Src.getValueType().isFixedLengthVector() &&
8591 Src.getValueType().getScalarType() == MVT::i1) {
8592 EVT CmpVT = Src.getOperand(0).getValueType();
8593 if (CmpVT.getSizeInBits() == 128) {
8594 EVT IntVT = CmpVT.changeVectorElementTypeToInteger();
8595 SDValue LHS =
8596 DAG.getBitcast(MVT::i128, DAG.getSExtOrTrunc(Src, DL, IntVT));
8597 SDValue RHS = CmpNull ? DAG.getConstant(0, DL, MVT::i128)
8598 : DAG.getAllOnesConstant(DL, MVT::i128);
8599 return DAG.getNode(ISD::SETCC, DL, VT, LHS, RHS, N->getOperand(2),
8600 N->getFlags());
8601 }
8602 }
8603 }
8604
8605 return SDValue();
8606}
8607
8608static std::pair<SDValue, int> findCCUse(const SDValue &Val) {
8609 switch (Val.getOpcode()) {
8610 default:
8611 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8612 case SystemZISD::IPM:
8613 if (Val.getOperand(0).getOpcode() == SystemZISD::CLC ||
8614 Val.getOperand(0).getOpcode() == SystemZISD::STRCMP)
8615 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ICMP);
8616 return std::make_pair(Val.getOperand(0), SystemZ::CCMASK_ANY);
8617 case SystemZISD::SELECT_CCMASK: {
8618 SDValue Op4CCReg = Val.getOperand(4);
8619 if (Op4CCReg.getOpcode() == SystemZISD::ICMP ||
8620 Op4CCReg.getOpcode() == SystemZISD::TM) {
8621 auto [OpCC, OpCCValid] = findCCUse(Op4CCReg.getOperand(0));
8622 if (OpCC != SDValue())
8623 return std::make_pair(OpCC, OpCCValid);
8624 }
8625 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8626 if (!CCValid)
8627 return std::make_pair(SDValue(), SystemZ::CCMASK_NONE);
8628 int CCValidVal = CCValid->getZExtValue();
8629 return std::make_pair(Op4CCReg, CCValidVal);
8630 }
8631 case ISD::ADD:
8632 case ISD::AND:
8633 case ISD::OR:
8634 case ISD::XOR:
8635 case ISD::SHL:
8636 case ISD::SRA:
8637 case ISD::SRL:
8638 auto [Op0CC, Op0CCValid] = findCCUse(Val.getOperand(0));
8639 if (Op0CC != SDValue())
8640 return std::make_pair(Op0CC, Op0CCValid);
8641 return findCCUse(Val.getOperand(1));
8642 }
8643}
8644
8645static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8646 SelectionDAG &DAG);
8647
8649 SelectionDAG &DAG) {
8650 SDLoc DL(Val);
8651 auto Opcode = Val.getOpcode();
8652 switch (Opcode) {
8653 default:
8654 return {};
8655 case ISD::Constant:
8656 return {Val, Val, Val, Val};
8657 case SystemZISD::IPM: {
8658 SDValue IPMOp0 = Val.getOperand(0);
8659 if (IPMOp0 != CC)
8660 return {};
8661 SmallVector<SDValue, 4> ShiftedCCVals;
8662 for (auto CC : {0, 1, 2, 3})
8663 ShiftedCCVals.emplace_back(
8664 DAG.getConstant((CC << SystemZ::IPM_CC), DL, MVT::i32));
8665 return ShiftedCCVals;
8666 }
8667 case SystemZISD::SELECT_CCMASK: {
8668 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8669 auto *CCValid = dyn_cast<ConstantSDNode>(Val.getOperand(2));
8670 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8671 if (!CCValid || !CCMask)
8672 return {};
8673
8674 int CCValidVal = CCValid->getZExtValue();
8675 int CCMaskVal = CCMask->getZExtValue();
8676 // Pruning search tree early - Moving CC test and combineCCMask ahead of
8677 // recursive call to simplifyAssumingCCVal.
8678 SDValue Op4CCReg = Val.getOperand(4);
8679 if (Op4CCReg != CC)
8680 combineCCMask(Op4CCReg, CCValidVal, CCMaskVal, DAG);
8681 if (Op4CCReg != CC)
8682 return {};
8683 const auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CC, DAG);
8684 const auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CC, DAG);
8685 if (TrueSDVals.empty() || FalseSDVals.empty())
8686 return {};
8687 SmallVector<SDValue, 4> MergedSDVals;
8688 for (auto &CCVal : {0, 1, 2, 3})
8689 MergedSDVals.emplace_back(((CCMaskVal & (1 << (3 - CCVal))) != 0)
8690 ? TrueSDVals[CCVal]
8691 : FalseSDVals[CCVal]);
8692 return MergedSDVals;
8693 }
8694 case ISD::ADD:
8695 case ISD::AND:
8696 case ISD::OR:
8697 case ISD::XOR:
8698 case ISD::SRA:
8699 // Avoid introducing CC spills (because ADD/AND/OR/XOR/SRA
8700 // would clobber CC).
8701 if (!Val.hasOneUse())
8702 return {};
8703 [[fallthrough]];
8704 case ISD::SHL:
8705 case ISD::SRL:
8706 SDValue Op0 = Val.getOperand(0), Op1 = Val.getOperand(1);
8707 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, CC, DAG);
8708 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, CC, DAG);
8709 if (Op0SDVals.empty() || Op1SDVals.empty())
8710 return {};
8711 SmallVector<SDValue, 4> BinaryOpSDVals;
8712 for (auto CCVal : {0, 1, 2, 3})
8713 BinaryOpSDVals.emplace_back(DAG.getNode(
8714 Opcode, DL, Val.getValueType(), Op0SDVals[CCVal], Op1SDVals[CCVal]));
8715 return BinaryOpSDVals;
8716 }
8717}
8718
8719static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask,
8720 SelectionDAG &DAG) {
8721 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code
8722 // set by the CCReg instruction using the CCValid / CCMask masks,
8723 // If the CCReg instruction is itself a ICMP / TM testing the condition
8724 // code set by some other instruction, see whether we can directly
8725 // use that condition code.
8726 auto *CCNode = CCReg.getNode();
8727 if (!CCNode)
8728 return false;
8729
8730 if (CCNode->getOpcode() == SystemZISD::TM) {
8731 if (CCValid != SystemZ::CCMASK_TM)
8732 return false;
8733 auto emulateTMCCMask = [](const SDValue &Op0Val, const SDValue &Op1Val) {
8734 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8735 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8736 if (!Op0Node || !Op1Node)
8737 return -1;
8738 auto Op0APVal = Op0Node->getAPIntValue();
8739 auto Op1APVal = Op1Node->getAPIntValue();
8740 auto Result = Op0APVal & Op1APVal;
8741 bool AllOnes = Result == Op1APVal;
8742 bool AllZeros = Result == 0;
8743 bool IsLeftMostBitSet = Result[Op1APVal.getActiveBits()] != 0;
8744 return AllZeros ? 0 : AllOnes ? 3 : IsLeftMostBitSet ? 2 : 1;
8745 };
8746 SDValue Op0 = CCNode->getOperand(0);
8747 SDValue Op1 = CCNode->getOperand(1);
8748 auto [Op0CC, Op0CCValid] = findCCUse(Op0);
8749 if (Op0CC == SDValue())
8750 return false;
8751 const auto &&Op0SDVals = simplifyAssumingCCVal(Op0, Op0CC, DAG);
8752 const auto &&Op1SDVals = simplifyAssumingCCVal(Op1, Op0CC, DAG);
8753 if (Op0SDVals.empty() || Op1SDVals.empty())
8754 return false;
8755 int NewCCMask = 0;
8756 for (auto CC : {0, 1, 2, 3}) {
8757 auto CCVal = emulateTMCCMask(Op0SDVals[CC], Op1SDVals[CC]);
8758 if (CCVal < 0)
8759 return false;
8760 NewCCMask <<= 1;
8761 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8762 }
8763 NewCCMask &= Op0CCValid;
8764 CCReg = Op0CC;
8765 CCMask = NewCCMask;
8766 CCValid = Op0CCValid;
8767 return true;
8768 }
8769 if (CCNode->getOpcode() != SystemZISD::ICMP ||
8770 CCValid != SystemZ::CCMASK_ICMP)
8771 return false;
8772
8773 SDValue CmpOp0 = CCNode->getOperand(0);
8774 SDValue CmpOp1 = CCNode->getOperand(1);
8775 SDValue CmpOp2 = CCNode->getOperand(2);
8776 auto [Op0CC, Op0CCValid] = findCCUse(CmpOp0);
8777 if (Op0CC != SDValue()) {
8778 const auto &&Op0SDVals = simplifyAssumingCCVal(CmpOp0, Op0CC, DAG);
8779 const auto &&Op1SDVals = simplifyAssumingCCVal(CmpOp1, Op0CC, DAG);
8780 if (Op0SDVals.empty() || Op1SDVals.empty())
8781 return false;
8782
8783 auto *CmpType = dyn_cast<ConstantSDNode>(CmpOp2);
8784 auto CmpTypeVal = CmpType->getZExtValue();
8785 const auto compareCCSigned = [&CmpTypeVal](const SDValue &Op0Val,
8786 const SDValue &Op1Val) {
8787 auto *Op0Node = dyn_cast<ConstantSDNode>(Op0Val.getNode());
8788 auto *Op1Node = dyn_cast<ConstantSDNode>(Op1Val.getNode());
8789 if (!Op0Node || !Op1Node)
8790 return -1;
8791 auto Op0APVal = Op0Node->getAPIntValue();
8792 auto Op1APVal = Op1Node->getAPIntValue();
8793 if (CmpTypeVal == SystemZICMP::SignedOnly)
8794 return Op0APVal == Op1APVal ? 0 : Op0APVal.slt(Op1APVal) ? 1 : 2;
8795 return Op0APVal == Op1APVal ? 0 : Op0APVal.ult(Op1APVal) ? 1 : 2;
8796 };
8797 int NewCCMask = 0;
8798 for (auto CC : {0, 1, 2, 3}) {
8799 auto CCVal = compareCCSigned(Op0SDVals[CC], Op1SDVals[CC]);
8800 if (CCVal < 0)
8801 return false;
8802 NewCCMask <<= 1;
8803 NewCCMask |= (CCMask & (1 << (3 - CCVal))) != 0;
8804 }
8805 NewCCMask &= Op0CCValid;
8806 CCMask = NewCCMask;
8807 CCReg = Op0CC;
8808 CCValid = Op0CCValid;
8809 return true;
8810 }
8811
8812 return false;
8813}
8814
8815// Merging versus split in multiple branches cost.
8818 const Value *Lhs,
8819 const Value *Rhs) const {
8820 const auto isFlagOutOpCC = [](const Value *V) {
8821 using namespace llvm::PatternMatch;
8822 const Value *RHSVal;
8823 const APInt *RHSC;
8824 if (const auto *I = dyn_cast<Instruction>(V)) {
8825 // PatternMatch.h provides concise tree-based pattern match of llvm IR.
8826 if (match(I->getOperand(0), m_And(m_Value(RHSVal), m_APInt(RHSC))) ||
8827 match(I, m_Cmp(m_Value(RHSVal), m_APInt(RHSC)))) {
8828 if (const auto *CB = dyn_cast<CallBase>(RHSVal)) {
8829 if (CB->isInlineAsm()) {
8830 const InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
8831 return IA && IA->getConstraintString().contains("{@cc}");
8832 }
8833 }
8834 }
8835 }
8836 return false;
8837 };
8838 // Pattern (ICmp %asm) or (ICmp (And %asm)).
8839 // Cost of longest dependency chain (ICmp, And) is 2. CostThreshold or
8840 // BaseCost can be set >=2. If cost of instruction <= CostThreshold
8841 // conditionals will be merged or else conditionals will be split.
8842 if (isFlagOutOpCC(Lhs) && isFlagOutOpCC(Rhs))
8843 return {3, 0, -1};
8844 // Default.
8845 return {-1, -1, -1};
8846}
8847
8848SDValue SystemZTargetLowering::combineBR_CCMASK(SDNode *N,
8849 DAGCombinerInfo &DCI) const {
8850 SelectionDAG &DAG = DCI.DAG;
8851
8852 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK.
8853 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8854 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8855 if (!CCValid || !CCMask)
8856 return SDValue();
8857
8858 int CCValidVal = CCValid->getZExtValue();
8859 int CCMaskVal = CCMask->getZExtValue();
8860 SDValue Chain = N->getOperand(0);
8861 SDValue CCReg = N->getOperand(4);
8862 if (combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG))
8863 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0),
8864 Chain,
8865 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8866 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32),
8867 N->getOperand(3), CCReg);
8868 return SDValue();
8869}
8870
8871SDValue SystemZTargetLowering::combineSELECT_CCMASK(
8872 SDNode *N, DAGCombinerInfo &DCI) const {
8873 SelectionDAG &DAG = DCI.DAG;
8874
8875 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK.
8876 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2));
8877 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3));
8878 if (!CCValid || !CCMask)
8879 return SDValue();
8880
8881 int CCValidVal = CCValid->getZExtValue();
8882 int CCMaskVal = CCMask->getZExtValue();
8883 SDValue CCReg = N->getOperand(4);
8884
8885 bool IsCombinedCCReg = combineCCMask(CCReg, CCValidVal, CCMaskVal, DAG);
8886
8887 // Populate SDVals vector for each condition code ccval for given Val, which
8888 // can again be another nested select_ccmask with the same CC.
8889 const auto constructCCSDValsFromSELECT = [&CCReg](SDValue &Val) {
8890 if (Val.getOpcode() == SystemZISD::SELECT_CCMASK) {
8892 if (Val.getOperand(4) != CCReg)
8893 return SmallVector<SDValue, 4>{};
8894 SDValue TrueVal = Val.getOperand(0), FalseVal = Val.getOperand(1);
8895 auto *CCMask = dyn_cast<ConstantSDNode>(Val.getOperand(3));
8896 if (!CCMask)
8897 return SmallVector<SDValue, 4>{};
8898
8899 int CCMaskVal = CCMask->getZExtValue();
8900 for (auto &CC : {0, 1, 2, 3})
8901 Res.emplace_back(((CCMaskVal & (1 << (3 - CC))) != 0) ? TrueVal
8902 : FalseVal);
8903 return Res;
8904 }
8905 return SmallVector<SDValue, 4>{Val, Val, Val, Val};
8906 };
8907 // Attempting to optimize TrueVal/FalseVal in outermost select_ccmask either
8908 // with CCReg found by combineCCMask or original CCReg.
8909 SDValue TrueVal = N->getOperand(0);
8910 SDValue FalseVal = N->getOperand(1);
8911 auto &&TrueSDVals = simplifyAssumingCCVal(TrueVal, CCReg, DAG);
8912 auto &&FalseSDVals = simplifyAssumingCCVal(FalseVal, CCReg, DAG);
8913 // TrueSDVals/FalseSDVals might be empty in case of non-constant
8914 // TrueVal/FalseVal for select_ccmask, which can not be optimized further.
8915 if (TrueSDVals.empty())
8916 TrueSDVals = constructCCSDValsFromSELECT(TrueVal);
8917 if (FalseSDVals.empty())
8918 FalseSDVals = constructCCSDValsFromSELECT(FalseVal);
8919 if (!TrueSDVals.empty() && !FalseSDVals.empty()) {
8920 SmallSet<SDValue, 4> MergedSDValsSet;
8921 // Ignoring CC values outside CCValiid.
8922 for (auto CC : {0, 1, 2, 3}) {
8923 if ((CCValidVal & ((1 << (3 - CC)))) != 0)
8924 MergedSDValsSet.insert(((CCMaskVal & (1 << (3 - CC))) != 0)
8925 ? TrueSDVals[CC]
8926 : FalseSDVals[CC]);
8927 }
8928 if (MergedSDValsSet.size() == 1)
8929 return *MergedSDValsSet.begin();
8930 if (MergedSDValsSet.size() == 2) {
8931 auto BeginIt = MergedSDValsSet.begin();
8932 SDValue NewTrueVal = *BeginIt, NewFalseVal = *next(BeginIt);
8933 if (NewTrueVal == FalseVal || NewFalseVal == TrueVal)
8934 std::swap(NewTrueVal, NewFalseVal);
8935 int NewCCMask = 0;
8936 for (auto CC : {0, 1, 2, 3}) {
8937 NewCCMask <<= 1;
8938 NewCCMask |= ((CCMaskVal & (1 << (3 - CC))) != 0)
8939 ? (TrueSDVals[CC] == NewTrueVal)
8940 : (FalseSDVals[CC] == NewTrueVal);
8941 }
8942 CCMaskVal = NewCCMask;
8943 CCMaskVal &= CCValidVal;
8944 TrueVal = NewTrueVal;
8945 FalseVal = NewFalseVal;
8946 IsCombinedCCReg = true;
8947 }
8948 }
8949
8950 if (IsCombinedCCReg)
8951 return DAG.getNode(
8952 SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), TrueVal,
8953 FalseVal, DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32),
8954 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), CCReg);
8955
8956 return SDValue();
8957}
8958
8959SDValue SystemZTargetLowering::combineGET_CCMASK(
8960 SDNode *N, DAGCombinerInfo &DCI) const {
8961
8962 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible
8963 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1));
8964 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2));
8965 if (!CCValid || !CCMask)
8966 return SDValue();
8967 int CCValidVal = CCValid->getZExtValue();
8968 int CCMaskVal = CCMask->getZExtValue();
8969
8970 SDValue Select = N->getOperand(0);
8971 if (Select->getOpcode() == ISD::TRUNCATE)
8972 Select = Select->getOperand(0);
8973 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK)
8974 return SDValue();
8975
8976 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2));
8977 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3));
8978 if (!SelectCCValid || !SelectCCMask)
8979 return SDValue();
8980 int SelectCCValidVal = SelectCCValid->getZExtValue();
8981 int SelectCCMaskVal = SelectCCMask->getZExtValue();
8982
8983 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0));
8984 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1));
8985 if (!TrueVal || !FalseVal)
8986 return SDValue();
8987 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0)
8988 ;
8989 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1)
8990 SelectCCMaskVal ^= SelectCCValidVal;
8991 else
8992 return SDValue();
8993
8994 if (SelectCCValidVal & ~CCValidVal)
8995 return SDValue();
8996 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal))
8997 return SDValue();
8998
8999 return Select->getOperand(4);
9000}
9001
9002SDValue SystemZTargetLowering::combineIntDIVREM(
9003 SDNode *N, DAGCombinerInfo &DCI) const {
9004 SelectionDAG &DAG = DCI.DAG;
9005 EVT VT = N->getValueType(0);
9006 // In the case where the divisor is a vector of constants a cheaper
9007 // sequence of instructions can replace the divide. BuildSDIV is called to
9008 // do this during DAG combining, but it only succeeds when it can build a
9009 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and
9010 // since it is not Legal but Custom it can only happen before
9011 // legalization. Therefore we must scalarize this early before Combine
9012 // 1. For widened vectors, this is already the result of type legalization.
9013 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) &&
9014 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1)))
9015 return DAG.UnrollVectorOp(N);
9016 return SDValue();
9017}
9018
9019
9020// Transform a right shift of a multiply-and-add into a multiply-and-add-high.
9021// This is closely modeled after the common-code combineShiftToMULH.
9022SDValue SystemZTargetLowering::combineShiftToMulAddHigh(
9023 SDNode *N, DAGCombinerInfo &DCI) const {
9024 SelectionDAG &DAG = DCI.DAG;
9025 SDLoc DL(N);
9026
9027 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
9028 "SRL or SRA node is required here!");
9029
9030 if (!Subtarget.hasVector())
9031 return SDValue();
9032
9033 // Check the shift amount. Proceed with the transformation if the shift
9034 // amount is constant.
9035 ConstantSDNode *ShiftAmtSrc = isConstOrConstSplat(N->getOperand(1));
9036 if (!ShiftAmtSrc)
9037 return SDValue();
9038
9039 // The operation feeding into the shift must be an add.
9040 SDValue ShiftOperand = N->getOperand(0);
9041 if (ShiftOperand.getOpcode() != ISD::ADD)
9042 return SDValue();
9043
9044 // One operand of the add must be a multiply.
9045 SDValue MulOp = ShiftOperand.getOperand(0);
9046 SDValue AddOp = ShiftOperand.getOperand(1);
9047 if (MulOp.getOpcode() != ISD::MUL) {
9048 if (AddOp.getOpcode() != ISD::MUL)
9049 return SDValue();
9050 std::swap(MulOp, AddOp);
9051 }
9052
9053 // All operands must be equivalent extend nodes.
9054 SDValue LeftOp = MulOp.getOperand(0);
9055 SDValue RightOp = MulOp.getOperand(1);
9056
9057 bool IsSignExt = LeftOp.getOpcode() == ISD::SIGN_EXTEND;
9058 bool IsZeroExt = LeftOp.getOpcode() == ISD::ZERO_EXTEND;
9059
9060 if (!IsSignExt && !IsZeroExt)
9061 return SDValue();
9062
9063 EVT NarrowVT = LeftOp.getOperand(0).getValueType();
9064 unsigned NarrowVTSize = NarrowVT.getScalarSizeInBits();
9065
9066 SDValue MulhRightOp;
9067 if (ConstantSDNode *Constant = isConstOrConstSplat(RightOp)) {
9068 unsigned ActiveBits = IsSignExt
9069 ? Constant->getAPIntValue().getSignificantBits()
9070 : Constant->getAPIntValue().getActiveBits();
9071 if (ActiveBits > NarrowVTSize)
9072 return SDValue();
9073 MulhRightOp = DAG.getConstant(
9074 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9075 NarrowVT);
9076 } else {
9077 if (LeftOp.getOpcode() != RightOp.getOpcode())
9078 return SDValue();
9079 // Check that the two extend nodes are the same type.
9080 if (NarrowVT != RightOp.getOperand(0).getValueType())
9081 return SDValue();
9082 MulhRightOp = RightOp.getOperand(0);
9083 }
9084
9085 SDValue MulhAddOp;
9086 if (ConstantSDNode *Constant = isConstOrConstSplat(AddOp)) {
9087 unsigned ActiveBits = IsSignExt
9088 ? Constant->getAPIntValue().getSignificantBits()
9089 : Constant->getAPIntValue().getActiveBits();
9090 if (ActiveBits > NarrowVTSize)
9091 return SDValue();
9092 MulhAddOp = DAG.getConstant(
9093 Constant->getAPIntValue().trunc(NarrowVT.getScalarSizeInBits()), DL,
9094 NarrowVT);
9095 } else {
9096 if (LeftOp.getOpcode() != AddOp.getOpcode())
9097 return SDValue();
9098 // Check that the two extend nodes are the same type.
9099 if (NarrowVT != AddOp.getOperand(0).getValueType())
9100 return SDValue();
9101 MulhAddOp = AddOp.getOperand(0);
9102 }
9103
9104 EVT WideVT = LeftOp.getValueType();
9105 // Proceed with the transformation if the wide types match.
9106 assert((WideVT == RightOp.getValueType()) &&
9107 "Cannot have a multiply node with two different operand types.");
9108 assert((WideVT == AddOp.getValueType()) &&
9109 "Cannot have an add node with two different operand types.");
9110
9111 // Proceed with the transformation if the wide type is twice as large
9112 // as the narrow type.
9113 if (WideVT.getScalarSizeInBits() != 2 * NarrowVTSize)
9114 return SDValue();
9115
9116 // Check the shift amount with the narrow type size.
9117 // Proceed with the transformation if the shift amount is the width
9118 // of the narrow type.
9119 unsigned ShiftAmt = ShiftAmtSrc->getZExtValue();
9120 if (ShiftAmt != NarrowVTSize)
9121 return SDValue();
9122
9123 // Proceed if we support the multiply-and-add-high operation.
9124 if (!(NarrowVT == MVT::v16i8 || NarrowVT == MVT::v8i16 ||
9125 NarrowVT == MVT::v4i32 ||
9126 (Subtarget.hasVectorEnhancements3() &&
9127 (NarrowVT == MVT::v2i64 || NarrowVT == MVT::i128))))
9128 return SDValue();
9129
9130 // Emit the VMAH (signed) or VMALH (unsigned) operation.
9131 SDValue Result = DAG.getNode(IsSignExt ? SystemZISD::VMAH : SystemZISD::VMALH,
9132 DL, NarrowVT, LeftOp.getOperand(0),
9133 MulhRightOp, MulhAddOp);
9134 bool IsSigned = N->getOpcode() == ISD::SRA;
9135 return DAG.getExtOrTrunc(IsSigned, Result, DL, WideVT);
9136}
9137
9138// Op is an operand of a multiplication. Check whether this can be folded
9139// into an even/odd widening operation; if so, return the opcode to be used
9140// and update Op to the appropriate sub-operand. Note that the caller must
9141// verify that *both* operands of the multiplication support the operation.
9143 const SystemZSubtarget &Subtarget,
9144 SDValue &Op) {
9145 EVT VT = Op.getValueType();
9146
9147 // Check for (sign/zero_extend_vector_inreg (vector_shuffle)) corresponding
9148 // to selecting the even or odd vector elements.
9149 if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT) &&
9150 (Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9151 Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG)) {
9152 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
9153 unsigned NumElts = VT.getVectorNumElements();
9154 Op = Op.getOperand(0);
9155 if (Op.getValueType().getVectorNumElements() == 2 * NumElts &&
9156 Op.getOpcode() == ISD::VECTOR_SHUFFLE) {
9158 ArrayRef<int> ShuffleMask = SVN->getMask();
9159 bool CanUseEven = true, CanUseOdd = true;
9160 for (unsigned Elt = 0; Elt < NumElts; Elt++) {
9161 if (ShuffleMask[Elt] == -1)
9162 continue;
9163 if (unsigned(ShuffleMask[Elt]) != 2 * Elt)
9164 CanUseEven = false;
9165 if (unsigned(ShuffleMask[Elt]) != 2 * Elt + 1)
9166 CanUseOdd = false;
9167 }
9168 Op = Op.getOperand(0);
9169 if (CanUseEven)
9170 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9171 if (CanUseOdd)
9172 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9173 }
9174 }
9175
9176 // For z17, we can also support the v2i64->i128 case, which looks like
9177 // (sign/zero_extend (extract_vector_elt X 0/1))
9178 if (VT == MVT::i128 && Subtarget.hasVectorEnhancements3() &&
9179 (Op.getOpcode() == ISD::SIGN_EXTEND ||
9180 Op.getOpcode() == ISD::ZERO_EXTEND)) {
9181 bool IsSigned = Op.getOpcode() == ISD::SIGN_EXTEND;
9182 Op = Op.getOperand(0);
9183 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
9184 Op.getOperand(0).getValueType() == MVT::v2i64 &&
9185 Op.getOperand(1).getOpcode() == ISD::Constant) {
9186 unsigned Elem = Op.getConstantOperandVal(1);
9187 Op = Op.getOperand(0);
9188 if (Elem == 0)
9189 return IsSigned ? SystemZISD::VME : SystemZISD::VMLE;
9190 if (Elem == 1)
9191 return IsSigned ? SystemZISD::VMO : SystemZISD::VMLO;
9192 }
9193 }
9194
9195 return 0;
9196}
9197
9198SDValue SystemZTargetLowering::combineMUL(
9199 SDNode *N, DAGCombinerInfo &DCI) const {
9200 SelectionDAG &DAG = DCI.DAG;
9201
9202 // Detect even/odd widening multiplication.
9203 SDValue Op0 = N->getOperand(0);
9204 SDValue Op1 = N->getOperand(1);
9205 unsigned OpcodeCand0 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op0);
9206 unsigned OpcodeCand1 = detectEvenOddMultiplyOperand(DAG, Subtarget, Op1);
9207 if (OpcodeCand0 && OpcodeCand0 == OpcodeCand1)
9208 return DAG.getNode(OpcodeCand0, SDLoc(N), N->getValueType(0), Op0, Op1);
9209
9210 return SDValue();
9211}
9212
9213SDValue SystemZTargetLowering::combineINTRINSIC(
9214 SDNode *N, DAGCombinerInfo &DCI) const {
9215 SelectionDAG &DAG = DCI.DAG;
9216
9217 unsigned Id = N->getConstantOperandVal(1);
9218 switch (Id) {
9219 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15
9220 // or larger is simply a vector load.
9221 case Intrinsic::s390_vll:
9222 case Intrinsic::s390_vlrl:
9223 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
9224 if (C->getZExtValue() >= 15)
9225 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0),
9226 N->getOperand(3), MachinePointerInfo());
9227 break;
9228 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH.
9229 case Intrinsic::s390_vstl:
9230 case Intrinsic::s390_vstrl:
9231 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
9232 if (C->getZExtValue() >= 15)
9233 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2),
9234 N->getOperand(4), MachinePointerInfo());
9235 break;
9236 }
9237
9238 return SDValue();
9239}
9240
9241SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const {
9242 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER)
9243 return N->getOperand(0);
9244 return N;
9245}
9246
9248 DAGCombinerInfo &DCI) const {
9249 switch(N->getOpcode()) {
9250 default: break;
9251 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI);
9252 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI);
9253 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI);
9254 case SystemZISD::MERGE_HIGH:
9255 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
9256 case ISD::LOAD: return combineLOAD(N, DCI);
9257 case ISD::STORE: return combineSTORE(N, DCI);
9258 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
9259 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
9260 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
9262 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
9264 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
9265 case ISD::SINT_TO_FP:
9266 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
9267 case ISD::FCOPYSIGN: return combineFCOPYSIGN(N, DCI);
9268 case ISD::BSWAP: return combineBSWAP(N, DCI);
9269 case ISD::SETCC: return combineSETCC(N, DCI);
9270 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
9271 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
9272 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI);
9273 case ISD::SRL:
9274 case ISD::SRA: return combineShiftToMulAddHigh(N, DCI);
9275 case ISD::MUL: return combineMUL(N, DCI);
9276 case ISD::SDIV:
9277 case ISD::UDIV:
9278 case ISD::SREM:
9279 case ISD::UREM: return combineIntDIVREM(N, DCI);
9281 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI);
9282 }
9283
9284 return SDValue();
9285}
9286
9287// Return the demanded elements for the OpNo source operand of Op. DemandedElts
9288// are for Op.
9289static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts,
9290 unsigned OpNo) {
9291 EVT VT = Op.getValueType();
9292 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1);
9293 APInt SrcDemE;
9294 unsigned Opcode = Op.getOpcode();
9295 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9296 unsigned Id = Op.getConstantOperandVal(0);
9297 switch (Id) {
9298 case Intrinsic::s390_vpksh: // PACKS
9299 case Intrinsic::s390_vpksf:
9300 case Intrinsic::s390_vpksg:
9301 case Intrinsic::s390_vpkshs: // PACKS_CC
9302 case Intrinsic::s390_vpksfs:
9303 case Intrinsic::s390_vpksgs:
9304 case Intrinsic::s390_vpklsh: // PACKLS
9305 case Intrinsic::s390_vpklsf:
9306 case Intrinsic::s390_vpklsg:
9307 case Intrinsic::s390_vpklshs: // PACKLS_CC
9308 case Intrinsic::s390_vpklsfs:
9309 case Intrinsic::s390_vpklsgs:
9310 // VECTOR PACK truncates the elements of two source vectors into one.
9311 SrcDemE = DemandedElts;
9312 if (OpNo == 2)
9313 SrcDemE.lshrInPlace(NumElts / 2);
9314 SrcDemE = SrcDemE.trunc(NumElts / 2);
9315 break;
9316 // VECTOR UNPACK extends half the elements of the source vector.
9317 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9318 case Intrinsic::s390_vuphh:
9319 case Intrinsic::s390_vuphf:
9320 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9321 case Intrinsic::s390_vuplhh:
9322 case Intrinsic::s390_vuplhf:
9323 SrcDemE = APInt(NumElts * 2, 0);
9324 SrcDemE.insertBits(DemandedElts, 0);
9325 break;
9326 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9327 case Intrinsic::s390_vuplhw:
9328 case Intrinsic::s390_vuplf:
9329 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9330 case Intrinsic::s390_vupllh:
9331 case Intrinsic::s390_vupllf:
9332 SrcDemE = APInt(NumElts * 2, 0);
9333 SrcDemE.insertBits(DemandedElts, NumElts);
9334 break;
9335 case Intrinsic::s390_vpdi: {
9336 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source.
9337 SrcDemE = APInt(NumElts, 0);
9338 if (!DemandedElts[OpNo - 1])
9339 break;
9340 unsigned Mask = Op.getConstantOperandVal(3);
9341 unsigned MaskBit = ((OpNo - 1) ? 1 : 4);
9342 // Demand input element 0 or 1, given by the mask bit value.
9343 SrcDemE.setBit((Mask & MaskBit)? 1 : 0);
9344 break;
9345 }
9346 case Intrinsic::s390_vsldb: {
9347 // VECTOR SHIFT LEFT DOUBLE BY BYTE
9348 assert(VT == MVT::v16i8 && "Unexpected type.");
9349 unsigned FirstIdx = Op.getConstantOperandVal(3);
9350 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand.");
9351 unsigned NumSrc0Els = 16 - FirstIdx;
9352 SrcDemE = APInt(NumElts, 0);
9353 if (OpNo == 1) {
9354 APInt DemEls = DemandedElts.trunc(NumSrc0Els);
9355 SrcDemE.insertBits(DemEls, FirstIdx);
9356 } else {
9357 APInt DemEls = DemandedElts.lshr(NumSrc0Els);
9358 SrcDemE.insertBits(DemEls, 0);
9359 }
9360 break;
9361 }
9362 case Intrinsic::s390_vperm:
9363 SrcDemE = APInt::getAllOnes(NumElts);
9364 break;
9365 default:
9366 llvm_unreachable("Unhandled intrinsic.");
9367 break;
9368 }
9369 } else {
9370 switch (Opcode) {
9371 case SystemZISD::JOIN_DWORDS:
9372 // Scalar operand.
9373 SrcDemE = APInt(1, 1);
9374 break;
9375 case SystemZISD::SELECT_CCMASK:
9376 SrcDemE = DemandedElts;
9377 break;
9378 default:
9379 llvm_unreachable("Unhandled opcode.");
9380 break;
9381 }
9382 }
9383 return SrcDemE;
9384}
9385
9386static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known,
9387 const APInt &DemandedElts,
9388 const SelectionDAG &DAG, unsigned Depth,
9389 unsigned OpNo) {
9390 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9391 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9392 KnownBits LHSKnown =
9393 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9394 KnownBits RHSKnown =
9395 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9396 Known = LHSKnown.intersectWith(RHSKnown);
9397}
9398
9399void
9401 KnownBits &Known,
9402 const APInt &DemandedElts,
9403 const SelectionDAG &DAG,
9404 unsigned Depth) const {
9405 Known.resetAll();
9406
9407 // Intrinsic CC result is returned in the two low bits.
9408 unsigned Tmp0, Tmp1; // not used
9409 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, Tmp0, Tmp1)) {
9410 Known.Zero.setBitsFrom(2);
9411 return;
9412 }
9413 EVT VT = Op.getValueType();
9414 if (Op.getResNo() != 0 || VT == MVT::Untyped)
9415 return;
9416 assert (Known.getBitWidth() == VT.getScalarSizeInBits() &&
9417 "KnownBits does not match VT in bitwidth");
9418 assert ((!VT.isVector() ||
9419 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) &&
9420 "DemandedElts does not match VT number of elements");
9421 unsigned BitWidth = Known.getBitWidth();
9422 unsigned Opcode = Op.getOpcode();
9423 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9424 bool IsLogical = false;
9425 unsigned Id = Op.getConstantOperandVal(0);
9426 switch (Id) {
9427 case Intrinsic::s390_vpksh: // PACKS
9428 case Intrinsic::s390_vpksf:
9429 case Intrinsic::s390_vpksg:
9430 case Intrinsic::s390_vpkshs: // PACKS_CC
9431 case Intrinsic::s390_vpksfs:
9432 case Intrinsic::s390_vpksgs:
9433 case Intrinsic::s390_vpklsh: // PACKLS
9434 case Intrinsic::s390_vpklsf:
9435 case Intrinsic::s390_vpklsg:
9436 case Intrinsic::s390_vpklshs: // PACKLS_CC
9437 case Intrinsic::s390_vpklsfs:
9438 case Intrinsic::s390_vpklsgs:
9439 case Intrinsic::s390_vpdi:
9440 case Intrinsic::s390_vsldb:
9441 case Intrinsic::s390_vperm:
9442 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1);
9443 break;
9444 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH
9445 case Intrinsic::s390_vuplhh:
9446 case Intrinsic::s390_vuplhf:
9447 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW
9448 case Intrinsic::s390_vupllh:
9449 case Intrinsic::s390_vupllf:
9450 IsLogical = true;
9451 [[fallthrough]];
9452 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9453 case Intrinsic::s390_vuphh:
9454 case Intrinsic::s390_vuphf:
9455 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9456 case Intrinsic::s390_vuplhw:
9457 case Intrinsic::s390_vuplf: {
9458 SDValue SrcOp = Op.getOperand(1);
9459 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0);
9460 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1);
9461 if (IsLogical) {
9462 Known = Known.zext(BitWidth);
9463 } else
9464 Known = Known.sext(BitWidth);
9465 break;
9466 }
9467 default:
9468 break;
9469 }
9470 } else {
9471 switch (Opcode) {
9472 case SystemZISD::JOIN_DWORDS:
9473 case SystemZISD::SELECT_CCMASK:
9474 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0);
9475 break;
9476 case SystemZISD::REPLICATE: {
9477 SDValue SrcOp = Op.getOperand(0);
9478 Known = DAG.computeKnownBits(SrcOp, Depth + 1);
9480 Known = Known.sext(BitWidth); // VREPI sign extends the immedate.
9481 break;
9482 }
9483 default:
9484 break;
9485 }
9486 }
9487
9488 // Known has the width of the source operand(s). Adjust if needed to match
9489 // the passed bitwidth.
9490 if (Known.getBitWidth() != BitWidth)
9491 Known = Known.anyextOrTrunc(BitWidth);
9492}
9493
9494static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts,
9495 const SelectionDAG &DAG, unsigned Depth,
9496 unsigned OpNo) {
9497 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo);
9498 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1);
9499 if (LHS == 1) return 1; // Early out.
9500 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1);
9501 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1);
9502 if (RHS == 1) return 1; // Early out.
9503 unsigned Common = std::min(LHS, RHS);
9504 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits();
9505 EVT VT = Op.getValueType();
9506 unsigned VTBits = VT.getScalarSizeInBits();
9507 if (SrcBitWidth > VTBits) { // PACK
9508 unsigned SrcExtraBits = SrcBitWidth - VTBits;
9509 if (Common > SrcExtraBits)
9510 return (Common - SrcExtraBits);
9511 return 1;
9512 }
9513 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth.");
9514 return Common;
9515}
9516
9517unsigned
9519 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
9520 unsigned Depth) const {
9521 if (Op.getResNo() != 0)
9522 return 1;
9523 unsigned Opcode = Op.getOpcode();
9524 if (Opcode == ISD::INTRINSIC_WO_CHAIN) {
9525 unsigned Id = Op.getConstantOperandVal(0);
9526 switch (Id) {
9527 case Intrinsic::s390_vpksh: // PACKS
9528 case Intrinsic::s390_vpksf:
9529 case Intrinsic::s390_vpksg:
9530 case Intrinsic::s390_vpkshs: // PACKS_CC
9531 case Intrinsic::s390_vpksfs:
9532 case Intrinsic::s390_vpksgs:
9533 case Intrinsic::s390_vpklsh: // PACKLS
9534 case Intrinsic::s390_vpklsf:
9535 case Intrinsic::s390_vpklsg:
9536 case Intrinsic::s390_vpklshs: // PACKLS_CC
9537 case Intrinsic::s390_vpklsfs:
9538 case Intrinsic::s390_vpklsgs:
9539 case Intrinsic::s390_vpdi:
9540 case Intrinsic::s390_vsldb:
9541 case Intrinsic::s390_vperm:
9542 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1);
9543 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH
9544 case Intrinsic::s390_vuphh:
9545 case Intrinsic::s390_vuphf:
9546 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW
9547 case Intrinsic::s390_vuplhw:
9548 case Intrinsic::s390_vuplf: {
9549 SDValue PackedOp = Op.getOperand(1);
9550 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1);
9551 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1);
9552 EVT VT = Op.getValueType();
9553 unsigned VTBits = VT.getScalarSizeInBits();
9554 Tmp += VTBits - PackedOp.getScalarValueSizeInBits();
9555 return Tmp;
9556 }
9557 default:
9558 break;
9559 }
9560 } else {
9561 switch (Opcode) {
9562 case SystemZISD::SELECT_CCMASK:
9563 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0);
9564 default:
9565 break;
9566 }
9567 }
9568
9569 return 1;
9570}
9571
9574 const APInt &DemandedElts, const SelectionDAG &DAG,
9575 bool PoisonOnly, unsigned Depth) const {
9576 switch (Op->getOpcode()) {
9577 case SystemZISD::PCREL_WRAPPER:
9578 case SystemZISD::PCREL_OFFSET:
9579 return true;
9580 }
9581 return false;
9582}
9583
9584unsigned
9586 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
9587 unsigned StackAlign = TFI->getStackAlignment();
9588 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) &&
9589 "Unexpected stack alignment");
9590 // The default stack probe size is 4096 if the function has no
9591 // stack-probe-size attribute.
9592 unsigned StackProbeSize =
9593 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096);
9594 // Round down to the stack alignment.
9595 StackProbeSize &= ~(StackAlign - 1);
9596 return StackProbeSize ? StackProbeSize : StackAlign;
9597}
9598
9599//===----------------------------------------------------------------------===//
9600// Custom insertion
9601//===----------------------------------------------------------------------===//
9602
9603// Force base value Base into a register before MI. Return the register.
9605 const SystemZInstrInfo *TII) {
9606 MachineBasicBlock *MBB = MI.getParent();
9607 MachineFunction &MF = *MBB->getParent();
9609
9610 if (Base.isReg()) {
9611 // Copy Base into a new virtual register to help register coalescing in
9612 // cases with multiple uses.
9613 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9614 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg)
9615 .add(Base);
9616 return Reg;
9617 }
9618
9619 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
9620 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg)
9621 .add(Base)
9622 .addImm(0)
9623 .addReg(0);
9624 return Reg;
9625}
9626
9627// The CC operand of MI might be missing a kill marker because there
9628// were multiple uses of CC, and ISel didn't know which to mark.
9629// Figure out whether MI should have had a kill marker.
9631 // Scan forward through BB for a use/def of CC.
9633 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) {
9634 const MachineInstr &MI = *miI;
9635 if (MI.readsRegister(SystemZ::CC, /*TRI=*/nullptr))
9636 return false;
9637 if (MI.definesRegister(SystemZ::CC, /*TRI=*/nullptr))
9638 break; // Should have kill-flag - update below.
9639 }
9640
9641 // If we hit the end of the block, check whether CC is live into a
9642 // successor.
9643 if (miI == MBB->end()) {
9644 for (const MachineBasicBlock *Succ : MBB->successors())
9645 if (Succ->isLiveIn(SystemZ::CC))
9646 return false;
9647 }
9648
9649 return true;
9650}
9651
9652// Return true if it is OK for this Select pseudo-opcode to be cascaded
9653// together with other Select pseudo-opcodes into a single basic-block with
9654// a conditional jump around it.
9656 switch (MI.getOpcode()) {
9657 case SystemZ::Select32:
9658 case SystemZ::Select64:
9659 case SystemZ::Select128:
9660 case SystemZ::SelectF32:
9661 case SystemZ::SelectF64:
9662 case SystemZ::SelectF128:
9663 case SystemZ::SelectVR32:
9664 case SystemZ::SelectVR64:
9665 case SystemZ::SelectVR128:
9666 return true;
9667
9668 default:
9669 return false;
9670 }
9671}
9672
9673// Helper function, which inserts PHI functions into SinkMBB:
9674// %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ],
9675// where %FalseValue(i) and %TrueValue(i) are taken from Selects.
9677 MachineBasicBlock *TrueMBB,
9678 MachineBasicBlock *FalseMBB,
9679 MachineBasicBlock *SinkMBB) {
9680 MachineFunction *MF = TrueMBB->getParent();
9682
9683 MachineInstr *FirstMI = Selects.front();
9684 unsigned CCValid = FirstMI->getOperand(3).getImm();
9685 unsigned CCMask = FirstMI->getOperand(4).getImm();
9686
9687 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin();
9688
9689 // As we are creating the PHIs, we have to be careful if there is more than
9690 // one. Later Selects may reference the results of earlier Selects, but later
9691 // PHIs have to reference the individual true/false inputs from earlier PHIs.
9692 // That also means that PHI construction must work forward from earlier to
9693 // later, and that the code must maintain a mapping from earlier PHI's
9694 // destination registers, and the registers that went into the PHI.
9696
9697 for (auto *MI : Selects) {
9698 Register DestReg = MI->getOperand(0).getReg();
9699 Register TrueReg = MI->getOperand(1).getReg();
9700 Register FalseReg = MI->getOperand(2).getReg();
9701
9702 // If this Select we are generating is the opposite condition from
9703 // the jump we generated, then we have to swap the operands for the
9704 // PHI that is going to be generated.
9705 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask))
9706 std::swap(TrueReg, FalseReg);
9707
9708 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end())
9709 TrueReg = It->second.first;
9710
9711 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end())
9712 FalseReg = It->second.second;
9713
9714 DebugLoc DL = MI->getDebugLoc();
9715 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg)
9716 .addReg(TrueReg).addMBB(TrueMBB)
9717 .addReg(FalseReg).addMBB(FalseMBB);
9718
9719 // Add this PHI to the rewrite table.
9720 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg);
9721 }
9722
9723 MF->getProperties().resetNoPHIs();
9724}
9725
9727SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI,
9728 MachineBasicBlock *BB) const {
9729 MachineFunction &MF = *BB->getParent();
9730 MachineFrameInfo &MFI = MF.getFrameInfo();
9731 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>();
9732 assert(TFL->hasReservedCallFrame(MF) &&
9733 "ADJSTACKDOWN and ADJSTACKUP should be no-ops");
9734 (void)TFL;
9735 // Get the MaxCallFrameSize value and erase MI since it serves no further
9736 // purpose as the call frame is statically reserved in the prolog. Set
9737 // AdjustsStack as MI is *not* mapped as a frame instruction.
9738 uint32_t NumBytes = MI.getOperand(0).getImm();
9739 if (NumBytes > MFI.getMaxCallFrameSize())
9740 MFI.setMaxCallFrameSize(NumBytes);
9741 MFI.setAdjustsStack(true);
9742
9743 MI.eraseFromParent();
9744 return BB;
9745}
9746
9747// Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
9749SystemZTargetLowering::emitSelect(MachineInstr &MI,
9750 MachineBasicBlock *MBB) const {
9751 assert(isSelectPseudo(MI) && "Bad call to emitSelect()");
9752 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9753
9754 unsigned CCValid = MI.getOperand(3).getImm();
9755 unsigned CCMask = MI.getOperand(4).getImm();
9756
9757 // If we have a sequence of Select* pseudo instructions using the
9758 // same condition code value, we want to expand all of them into
9759 // a single pair of basic blocks using the same condition.
9760 SmallVector<MachineInstr*, 8> Selects;
9761 SmallVector<MachineInstr*, 8> DbgValues;
9762 Selects.push_back(&MI);
9763 unsigned Count = 0;
9764 for (MachineInstr &NextMI : llvm::make_range(
9765 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) {
9766 if (isSelectPseudo(NextMI)) {
9767 assert(NextMI.getOperand(3).getImm() == CCValid &&
9768 "Bad CCValid operands since CC was not redefined.");
9769 if (NextMI.getOperand(4).getImm() == CCMask ||
9770 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) {
9771 Selects.push_back(&NextMI);
9772 continue;
9773 }
9774 break;
9775 }
9776 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9777 NextMI.usesCustomInsertionHook())
9778 break;
9779 bool User = false;
9780 for (auto *SelMI : Selects)
9781 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) {
9782 User = true;
9783 break;
9784 }
9785 if (NextMI.isDebugInstr()) {
9786 if (User) {
9787 assert(NextMI.isDebugValue() && "Unhandled debug opcode.");
9788 DbgValues.push_back(&NextMI);
9789 }
9790 } else if (User || ++Count > 20)
9791 break;
9792 }
9793
9794 MachineInstr *LastMI = Selects.back();
9795 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) ||
9796 checkCCKill(*LastMI, MBB));
9797 MachineBasicBlock *StartMBB = MBB;
9798 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB);
9799 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9800
9801 // Unless CC was killed in the last Select instruction, mark it as
9802 // live-in to both FalseMBB and JoinMBB.
9803 if (!CCKilled) {
9804 FalseMBB->addLiveIn(SystemZ::CC);
9805 JoinMBB->addLiveIn(SystemZ::CC);
9806 }
9807
9808 // StartMBB:
9809 // BRC CCMask, JoinMBB
9810 // # fallthrough to FalseMBB
9811 MBB = StartMBB;
9812 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9813 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9814 MBB->addSuccessor(JoinMBB);
9815 MBB->addSuccessor(FalseMBB);
9816
9817 // FalseMBB:
9818 // # fallthrough to JoinMBB
9819 MBB = FalseMBB;
9820 MBB->addSuccessor(JoinMBB);
9821
9822 // JoinMBB:
9823 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ]
9824 // ...
9825 MBB = JoinMBB;
9826 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB);
9827 for (auto *SelMI : Selects)
9828 SelMI->eraseFromParent();
9829
9831 for (auto *DbgMI : DbgValues)
9832 MBB->splice(InsertPos, StartMBB, DbgMI);
9833
9834 return JoinMBB;
9835}
9836
9837// Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI.
9838// StoreOpcode is the store to use and Invert says whether the store should
9839// happen when the condition is false rather than true. If a STORE ON
9840// CONDITION is available, STOCOpcode is its opcode, otherwise it is 0.
9841MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI,
9843 unsigned StoreOpcode,
9844 unsigned STOCOpcode,
9845 bool Invert) const {
9846 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9847
9848 Register SrcReg = MI.getOperand(0).getReg();
9849 MachineOperand Base = MI.getOperand(1);
9850 int64_t Disp = MI.getOperand(2).getImm();
9851 Register IndexReg = MI.getOperand(3).getReg();
9852 unsigned CCValid = MI.getOperand(4).getImm();
9853 unsigned CCMask = MI.getOperand(5).getImm();
9854 DebugLoc DL = MI.getDebugLoc();
9855
9856 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp);
9857
9858 // ISel pattern matching also adds a load memory operand of the same
9859 // address, so take special care to find the storing memory operand.
9860 MachineMemOperand *MMO = nullptr;
9861 for (auto *I : MI.memoperands())
9862 if (I->isStore()) {
9863 MMO = I;
9864 break;
9865 }
9866
9867 // Use STOCOpcode if possible. We could use different store patterns in
9868 // order to avoid matching the index register, but the performance trade-offs
9869 // might be more complicated in that case.
9870 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) {
9871 if (Invert)
9872 CCMask ^= CCValid;
9873
9874 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode))
9875 .addReg(SrcReg)
9876 .add(Base)
9877 .addImm(Disp)
9878 .addImm(CCValid)
9879 .addImm(CCMask)
9880 .addMemOperand(MMO);
9881
9882 MI.eraseFromParent();
9883 return MBB;
9884 }
9885
9886 // Get the condition needed to branch around the store.
9887 if (!Invert)
9888 CCMask ^= CCValid;
9889
9890 MachineBasicBlock *StartMBB = MBB;
9891 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB);
9892 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB);
9893
9894 // Unless CC was killed in the CondStore instruction, mark it as
9895 // live-in to both FalseMBB and JoinMBB.
9896 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) &&
9897 !checkCCKill(MI, JoinMBB)) {
9898 FalseMBB->addLiveIn(SystemZ::CC);
9899 JoinMBB->addLiveIn(SystemZ::CC);
9900 }
9901
9902 // StartMBB:
9903 // BRC CCMask, JoinMBB
9904 // # fallthrough to FalseMBB
9905 MBB = StartMBB;
9906 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
9907 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB);
9908 MBB->addSuccessor(JoinMBB);
9909 MBB->addSuccessor(FalseMBB);
9910
9911 // FalseMBB:
9912 // store %SrcReg, %Disp(%Index,%Base)
9913 // # fallthrough to JoinMBB
9914 MBB = FalseMBB;
9915 BuildMI(MBB, DL, TII->get(StoreOpcode))
9916 .addReg(SrcReg)
9917 .add(Base)
9918 .addImm(Disp)
9919 .addReg(IndexReg)
9920 .addMemOperand(MMO);
9921 MBB->addSuccessor(JoinMBB);
9922
9923 MI.eraseFromParent();
9924 return JoinMBB;
9925}
9926
9927// Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI.
9929SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI,
9931 bool Unsigned) const {
9932 MachineFunction &MF = *MBB->getParent();
9933 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
9934 MachineRegisterInfo &MRI = MF.getRegInfo();
9935
9936 // Synthetic instruction to compare 128-bit values.
9937 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise.
9938 Register Op0 = MI.getOperand(0).getReg();
9939 Register Op1 = MI.getOperand(1).getReg();
9940
9941 MachineBasicBlock *StartMBB = MBB;
9942 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB);
9943 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB);
9944
9945 // StartMBB:
9946 //
9947 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts.
9948 // Swap the inputs to get:
9949 // CC 1 if high(Op0) > high(Op1)
9950 // CC 2 if high(Op0) < high(Op1)
9951 // CC 0 if high(Op0) == high(Op1)
9952 //
9953 // If CC != 0, we'd done, so jump over the next instruction.
9954 //
9955 // VEC[L]G Op1, Op0
9956 // JNE JoinMBB
9957 // # fallthrough to HiEqMBB
9958 MBB = StartMBB;
9959 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG;
9960 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode))
9961 .addReg(Op1).addReg(Op0);
9962 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC))
9964 MBB->addSuccessor(JoinMBB);
9965 MBB->addSuccessor(HiEqMBB);
9966
9967 // HiEqMBB:
9968 //
9969 // Otherwise, use VECTOR COMPARE HIGH LOGICAL.
9970 // Since we already know the high parts are equal, the CC
9971 // result will only depend on the low parts:
9972 // CC 1 if low(Op0) > low(Op1)
9973 // CC 3 if low(Op0) <= low(Op1)
9974 //
9975 // VCHLGS Tmp, Op0, Op1
9976 // # fallthrough to JoinMBB
9977 MBB = HiEqMBB;
9978 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass);
9979 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp)
9980 .addReg(Op0).addReg(Op1);
9981 MBB->addSuccessor(JoinMBB);
9982
9983 // Mark CC as live-in to JoinMBB.
9984 JoinMBB->addLiveIn(SystemZ::CC);
9985
9986 MI.eraseFromParent();
9987 return JoinMBB;
9988}
9989
9990// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or
9991// ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs
9992// the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says
9993// whether the field should be inverted after performing BinOpcode (e.g. for
9994// NAND).
9995MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary(
9996 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode,
9997 bool Invert) const {
9998 MachineFunction &MF = *MBB->getParent();
9999 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10000 MachineRegisterInfo &MRI = MF.getRegInfo();
10001
10002 // Extract the operands. Base can be a register or a frame index.
10003 // Src2 can be a register or immediate.
10004 Register Dest = MI.getOperand(0).getReg();
10005 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10006 int64_t Disp = MI.getOperand(2).getImm();
10007 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3));
10008 Register BitShift = MI.getOperand(4).getReg();
10009 Register NegBitShift = MI.getOperand(5).getReg();
10010 unsigned BitSize = MI.getOperand(6).getImm();
10011 DebugLoc DL = MI.getDebugLoc();
10012
10013 // Get the right opcodes for the displacement.
10014 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10015 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10016 assert(LOpcode && CSOpcode && "Displacement out of range");
10017
10018 // Create virtual registers for temporary results.
10019 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10020 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10021 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10022 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10023 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10024
10025 // Insert a basic block for the main loop.
10026 MachineBasicBlock *StartMBB = MBB;
10027 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10028 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10029
10030 // StartMBB:
10031 // ...
10032 // %OrigVal = L Disp(%Base)
10033 // # fall through to LoopMBB
10034 MBB = StartMBB;
10035 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10036 MBB->addSuccessor(LoopMBB);
10037
10038 // LoopMBB:
10039 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ]
10040 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10041 // %RotatedNewVal = OP %RotatedOldVal, %Src2
10042 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10043 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10044 // JNE LoopMBB
10045 // # fall through to DoneMBB
10046 MBB = LoopMBB;
10047 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10048 .addReg(OrigVal).addMBB(StartMBB)
10049 .addReg(Dest).addMBB(LoopMBB);
10050 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10051 .addReg(OldVal).addReg(BitShift).addImm(0);
10052 if (Invert) {
10053 // Perform the operation normally and then invert every bit of the field.
10054 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10055 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2);
10056 // XILF with the upper BitSize bits set.
10057 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal)
10058 .addReg(Tmp).addImm(-1U << (32 - BitSize));
10059 } else if (BinOpcode)
10060 // A simply binary operation.
10061 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal)
10062 .addReg(RotatedOldVal)
10063 .add(Src2);
10064 else
10065 // Use RISBG to rotate Src2 into position and use it to replace the
10066 // field in RotatedOldVal.
10067 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal)
10068 .addReg(RotatedOldVal).addReg(Src2.getReg())
10069 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize);
10070 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10071 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10072 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10073 .addReg(OldVal)
10074 .addReg(NewVal)
10075 .add(Base)
10076 .addImm(Disp);
10077 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10079 MBB->addSuccessor(LoopMBB);
10080 MBB->addSuccessor(DoneMBB);
10081
10082 MI.eraseFromParent();
10083 return DoneMBB;
10084}
10085
10086// Implement EmitInstrWithCustomInserter for subword pseudo
10087// ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the
10088// instruction that should be used to compare the current field with the
10089// minimum or maximum value. KeepOldMask is the BRC condition-code mask
10090// for when the current field should be kept.
10091MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax(
10092 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode,
10093 unsigned KeepOldMask) const {
10094 MachineFunction &MF = *MBB->getParent();
10095 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10096 MachineRegisterInfo &MRI = MF.getRegInfo();
10097
10098 // Extract the operands. Base can be a register or a frame index.
10099 Register Dest = MI.getOperand(0).getReg();
10100 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10101 int64_t Disp = MI.getOperand(2).getImm();
10102 Register Src2 = MI.getOperand(3).getReg();
10103 Register BitShift = MI.getOperand(4).getReg();
10104 Register NegBitShift = MI.getOperand(5).getReg();
10105 unsigned BitSize = MI.getOperand(6).getImm();
10106 DebugLoc DL = MI.getDebugLoc();
10107
10108 // Get the right opcodes for the displacement.
10109 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10110 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10111 assert(LOpcode && CSOpcode && "Displacement out of range");
10112
10113 // Create virtual registers for temporary results.
10114 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10115 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10116 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10117 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10118 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10119 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass);
10120
10121 // Insert 3 basic blocks for the loop.
10122 MachineBasicBlock *StartMBB = MBB;
10123 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10124 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10125 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB);
10126 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB);
10127
10128 // StartMBB:
10129 // ...
10130 // %OrigVal = L Disp(%Base)
10131 // # fall through to LoopMBB
10132 MBB = StartMBB;
10133 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0);
10134 MBB->addSuccessor(LoopMBB);
10135
10136 // LoopMBB:
10137 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ]
10138 // %RotatedOldVal = RLL %OldVal, 0(%BitShift)
10139 // CompareOpcode %RotatedOldVal, %Src2
10140 // BRC KeepOldMask, UpdateMBB
10141 MBB = LoopMBB;
10142 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10143 .addReg(OrigVal).addMBB(StartMBB)
10144 .addReg(Dest).addMBB(UpdateMBB);
10145 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal)
10146 .addReg(OldVal).addReg(BitShift).addImm(0);
10147 BuildMI(MBB, DL, TII->get(CompareOpcode))
10148 .addReg(RotatedOldVal).addReg(Src2);
10149 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10150 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB);
10151 MBB->addSuccessor(UpdateMBB);
10152 MBB->addSuccessor(UseAltMBB);
10153
10154 // UseAltMBB:
10155 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0
10156 // # fall through to UpdateMBB
10157 MBB = UseAltMBB;
10158 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal)
10159 .addReg(RotatedOldVal).addReg(Src2)
10160 .addImm(32).addImm(31 + BitSize).addImm(0);
10161 MBB->addSuccessor(UpdateMBB);
10162
10163 // UpdateMBB:
10164 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ],
10165 // [ %RotatedAltVal, UseAltMBB ]
10166 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift)
10167 // %Dest = CS %OldVal, %NewVal, Disp(%Base)
10168 // JNE LoopMBB
10169 // # fall through to DoneMBB
10170 MBB = UpdateMBB;
10171 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal)
10172 .addReg(RotatedOldVal).addMBB(LoopMBB)
10173 .addReg(RotatedAltVal).addMBB(UseAltMBB);
10174 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal)
10175 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0);
10176 BuildMI(MBB, DL, TII->get(CSOpcode), Dest)
10177 .addReg(OldVal)
10178 .addReg(NewVal)
10179 .add(Base)
10180 .addImm(Disp);
10181 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10183 MBB->addSuccessor(LoopMBB);
10184 MBB->addSuccessor(DoneMBB);
10185
10186 MI.eraseFromParent();
10187 return DoneMBB;
10188}
10189
10190// Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW
10191// instruction MI.
10193SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI,
10194 MachineBasicBlock *MBB) const {
10195 MachineFunction &MF = *MBB->getParent();
10196 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10197 MachineRegisterInfo &MRI = MF.getRegInfo();
10198
10199 // Extract the operands. Base can be a register or a frame index.
10200 Register Dest = MI.getOperand(0).getReg();
10201 MachineOperand Base = earlyUseOperand(MI.getOperand(1));
10202 int64_t Disp = MI.getOperand(2).getImm();
10203 Register CmpVal = MI.getOperand(3).getReg();
10204 Register OrigSwapVal = MI.getOperand(4).getReg();
10205 Register BitShift = MI.getOperand(5).getReg();
10206 Register NegBitShift = MI.getOperand(6).getReg();
10207 int64_t BitSize = MI.getOperand(7).getImm();
10208 DebugLoc DL = MI.getDebugLoc();
10209
10210 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass;
10211
10212 // Get the right opcodes for the displacement and zero-extension.
10213 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp);
10214 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp);
10215 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR;
10216 assert(LOpcode && CSOpcode && "Displacement out of range");
10217
10218 // Create virtual registers for temporary results.
10219 Register OrigOldVal = MRI.createVirtualRegister(RC);
10220 Register OldVal = MRI.createVirtualRegister(RC);
10221 Register SwapVal = MRI.createVirtualRegister(RC);
10222 Register StoreVal = MRI.createVirtualRegister(RC);
10223 Register OldValRot = MRI.createVirtualRegister(RC);
10224 Register RetryOldVal = MRI.createVirtualRegister(RC);
10225 Register RetrySwapVal = MRI.createVirtualRegister(RC);
10226
10227 // Insert 2 basic blocks for the loop.
10228 MachineBasicBlock *StartMBB = MBB;
10229 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10230 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10231 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB);
10232
10233 // StartMBB:
10234 // ...
10235 // %OrigOldVal = L Disp(%Base)
10236 // # fall through to LoopMBB
10237 MBB = StartMBB;
10238 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal)
10239 .add(Base)
10240 .addImm(Disp)
10241 .addReg(0);
10242 MBB->addSuccessor(LoopMBB);
10243
10244 // LoopMBB:
10245 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ]
10246 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ]
10247 // %OldValRot = RLL %OldVal, BitSize(%BitShift)
10248 // ^^ The low BitSize bits contain the field
10249 // of interest.
10250 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0
10251 // ^^ Replace the upper 32-BitSize bits of the
10252 // swap value with those that we loaded and rotated.
10253 // %Dest = LL[CH] %OldValRot
10254 // CR %Dest, %CmpVal
10255 // JNE DoneMBB
10256 // # Fall through to SetMBB
10257 MBB = LoopMBB;
10258 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal)
10259 .addReg(OrigOldVal).addMBB(StartMBB)
10260 .addReg(RetryOldVal).addMBB(SetMBB);
10261 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal)
10262 .addReg(OrigSwapVal).addMBB(StartMBB)
10263 .addReg(RetrySwapVal).addMBB(SetMBB);
10264 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot)
10265 .addReg(OldVal).addReg(BitShift).addImm(BitSize);
10266 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal)
10267 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0);
10268 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest)
10269 .addReg(OldValRot);
10270 BuildMI(MBB, DL, TII->get(SystemZ::CR))
10271 .addReg(Dest).addReg(CmpVal);
10272 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10275 MBB->addSuccessor(DoneMBB);
10276 MBB->addSuccessor(SetMBB);
10277
10278 // SetMBB:
10279 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift)
10280 // ^^ Rotate the new field to its proper position.
10281 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base)
10282 // JNE LoopMBB
10283 // # fall through to ExitMBB
10284 MBB = SetMBB;
10285 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal)
10286 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize);
10287 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal)
10288 .addReg(OldVal)
10289 .addReg(StoreVal)
10290 .add(Base)
10291 .addImm(Disp);
10292 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10294 MBB->addSuccessor(LoopMBB);
10295 MBB->addSuccessor(DoneMBB);
10296
10297 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in
10298 // to the block after the loop. At this point, CC may have been defined
10299 // either by the CR in LoopMBB or by the CS in SetMBB.
10300 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr))
10301 DoneMBB->addLiveIn(SystemZ::CC);
10302
10303 MI.eraseFromParent();
10304 return DoneMBB;
10305}
10306
10307// Emit a move from two GR64s to a GR128.
10309SystemZTargetLowering::emitPair128(MachineInstr &MI,
10310 MachineBasicBlock *MBB) const {
10311 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10312 const DebugLoc &DL = MI.getDebugLoc();
10313
10314 Register Dest = MI.getOperand(0).getReg();
10315 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest)
10316 .add(MI.getOperand(1))
10317 .addImm(SystemZ::subreg_h64)
10318 .add(MI.getOperand(2))
10319 .addImm(SystemZ::subreg_l64);
10320 MI.eraseFromParent();
10321 return MBB;
10322}
10323
10324// Emit an extension from a GR64 to a GR128. ClearEven is true
10325// if the high register of the GR128 value must be cleared or false if
10326// it's "don't care".
10327MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI,
10329 bool ClearEven) const {
10330 MachineFunction &MF = *MBB->getParent();
10331 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10332 MachineRegisterInfo &MRI = MF.getRegInfo();
10333 DebugLoc DL = MI.getDebugLoc();
10334
10335 Register Dest = MI.getOperand(0).getReg();
10336 Register Src = MI.getOperand(1).getReg();
10337 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10338
10339 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128);
10340 if (ClearEven) {
10341 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass);
10342 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10343
10344 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64)
10345 .addImm(0);
10346 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128)
10347 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64);
10348 In128 = NewIn128;
10349 }
10350 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest)
10351 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64);
10352
10353 MI.eraseFromParent();
10354 return MBB;
10355}
10356
10358SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI,
10360 unsigned Opcode, bool IsMemset) const {
10361 MachineFunction &MF = *MBB->getParent();
10362 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10363 MachineRegisterInfo &MRI = MF.getRegInfo();
10364 DebugLoc DL = MI.getDebugLoc();
10365
10366 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0));
10367 uint64_t DestDisp = MI.getOperand(1).getImm();
10368 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false);
10369 uint64_t SrcDisp;
10370
10371 // Fold the displacement Disp if it is out of range.
10372 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void {
10373 if (!isUInt<12>(Disp)) {
10374 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10375 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp);
10376 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg)
10377 .add(Base).addImm(Disp).addReg(0);
10379 Disp = 0;
10380 }
10381 };
10382
10383 if (!IsMemset) {
10384 SrcBase = earlyUseOperand(MI.getOperand(2));
10385 SrcDisp = MI.getOperand(3).getImm();
10386 } else {
10387 SrcBase = DestBase;
10388 SrcDisp = DestDisp++;
10389 foldDisplIfNeeded(DestBase, DestDisp);
10390 }
10391
10392 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4);
10393 bool IsImmForm = LengthMO.isImm();
10394 bool IsRegForm = !IsImmForm;
10395
10396 // Build and insert one Opcode of Length, with special treatment for memset.
10397 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB,
10399 MachineOperand DBase, uint64_t DDisp,
10400 MachineOperand SBase, uint64_t SDisp,
10401 unsigned Length) -> void {
10402 assert(Length > 0 && Length <= 256 && "Building memory op with bad length.");
10403 if (IsMemset) {
10404 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3));
10405 if (ByteMO.isImm())
10406 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI))
10407 .add(SBase).addImm(SDisp).add(ByteMO);
10408 else
10409 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC))
10410 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0);
10411 if (--Length == 0)
10412 return;
10413 }
10414 BuildMI(*MBB, InsPos, DL, TII->get(Opcode))
10415 .add(DBase).addImm(DDisp).addImm(Length)
10416 .add(SBase).addImm(SDisp)
10417 .setMemRefs(MI.memoperands());
10418 };
10419
10420 bool NeedsLoop = false;
10421 uint64_t ImmLength = 0;
10422 Register LenAdjReg = SystemZ::NoRegister;
10423 if (IsImmForm) {
10424 ImmLength = LengthMO.getImm();
10425 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment.
10426 if (ImmLength == 0) {
10427 MI.eraseFromParent();
10428 return MBB;
10429 }
10430 if (Opcode == SystemZ::CLC) {
10431 if (ImmLength > 3 * 256)
10432 // A two-CLC sequence is a clear win over a loop, not least because
10433 // it needs only one branch. A three-CLC sequence needs the same
10434 // number of branches as a loop (i.e. 2), but is shorter. That
10435 // brings us to lengths greater than 768 bytes. It seems relatively
10436 // likely that a difference will be found within the first 768 bytes,
10437 // so we just optimize for the smallest number of branch
10438 // instructions, in order to avoid polluting the prediction buffer
10439 // too much.
10440 NeedsLoop = true;
10441 } else if (ImmLength > 6 * 256)
10442 // The heuristic we use is to prefer loops for anything that would
10443 // require 7 or more MVCs. With these kinds of sizes there isn't much
10444 // to choose between straight-line code and looping code, since the
10445 // time will be dominated by the MVCs themselves.
10446 NeedsLoop = true;
10447 } else {
10448 NeedsLoop = true;
10449 LenAdjReg = LengthMO.getReg();
10450 }
10451
10452 // When generating more than one CLC, all but the last will need to
10453 // branch to the end when a difference is found.
10454 MachineBasicBlock *EndMBB =
10455 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop)
10457 : nullptr);
10458
10459 if (NeedsLoop) {
10460 Register StartCountReg =
10461 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass);
10462 if (IsImmForm) {
10463 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256);
10464 ImmLength &= 255;
10465 } else {
10466 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg)
10467 .addReg(LenAdjReg)
10468 .addReg(0)
10469 .addImm(8);
10470 }
10471
10472 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase);
10473 auto loadZeroAddress = [&]() -> MachineOperand {
10474 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10475 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0);
10476 return MachineOperand::CreateReg(Reg, false);
10477 };
10478 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister)
10479 DestBase = loadZeroAddress();
10480 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister)
10481 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress();
10482
10483 MachineBasicBlock *StartMBB = nullptr;
10484 MachineBasicBlock *LoopMBB = nullptr;
10485 MachineBasicBlock *NextMBB = nullptr;
10486 MachineBasicBlock *DoneMBB = nullptr;
10487 MachineBasicBlock *AllDoneMBB = nullptr;
10488
10489 Register StartSrcReg = forceReg(MI, SrcBase, TII);
10490 Register StartDestReg =
10491 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII));
10492
10493 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass;
10494 Register ThisSrcReg = MRI.createVirtualRegister(RC);
10495 Register ThisDestReg =
10496 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC));
10497 Register NextSrcReg = MRI.createVirtualRegister(RC);
10498 Register NextDestReg =
10499 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC));
10500 RC = &SystemZ::GR64BitRegClass;
10501 Register ThisCountReg = MRI.createVirtualRegister(RC);
10502 Register NextCountReg = MRI.createVirtualRegister(RC);
10503
10504 if (IsRegForm) {
10505 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10506 StartMBB = SystemZ::emitBlockAfter(MBB);
10507 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10508 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10509 DoneMBB = SystemZ::emitBlockAfter(NextMBB);
10510
10511 // MBB:
10512 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB.
10513 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10514 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1);
10515 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10517 .addMBB(AllDoneMBB);
10518 MBB->addSuccessor(AllDoneMBB);
10519 if (!IsMemset)
10520 MBB->addSuccessor(StartMBB);
10521 else {
10522 // MemsetOneCheckMBB:
10523 // # Jump to MemsetOneMBB for a memset of length 1, or
10524 // # fall thru to StartMBB.
10525 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB);
10526 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin());
10527 MBB->addSuccessor(MemsetOneCheckMBB);
10528 MBB = MemsetOneCheckMBB;
10529 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10530 .addReg(LenAdjReg).addImm(-1);
10531 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10533 .addMBB(MemsetOneMBB);
10534 MBB->addSuccessor(MemsetOneMBB, {10, 100});
10535 MBB->addSuccessor(StartMBB, {90, 100});
10536
10537 // MemsetOneMBB:
10538 // # Jump back to AllDoneMBB after a single MVI or STC.
10539 MBB = MemsetOneMBB;
10540 insertMemMemOp(MBB, MBB->end(),
10541 MachineOperand::CreateReg(StartDestReg, false), DestDisp,
10542 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp,
10543 1);
10544 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB);
10545 MBB->addSuccessor(AllDoneMBB);
10546 }
10547
10548 // StartMBB:
10549 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB.
10550 MBB = StartMBB;
10551 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10552 .addReg(StartCountReg).addImm(0);
10553 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10555 .addMBB(DoneMBB);
10556 MBB->addSuccessor(DoneMBB);
10557 MBB->addSuccessor(LoopMBB);
10558 }
10559 else {
10560 StartMBB = MBB;
10561 DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10562 LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10563 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB);
10564
10565 // StartMBB:
10566 // # fall through to LoopMBB
10567 MBB->addSuccessor(LoopMBB);
10568
10569 DestBase = MachineOperand::CreateReg(NextDestReg, false);
10570 SrcBase = MachineOperand::CreateReg(NextSrcReg, false);
10571 if (EndMBB && !ImmLength)
10572 // If the loop handled the whole CLC range, DoneMBB will be empty with
10573 // CC live-through into EndMBB, so add it as live-in.
10574 DoneMBB->addLiveIn(SystemZ::CC);
10575 }
10576
10577 // LoopMBB:
10578 // %ThisDestReg = phi [ %StartDestReg, StartMBB ],
10579 // [ %NextDestReg, NextMBB ]
10580 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ],
10581 // [ %NextSrcReg, NextMBB ]
10582 // %ThisCountReg = phi [ %StartCountReg, StartMBB ],
10583 // [ %NextCountReg, NextMBB ]
10584 // ( PFD 2, 768+DestDisp(%ThisDestReg) )
10585 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg)
10586 // ( JLH EndMBB )
10587 //
10588 // The prefetch is used only for MVC. The JLH is used only for CLC.
10589 MBB = LoopMBB;
10590 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg)
10591 .addReg(StartDestReg).addMBB(StartMBB)
10592 .addReg(NextDestReg).addMBB(NextMBB);
10593 if (!HaveSingleBase)
10594 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg)
10595 .addReg(StartSrcReg).addMBB(StartMBB)
10596 .addReg(NextSrcReg).addMBB(NextMBB);
10597 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg)
10598 .addReg(StartCountReg).addMBB(StartMBB)
10599 .addReg(NextCountReg).addMBB(NextMBB);
10600 if (Opcode == SystemZ::MVC)
10601 BuildMI(MBB, DL, TII->get(SystemZ::PFD))
10603 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0);
10604 insertMemMemOp(MBB, MBB->end(),
10605 MachineOperand::CreateReg(ThisDestReg, false), DestDisp,
10606 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256);
10607 if (EndMBB) {
10608 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10610 .addMBB(EndMBB);
10611 MBB->addSuccessor(EndMBB);
10612 MBB->addSuccessor(NextMBB);
10613 }
10614
10615 // NextMBB:
10616 // %NextDestReg = LA 256(%ThisDestReg)
10617 // %NextSrcReg = LA 256(%ThisSrcReg)
10618 // %NextCountReg = AGHI %ThisCountReg, -1
10619 // CGHI %NextCountReg, 0
10620 // JLH LoopMBB
10621 // # fall through to DoneMBB
10622 //
10623 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes.
10624 MBB = NextMBB;
10625 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg)
10626 .addReg(ThisDestReg).addImm(256).addReg(0);
10627 if (!HaveSingleBase)
10628 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg)
10629 .addReg(ThisSrcReg).addImm(256).addReg(0);
10630 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg)
10631 .addReg(ThisCountReg).addImm(-1);
10632 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10633 .addReg(NextCountReg).addImm(0);
10634 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10636 .addMBB(LoopMBB);
10637 MBB->addSuccessor(LoopMBB);
10638 MBB->addSuccessor(DoneMBB);
10639
10640 MBB = DoneMBB;
10641 if (IsRegForm) {
10642 // DoneMBB:
10643 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run.
10644 // # Use EXecute Relative Long for the remainder of the bytes. The target
10645 // instruction of the EXRL will have a length field of 1 since 0 is an
10646 // illegal value. The number of bytes processed becomes (%LenAdjReg &
10647 // 0xff) + 1.
10648 // # Fall through to AllDoneMBB.
10649 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10650 Register RemDestReg = HaveSingleBase ? RemSrcReg
10651 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10652 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg)
10653 .addReg(StartDestReg).addMBB(StartMBB)
10654 .addReg(NextDestReg).addMBB(NextMBB);
10655 if (!HaveSingleBase)
10656 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg)
10657 .addReg(StartSrcReg).addMBB(StartMBB)
10658 .addReg(NextSrcReg).addMBB(NextMBB);
10659 if (IsMemset)
10660 insertMemMemOp(MBB, MBB->end(),
10661 MachineOperand::CreateReg(RemDestReg, false), DestDisp,
10662 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1);
10663 MachineInstrBuilder EXRL_MIB =
10664 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo))
10665 .addImm(Opcode)
10666 .addReg(LenAdjReg)
10667 .addReg(RemDestReg).addImm(DestDisp)
10668 .addReg(RemSrcReg).addImm(SrcDisp);
10669 MBB->addSuccessor(AllDoneMBB);
10670 MBB = AllDoneMBB;
10671 if (Opcode != SystemZ::MVC) {
10672 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine);
10673 if (EndMBB)
10674 MBB->addLiveIn(SystemZ::CC);
10675 }
10676 }
10677 MF.getProperties().resetNoPHIs();
10678 }
10679
10680 // Handle any remaining bytes with straight-line code.
10681 while (ImmLength > 0) {
10682 uint64_t ThisLength = std::min(ImmLength, uint64_t(256));
10683 // The previous iteration might have created out-of-range displacements.
10684 // Apply them using LA/LAY if so.
10685 foldDisplIfNeeded(DestBase, DestDisp);
10686 foldDisplIfNeeded(SrcBase, SrcDisp);
10687 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength);
10688 DestDisp += ThisLength;
10689 SrcDisp += ThisLength;
10690 ImmLength -= ThisLength;
10691 // If there's another CLC to go, branch to the end if a difference
10692 // was found.
10693 if (EndMBB && ImmLength > 0) {
10694 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB);
10695 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10697 .addMBB(EndMBB);
10698 MBB->addSuccessor(EndMBB);
10699 MBB->addSuccessor(NextMBB);
10700 MBB = NextMBB;
10701 }
10702 }
10703 if (EndMBB) {
10704 MBB->addSuccessor(EndMBB);
10705 MBB = EndMBB;
10706 MBB->addLiveIn(SystemZ::CC);
10707 }
10708
10709 MI.eraseFromParent();
10710 return MBB;
10711}
10712
10713// Decompose string pseudo-instruction MI into a loop that continually performs
10714// Opcode until CC != 3.
10715MachineBasicBlock *SystemZTargetLowering::emitStringWrapper(
10716 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10717 MachineFunction &MF = *MBB->getParent();
10718 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10719 MachineRegisterInfo &MRI = MF.getRegInfo();
10720 DebugLoc DL = MI.getDebugLoc();
10721
10722 uint64_t End1Reg = MI.getOperand(0).getReg();
10723 uint64_t Start1Reg = MI.getOperand(1).getReg();
10724 uint64_t Start2Reg = MI.getOperand(2).getReg();
10725 uint64_t CharReg = MI.getOperand(3).getReg();
10726
10727 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass;
10728 uint64_t This1Reg = MRI.createVirtualRegister(RC);
10729 uint64_t This2Reg = MRI.createVirtualRegister(RC);
10730 uint64_t End2Reg = MRI.createVirtualRegister(RC);
10731
10732 MachineBasicBlock *StartMBB = MBB;
10733 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB);
10734 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB);
10735
10736 // StartMBB:
10737 // # fall through to LoopMBB
10738 MBB->addSuccessor(LoopMBB);
10739
10740 // LoopMBB:
10741 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ]
10742 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ]
10743 // R0L = %CharReg
10744 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L
10745 // JO LoopMBB
10746 // # fall through to DoneMBB
10747 //
10748 // The load of R0L can be hoisted by post-RA LICM.
10749 MBB = LoopMBB;
10750
10751 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg)
10752 .addReg(Start1Reg).addMBB(StartMBB)
10753 .addReg(End1Reg).addMBB(LoopMBB);
10754 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg)
10755 .addReg(Start2Reg).addMBB(StartMBB)
10756 .addReg(End2Reg).addMBB(LoopMBB);
10757 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg);
10758 BuildMI(MBB, DL, TII->get(Opcode))
10759 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define)
10760 .addReg(This1Reg).addReg(This2Reg);
10761 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10763 MBB->addSuccessor(LoopMBB);
10764 MBB->addSuccessor(DoneMBB);
10765
10766 DoneMBB->addLiveIn(SystemZ::CC);
10767
10768 MI.eraseFromParent();
10769 return DoneMBB;
10770}
10771
10772// Update TBEGIN instruction with final opcode and register clobbers.
10773MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin(
10774 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode,
10775 bool NoFloat) const {
10776 MachineFunction &MF = *MBB->getParent();
10777 const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
10778 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10779
10780 // Update opcode.
10781 MI.setDesc(TII->get(Opcode));
10782
10783 // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
10784 // Make sure to add the corresponding GRSM bits if they are missing.
10785 uint64_t Control = MI.getOperand(2).getImm();
10786 static const unsigned GPRControlBit[16] = {
10787 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
10788 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
10789 };
10790 Control |= GPRControlBit[15];
10791 if (TFI->hasFP(MF))
10792 Control |= GPRControlBit[11];
10793 MI.getOperand(2).setImm(Control);
10794
10795 // Add GPR clobbers.
10796 for (int I = 0; I < 16; I++) {
10797 if ((Control & GPRControlBit[I]) == 0) {
10798 unsigned Reg = SystemZMC::GR64Regs[I];
10799 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10800 }
10801 }
10802
10803 // Add FPR/VR clobbers.
10804 if (!NoFloat && (Control & 4) != 0) {
10805 if (Subtarget.hasVector()) {
10806 for (unsigned Reg : SystemZMC::VR128Regs) {
10807 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10808 }
10809 } else {
10810 for (unsigned Reg : SystemZMC::FP64Regs) {
10811 MI.addOperand(MachineOperand::CreateReg(Reg, true, true));
10812 }
10813 }
10814 }
10815
10816 return MBB;
10817}
10818
10819MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0(
10820 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const {
10821 MachineFunction &MF = *MBB->getParent();
10822 MachineRegisterInfo *MRI = &MF.getRegInfo();
10823 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10824 DebugLoc DL = MI.getDebugLoc();
10825
10826 Register SrcReg = MI.getOperand(0).getReg();
10827
10828 // Create new virtual register of the same class as source.
10829 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
10830 Register DstReg = MRI->createVirtualRegister(RC);
10831
10832 // Replace pseudo with a normal load-and-test that models the def as
10833 // well.
10834 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
10835 .addReg(SrcReg)
10836 .setMIFlags(MI.getFlags());
10837 MI.eraseFromParent();
10838
10839 return MBB;
10840}
10841
10842MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca(
10844 MachineFunction &MF = *MBB->getParent();
10845 MachineRegisterInfo *MRI = &MF.getRegInfo();
10846 const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
10847 DebugLoc DL = MI.getDebugLoc();
10848 const unsigned ProbeSize = getStackProbeSize(MF);
10849 Register DstReg = MI.getOperand(0).getReg();
10850 Register SizeReg = MI.getOperand(2).getReg();
10851
10852 MachineBasicBlock *StartMBB = MBB;
10853 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB);
10854 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB);
10855 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB);
10856 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB);
10857 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB);
10858
10859 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(),
10861
10862 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10863 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass);
10864
10865 // LoopTestMBB
10866 // BRC TailTestMBB
10867 // # fallthrough to LoopBodyMBB
10868 StartMBB->addSuccessor(LoopTestMBB);
10869 MBB = LoopTestMBB;
10870 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg)
10871 .addReg(SizeReg)
10872 .addMBB(StartMBB)
10873 .addReg(IncReg)
10874 .addMBB(LoopBodyMBB);
10875 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI))
10876 .addReg(PHIReg)
10877 .addImm(ProbeSize);
10878 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10880 .addMBB(TailTestMBB);
10881 MBB->addSuccessor(LoopBodyMBB);
10882 MBB->addSuccessor(TailTestMBB);
10883
10884 // LoopBodyMBB: Allocate and probe by means of a volatile compare.
10885 // J LoopTestMBB
10886 MBB = LoopBodyMBB;
10887 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg)
10888 .addReg(PHIReg)
10889 .addImm(ProbeSize);
10890 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D)
10891 .addReg(SystemZ::R15D)
10892 .addImm(ProbeSize);
10893 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10894 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0)
10895 .setMemRefs(VolLdMMO);
10896 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB);
10897 MBB->addSuccessor(LoopTestMBB);
10898
10899 // TailTestMBB
10900 // BRC DoneMBB
10901 // # fallthrough to TailMBB
10902 MBB = TailTestMBB;
10903 BuildMI(MBB, DL, TII->get(SystemZ::CGHI))
10904 .addReg(PHIReg)
10905 .addImm(0);
10906 BuildMI(MBB, DL, TII->get(SystemZ::BRC))
10908 .addMBB(DoneMBB);
10909 MBB->addSuccessor(TailMBB);
10910 MBB->addSuccessor(DoneMBB);
10911
10912 // TailMBB
10913 // # fallthrough to DoneMBB
10914 MBB = TailMBB;
10915 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D)
10916 .addReg(SystemZ::R15D)
10917 .addReg(PHIReg);
10918 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D)
10919 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg)
10920 .setMemRefs(VolLdMMO);
10921 MBB->addSuccessor(DoneMBB);
10922
10923 // DoneMBB
10924 MBB = DoneMBB;
10925 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg)
10926 .addReg(SystemZ::R15D);
10927
10928 MI.eraseFromParent();
10929 return DoneMBB;
10930}
10931
10932SDValue SystemZTargetLowering::
10933getBackchainAddress(SDValue SP, SelectionDAG &DAG) const {
10934 MachineFunction &MF = DAG.getMachineFunction();
10935 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>();
10936 SDLoc DL(SP);
10937 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP,
10938 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL));
10939}
10940
10943 switch (MI.getOpcode()) {
10944 case SystemZ::ADJCALLSTACKDOWN:
10945 case SystemZ::ADJCALLSTACKUP:
10946 return emitAdjCallStack(MI, MBB);
10947
10948 case SystemZ::Select32:
10949 case SystemZ::Select64:
10950 case SystemZ::Select128:
10951 case SystemZ::SelectF32:
10952 case SystemZ::SelectF64:
10953 case SystemZ::SelectF128:
10954 case SystemZ::SelectVR32:
10955 case SystemZ::SelectVR64:
10956 case SystemZ::SelectVR128:
10957 return emitSelect(MI, MBB);
10958
10959 case SystemZ::CondStore8Mux:
10960 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
10961 case SystemZ::CondStore8MuxInv:
10962 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true);
10963 case SystemZ::CondStore16Mux:
10964 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
10965 case SystemZ::CondStore16MuxInv:
10966 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
10967 case SystemZ::CondStore32Mux:
10968 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
10969 case SystemZ::CondStore32MuxInv:
10970 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
10971 case SystemZ::CondStore8:
10972 return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
10973 case SystemZ::CondStore8Inv:
10974 return emitCondStore(MI, MBB, SystemZ::STC, 0, true);
10975 case SystemZ::CondStore16:
10976 return emitCondStore(MI, MBB, SystemZ::STH, 0, false);
10977 case SystemZ::CondStore16Inv:
10978 return emitCondStore(MI, MBB, SystemZ::STH, 0, true);
10979 case SystemZ::CondStore32:
10980 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false);
10981 case SystemZ::CondStore32Inv:
10982 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true);
10983 case SystemZ::CondStore64:
10984 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false);
10985 case SystemZ::CondStore64Inv:
10986 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true);
10987 case SystemZ::CondStoreF32:
10988 return emitCondStore(MI, MBB, SystemZ::STE, 0, false);
10989 case SystemZ::CondStoreF32Inv:
10990 return emitCondStore(MI, MBB, SystemZ::STE, 0, true);
10991 case SystemZ::CondStoreF64:
10992 return emitCondStore(MI, MBB, SystemZ::STD, 0, false);
10993 case SystemZ::CondStoreF64Inv:
10994 return emitCondStore(MI, MBB, SystemZ::STD, 0, true);
10995
10996 case SystemZ::SCmp128Hi:
10997 return emitICmp128Hi(MI, MBB, false);
10998 case SystemZ::UCmp128Hi:
10999 return emitICmp128Hi(MI, MBB, true);
11000
11001 case SystemZ::PAIR128:
11002 return emitPair128(MI, MBB);
11003 case SystemZ::AEXT128:
11004 return emitExt128(MI, MBB, false);
11005 case SystemZ::ZEXT128:
11006 return emitExt128(MI, MBB, true);
11007
11008 case SystemZ::ATOMIC_SWAPW:
11009 return emitAtomicLoadBinary(MI, MBB, 0);
11010
11011 case SystemZ::ATOMIC_LOADW_AR:
11012 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR);
11013 case SystemZ::ATOMIC_LOADW_AFI:
11014 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI);
11015
11016 case SystemZ::ATOMIC_LOADW_SR:
11017 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR);
11018
11019 case SystemZ::ATOMIC_LOADW_NR:
11020 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR);
11021 case SystemZ::ATOMIC_LOADW_NILH:
11022 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH);
11023
11024 case SystemZ::ATOMIC_LOADW_OR:
11025 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR);
11026 case SystemZ::ATOMIC_LOADW_OILH:
11027 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH);
11028
11029 case SystemZ::ATOMIC_LOADW_XR:
11030 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR);
11031 case SystemZ::ATOMIC_LOADW_XILF:
11032 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF);
11033
11034 case SystemZ::ATOMIC_LOADW_NRi:
11035 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true);
11036 case SystemZ::ATOMIC_LOADW_NILHi:
11037 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true);
11038
11039 case SystemZ::ATOMIC_LOADW_MIN:
11040 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE);
11041 case SystemZ::ATOMIC_LOADW_MAX:
11042 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE);
11043 case SystemZ::ATOMIC_LOADW_UMIN:
11044 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE);
11045 case SystemZ::ATOMIC_LOADW_UMAX:
11046 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE);
11047
11048 case SystemZ::ATOMIC_CMP_SWAPW:
11049 return emitAtomicCmpSwapW(MI, MBB);
11050 case SystemZ::MVCImm:
11051 case SystemZ::MVCReg:
11052 return emitMemMemWrapper(MI, MBB, SystemZ::MVC);
11053 case SystemZ::NCImm:
11054 return emitMemMemWrapper(MI, MBB, SystemZ::NC);
11055 case SystemZ::OCImm:
11056 return emitMemMemWrapper(MI, MBB, SystemZ::OC);
11057 case SystemZ::XCImm:
11058 case SystemZ::XCReg:
11059 return emitMemMemWrapper(MI, MBB, SystemZ::XC);
11060 case SystemZ::CLCImm:
11061 case SystemZ::CLCReg:
11062 return emitMemMemWrapper(MI, MBB, SystemZ::CLC);
11063 case SystemZ::MemsetImmImm:
11064 case SystemZ::MemsetImmReg:
11065 case SystemZ::MemsetRegImm:
11066 case SystemZ::MemsetRegReg:
11067 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/);
11068 case SystemZ::CLSTLoop:
11069 return emitStringWrapper(MI, MBB, SystemZ::CLST);
11070 case SystemZ::MVSTLoop:
11071 return emitStringWrapper(MI, MBB, SystemZ::MVST);
11072 case SystemZ::SRSTLoop:
11073 return emitStringWrapper(MI, MBB, SystemZ::SRST);
11074 case SystemZ::TBEGIN:
11075 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
11076 case SystemZ::TBEGIN_nofloat:
11077 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
11078 case SystemZ::TBEGINC:
11079 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
11080 case SystemZ::LTEBRCompare_Pseudo:
11081 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR);
11082 case SystemZ::LTDBRCompare_Pseudo:
11083 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR);
11084 case SystemZ::LTXBRCompare_Pseudo:
11085 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR);
11086
11087 case SystemZ::PROBED_ALLOCA:
11088 return emitProbedAlloca(MI, MBB);
11089 case SystemZ::EH_SjLj_SetJmp:
11090 return emitEHSjLjSetJmp(MI, MBB);
11091 case SystemZ::EH_SjLj_LongJmp:
11092 return emitEHSjLjLongJmp(MI, MBB);
11093
11094 case TargetOpcode::STACKMAP:
11095 case TargetOpcode::PATCHPOINT:
11096 return emitPatchPoint(MI, MBB);
11097
11098 default:
11099 llvm_unreachable("Unexpected instr type to insert");
11100 }
11101}
11102
11103// This is only used by the isel schedulers, and is needed only to prevent
11104// compiler from crashing when list-ilp is used.
11105const TargetRegisterClass *
11106SystemZTargetLowering::getRepRegClassFor(MVT VT) const {
11107 if (VT == MVT::Untyped)
11108 return &SystemZ::ADDR128BitRegClass;
11110}
11111
11112SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op,
11113 SelectionDAG &DAG) const {
11114 SDLoc dl(Op);
11115 /*
11116 The rounding method is in FPC Byte 3 bits 6-7, and has the following
11117 settings:
11118 00 Round to nearest
11119 01 Round to 0
11120 10 Round to +inf
11121 11 Round to -inf
11122
11123 FLT_ROUNDS, on the other hand, expects the following:
11124 -1 Undefined
11125 0 Round to 0
11126 1 Round to nearest
11127 2 Round to +inf
11128 3 Round to -inf
11129 */
11130
11131 // Save FPC to register.
11132 SDValue Chain = Op.getOperand(0);
11133 SDValue EFPC(
11134 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0);
11135 Chain = EFPC.getValue(1);
11136
11137 // Transform as necessary
11138 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC,
11139 DAG.getConstant(3, dl, MVT::i32));
11140 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1
11141 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1,
11142 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1,
11143 DAG.getConstant(1, dl, MVT::i32)));
11144
11145 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2,
11146 DAG.getConstant(1, dl, MVT::i32));
11147 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType());
11148
11149 return DAG.getMergeValues({RetVal, Chain}, dl);
11150}
11151
11152SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op,
11153 SelectionDAG &DAG) const {
11154 EVT VT = Op.getValueType();
11155 Op = Op.getOperand(0);
11156 EVT OpVT = Op.getValueType();
11157
11158 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector.");
11159
11160 SDLoc DL(Op);
11161
11162 // load a 0 vector for the third operand of VSUM.
11163 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT));
11164
11165 // execute VSUM.
11166 switch (OpVT.getScalarSizeInBits()) {
11167 case 8:
11168 case 16:
11169 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero);
11170 [[fallthrough]];
11171 case 32:
11172 case 64:
11173 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op,
11174 DAG.getBitcast(Op.getValueType(), Zero));
11175 break;
11176 case 128:
11177 break; // VSUM over v1i128 should not happen and would be a noop
11178 default:
11179 llvm_unreachable("Unexpected scalar size.");
11180 }
11181 // Cast to original vector type, retrieve last element.
11182 return DAG.getNode(
11183 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op),
11184 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32));
11185}
11186
11188 FunctionType *FT = F->getFunctionType();
11189 const AttributeList &Attrs = F->getAttributes();
11190 if (Attrs.hasRetAttrs())
11191 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " ";
11192 OS << *F->getReturnType() << " @" << F->getName() << "(";
11193 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) {
11194 if (I)
11195 OS << ", ";
11196 OS << *FT->getParamType(I);
11197 AttributeSet ArgAttrs = Attrs.getParamAttrs(I);
11198 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt})
11199 if (ArgAttrs.hasAttribute(A))
11200 OS << " " << Attribute::getNameFromAttrKind(A);
11201 }
11202 OS << ")\n";
11203}
11204
11205bool SystemZTargetLowering::isInternal(const Function *Fn) const {
11206 std::map<const Function *, bool>::iterator Itr = IsInternalCache.find(Fn);
11207 if (Itr == IsInternalCache.end())
11208 Itr = IsInternalCache
11209 .insert(std::pair<const Function *, bool>(
11210 Fn, (Fn->hasLocalLinkage() && !Fn->hasAddressTaken())))
11211 .first;
11212 return Itr->second;
11213}
11214
11215void SystemZTargetLowering::
11216verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs,
11217 const Function *F, SDValue Callee) const {
11218 // Temporarily only do the check when explicitly requested, until it can be
11219 // enabled by default.
11221 return;
11222
11223 bool IsInternal = false;
11224 const Function *CalleeFn = nullptr;
11225 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee))
11226 if ((CalleeFn = dyn_cast<Function>(G->getGlobal())))
11227 IsInternal = isInternal(CalleeFn);
11228 if (!IsInternal && !verifyNarrowIntegerArgs(Outs)) {
11229 errs() << "ERROR: Missing extension attribute of passed "
11230 << "value in call to function:\n" << "Callee: ";
11231 if (CalleeFn != nullptr)
11232 printFunctionArgExts(CalleeFn, errs());
11233 else
11234 errs() << "-\n";
11235 errs() << "Caller: ";
11237 llvm_unreachable("");
11238 }
11239}
11240
11241void SystemZTargetLowering::
11242verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs,
11243 const Function *F) const {
11244 // Temporarily only do the check when explicitly requested, until it can be
11245 // enabled by default.
11247 return;
11248
11249 if (!isInternal(F) && !verifyNarrowIntegerArgs(Outs)) {
11250 errs() << "ERROR: Missing extension attribute of returned "
11251 << "value from function:\n";
11253 llvm_unreachable("");
11254 }
11255}
11256
11257// Verify that narrow integer arguments are extended as required by the ABI.
11258// Return false if an error is found.
11259bool SystemZTargetLowering::verifyNarrowIntegerArgs(
11260 const SmallVectorImpl<ISD::OutputArg> &Outs) const {
11261 if (!Subtarget.isTargetELF())
11262 return true;
11263
11266 return true;
11267 } else if (!getTargetMachine().Options.VerifyArgABICompliance)
11268 return true;
11269
11270 for (unsigned i = 0; i < Outs.size(); ++i) {
11271 MVT VT = Outs[i].VT;
11272 ISD::ArgFlagsTy Flags = Outs[i].Flags;
11273 if (VT.isInteger()) {
11274 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) &&
11275 "Unexpected integer argument VT.");
11276 if (VT == MVT::i32 &&
11277 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt())
11278 return false;
11279 }
11280 }
11281
11282 return true;
11283}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
static bool isZeroVector(SDValue N)
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis false
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val, const CCValAssign &VA, const SDLoc &DL)
#define Check(C,...)
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RegName(no)
static LVOptions Options
Definition LVOptions.cpp:25
static bool isSelectPseudo(MachineInstr &MI)
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static bool isUndef(const MachineInstr &MI)
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
uint64_t High
uint64_t IntrinsicInst * II
#define P(N)
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
This file defines the SmallSet class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, Comparison C, SDValue TrueOp, SDValue FalseOp)
static SmallVector< SDValue, 4 > simplifyAssumingCCVal(SDValue &Val, SDValue &CC, SelectionDAG &DAG)
static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS)
static void adjustForLTGFR(Comparison &C)
static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, SDValue Op1)
#define CONV(X)
static cl::opt< bool > EnableIntArgExtCheck("argext-abi-check", cl::init(false), cl::desc("Verify that narrow int args are properly extended per the " "SystemZ ABI."))
static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG)
static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, unsigned Opcode, SDValue Op0, SDValue Op1, SDValue &Even, SDValue &Odd)
static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG)
static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Value)
static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In)
static bool isSimpleShift(SDValue N, unsigned &ShiftVal)
static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1)
static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num)
static bool isVectorElementSwap(ArrayRef< int > M, EVT VT)
static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, SDValue &AlignedAddr, SDValue &BitShift, SDValue &NegBitShift)
static bool isShlDoublePermute(const SmallVectorImpl< int > &Bytes, unsigned &StartIndex, unsigned &OpNo0, unsigned &OpNo1)
static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, const Permute &P, SDValue Op0, SDValue Op1)
static SDNode * emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg)
static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, SDValue Op0, SDValue Op1, SDValue &Hi, SDValue &Lo)
static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, SDNode *&HiPart)
static void createPHIsForSelects(SmallVector< MachineInstr *, 8 > &Selects, MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, MachineBasicBlock *SinkMBB)
static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, SDValue *Ops, const SmallVectorImpl< int > &Bytes)
static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, bool &Invert)
static unsigned CCMaskForCondCode(ISD::CondCode CC)
static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static void adjustForFNeg(Comparison &C)
static bool isScalarToVector(SDValue Op)
static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, unsigned CCValid, unsigned CCMask)
static bool matchPermute(const SmallVectorImpl< int > &Bytes, const Permute &P, unsigned &OpNo0, unsigned &OpNo1)
static bool isAddCarryChain(SDValue Carry)
static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static MachineOperand earlyUseOperand(MachineOperand Op)
static bool canUseSiblingCall(const CCState &ArgCCInfo, SmallVectorImpl< CCValAssign > &ArgLocs, SmallVectorImpl< ISD::OutputArg > &Outs)
static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, SDLoc &DL, SDValue &Chain)
static SDValue convertToF16(SDValue Op, SelectionDAG &DAG)
static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask, SelectionDAG &DAG)
static bool shouldSwapCmpOperands(const Comparison &C)
static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType)
static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, unsigned Offset, bool LoadAdr=false)
static SDNode * emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, unsigned Opcode)
static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static bool getVPermMask(SDValue ShuffleOp, SmallVectorImpl< int > &Bytes)
static const Permute PermuteForms[]
static std::pair< SDValue, int > findCCUse(const SDValue &Val)
static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static bool isSubBorrowChain(SDValue Carry)
static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, unsigned OpNo)
static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, bool IsNegative)
static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, BuildVectorSDNode *BVN)
static SDValue convertFromF16(SDValue Op, SDLoc DL, SelectionDAG &DAG)
static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode)
static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In)
static SDValue MergeInputChains(SDNode *N1, SDNode *N2)
static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, const SDLoc &SL)
static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, uint64_t Mask, uint64_t CmpVal, unsigned ICmpType)
static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid)
static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, SDValue Op, SDValue Chain)
static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, ISD::CondCode Cond, const SDLoc &DL, SDValue Chain=SDValue(), bool IsSignaling=false)
static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB)
static Register forceReg(MachineInstr &MI, MachineOperand &Base, const SystemZInstrInfo *TII)
static bool is32Bit(EVT VT)
static std::pair< unsigned, const TargetRegisterClass * > parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, const unsigned *Map, unsigned Size)
static unsigned detectEvenOddMultiplyOperand(const SelectionDAG &DAG, const SystemZSubtarget &Subtarget, SDValue &Op)
static bool matchDoublePermute(const SmallVectorImpl< int > &Bytes, const Permute &P, SmallVectorImpl< int > &Transform)
static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, SDValue Call, unsigned CCValid, uint64_t CC, ISD::CondCode Cond)
static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg)
static AddressingMode getLoadStoreAddrMode(bool HasVector, Type *Ty)
static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, SDValue Op0, SDValue Op1)
static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth, unsigned OpNo)
static bool getShuffleInput(const SmallVectorImpl< int > &Bytes, unsigned Start, unsigned BytesPerElement, int &Base)
static AddressingMode supportedAddressingMode(Instruction *I, bool HasVector)
static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, SDValue &HiPart)
static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C)
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
void setBitsFrom(unsigned loBit)
Set the top bits starting from loBit.
Definition APInt.h:1386
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1331
static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit)
Get a value with a block of bits set.
Definition APInt.h:259
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1489
bool isSingleWord() const
Determine if this APInt just has one word to store value.
Definition APInt.h:323
LLVM_ABI void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
Definition APInt.cpp:397
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1258
void lshrInPlace(unsigned ShiftAmt)
Logical right-shift this APInt by ShiftAmt in place.
Definition APInt.h:859
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
an instruction that atomically reads a memory location, combines it with another value,...
@ Add
*p = old + v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
BinOp getOperation() const
This class holds the attributes for a particular argument, parameter, function, or return value.
Definition Attributes.h:361
LLVM_ABI bool hasAttribute(Attribute::AttrKind Kind) const
Return true if the attribute exists in this set.
LLVM_ABI StringRef getValueAsString() const
Return the attribute's value as a string.
static LLVM_ABI StringRef getNameFromAttrKind(Attribute::AttrKind AttrKind)
LLVM Basic Block Representation.
Definition BasicBlock.h:62
A "pseudo-class" with methods for operating on BUILD_VECTORs.
LLVM_ABI bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
LLVM_ABI bool isConstant() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
LLVM_ABI bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
LLVM_ABI void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
LLVM_ABI void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
uint64_t getZExtValue() const
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
A debug info location.
Definition DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
bool hasAddressTaken(const User **=nullptr, bool IgnoreCallbackUses=false, bool IgnoreAssumeLikeCalls=true, bool IngoreLLVMUsed=false, bool IgnoreARCAttachedCall=false, bool IgnoreCastedDirectCall=false) const
hasAddressTaken - returns true if there are any uses of this function other than direct calls or invo...
Definition Function.cpp:954
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:765
uint64_t getFnAttributeAsParsedInteger(StringRef Kind, uint64_t Default=0) const
For a string attribute Kind, parse attribute as an integer.
Definition Function.cpp:777
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
LLVM_ABI const GlobalObject * getAliaseeObject() const
Definition Globals.cpp:636
bool hasLocalLinkage() const
bool hasPrivateLinkage() const
bool hasInternalLinkage() const
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
Machine Value Type.
static auto integer_fixedlen_vector_valuetypes()
SimpleValueType SimpleTy
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
static MVT getVectorVT(MVT VT, unsigned NumElements)
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
void setReturnAddressIsTaken(bool s)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
reverse_iterator rbegin()
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineFunctionProperties & getProperties() const
Get the function properties.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr kills the specified register.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< user_iterator > users()
void setFlags(SDNodeFlags NewFlags)
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
bool isMachineOpcode() const
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
const APInt & getConstantOperandAPInt(unsigned i) const
uint64_t getScalarValueSizeInBits() const
unsigned getResNo() const
get the index which selects a specific result in the SDNode
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getMachineOpcode() const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
SDValue getExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT, unsigned Opcode)
Convert Op, which must be of integer type, to the integer type VT, by either any/sign/zero-extending ...
LLVM_ABI SDValue getAddrSpaceCast(const SDLoc &dl, EVT VT, SDValue Ptr, unsigned SrcAS, unsigned DestAS)
Return an AddrSpaceCastSDNode.
const TargetSubtargetInfo & getSubtarget() const
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getAtomicLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO)
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI bool isConstantIntBuildVectorOrConstantInt(SDValue N, bool AllowOpaques=true) const
Test whether the given value is a constant int or similar node.
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getGLOBAL_OFFSET_TABLE(EVT VT)
Return a GLOBAL_OFFSET_TABLE node. This does not have a useful SDLoc.
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSplatVector(EVT VT, const SDLoc &DL, SDValue Op)
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
LLVM_ABI bool SignBitIsZero(SDValue Op, unsigned Depth=0) const
Return true if the sign bit of Op is known to be zero.
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
const TargetMachine & getTarget() const
LLVM_ABI std::pair< SDValue, SDValue > getStrictFPExtendOrRound(SDValue Op, SDValue Chain, const SDLoc &DL, EVT VT)
Convert Op, which must be a STRICT operation of float type, to the float type VT, by either extending...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of float type, to the float type VT, by either extending or rounding (by tr...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVM_ABI bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
ArrayRef< int > getMask() const
const_iterator begin() const
Definition SmallSet.h:215
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
size_type size() const
Definition SmallSet.h:170
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition StringRef.h:472
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
StringRef slice(size_t Start, size_t End) const
Return a reference to the substring from [Start, End).
Definition StringRef.h:686
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
iterator end() const
Definition StringRef.h:114
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
A SystemZ-specific class detailing special use registers particular for calling conventions.
static SystemZConstantPoolValue * Create(const GlobalValue *GV, SystemZCP::SystemZCPModifier Modifier)
const SystemZInstrInfo * getInstrInfo() const override
SystemZCallingConventionRegisters * getSpecialRegisters() const
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
Returns the target specific optimal type for load and store operations as a result of memset,...
bool hasInlineStackProbe(const MachineFunction &MF) const override
Returns true if stack probing through inline assembly is requested.
bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const override
Determines the optimal series of memory ops to replace the memset / memcpy.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const override
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override
Return the ValueType of the result of SETCC operations.
bool allowTruncateForTailCall(Type *, Type *) const override
Return true if a truncation from FromTy to ToTy is permitted when deciding whether a call is in tail ...
SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, const SDLoc &DL, const AsmOperandInfo &Constraint, SelectionDAG &DAG) const override
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &DL, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
CondMergingParams getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs, const Value *Rhs) const override
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, LLVMContext &Context, const Type *RetTy) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
std::pair< SDValue, SDValue > makeExternalCall(SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, ArrayRef< SDValue > Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, bool DoesNotReturn, bool IsReturnValueUsed) const
bool mayBeEmittedAsTailCall(const CallInst *CI) const override
Return true if the target may be able emit the call instruction as a tail call.
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, std::optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
SystemZTargetLowering(const TargetMachine &TM, const SystemZSubtarget &STI)
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
Return true if an FMA operation is faster than a pair of fmul and fadd instructions.
bool isLegalICmpImmediate(int64_t Imm) const override
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
TargetLowering::ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Determine if the target supports unaligned memory accesses.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
TargetLowering::ConstraintType getConstraintType(StringRef Constraint) const override
Given a constraint, return the type of constraint it is for this target.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, std::optional< CallingConv::ID > CC) const override
Target-specific combining of register parts into its original value.
bool isTruncateFree(Type *, Type *) const override
Return true if it's free to truncate a value of type FromTy to type ToTy.
SDValue useLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, MVT VT, SDValue Arg, SDLoc DL, SDValue Chain, bool IsStrict) const
unsigned ComputeNumSignBitsForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const override
Determine the number of bits in the operation that are sign bits.
void LowerOperationWrapper(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked by the type legalizer to legalize nodes with an illegal operand type but leg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
bool isLegalAddImmediate(int64_t Imm) const override
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
Lower the specified operand into the Ops vector.
bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, bool PoisonOnly, unsigned Depth) const override
Return true if this function can prove that Op is never poison and, if PoisonOnly is false,...
AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const override
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
unsigned getStackProbeSize(const MachineFunction &MF) const
XPLINK64 calling convention specific use registers Particular to z/OS when in 64 bit mode.
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setAtomicLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Let target indicate that an extending atomic load of the specified type is legal.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
virtual const TargetRegisterClass * getRepRegClassFor(MVT VT) const
Return the 'representative' register class for the specified value type.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
std::vector< ArgListEntry > ArgListTy
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
virtual MVT getPointerMemTy(const DataLayout &DL, uint32_t AS=0) const
Return the in-memory pointer type for the given address space, defaults to the pointer type from the ...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
virtual bool findOptimalMemOpLowering(LLVMContext &Context, std::vector< EVT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes) const
Determines the optimal series of memory ops to replace the memset / memcpy.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
TargetLowering(const TargetLowering &)=delete
virtual void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
Primary interface to the complete machine description for the target machine.
TLSModel::Model getTLSModel(const GlobalValue *GV) const
Returns the TLS model which should be used for the given global variable.
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
unsigned getPointerSize(unsigned AS) const
Get the pointer size for this target.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetInstrInfo * getInstrInfo() const
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
A raw_ostream that writes to a file descriptor.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:504
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition ISDOpcodes.h:163
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ STRICT_FMINIMUM
Definition ISDOpcodes.h:464
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:431
@ BUILTIN_OP_END
BUILTIN_OP_END - This must be the last enum value in this list.
@ GlobalTLSAddress
Definition ISDOpcodes.h:89
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:478
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:347
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition ISDOpcodes.h:242
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:343
@ GET_ROUNDING
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition ISDOpcodes.h:958
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ STRICT_FMAXIMUM
Definition ISDOpcodes.h:463
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ UADDO_CARRY
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:323
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:477
@ STRICT_FROUNDEVEN
Definition ISDOpcodes.h:457
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition ISDOpcodes.h:110
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:471
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:493
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:470
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:498
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:420
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ STRICT_FNEARBYINT
Definition ISDOpcodes.h:451
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition ISDOpcodes.h:157
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition ISDOpcodes.h:208
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
LLVM_ABI bool isConstantSplatVectorAllZeros(const SDNode *N, bool BuildVectorOnly=false)
Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where all of the elements are 0 o...
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
LLVM_ABI bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
@ Define
Register definition.
@ System
Synchronized with respect to all concurrently executing threads.
Definition LLVMContext.h:58
const unsigned GR64Regs[16]
const unsigned VR128Regs[32]
const unsigned VR16Regs[32]
const unsigned GR128Regs[16]
const unsigned FP32Regs[16]
const unsigned FP16Regs[16]
const unsigned GR32Regs[16]
const unsigned FP64Regs[16]
const int64_t ELFCallFrameSize
const unsigned VR64Regs[32]
const unsigned FP128Regs[16]
const unsigned VR32Regs[32]
unsigned odd128(bool Is32bit)
const unsigned CCMASK_CMP_GE
Definition SystemZ.h:41
static bool isImmHH(uint64_t Val)
Definition SystemZ.h:177
const unsigned CCMASK_TEND
Definition SystemZ.h:98
const unsigned CCMASK_CS_EQ
Definition SystemZ.h:68
const unsigned CCMASK_TBEGIN
Definition SystemZ.h:93
const unsigned CCMASK_0
Definition SystemZ.h:28
const MCPhysReg ELFArgFPRs[ELFNumArgFPRs]
MachineBasicBlock * splitBlockBefore(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_TM_SOME_1
Definition SystemZ.h:83
const unsigned CCMASK_LOGICAL_CARRY
Definition SystemZ.h:61
const unsigned TDCMASK_NORMAL_MINUS
Definition SystemZ.h:123
const unsigned CCMASK_TDC
Definition SystemZ.h:110
const unsigned CCMASK_FCMP
Definition SystemZ.h:49
const unsigned CCMASK_TM_SOME_0
Definition SystemZ.h:82
static bool isImmHL(uint64_t Val)
Definition SystemZ.h:172
const unsigned TDCMASK_SUBNORMAL_MINUS
Definition SystemZ.h:125
const unsigned PFD_READ
Definition SystemZ.h:116
const unsigned CCMASK_1
Definition SystemZ.h:29
const unsigned TDCMASK_NORMAL_PLUS
Definition SystemZ.h:122
const unsigned PFD_WRITE
Definition SystemZ.h:117
const unsigned CCMASK_CMP_GT
Definition SystemZ.h:38
const unsigned TDCMASK_QNAN_MINUS
Definition SystemZ.h:129
const unsigned CCMASK_CS
Definition SystemZ.h:70
const unsigned CCMASK_ANY
Definition SystemZ.h:32
const unsigned CCMASK_ARITH
Definition SystemZ.h:56
const unsigned CCMASK_TM_MIXED_MSB_0
Definition SystemZ.h:79
const unsigned TDCMASK_SUBNORMAL_PLUS
Definition SystemZ.h:124
static bool isImmLL(uint64_t Val)
Definition SystemZ.h:162
const unsigned VectorBits
Definition SystemZ.h:155
static bool isImmLH(uint64_t Val)
Definition SystemZ.h:167
MachineBasicBlock * emitBlockAfter(MachineBasicBlock *MBB)
const unsigned TDCMASK_INFINITY_PLUS
Definition SystemZ.h:126
unsigned reverseCCMask(unsigned CCMask)
const unsigned CCMASK_TM_ALL_0
Definition SystemZ.h:78
const unsigned IPM_CC
Definition SystemZ.h:113
const unsigned CCMASK_CMP_LE
Definition SystemZ.h:40
const unsigned CCMASK_CMP_O
Definition SystemZ.h:45
const unsigned CCMASK_CMP_EQ
Definition SystemZ.h:36
const unsigned VectorBytes
Definition SystemZ.h:159
const unsigned TDCMASK_INFINITY_MINUS
Definition SystemZ.h:127
const unsigned CCMASK_ICMP
Definition SystemZ.h:48
const unsigned CCMASK_VCMP_ALL
Definition SystemZ.h:102
const unsigned CCMASK_VCMP_NONE
Definition SystemZ.h:104
MachineBasicBlock * splitBlockAfter(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB)
const unsigned CCMASK_VCMP
Definition SystemZ.h:105
const unsigned CCMASK_TM_MIXED_MSB_1
Definition SystemZ.h:80
const unsigned CCMASK_TM_MSB_0
Definition SystemZ.h:84
const unsigned CCMASK_ARITH_OVERFLOW
Definition SystemZ.h:55
const unsigned CCMASK_CS_NE
Definition SystemZ.h:69
const unsigned TDCMASK_SNAN_PLUS
Definition SystemZ.h:130
const unsigned CCMASK_TM
Definition SystemZ.h:86
const unsigned CCMASK_3
Definition SystemZ.h:31
const unsigned CCMASK_NONE
Definition SystemZ.h:27
const unsigned CCMASK_CMP_LT
Definition SystemZ.h:37
const unsigned CCMASK_CMP_NE
Definition SystemZ.h:39
const unsigned TDCMASK_ZERO_PLUS
Definition SystemZ.h:120
const unsigned TDCMASK_QNAN_PLUS
Definition SystemZ.h:128
const unsigned TDCMASK_ZERO_MINUS
Definition SystemZ.h:121
unsigned even128(bool Is32bit)
const unsigned CCMASK_TM_ALL_1
Definition SystemZ.h:81
const unsigned CCMASK_LOGICAL_BORROW
Definition SystemZ.h:63
const unsigned ELFNumArgFPRs
const unsigned CCMASK_CMP_UO
Definition SystemZ.h:44
const unsigned CCMASK_LOGICAL
Definition SystemZ.h:65
const unsigned CCMASK_TM_MSB_1
Definition SystemZ.h:85
const unsigned TDCMASK_SNAN_MINUS
Definition SystemZ.h:131
initializer< Ty > init(const Ty &Val)
support::ulittle32_t Word
Definition IRSymtab.h:53
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
NodeAddr< CodeNode * > Code
Definition RDFGraph.h:388
This is an optimization pass for GlobalISel generic memory operations.
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
unsigned Log2_32_Ceil(uint32_t Value)
Return the ceil log base 2 of the specified value, 32 if the value is zero.
Definition MathExtras.h:344
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
LLVM_ABI SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
testing::Matcher< const detail::ErrorHolder & > Failed()
Definition Error.h:198
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
constexpr T maskLeadingOnes(unsigned N)
Create a bitmask with the N left-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:88
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI void dumpBytes(ArrayRef< uint8_t > Bytes, raw_ostream &OS)
Convert ‘Bytes’ to a hex string and output to ‘OS’.
T bit_ceil(T Value)
Returns the smallest integral power of two no smaller than Value if Value is nonzero.
Definition bit.h:345
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:202
int countl_zero(T Val)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition bit.h:236
LLVM_ABI bool isBitwiseNot(SDValue V, bool AllowUndefs=false)
Returns true if V is a bitwise not operation.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Success
The lock was released successfully.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
@ BeforeLegalizeTypes
Definition DAGCombine.h:16
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
constexpr T maskTrailingOnes(unsigned N)
Create a bitmask with the N right-most bits set to 1, and all other bits set to 0.
Definition MathExtras.h:77
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
#define EQ(a, b)
Definition regexec.c:65
AddressingMode(bool LongDispl, bool IdxReg)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:147
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:381
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition ValueTypes.h:157
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
KnownBits anyextOrTrunc(unsigned BitWidth) const
Return known bits for an "any" extension or truncation of the value we're tracking.
Definition KnownBits.h:186
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
KnownBits zext(unsigned BitWidth) const
Return known bits for a zero extension of the value we're tracking.
Definition KnownBits.h:172
void resetAll()
Resets the known state of all bits.
Definition KnownBits.h:74
KnownBits intersectWith(const KnownBits &RHS) const
Returns KnownBits information that is known to be true for both this and RHS.
Definition KnownBits.h:311
KnownBits sext(unsigned BitWidth) const
Return known bits for a sign extension of the value we're tracking.
Definition KnownBits.h:180
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:145
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
static LLVM_ABI MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
SmallVector< unsigned, 2 > OpVals
bool isVectorConstantLegal(const SystemZSubtarget &Subtarget)
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
CallLoweringInfo & setDiscardResult(bool Value=true)
CallLoweringInfo & setZExtResult(bool Value=true)
CallLoweringInfo & setDebugLoc(const SDLoc &dl)
CallLoweringInfo & setSExtResult(bool Value=true)
CallLoweringInfo & setNoReturn(bool Value=true)
SmallVector< ISD::OutputArg, 32 > Outs
CallLoweringInfo & setChain(SDValue InChain)
CallLoweringInfo & setCallee(CallingConv::ID CC, Type *ResultType, SDValue Target, ArgListTy &&ArgsList, AttributeSet ResultAttrs={})
This structure is used to pass arguments to makeLibCall function.