LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
110 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
119
120 if (Subtarget.useHVX128BOps()) {
121 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122 setOperationAction(ISD::BITCAST, MVT::v64i1, Custom);
123 }
124 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
125 Subtarget.useHVXFloatingPoint()) {
126
127 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
128 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
129
130 for (MVT T : FloatV) {
134 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
135 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
136
139
142
143 setOperationAction(ISD::MLOAD, T, Custom);
144 setOperationAction(ISD::MSTORE, T, Custom);
145 // Custom-lower BUILD_VECTOR. The standard (target-independent)
146 // handling of it would convert it to a load, which is not always
147 // the optimal choice.
149 }
150
151
152 // BUILD_VECTOR with f16 operands cannot be promoted without
153 // promoting the result, so lower the node to vsplat or constant pool
157
158 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
159 // generated.
160 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
161 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
162 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
163 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
164
165 for (MVT P : FloatW) {
166 setOperationAction(ISD::LOAD, P, Custom);
167 setOperationAction(ISD::STORE, P, Custom);
171 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
172 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
175
176 // Custom-lower BUILD_VECTOR. The standard (target-independent)
177 // handling of it would convert it to a load, which is not always
178 // the optimal choice.
180 // Make concat-vectors custom to handle concats of more than 2 vectors.
182
183 setOperationAction(ISD::MLOAD, P, Custom);
184 setOperationAction(ISD::MSTORE, P, Custom);
185 }
186
187 if (Subtarget.useHVXQFloatOps()) {
188 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
190 } else if (Subtarget.useHVXIEEEFPOps()) {
191 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
193 }
194 }
195
196 for (MVT T : LegalV) {
199
215 if (T != ByteV) {
219 }
220
223 if (T.getScalarType() != MVT::i32) {
226 }
227
229 setOperationAction(ISD::LOAD, T, Custom);
230 setOperationAction(ISD::MLOAD, T, Custom);
231 setOperationAction(ISD::MSTORE, T, Custom);
232 if (T.getScalarType() != MVT::i32) {
235 }
236
238 // Make concat-vectors custom to handle concats of more than 2 vectors.
249 if (T != ByteV) {
251 // HVX only has shifts of words and halfwords.
255
256 // Promote all shuffles to operate on vectors of bytes.
257 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
258 }
259
260 if (Subtarget.useHVXFloatingPoint()) {
261 // Same action for both QFloat and IEEE.
266 }
267
275 }
276
277 for (MVT T : LegalW) {
278 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
279 // independent) handling of it would convert it to a load, which is
280 // not always the optimal choice.
282 // Make concat-vectors custom to handle concats of more than 2 vectors.
284
285 // Custom-lower these operations for pairs. Expand them into a concat
286 // of the corresponding operations on individual vectors.
295
296 setOperationAction(ISD::LOAD, T, Custom);
297 setOperationAction(ISD::STORE, T, Custom);
298 setOperationAction(ISD::MLOAD, T, Custom);
299 setOperationAction(ISD::MSTORE, T, Custom);
304
319 if (T != ByteW) {
323
324 // Promote all shuffles to operate on vectors of bytes.
325 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
326 }
329
332 if (T.getScalarType() != MVT::i32) {
335 }
336
337 if (Subtarget.useHVXFloatingPoint()) {
338 // Same action for both QFloat and IEEE.
343 }
344 }
345
346 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
347 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
348 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
351
352 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
353 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
354 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
355 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
356 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
357 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
358 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
359 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
360 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
361 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
362 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
363 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
364 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
365 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
366
367 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
368 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
369 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
370 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
371 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
372 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
373 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
374 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
375 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
376 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
377 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
378 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
379 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
380 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
381
382 // Boolean vectors.
383
384 for (MVT T : LegalW) {
385 // Boolean types for vector pairs will overlap with the boolean
386 // types for single vectors, e.g.
387 // v64i8 -> v64i1 (single)
388 // v64i16 -> v64i1 (pair)
389 // Set these actions first, and allow the single actions to overwrite
390 // any duplicates.
391 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
396 // Masked load/store takes a mask that may need splitting.
397 setOperationAction(ISD::MLOAD, BoolW, Custom);
398 setOperationAction(ISD::MSTORE, BoolW, Custom);
399 }
400
401 for (MVT T : LegalV) {
402 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
413 }
414
415 if (Use64b) {
416 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
418 } else {
419 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
421 }
422
423 // Handle store widening for short vectors.
424 unsigned HwLen = Subtarget.getVectorLength();
425 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
426 if (ElemTy == MVT::i1)
427 continue;
428 int ElemWidth = ElemTy.getFixedSizeInBits();
429 int MaxElems = (8*HwLen) / ElemWidth;
430 for (int N = 2; N < MaxElems; N *= 2) {
431 MVT VecTy = MVT::getVectorVT(ElemTy, N);
432 auto Action = getPreferredVectorAction(VecTy);
434 setOperationAction(ISD::LOAD, VecTy, Custom);
435 setOperationAction(ISD::STORE, VecTy, Custom);
441 if (Subtarget.useHVXFloatingPoint()) {
446 }
447
448 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
449 if (!isTypeLegal(BoolTy))
451 }
452 }
453 }
454
455 // Include cases which are not hander earlier
459
461}
462
463unsigned
464HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
465 MVT ElemTy = VecTy.getVectorElementType();
466 unsigned VecLen = VecTy.getVectorNumElements();
467 unsigned HwLen = Subtarget.getVectorLength();
468
469 // Split vectors of i1 that exceed byte vector length.
470 if (ElemTy == MVT::i1 && VecLen > HwLen)
472
473 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
474 // For shorter vectors of i1, widen them if any of the corresponding
475 // vectors of integers needs to be widened.
476 if (ElemTy == MVT::i1) {
477 for (MVT T : Tys) {
478 assert(T != MVT::i1);
479 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
480 if (A != ~0u)
481 return A;
482 }
483 return ~0u;
484 }
485
486 // If the size of VecTy is at least half of the vector length,
487 // widen the vector. Note: the threshold was not selected in
488 // any scientific way.
489 if (llvm::is_contained(Tys, ElemTy)) {
490 unsigned VecWidth = VecTy.getSizeInBits();
491 unsigned HwWidth = 8*HwLen;
492 if (VecWidth > 2*HwWidth)
494
495 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
496 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
498 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
500 }
501
502 // Defer to default.
503 return ~0u;
504}
505
506unsigned
507HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
508 unsigned Opc = Op.getOpcode();
509 switch (Opc) {
514 }
516}
517
519HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
520 const SDLoc &dl, SelectionDAG &DAG) const {
522 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
523 append_range(IntOps, Ops);
524 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
525}
526
527MVT
528HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
529 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
530
531 MVT ElemTy = Tys.first.getVectorElementType();
532 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
533 Tys.second.getVectorNumElements());
534}
535
536HexagonTargetLowering::TypePair
537HexagonTargetLowering::typeSplit(MVT VecTy) const {
538 assert(VecTy.isVector());
539 unsigned NumElem = VecTy.getVectorNumElements();
540 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
541 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
542 return { HalfTy, HalfTy };
543}
544
545MVT
546HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
547 MVT ElemTy = VecTy.getVectorElementType();
548 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
549 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
550}
551
552MVT
553HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
554 MVT ElemTy = VecTy.getVectorElementType();
555 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
556 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
557}
558
560HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
561 SelectionDAG &DAG) const {
562 if (ty(Vec).getVectorElementType() == ElemTy)
563 return Vec;
564 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
565 return DAG.getBitcast(CastTy, Vec);
566}
567
569HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
570 SelectionDAG &DAG) const {
571 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
572 Ops.first, Ops.second);
573}
574
575HexagonTargetLowering::VectorPair
576HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
577 SelectionDAG &DAG) const {
578 TypePair Tys = typeSplit(ty(Vec));
579 if (Vec.getOpcode() == HexagonISD::QCAT)
580 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
581 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
582}
583
584bool
585HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
586 return Subtarget.isHVXVectorType(Ty) &&
587 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
588}
589
590bool
591HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
592 return Subtarget.isHVXVectorType(Ty) &&
593 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
594}
595
596bool
597HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
598 return Subtarget.isHVXVectorType(Ty, true) &&
599 Ty.getVectorElementType() == MVT::i1;
600}
601
602bool HexagonTargetLowering::allowsHvxMemoryAccess(
603 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
604 // Bool vectors are excluded by default, but make it explicit to
605 // emphasize that bool vectors cannot be loaded or stored.
606 // Also, disallow double vector stores (to prevent unnecessary
607 // store widening in DAG combiner).
608 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
609 return false;
610 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
611 return false;
612 if (Fast)
613 *Fast = 1;
614 return true;
615}
616
617bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
618 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
619 if (!Subtarget.isHVXVectorType(VecTy))
620 return false;
621 // XXX Should this be false? vmemu are a bit slower than vmem.
622 if (Fast)
623 *Fast = 1;
624 return true;
625}
626
627void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
628 MachineInstr &MI, SDNode *Node) const {
629 unsigned Opc = MI.getOpcode();
630 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
631 MachineBasicBlock &MB = *MI.getParent();
632 MachineFunction &MF = *MB.getParent();
633 MachineRegisterInfo &MRI = MF.getRegInfo();
634 DebugLoc DL = MI.getDebugLoc();
635 auto At = MI.getIterator();
636
637 switch (Opc) {
638 case Hexagon::PS_vsplatib:
639 if (Subtarget.useHVXV62Ops()) {
640 // SplatV = A2_tfrsi #imm
641 // OutV = V6_lvsplatb SplatV
642 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
643 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
644 .add(MI.getOperand(1));
645 Register OutV = MI.getOperand(0).getReg();
646 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
647 .addReg(SplatV);
648 } else {
649 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
650 // OutV = V6_lvsplatw SplatV
651 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
652 const MachineOperand &InpOp = MI.getOperand(1);
653 assert(InpOp.isImm());
654 uint32_t V = InpOp.getImm() & 0xFF;
655 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
656 .addImm(V << 24 | V << 16 | V << 8 | V);
657 Register OutV = MI.getOperand(0).getReg();
658 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
659 }
660 MB.erase(At);
661 break;
662 case Hexagon::PS_vsplatrb:
663 if (Subtarget.useHVXV62Ops()) {
664 // OutV = V6_lvsplatb Inp
665 Register OutV = MI.getOperand(0).getReg();
666 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
667 .add(MI.getOperand(1));
668 } else {
669 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
670 const MachineOperand &InpOp = MI.getOperand(1);
671 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
672 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
673 Register OutV = MI.getOperand(0).getReg();
674 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
675 .addReg(SplatV);
676 }
677 MB.erase(At);
678 break;
679 case Hexagon::PS_vsplatih:
680 if (Subtarget.useHVXV62Ops()) {
681 // SplatV = A2_tfrsi #imm
682 // OutV = V6_lvsplath SplatV
683 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
684 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
685 .add(MI.getOperand(1));
686 Register OutV = MI.getOperand(0).getReg();
687 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
688 .addReg(SplatV);
689 } else {
690 // SplatV = A2_tfrsi #imm:#imm
691 // OutV = V6_lvsplatw SplatV
692 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
693 const MachineOperand &InpOp = MI.getOperand(1);
694 assert(InpOp.isImm());
695 uint32_t V = InpOp.getImm() & 0xFFFF;
696 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
697 .addImm(V << 16 | V);
698 Register OutV = MI.getOperand(0).getReg();
699 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
700 }
701 MB.erase(At);
702 break;
703 case Hexagon::PS_vsplatrh:
704 if (Subtarget.useHVXV62Ops()) {
705 // OutV = V6_lvsplath Inp
706 Register OutV = MI.getOperand(0).getReg();
707 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
708 .add(MI.getOperand(1));
709 } else {
710 // SplatV = A2_combine_ll Inp, Inp
711 // OutV = V6_lvsplatw SplatV
712 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
713 const MachineOperand &InpOp = MI.getOperand(1);
714 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
715 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
716 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
717 Register OutV = MI.getOperand(0).getReg();
718 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
719 }
720 MB.erase(At);
721 break;
722 case Hexagon::PS_vsplatiw:
723 case Hexagon::PS_vsplatrw:
724 if (Opc == Hexagon::PS_vsplatiw) {
725 // SplatV = A2_tfrsi #imm
726 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
727 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
728 .add(MI.getOperand(1));
729 MI.getOperand(1).ChangeToRegister(SplatV, false);
730 }
731 // OutV = V6_lvsplatw SplatV/Inp
732 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
733 break;
734 }
735}
736
738HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
739 SelectionDAG &DAG) const {
740 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
741 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
742
743 unsigned ElemWidth = ElemTy.getSizeInBits();
744 if (ElemWidth == 8)
745 return ElemIdx;
746
747 unsigned L = Log2_32(ElemWidth/8);
748 const SDLoc &dl(ElemIdx);
749 return DAG.getNode(ISD::SHL, dl, MVT::i32,
750 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
751}
752
754HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
755 SelectionDAG &DAG) const {
756 unsigned ElemWidth = ElemTy.getSizeInBits();
757 assert(ElemWidth >= 8 && ElemWidth <= 32);
758 if (ElemWidth == 32)
759 return Idx;
760
761 if (ty(Idx) != MVT::i32)
762 Idx = DAG.getBitcast(MVT::i32, Idx);
763 const SDLoc &dl(Idx);
764 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
765 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
766 return SubIdx;
767}
768
770HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
771 SDValue Op1, ArrayRef<int> Mask,
772 SelectionDAG &DAG) const {
773 MVT OpTy = ty(Op0);
774 assert(OpTy == ty(Op1));
775
776 MVT ElemTy = OpTy.getVectorElementType();
777 if (ElemTy == MVT::i8)
778 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
779 assert(ElemTy.getSizeInBits() >= 8);
780
781 MVT ResTy = tyVector(OpTy, MVT::i8);
782 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
783
784 SmallVector<int,128> ByteMask;
785 for (int M : Mask) {
786 if (M < 0) {
787 for (unsigned I = 0; I != ElemSize; ++I)
788 ByteMask.push_back(-1);
789 } else {
790 int NewM = M*ElemSize;
791 for (unsigned I = 0; I != ElemSize; ++I)
792 ByteMask.push_back(NewM+I);
793 }
794 }
795 assert(ResTy.getVectorNumElements() == ByteMask.size());
796 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
797 opCastElem(Op1, MVT::i8, DAG), ByteMask);
798}
799
801HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
802 const SDLoc &dl, MVT VecTy,
803 SelectionDAG &DAG) const {
804 unsigned VecLen = Values.size();
805 MachineFunction &MF = DAG.getMachineFunction();
806 MVT ElemTy = VecTy.getVectorElementType();
807 unsigned ElemWidth = ElemTy.getSizeInBits();
808 unsigned HwLen = Subtarget.getVectorLength();
809
810 unsigned ElemSize = ElemWidth / 8;
811 assert(ElemSize*VecLen == HwLen);
813
814 if (VecTy.getVectorElementType() != MVT::i32 &&
815 !(Subtarget.useHVXFloatingPoint() &&
816 VecTy.getVectorElementType() == MVT::f32)) {
817 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
818 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
819 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
820 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
821 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
822 Words.push_back(DAG.getBitcast(MVT::i32, W));
823 }
824 } else {
825 for (SDValue V : Values)
826 Words.push_back(DAG.getBitcast(MVT::i32, V));
827 }
828 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
829 unsigned NumValues = Values.size();
830 assert(NumValues > 0);
831 bool IsUndef = true;
832 for (unsigned i = 0; i != NumValues; ++i) {
833 if (Values[i].isUndef())
834 continue;
835 IsUndef = false;
836 if (!SplatV.getNode())
837 SplatV = Values[i];
838 else if (SplatV != Values[i])
839 return false;
840 }
841 if (IsUndef)
842 SplatV = Values[0];
843 return true;
844 };
845
846 unsigned NumWords = Words.size();
847 SDValue SplatV;
848 bool IsSplat = isSplat(Words, SplatV);
849 if (IsSplat && isUndef(SplatV))
850 return DAG.getUNDEF(VecTy);
851 if (IsSplat) {
852 assert(SplatV.getNode());
853 if (isNullConstant(SplatV))
854 return getZero(dl, VecTy, DAG);
855 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
856 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
857 return DAG.getBitcast(VecTy, S);
858 }
859
860 // Delay recognizing constant vectors until here, so that we can generate
861 // a vsplat.
862 SmallVector<ConstantInt*, 128> Consts(VecLen);
863 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
864 if (AllConst) {
865 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
866 (Constant**)Consts.end());
867 Constant *CV = ConstantVector::get(Tmp);
868 Align Alignment(HwLen);
869 SDValue CP =
870 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
871 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
873 }
874
875 // A special case is a situation where the vector is built entirely from
876 // elements extracted from another vector. This could be done via a shuffle
877 // more efficiently, but typically, the size of the source vector will not
878 // match the size of the vector being built (which precludes the use of a
879 // shuffle directly).
880 // This only handles a single source vector, and the vector being built
881 // should be of a sub-vector type of the source vector type.
882 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
883 SmallVectorImpl<int> &SrcIdx) {
884 SDValue Vec;
885 for (SDValue V : Values) {
886 if (isUndef(V)) {
887 SrcIdx.push_back(-1);
888 continue;
889 }
890 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
891 return false;
892 // All extracts should come from the same vector.
893 SDValue T = V.getOperand(0);
894 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
895 return false;
896 Vec = T;
897 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
898 if (C == nullptr)
899 return false;
900 int I = C->getSExtValue();
901 assert(I >= 0 && "Negative element index");
902 SrcIdx.push_back(I);
903 }
904 SrcVec = Vec;
905 return true;
906 };
907
908 SmallVector<int,128> ExtIdx;
909 SDValue ExtVec;
910 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
911 MVT ExtTy = ty(ExtVec);
912 unsigned ExtLen = ExtTy.getVectorNumElements();
913 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
914 // Construct a new shuffle mask that will produce a vector with the same
915 // number of elements as the input vector, and such that the vector we
916 // want will be the initial subvector of it.
917 SmallVector<int,128> Mask;
918 BitVector Used(ExtLen);
919
920 for (int M : ExtIdx) {
921 Mask.push_back(M);
922 if (M >= 0)
923 Used.set(M);
924 }
925 // Fill the rest of the mask with the unused elements of ExtVec in hopes
926 // that it will result in a permutation of ExtVec's elements. It's still
927 // fine if it doesn't (e.g. if undefs are present, or elements are
928 // repeated), but permutations can always be done efficiently via vdelta
929 // and vrdelta.
930 for (unsigned I = 0; I != ExtLen; ++I) {
931 if (Mask.size() == ExtLen)
932 break;
933 if (!Used.test(I))
934 Mask.push_back(I);
935 }
936
937 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
938 DAG.getUNDEF(ExtTy), Mask);
939 return ExtLen == VecLen ? S : LoHalf(S, DAG);
940 }
941 }
942
943 // Find most common element to initialize vector with. This is to avoid
944 // unnecessary vinsert/valign for cases where the same value is present
945 // many times. Creates a histogram of the vector's elements to find the
946 // most common element n.
947 assert(4*Words.size() == Subtarget.getVectorLength());
948 int VecHist[32];
949 int n = 0;
950 for (unsigned i = 0; i != NumWords; ++i) {
951 VecHist[i] = 0;
952 if (Words[i].isUndef())
953 continue;
954 for (unsigned j = i; j != NumWords; ++j)
955 if (Words[i] == Words[j])
956 VecHist[i]++;
957
958 if (VecHist[i] > VecHist[n])
959 n = i;
960 }
961
962 SDValue HalfV = getZero(dl, VecTy, DAG);
963 if (VecHist[n] > 1) {
964 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
965 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
966 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
967 }
968 SDValue HalfV0 = HalfV;
969 SDValue HalfV1 = HalfV;
970
971 // Construct two halves in parallel, then or them together. Rn and Rm count
972 // number of rotations needed before the next element. One last rotation is
973 // performed post-loop to position the last element.
974 int Rn = 0, Rm = 0;
975 SDValue Sn, Sm;
976 SDValue N = HalfV0;
977 SDValue M = HalfV1;
978 for (unsigned i = 0; i != NumWords/2; ++i) {
979 // Rotate by element count since last insertion.
980 if (Words[i] != Words[n] || VecHist[n] <= 1) {
981 Sn = DAG.getConstant(Rn, dl, MVT::i32);
982 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
983 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
984 {HalfV0, Words[i]});
985 Rn = 0;
986 }
987 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
988 Sm = DAG.getConstant(Rm, dl, MVT::i32);
989 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
990 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
991 {HalfV1, Words[i+NumWords/2]});
992 Rm = 0;
993 }
994 Rn += 4;
995 Rm += 4;
996 }
997 // Perform last rotation.
998 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
999 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1000 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1001 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1002
1003 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1004 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1005
1006 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1007
1008 SDValue OutV =
1009 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1010 return OutV;
1011}
1012
1013SDValue
1014HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1015 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1016 MVT PredTy = ty(PredV);
1017 unsigned HwLen = Subtarget.getVectorLength();
1018 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1019
1020 if (Subtarget.isHVXVectorType(PredTy, true)) {
1021 // Move the vector predicate SubV to a vector register, and scale it
1022 // down to match the representation (bytes per type element) that VecV
1023 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1024 // in general) element and put them at the front of the resulting
1025 // vector. This subvector will then be inserted into the Q2V of VecV.
1026 // To avoid having an operation that generates an illegal type (short
1027 // vector), generate a full size vector.
1028 //
1029 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1030 SmallVector<int,128> Mask(HwLen);
1031 // Scale = BitBytes(PredV) / Given BitBytes.
1032 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1033 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1034
1035 for (unsigned i = 0; i != HwLen; ++i) {
1036 unsigned Num = i % Scale;
1037 unsigned Off = i / Scale;
1038 Mask[BlockLen*Num + Off] = i;
1039 }
1040 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1041 if (!ZeroFill)
1042 return S;
1043 // Fill the bytes beyond BlockLen with 0s.
1044 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1045 // when BlockLen < HwLen.
1046 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1047 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1048 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1049 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1050 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1051 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1052 }
1053
1054 // Make sure that this is a valid scalar predicate.
1055 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1056
1057 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1058 SmallVector<SDValue,4> Words[2];
1059 unsigned IdxW = 0;
1060
1061 SDValue W0 = isUndef(PredV)
1062 ? DAG.getUNDEF(MVT::i64)
1063 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1064 Words[IdxW].push_back(HiHalf(W0, DAG));
1065 Words[IdxW].push_back(LoHalf(W0, DAG));
1066
1067 while (Bytes < BitBytes) {
1068 IdxW ^= 1;
1069 Words[IdxW].clear();
1070
1071 if (Bytes < 4) {
1072 for (const SDValue &W : Words[IdxW ^ 1]) {
1073 SDValue T = expandPredicate(W, dl, DAG);
1074 Words[IdxW].push_back(HiHalf(T, DAG));
1075 Words[IdxW].push_back(LoHalf(T, DAG));
1076 }
1077 } else {
1078 for (const SDValue &W : Words[IdxW ^ 1]) {
1079 Words[IdxW].push_back(W);
1080 Words[IdxW].push_back(W);
1081 }
1082 }
1083 Bytes *= 2;
1084 }
1085
1086 assert(Bytes == BitBytes);
1087
1088 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1089 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1090 for (const SDValue &W : Words[IdxW]) {
1091 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1092 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1093 }
1094
1095 return Vec;
1096}
1097
1098SDValue
1099HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1100 const SDLoc &dl, MVT VecTy,
1101 SelectionDAG &DAG) const {
1102 // Construct a vector V of bytes, such that a comparison V >u 0 would
1103 // produce the required vector predicate.
1104 unsigned VecLen = Values.size();
1105 unsigned HwLen = Subtarget.getVectorLength();
1106 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1108 bool AllT = true, AllF = true;
1109
1110 auto IsTrue = [] (SDValue V) {
1111 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1112 return !N->isZero();
1113 return false;
1114 };
1115 auto IsFalse = [] (SDValue V) {
1116 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1117 return N->isZero();
1118 return false;
1119 };
1120
1121 if (VecLen <= HwLen) {
1122 // In the hardware, each bit of a vector predicate corresponds to a byte
1123 // of a vector register. Calculate how many bytes does a bit of VecTy
1124 // correspond to.
1125 assert(HwLen % VecLen == 0);
1126 unsigned BitBytes = HwLen / VecLen;
1127 for (SDValue V : Values) {
1128 AllT &= IsTrue(V);
1129 AllF &= IsFalse(V);
1130
1131 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1132 : DAG.getUNDEF(MVT::i8);
1133 for (unsigned B = 0; B != BitBytes; ++B)
1134 Bytes.push_back(Ext);
1135 }
1136 } else {
1137 // There are as many i1 values, as there are bits in a vector register.
1138 // Divide the values into groups of 8 and check that each group consists
1139 // of the same value (ignoring undefs).
1140 for (unsigned I = 0; I != VecLen; I += 8) {
1141 unsigned B = 0;
1142 // Find the first non-undef value in this group.
1143 for (; B != 8; ++B) {
1144 if (!Values[I+B].isUndef())
1145 break;
1146 }
1147 SDValue F = Values[I+B];
1148 AllT &= IsTrue(F);
1149 AllF &= IsFalse(F);
1150
1151 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1152 : DAG.getUNDEF(MVT::i8);
1153 Bytes.push_back(Ext);
1154 // Verify that the rest of values in the group are the same as the
1155 // first.
1156 for (; B != 8; ++B)
1157 assert(Values[I+B].isUndef() || Values[I+B] == F);
1158 }
1159 }
1160
1161 if (AllT)
1162 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1163 if (AllF)
1164 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1165
1166 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1167 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1168 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1169}
1170
1171SDValue
1172HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1173 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1174 MVT ElemTy = ty(VecV).getVectorElementType();
1175
1176 unsigned ElemWidth = ElemTy.getSizeInBits();
1177 assert(ElemWidth >= 8 && ElemWidth <= 32);
1178 (void)ElemWidth;
1179
1180 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1181 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1182 {VecV, ByteIdx});
1183 if (ElemTy == MVT::i32)
1184 return ExWord;
1185
1186 // Have an extracted word, need to extract the smaller element out of it.
1187 // 1. Extract the bits of (the original) IdxV that correspond to the index
1188 // of the desired element in the 32-bit word.
1189 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1190 // 2. Extract the element from the word.
1191 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1192 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1193}
1194
1195SDValue
1196HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1197 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1198 // Implement other return types if necessary.
1199 assert(ResTy == MVT::i1);
1200
1201 unsigned HwLen = Subtarget.getVectorLength();
1202 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1203 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1204
1205 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1206 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1207 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1208
1209 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1210 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1211 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1212}
1213
1214SDValue
1215HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1216 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1217 MVT ElemTy = ty(VecV).getVectorElementType();
1218
1219 unsigned ElemWidth = ElemTy.getSizeInBits();
1220 assert(ElemWidth >= 8 && ElemWidth <= 32);
1221 (void)ElemWidth;
1222
1223 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1224 SDValue ByteIdxV) {
1225 MVT VecTy = ty(VecV);
1226 unsigned HwLen = Subtarget.getVectorLength();
1227 SDValue MaskV =
1228 DAG.getNode(ISD::AND, dl, MVT::i32,
1229 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1230 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1231 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1232 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1233 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1234 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1235 return TorV;
1236 };
1237
1238 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1239 if (ElemTy == MVT::i32)
1240 return InsertWord(VecV, ValV, ByteIdx);
1241
1242 // If this is not inserting a 32-bit word, convert it into such a thing.
1243 // 1. Extract the existing word from the target vector.
1244 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1245 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1246 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1247 dl, MVT::i32, DAG);
1248
1249 // 2. Treating the extracted word as a 32-bit vector, insert the given
1250 // value into it.
1251 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1252 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1253 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1254 ValV, SubIdx, dl, ElemTy, DAG);
1255
1256 // 3. Insert the 32-bit word back into the original vector.
1257 return InsertWord(VecV, Ins, ByteIdx);
1258}
1259
1260SDValue
1261HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1262 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1263 unsigned HwLen = Subtarget.getVectorLength();
1264 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1265 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1266
1267 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1268 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1269 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1270 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1271
1272 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1273 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1274}
1275
1276SDValue
1277HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1278 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1279 MVT VecTy = ty(VecV);
1280 unsigned HwLen = Subtarget.getVectorLength();
1281 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1282 MVT ElemTy = VecTy.getVectorElementType();
1283 unsigned ElemWidth = ElemTy.getSizeInBits();
1284
1285 // If the source vector is a vector pair, get the single vector containing
1286 // the subvector of interest. The subvector will never overlap two single
1287 // vectors.
1288 if (isHvxPairTy(VecTy)) {
1289 unsigned SubIdx = Hexagon::vsub_lo;
1290 if (Idx * ElemWidth >= 8 * HwLen) {
1291 SubIdx = Hexagon::vsub_hi;
1292 Idx -= VecTy.getVectorNumElements() / 2;
1293 }
1294
1295 VecTy = typeSplit(VecTy).first;
1296 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1297 if (VecTy == ResTy)
1298 return VecV;
1299 }
1300
1301 // The only meaningful subvectors of a single HVX vector are those that
1302 // fit in a scalar register.
1303 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1304
1305 MVT WordTy = tyVector(VecTy, MVT::i32);
1306 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1307 unsigned WordIdx = (Idx*ElemWidth) / 32;
1308
1309 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1310 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1311 if (ResTy.getSizeInBits() == 32)
1312 return DAG.getBitcast(ResTy, W0);
1313
1314 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1315 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1316 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1317 return DAG.getBitcast(ResTy, WW);
1318}
1319
1320SDValue
1321HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1322 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1323 MVT VecTy = ty(VecV);
1324 unsigned HwLen = Subtarget.getVectorLength();
1325 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1326 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1327 // IdxV is required to be a constant.
1328 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1329
1330 unsigned ResLen = ResTy.getVectorNumElements();
1331 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1332 unsigned Offset = Idx * BitBytes;
1333 SDValue Undef = DAG.getUNDEF(ByteTy);
1334 SmallVector<int,128> Mask;
1335
1336 if (Subtarget.isHVXVectorType(ResTy, true)) {
1337 // Converting between two vector predicates. Since the result is shorter
1338 // than the source, it will correspond to a vector predicate with the
1339 // relevant bits replicated. The replication count is the ratio of the
1340 // source and target vector lengths.
1341 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1342 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1343 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1344 for (unsigned j = 0; j != Rep; ++j)
1345 Mask.push_back(i + Offset);
1346 }
1347 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1348 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1349 }
1350
1351 // Converting between a vector predicate and a scalar predicate. In the
1352 // vector predicate, a group of BitBytes bits will correspond to a single
1353 // i1 element of the source vector type. Those bits will all have the same
1354 // value. The same will be true for ByteVec, where each byte corresponds
1355 // to a bit in the vector predicate.
1356 // The algorithm is to traverse the ByteVec, going over the i1 values from
1357 // the source vector, and generate the corresponding representation in an
1358 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1359 // elements so that the interesting 8 bytes will be in the low end of the
1360 // vector.
1361 unsigned Rep = 8 / ResLen;
1362 // Make sure the output fill the entire vector register, so repeat the
1363 // 8-byte groups as many times as necessary.
1364 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1365 // This will generate the indexes of the 8 interesting bytes.
1366 for (unsigned i = 0; i != ResLen; ++i) {
1367 for (unsigned j = 0; j != Rep; ++j)
1368 Mask.push_back(Offset + i*BitBytes);
1369 }
1370 }
1371
1372 SDValue Zero = getZero(dl, MVT::i32, DAG);
1373 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1374 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1375 // them against 0.
1376 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1377 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1378 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1379 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1380 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1381 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1382}
1383
1384SDValue
1385HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1386 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1387 MVT VecTy = ty(VecV);
1388 MVT SubTy = ty(SubV);
1389 unsigned HwLen = Subtarget.getVectorLength();
1390 MVT ElemTy = VecTy.getVectorElementType();
1391 unsigned ElemWidth = ElemTy.getSizeInBits();
1392
1393 bool IsPair = isHvxPairTy(VecTy);
1394 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1395 // The two single vectors that VecV consists of, if it's a pair.
1396 SDValue V0, V1;
1397 SDValue SingleV = VecV;
1398 SDValue PickHi;
1399
1400 if (IsPair) {
1401 V0 = LoHalf(VecV, DAG);
1402 V1 = HiHalf(VecV, DAG);
1403
1404 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1405 dl, MVT::i32);
1406 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1407 if (isHvxSingleTy(SubTy)) {
1408 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1409 unsigned Idx = CN->getZExtValue();
1410 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1411 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1412 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1413 }
1414 // If IdxV is not a constant, generate the two variants: with the
1415 // SubV as the high and as the low subregister, and select the right
1416 // pair based on the IdxV.
1417 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1418 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1419 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1420 }
1421 // The subvector being inserted must be entirely contained in one of
1422 // the vectors V0 or V1. Set SingleV to the correct one, and update
1423 // IdxV to be the index relative to the beginning of that vector.
1424 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1425 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1426 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1427 }
1428
1429 // The only meaningful subvectors of a single HVX vector are those that
1430 // fit in a scalar register.
1431 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1432 // Convert IdxV to be index in bytes.
1433 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1434 if (!IdxN || !IdxN->isZero()) {
1435 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1436 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1437 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1438 }
1439 // When inserting a single word, the rotation back to the original position
1440 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1441 // by (HwLen-4)-Idx.
1442 unsigned RolBase = HwLen;
1443 if (SubTy.getSizeInBits() == 32) {
1444 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1445 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1446 } else {
1447 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1448 SDValue R0 = LoHalf(V, DAG);
1449 SDValue R1 = HiHalf(V, DAG);
1450 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1451 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1452 DAG.getConstant(4, dl, MVT::i32));
1453 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1454 RolBase = HwLen-4;
1455 }
1456 // If the vector wasn't ror'ed, don't ror it back.
1457 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1458 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1459 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1460 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1461 }
1462
1463 if (IsPair) {
1464 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1465 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1466 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1467 }
1468 return SingleV;
1469}
1470
1471SDValue
1472HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1473 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1474 MVT VecTy = ty(VecV);
1475 MVT SubTy = ty(SubV);
1476 assert(Subtarget.isHVXVectorType(VecTy, true));
1477 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1478 // predicate as well, or it can be a scalar predicate.
1479
1480 unsigned VecLen = VecTy.getVectorNumElements();
1481 unsigned HwLen = Subtarget.getVectorLength();
1482 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1483
1484 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1485 unsigned BitBytes = HwLen / VecLen;
1486 unsigned BlockLen = HwLen / Scale;
1487
1488 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1489 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1490 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1491 SDValue ByteIdx;
1492
1493 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1494 if (!IdxN || !IdxN->isZero()) {
1495 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1496 DAG.getConstant(BitBytes, dl, MVT::i32));
1497 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1498 }
1499
1500 // ByteVec is the target vector VecV rotated in such a way that the
1501 // subvector should be inserted at index 0. Generate a predicate mask
1502 // and use vmux to do the insertion.
1503 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1504 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1505 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1506 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1507 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1508 // Rotate ByteVec back, and convert to a vector predicate.
1509 if (!IdxN || !IdxN->isZero()) {
1510 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1511 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1512 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1513 }
1514 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1515}
1516
1517SDValue
1518HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1519 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1520 // Sign- and any-extending of a vector predicate to a vector register is
1521 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1522 // a vector of 1s (where the 1s are of type matching the vector type).
1523 assert(Subtarget.isHVXVectorType(ResTy));
1524 if (!ZeroExt)
1525 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1526
1527 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1528 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1529 DAG.getConstant(1, dl, MVT::i32));
1530 SDValue False = getZero(dl, ResTy, DAG);
1531 return DAG.getSelect(dl, ResTy, VecV, True, False);
1532}
1533
1534SDValue
1535HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1536 MVT ResTy, SelectionDAG &DAG) const {
1537 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1538 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1539 // vector register. The remaining bits of the vector register are
1540 // unspecified.
1541
1542 MachineFunction &MF = DAG.getMachineFunction();
1543 unsigned HwLen = Subtarget.getVectorLength();
1544 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1545 MVT PredTy = ty(VecQ);
1546 unsigned PredLen = PredTy.getVectorNumElements();
1547 assert(HwLen % PredLen == 0);
1548 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1549
1550 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1552 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1553 // These are bytes with the LSB rotated left with respect to their index.
1554 for (unsigned i = 0; i != HwLen/8; ++i) {
1555 for (unsigned j = 0; j != 8; ++j)
1556 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1557 }
1558 Constant *CV = ConstantVector::get(Tmp);
1559 Align Alignment(HwLen);
1560 SDValue CP =
1561 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1562 SDValue Bytes =
1563 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1565
1566 // Select the bytes that correspond to true bits in the vector predicate.
1567 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1568 getZero(dl, VecTy, DAG));
1569 // Calculate the OR of all bytes in each group of 8. That will compress
1570 // all the individual bits into a single byte.
1571 // First, OR groups of 4, via vrmpy with 0x01010101.
1572 SDValue All1 =
1573 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1574 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1575 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1576 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1577 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1578 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1579
1580 // Pick every 8th byte and coalesce them at the beginning of the output.
1581 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1582 // byte and so on.
1583 SmallVector<int,128> Mask;
1584 for (unsigned i = 0; i != HwLen; ++i)
1585 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1586 SDValue Collect =
1587 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1588 return DAG.getBitcast(ResTy, Collect);
1589}
1590
1591SDValue
1592HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1593 const SDLoc &dl, SelectionDAG &DAG) const {
1594 // Take a vector and resize the element type to match the given type.
1595 MVT InpTy = ty(VecV);
1596 if (InpTy == ResTy)
1597 return VecV;
1598
1599 unsigned InpWidth = InpTy.getSizeInBits();
1600 unsigned ResWidth = ResTy.getSizeInBits();
1601
1602 if (InpTy.isFloatingPoint()) {
1603 return InpWidth < ResWidth
1604 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1605 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1606 DAG.getTargetConstant(0, dl, MVT::i32));
1607 }
1608
1609 assert(InpTy.isInteger());
1610
1611 if (InpWidth < ResWidth) {
1612 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1613 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1614 } else {
1615 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1616 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1617 }
1618}
1619
1620SDValue
1621HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1622 SelectionDAG &DAG) const {
1623 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1624
1625 const SDLoc &dl(Vec);
1626 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1627 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1628 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1629}
1630
1631SDValue
1632HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1633 const {
1634 const SDLoc &dl(Op);
1635 MVT VecTy = ty(Op);
1636
1637 unsigned Size = Op.getNumOperands();
1639 for (unsigned i = 0; i != Size; ++i)
1640 Ops.push_back(Op.getOperand(i));
1641
1642 if (VecTy.getVectorElementType() == MVT::i1)
1643 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1644
1645 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1646 // not a legal type, just bitcast the node to use i16
1647 // types and bitcast the result back to f16
1648 if (VecTy.getVectorElementType() == MVT::f16) {
1650 for (unsigned i = 0; i != Size; i++)
1651 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1652
1653 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1654 tyVector(VecTy, MVT::i16), NewOps);
1655 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1656 }
1657
1658 // First, split the BUILD_VECTOR for vector pairs. We could generate
1659 // some pairs directly (via splat), but splats should be generated
1660 // by the combiner prior to getting here.
1661 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1663 MVT SingleTy = typeSplit(VecTy).first;
1664 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1665 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1666 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1667 }
1668
1669 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1670}
1671
1672SDValue
1673HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1674 const {
1675 const SDLoc &dl(Op);
1676 MVT VecTy = ty(Op);
1677 MVT ArgTy = ty(Op.getOperand(0));
1678
1679 if (ArgTy == MVT::f16) {
1680 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1681 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1682 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1683 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1684 return DAG.getBitcast(VecTy, Splat);
1685 }
1686
1687 return SDValue();
1688}
1689
1690SDValue
1691HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1692 const {
1693 // Vector concatenation of two integer (non-bool) vectors does not need
1694 // special lowering. Custom-lower concats of bool vectors and expand
1695 // concats of more than 2 vectors.
1696 MVT VecTy = ty(Op);
1697 const SDLoc &dl(Op);
1698 unsigned NumOp = Op.getNumOperands();
1699 if (VecTy.getVectorElementType() != MVT::i1) {
1700 if (NumOp == 2)
1701 return Op;
1702 // Expand the other cases into a build-vector.
1704 for (SDValue V : Op.getNode()->ops())
1705 DAG.ExtractVectorElements(V, Elems);
1706 // A vector of i16 will be broken up into a build_vector of i16's.
1707 // This is a problem, since at the time of operation legalization,
1708 // all operations are expected to be type-legalized, and i16 is not
1709 // a legal type. If any of the extracted elements is not of a valid
1710 // type, sign-extend it to a valid one.
1711 for (SDValue &V : Elems) {
1712 MVT Ty = ty(V);
1713 if (!isTypeLegal(Ty)) {
1714 MVT NTy = typeLegalize(Ty, DAG);
1715 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1716 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1717 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1718 V.getOperand(0), V.getOperand(1)),
1719 DAG.getValueType(Ty));
1720 continue;
1721 }
1722 // A few less complicated cases.
1723 switch (V.getOpcode()) {
1724 case ISD::Constant:
1725 V = DAG.getSExtOrTrunc(V, dl, NTy);
1726 break;
1727 case ISD::UNDEF:
1728 V = DAG.getUNDEF(NTy);
1729 break;
1730 case ISD::TRUNCATE:
1731 V = V.getOperand(0);
1732 break;
1733 default:
1734 llvm_unreachable("Unexpected vector element");
1735 }
1736 }
1737 }
1738 return DAG.getBuildVector(VecTy, dl, Elems);
1739 }
1740
1741 assert(VecTy.getVectorElementType() == MVT::i1);
1742 unsigned HwLen = Subtarget.getVectorLength();
1743 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1744
1745 SDValue Op0 = Op.getOperand(0);
1746
1747 // If the operands are HVX types (i.e. not scalar predicates), then
1748 // defer the concatenation, and create QCAT instead.
1749 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1750 if (NumOp == 2)
1751 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1752
1753 ArrayRef<SDUse> U(Op.getNode()->ops());
1756
1757 MVT HalfTy = typeSplit(VecTy).first;
1758 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1759 Ops.take_front(NumOp/2));
1760 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1761 Ops.take_back(NumOp/2));
1762 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1763 }
1764
1765 // Count how many bytes (in a vector register) each bit in VecTy
1766 // corresponds to.
1767 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1768
1769 SmallVector<SDValue,8> Prefixes;
1770 for (SDValue V : Op.getNode()->op_values()) {
1771 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1772 Prefixes.push_back(P);
1773 }
1774
1775 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1776 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1777 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1778 SDValue Res = getZero(dl, ByteTy, DAG);
1779 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1780 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1781 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1782 }
1783 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1784}
1785
1786SDValue
1787HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1788 const {
1789 // Change the type of the extracted element to i32.
1790 SDValue VecV = Op.getOperand(0);
1791 MVT ElemTy = ty(VecV).getVectorElementType();
1792 const SDLoc &dl(Op);
1793 SDValue IdxV = Op.getOperand(1);
1794 if (ElemTy == MVT::i1)
1795 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1796
1797 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1798}
1799
1800SDValue
1801HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1802 const {
1803 const SDLoc &dl(Op);
1804 MVT VecTy = ty(Op);
1805 SDValue VecV = Op.getOperand(0);
1806 SDValue ValV = Op.getOperand(1);
1807 SDValue IdxV = Op.getOperand(2);
1808 MVT ElemTy = ty(VecV).getVectorElementType();
1809 if (ElemTy == MVT::i1)
1810 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1811
1812 if (ElemTy == MVT::f16) {
1814 tyVector(VecTy, MVT::i16),
1815 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1816 DAG.getBitcast(MVT::i16, ValV), IdxV);
1817 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1818 }
1819
1820 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1821}
1822
1823SDValue
1824HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1825 const {
1826 SDValue SrcV = Op.getOperand(0);
1827 MVT SrcTy = ty(SrcV);
1828 MVT DstTy = ty(Op);
1829 SDValue IdxV = Op.getOperand(1);
1830 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1831 assert(Idx % DstTy.getVectorNumElements() == 0);
1832 (void)Idx;
1833 const SDLoc &dl(Op);
1834
1835 MVT ElemTy = SrcTy.getVectorElementType();
1836 if (ElemTy == MVT::i1)
1837 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1838
1839 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1840}
1841
1842SDValue
1843HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1844 const {
1845 // Idx does not need to be a constant.
1846 SDValue VecV = Op.getOperand(0);
1847 SDValue ValV = Op.getOperand(1);
1848 SDValue IdxV = Op.getOperand(2);
1849
1850 const SDLoc &dl(Op);
1851 MVT VecTy = ty(VecV);
1852 MVT ElemTy = VecTy.getVectorElementType();
1853 if (ElemTy == MVT::i1)
1854 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1855
1856 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1857}
1858
1859SDValue
1860HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1861 // Lower any-extends of boolean vectors to sign-extends, since they
1862 // translate directly to Q2V. Zero-extending could also be done equally
1863 // fast, but Q2V is used/recognized in more places.
1864 // For all other vectors, use zero-extend.
1865 MVT ResTy = ty(Op);
1866 SDValue InpV = Op.getOperand(0);
1867 MVT ElemTy = ty(InpV).getVectorElementType();
1868 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1869 return LowerHvxSignExt(Op, DAG);
1870 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1871}
1872
1873SDValue
1874HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1875 MVT ResTy = ty(Op);
1876 SDValue InpV = Op.getOperand(0);
1877 MVT ElemTy = ty(InpV).getVectorElementType();
1878 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1879 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1880 return Op;
1881}
1882
1883SDValue
1884HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1885 MVT ResTy = ty(Op);
1886 SDValue InpV = Op.getOperand(0);
1887 MVT ElemTy = ty(InpV).getVectorElementType();
1888 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1889 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1890 return Op;
1891}
1892
1893SDValue
1894HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1895 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1896 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1897 const SDLoc &dl(Op);
1898 MVT ResTy = ty(Op);
1899 SDValue InpV = Op.getOperand(0);
1900 assert(ResTy == ty(InpV));
1901
1902 // Calculate the vectors of 1 and bitwidth(x).
1903 MVT ElemTy = ty(InpV).getVectorElementType();
1904 unsigned ElemWidth = ElemTy.getSizeInBits();
1905
1906 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1907 DAG.getConstant(1, dl, MVT::i32));
1908 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1909 DAG.getConstant(ElemWidth, dl, MVT::i32));
1910 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1911 DAG.getAllOnesConstant(dl, MVT::i32));
1912
1913 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1914 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1915 // it separately in custom combine or selection).
1916 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1917 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1918 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1919 return DAG.getNode(ISD::SUB, dl, ResTy,
1920 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1921}
1922
1923SDValue
1924HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1925 const SDLoc &dl(Op);
1926 MVT ResTy = ty(Op);
1927 assert(ResTy.getVectorElementType() == MVT::i32);
1928
1929 SDValue Vs = Op.getOperand(0);
1930 SDValue Vt = Op.getOperand(1);
1931
1932 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1933 unsigned Opc = Op.getOpcode();
1934
1935 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1936 if (Opc == ISD::MULHU)
1937 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1938 if (Opc == ISD::MULHS)
1939 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1940
1941#ifndef NDEBUG
1942 Op.dump(&DAG);
1943#endif
1944 llvm_unreachable("Unexpected mulh operation");
1945}
1946
1947SDValue
1948HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1949 const SDLoc &dl(Op);
1950 unsigned Opc = Op.getOpcode();
1951 SDValue Vu = Op.getOperand(0);
1952 SDValue Vv = Op.getOperand(1);
1953
1954 // If the HI part is not used, convert it to a regular MUL.
1955 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1956 // Need to preserve the types and the number of values.
1957 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1958 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1959 return DAG.getMergeValues({Lo, Hi}, dl);
1960 }
1961
1962 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1963 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1964
1965 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1966 // valued nodes.
1967 if (Subtarget.useHVXV62Ops())
1968 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1969
1970 if (Opc == HexagonISD::SMUL_LOHI) {
1971 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1972 // for other signedness LOHI is cheaper.
1973 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1974 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1975 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1976 return DAG.getMergeValues({Lo, Hi}, dl);
1977 }
1978 }
1979
1980 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1981}
1982
1983SDValue
1984HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1985 SDValue Val = Op.getOperand(0);
1986 MVT ResTy = ty(Op);
1987 MVT ValTy = ty(Val);
1988 const SDLoc &dl(Op);
1989
1990 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1991 unsigned HwLen = Subtarget.getVectorLength();
1992 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1993 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1994 unsigned BitWidth = ResTy.getSizeInBits();
1995
1996 if (BitWidth < 64) {
1997 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1998 dl, MVT::i32, DAG);
1999 if (BitWidth == 32)
2000 return W0;
2001 assert(BitWidth < 32u);
2002 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2003 }
2004
2005 // The result is >= 64 bits. The only options are 64 or 128.
2006 assert(BitWidth == 64 || BitWidth == 128);
2008 for (unsigned i = 0; i != BitWidth/32; ++i) {
2009 SDValue W = extractHvxElementReg(
2010 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2011 Words.push_back(W);
2012 }
2013 SmallVector<SDValue,2> Combines;
2014 assert(Words.size() % 2 == 0);
2015 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2016 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2017 Combines.push_back(C);
2018 }
2019
2020 if (BitWidth == 64)
2021 return Combines[0];
2022
2023 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2024 }
2025
2026 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2027 // Splat the input into a 32-element i32 vector, then AND each element
2028 // with a unique bitmask to isolate individual bits.
2029 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2030 assert(Val32.getValueType().getSizeInBits() == 32 &&
2031 "Input must be 32 bits");
2032 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2033 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2035 for (unsigned i = 0; i < 32; ++i)
2036 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2037
2038 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2039 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2040 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2041 };
2042 // === Case: v32i1 ===
2043 if (ResTy == MVT::v32i1 &&
2044 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2045 Subtarget.useHVX128BOps()) {
2046 SDValue Val32 = Val;
2047 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2048 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2049 return bitcastI32ToV32I1(Val32);
2050 }
2051 // === Case: v64i1 ===
2052 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2053 // Split i64 into lo/hi 32-bit halves.
2054 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2055 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2056 DAG.getConstant(32, dl, MVT::i64));
2057 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2058
2059 // Reuse the same 32-bit logic twice.
2060 SDValue LoRes = bitcastI32ToV32I1(Lo);
2061 SDValue HiRes = bitcastI32ToV32I1(Hi);
2062
2063 // Concatenate into a v64i1 predicate.
2064 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2065 }
2066
2067 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2068 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2069 unsigned BitWidth = ValTy.getSizeInBits();
2070 unsigned HwLen = Subtarget.getVectorLength();
2071 assert(BitWidth == HwLen);
2072
2073 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2074 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2075 // Splat each byte of Val 8 times.
2076 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2077 // where b0, b1,..., b15 are least to most significant bytes of I.
2079 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2080 // These are bytes with the LSB rotated left with respect to their index.
2082 for (unsigned I = 0; I != HwLen / 8; ++I) {
2083 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2084 SDValue Byte =
2085 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2086 for (unsigned J = 0; J != 8; ++J) {
2087 Bytes.push_back(Byte);
2088 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2089 }
2090 }
2091
2092 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2093 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2094 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2095
2096 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2097 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2098 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2099 }
2100
2101 return Op;
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2106 // Sign- and zero-extends are legal.
2107 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2108 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2109 Op.getOperand(0));
2110}
2111
2112SDValue
2113HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2114 MVT ResTy = ty(Op);
2115 if (ResTy.getVectorElementType() != MVT::i1)
2116 return Op;
2117
2118 const SDLoc &dl(Op);
2119 unsigned HwLen = Subtarget.getVectorLength();
2120 unsigned VecLen = ResTy.getVectorNumElements();
2121 assert(HwLen % VecLen == 0);
2122 unsigned ElemSize = HwLen / VecLen;
2123
2124 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2125 SDValue S =
2126 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2127 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2128 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2129 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2130}
2131
2132SDValue
2133HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2134 if (SDValue S = getVectorShiftByInt(Op, DAG))
2135 return S;
2136 return Op;
2137}
2138
2139SDValue
2140HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2141 SelectionDAG &DAG) const {
2142 unsigned Opc = Op.getOpcode();
2143 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2144
2145 // Make sure the shift amount is within the range of the bitwidth
2146 // of the element type.
2147 SDValue A = Op.getOperand(0);
2148 SDValue B = Op.getOperand(1);
2149 SDValue S = Op.getOperand(2);
2150
2151 MVT InpTy = ty(A);
2152 MVT ElemTy = InpTy.getVectorElementType();
2153
2154 const SDLoc &dl(Op);
2155 unsigned ElemWidth = ElemTy.getSizeInBits();
2156 bool IsLeft = Opc == ISD::FSHL;
2157
2158 // The expansion into regular shifts produces worse code for i8 and for
2159 // right shift of i32 on v65+.
2160 bool UseShifts = ElemTy != MVT::i8;
2161 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2162 UseShifts = false;
2163
2164 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2165 // If this is a funnel shift by a scalar, lower it into regular shifts.
2166 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2167 SDValue ModS =
2168 DAG.getNode(ISD::AND, dl, MVT::i32,
2169 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2170 SDValue NegS =
2171 DAG.getNode(ISD::SUB, dl, MVT::i32,
2172 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2173 SDValue IsZero =
2174 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2175 // FSHL A, B => A << | B >>n
2176 // FSHR A, B => A <<n | B >>
2177 SDValue Part1 =
2178 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2179 SDValue Part2 =
2180 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2181 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2182 // If the shift amount was 0, pick A or B, depending on the direction.
2183 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2184 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2185 }
2186
2188 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2189
2190 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2191 return DAG.getNode(MOpc, dl, ty(Op),
2192 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2193}
2194
2195SDValue
2196HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2197 const SDLoc &dl(Op);
2198 unsigned IntNo = Op.getConstantOperandVal(0);
2199 SmallVector<SDValue> Ops(Op->ops());
2200
2201 auto Swap = [&](SDValue P) {
2202 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2203 };
2204
2205 switch (IntNo) {
2206 case Intrinsic::hexagon_V6_pred_typecast:
2207 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2208 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2209 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2210 if (ResTy == InpTy)
2211 return Ops[1];
2212 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2213 }
2214 break;
2215 }
2216 case Intrinsic::hexagon_V6_vmpyss_parts:
2217 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2218 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2219 {Ops[1], Ops[2]}));
2220 case Intrinsic::hexagon_V6_vmpyuu_parts:
2221 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2222 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2223 {Ops[1], Ops[2]}));
2224 case Intrinsic::hexagon_V6_vmpyus_parts:
2225 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2226 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2227 {Ops[1], Ops[2]}));
2228 }
2229 } // switch
2230
2231 return Op;
2232}
2233
2234SDValue
2235HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2236 const SDLoc &dl(Op);
2237 unsigned HwLen = Subtarget.getVectorLength();
2238 MachineFunction &MF = DAG.getMachineFunction();
2239 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2240 SDValue Mask = MaskN->getMask();
2241 SDValue Chain = MaskN->getChain();
2242 SDValue Base = MaskN->getBasePtr();
2243 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2244
2245 unsigned Opc = Op->getOpcode();
2246 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2247
2248 if (Opc == ISD::MLOAD) {
2249 MVT ValTy = ty(Op);
2250 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2251 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2252 if (isUndef(Thru))
2253 return Load;
2254 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2255 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2256 }
2257
2258 // MSTORE
2259 // HVX only has aligned masked stores.
2260
2261 // TODO: Fold negations of the mask into the store.
2262 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2263 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2264 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2265
2266 if (MaskN->getAlign().value() % HwLen == 0) {
2267 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2268 {Mask, Base, Offset0, Value, Chain}, DAG);
2269 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2270 return Store;
2271 }
2272
2273 // Unaligned case.
2274 auto StoreAlign = [&](SDValue V, SDValue A) {
2275 SDValue Z = getZero(dl, ty(V), DAG);
2276 // TODO: use funnel shifts?
2277 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2278 // upper half.
2279 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2280 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2281 return std::make_pair(LoV, HiV);
2282 };
2283
2284 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2285 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2286 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2287 VectorPair Tmp = StoreAlign(MaskV, Base);
2288 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2289 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2290 VectorPair ValueU = StoreAlign(Value, Base);
2291
2292 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2293 SDValue StoreLo =
2294 getInstr(StoreOpc, dl, MVT::Other,
2295 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2296 SDValue StoreHi =
2297 getInstr(StoreOpc, dl, MVT::Other,
2298 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2299 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2300 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2301 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2302}
2303
2304SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2305 SelectionDAG &DAG) const {
2306 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2307 // is legal (done via a pattern).
2308 assert(Subtarget.useHVXQFloatOps());
2309
2310 assert(Op->getOpcode() == ISD::FP_EXTEND);
2311
2312 MVT VecTy = ty(Op);
2313 MVT ArgTy = ty(Op.getOperand(0));
2314 const SDLoc &dl(Op);
2315 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2316
2317 SDValue F16Vec = Op.getOperand(0);
2318
2319 APFloat FloatVal = APFloat(1.0f);
2320 bool Ignored;
2322 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2323 SDValue VmpyVec =
2324 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2325
2326 MVT HalfTy = typeSplit(VecTy).first;
2327 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2328 SDValue LoVec =
2329 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2330 SDValue HiVec =
2331 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2332
2333 SDValue ShuffVec =
2334 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2335 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2336
2337 return ShuffVec;
2338}
2339
2340SDValue
2341HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2342 // Catch invalid conversion ops (just in case).
2343 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2344 Op.getOpcode() == ISD::FP_TO_UINT);
2345
2346 MVT ResTy = ty(Op);
2347 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2348 MVT IntTy = ResTy.getVectorElementType();
2349
2350 if (Subtarget.useHVXIEEEFPOps()) {
2351 // There are only conversions from f16.
2352 if (FpTy == MVT::f16) {
2353 // Other int types aren't legal in HVX, so we shouldn't see them here.
2354 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2355 // Conversions to i8 and i16 are legal.
2356 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2357 return Op;
2358 }
2359 }
2360
2361 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2362 return EqualizeFpIntConversion(Op, DAG);
2363
2364 return ExpandHvxFpToInt(Op, DAG);
2365}
2366
2367// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2368// R1 = #1, R2 holds the v32i1 param
2369// V1 = vsplat(R1)
2370// V2 = vsplat(R2)
2371// Q0 = vand(V1,R1)
2372// V0.w=prefixsum(Q0)
2373// V0.w=vsub(V0.w,V1.w)
2374// V2.w = vlsr(V2.w,V0.w)
2375// V2 = vand(V2,V1)
2376// V2.sf = V2.w
2377SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2378 SelectionDAG &DAG) const {
2379
2380 MVT ResTy = ty(PredOp);
2381 const SDLoc &dl(PredOp);
2382
2383 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2384 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2385 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2386 SDValue(RegConst, 0));
2387 SDNode *PredTransfer =
2388 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2389 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2390 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2391 SDValue(PredTransfer, 0));
2392 SDNode *SplatParam = DAG.getMachineNode(
2393 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2394 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2395 SDNode *Vsub =
2396 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2397 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2398 SDNode *IndexShift =
2399 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2400 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2401 SDNode *MaskOff =
2402 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2403 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2404 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2405 SDValue(MaskOff, 0));
2406 return SDValue(Convert, 0);
2407}
2408
2409// For vector type v64i1 uint_to_fo to v64f16:
2410// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2411// R3 = subreg_high (R32)
2412// R2 = subreg_low (R32)
2413// R1 = #1
2414// V1 = vsplat(R1)
2415// V2 = vsplat(R2)
2416// V3 = vsplat(R3)
2417// Q0 = vand(V1,R1)
2418// V0.w=prefixsum(Q0)
2419// V0.w=vsub(V0.w,V1.w)
2420// V2.w = vlsr(V2.w,V0.w)
2421// V3.w = vlsr(V3.w,V0.w)
2422// V2 = vand(V2,V1)
2423// V3 = vand(V3,V1)
2424// V2.h = vpacke(V3.w,V2.w)
2425// V2.hf = V2.h
2426SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2427 SelectionDAG &DAG) const {
2428
2429 MVT ResTy = ty(PredOp);
2430 const SDLoc &dl(PredOp);
2431
2432 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2433 // Get the hi and lo regs
2434 SDValue HiReg =
2435 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2436 SDValue LoReg =
2437 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2438 // Get constant #1 and splat into vector V1
2439 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2440 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2441 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2442 SDValue(RegConst, 0));
2443 // Splat the hi and lo args
2444 SDNode *SplatHi =
2445 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2446 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2447 SDNode *SplatLo =
2448 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2449 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2450 // vand between splatted const and const
2451 SDNode *PredTransfer =
2452 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2453 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2454 // Get the prefixsum
2455 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2456 SDValue(PredTransfer, 0));
2457 // Get the vsub
2458 SDNode *Vsub =
2459 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2460 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2461 // Get vlsr for hi and lo
2462 SDNode *IndexShift_hi =
2463 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2464 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2465 SDNode *IndexShift_lo =
2466 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2467 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2468 // Get vand of hi and lo
2469 SDNode *MaskOff_hi =
2470 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2471 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2472 SDNode *MaskOff_lo =
2473 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2474 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2475 // Pack them
2476 SDNode *Pack =
2477 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2478 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2479 SDNode *Convert =
2480 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2481 return SDValue(Convert, 0);
2482}
2483
2484SDValue
2485HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2486 // Catch invalid conversion ops (just in case).
2487 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2488 Op.getOpcode() == ISD::UINT_TO_FP);
2489
2490 MVT ResTy = ty(Op);
2491 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2492 MVT FpTy = ResTy.getVectorElementType();
2493
2494 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2495 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2496 return LowerHvxPred32ToFp(Op, DAG);
2497 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2498 return LowerHvxPred64ToFp(Op, DAG);
2499 }
2500
2501 if (Subtarget.useHVXIEEEFPOps()) {
2502 // There are only conversions to f16.
2503 if (FpTy == MVT::f16) {
2504 // Other int types aren't legal in HVX, so we shouldn't see them here.
2505 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2506 // i8, i16 -> f16 is legal.
2507 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2508 return Op;
2509 }
2510 }
2511
2512 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2513 return EqualizeFpIntConversion(Op, DAG);
2514
2515 return ExpandHvxIntToFp(Op, DAG);
2516}
2517
2518HexagonTargetLowering::TypePair
2519HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2520 // Compare the widths of elements of the two types, and extend the narrower
2521 // type to match the with of the wider type. For vector types, apply this
2522 // to the element type.
2523 assert(Ty0.isVector() == Ty1.isVector());
2524
2525 MVT ElemTy0 = Ty0.getScalarType();
2526 MVT ElemTy1 = Ty1.getScalarType();
2527
2528 unsigned Width0 = ElemTy0.getSizeInBits();
2529 unsigned Width1 = ElemTy1.getSizeInBits();
2530 unsigned MaxWidth = std::max(Width0, Width1);
2531
2532 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2533 if (ScalarTy.isInteger())
2534 return MVT::getIntegerVT(Width);
2535 assert(ScalarTy.isFloatingPoint());
2536 return MVT::getFloatingPointVT(Width);
2537 };
2538
2539 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2540 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2541
2542 if (!Ty0.isVector()) {
2543 // Both types are scalars.
2544 return {WideETy0, WideETy1};
2545 }
2546
2547 // Vector types.
2548 unsigned NumElem = Ty0.getVectorNumElements();
2549 assert(NumElem == Ty1.getVectorNumElements());
2550
2551 return {MVT::getVectorVT(WideETy0, NumElem),
2552 MVT::getVectorVT(WideETy1, NumElem)};
2553}
2554
2555HexagonTargetLowering::TypePair
2556HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2557 // Compare the numbers of elements of two vector types, and widen the
2558 // narrower one to match the number of elements in the wider one.
2559 assert(Ty0.isVector() && Ty1.isVector());
2560
2561 unsigned Len0 = Ty0.getVectorNumElements();
2562 unsigned Len1 = Ty1.getVectorNumElements();
2563 if (Len0 == Len1)
2564 return {Ty0, Ty1};
2565
2566 unsigned MaxLen = std::max(Len0, Len1);
2567 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2568 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2569}
2570
2571MVT
2572HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2573 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2574 assert(LegalTy.isSimple());
2575 return LegalTy.getSimpleVT();
2576}
2577
2578MVT
2579HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2580 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2581 assert(Ty.getSizeInBits() <= HwWidth);
2582 if (Ty.getSizeInBits() == HwWidth)
2583 return Ty;
2584
2585 MVT ElemTy = Ty.getScalarType();
2586 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2587}
2588
2589HexagonTargetLowering::VectorPair
2590HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2591 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2592 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2593 // whether an overflow has occurred.
2594 MVT ResTy = ty(A);
2595 assert(ResTy == ty(B));
2596 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2597
2598 if (!Signed) {
2599 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2600 // save any instructions.
2601 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2602 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2603 return {Add, Ovf};
2604 }
2605
2606 // Signed overflow has happened, if:
2607 // (A, B have the same sign) and (A+B has a different sign from either)
2608 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2609 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2610 SDValue NotA =
2611 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2612 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2613 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2614 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2615 SDValue MSB =
2616 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2617 return {Add, MSB};
2618}
2619
2620HexagonTargetLowering::VectorPair
2621HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2622 bool Signed, SelectionDAG &DAG) const {
2623 // Shift Val right by Amt bits, round the result to the nearest integer,
2624 // tie-break by rounding halves to even integer.
2625
2626 const SDLoc &dl(Val);
2627 MVT ValTy = ty(Val);
2628
2629 // This should also work for signed integers.
2630 //
2631 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2632 // bool ovf = (inp > tmp0);
2633 // uint rup = inp & (1 << (Amt+1));
2634 //
2635 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2636 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2637 // uint tmp3 = tmp2 + rup;
2638 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2639 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2640 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2641 MVT IntTy = tyVector(ValTy, ElemTy);
2642 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2643 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2644
2645 SDValue Inp = DAG.getBitcast(IntTy, Val);
2646 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2647
2648 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2649 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2650 SDValue Zero = getZero(dl, IntTy, DAG);
2651 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2652 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2653 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2654
2655 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2656 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2657 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2658 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2659
2660 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2661 SDValue One = DAG.getConstant(1, dl, IntTy);
2662 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2663 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2664 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2665 return {Mux, Ovf};
2666}
2667
2668SDValue
2669HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2670 SelectionDAG &DAG) const {
2671 MVT VecTy = ty(A);
2672 MVT PairTy = typeJoin({VecTy, VecTy});
2673 assert(VecTy.getVectorElementType() == MVT::i32);
2674
2675 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2676
2677 // mulhs(A,B) =
2678 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2679 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2680 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2681 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2682 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2683 // anything, so it cannot produce any carry over to higher bits),
2684 // so everything in [] can be shifted by 16 without loss of precision.
2685 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2686 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2687 // The final additions need to make sure to properly maintain any carry-
2688 // out bits.
2689 //
2690 // Hi(B) Lo(B)
2691 // Hi(A) Lo(A)
2692 // --------------
2693 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2694 // Hi(B)*Lo(A) | + dropping the low 16 bits
2695 // Hi(A)*Lo(B) | T2
2696 // Hi(B)*Hi(A)
2697
2698 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2699 // T1 = get Hi(A) into low halves.
2700 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2701 // P0 = interleaved T1.h*B.uh (full precision product)
2702 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2703 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2704 SDValue T2 = LoHalf(P0, DAG);
2705 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2706 // added to the final sum.
2707 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2708 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2709 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2710 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2711 // T3 = full-precision(T0+T2) >> 16
2712 // The low halves are added-unsigned, the high ones are added-signed.
2713 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2714 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2715 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2716 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2717 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2718 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2719 SDValue T5 = LoHalf(P3, DAG);
2720 // Add:
2721 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2722 return T6;
2723}
2724
2725SDValue
2726HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2727 bool SignedB, const SDLoc &dl,
2728 SelectionDAG &DAG) const {
2729 MVT VecTy = ty(A);
2730 MVT PairTy = typeJoin({VecTy, VecTy});
2731 assert(VecTy.getVectorElementType() == MVT::i32);
2732
2733 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2734
2735 if (SignedA && !SignedB) {
2736 // Make A:unsigned, B:signed.
2737 std::swap(A, B);
2738 std::swap(SignedA, SignedB);
2739 }
2740
2741 // Do halfword-wise multiplications for unsigned*unsigned product, then
2742 // add corrections for signed and unsigned*signed.
2743
2744 SDValue Lo, Hi;
2745
2746 // P0:lo = (uu) products of low halves of A and B,
2747 // P0:hi = (uu) products of high halves.
2748 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2749
2750 // Swap low/high halves in B
2751 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2752 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2753 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2754 // P1 = products of even/odd halfwords.
2755 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2756 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2757 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2758
2759 // P2:lo = low halves of P1:lo + P1:hi,
2760 // P2:hi = high halves of P1:lo + P1:hi.
2761 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2762 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2763 // Still need to add the high halves of P0:lo to P2:lo
2764 SDValue T2 =
2765 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2766 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2767
2768 // The high halves of T3 will contribute to the HI part of LOHI.
2769 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2770 {HiHalf(P2, DAG), T3, S16}, DAG);
2771
2772 // The low halves of P2 need to be added to high halves of the LO part.
2773 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2774 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2775 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2776
2777 if (SignedA) {
2778 assert(SignedB && "Signed A and unsigned B should have been inverted");
2779
2780 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2781 SDValue Zero = getZero(dl, VecTy, DAG);
2782 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2783 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2784 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2785 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2786 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2787 } else if (SignedB) {
2788 // Same correction as for mulhus:
2789 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2790 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2791 SDValue Zero = getZero(dl, VecTy, DAG);
2792 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2793 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2794 } else {
2795 assert(!SignedA && !SignedB);
2796 }
2797
2798 return DAG.getMergeValues({Lo, Hi}, dl);
2799}
2800
2801SDValue
2802HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2803 SDValue B, bool SignedB,
2804 const SDLoc &dl,
2805 SelectionDAG &DAG) const {
2806 MVT VecTy = ty(A);
2807 MVT PairTy = typeJoin({VecTy, VecTy});
2808 assert(VecTy.getVectorElementType() == MVT::i32);
2809
2810 if (SignedA && !SignedB) {
2811 // Make A:unsigned, B:signed.
2812 std::swap(A, B);
2813 std::swap(SignedA, SignedB);
2814 }
2815
2816 // Do S*S first, then make corrections for U*S or U*U if needed.
2817 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2818 SDValue P1 =
2819 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2820 SDValue Lo = LoHalf(P1, DAG);
2821 SDValue Hi = HiHalf(P1, DAG);
2822
2823 if (!SignedB) {
2824 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2825 SDValue Zero = getZero(dl, VecTy, DAG);
2826 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2827
2828 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2829 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2830 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2831 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2832 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2833 // $A))>;
2834 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2835 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2836 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2837 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2838 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2839 } else if (!SignedA) {
2840 SDValue Zero = getZero(dl, VecTy, DAG);
2841 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2842
2843 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2844 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2845 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2846 // (HiHalf (Muls64O $A, $B)),
2847 // $B)>;
2848 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2849 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2850 }
2851
2852 return DAG.getMergeValues({Lo, Hi}, dl);
2853}
2854
2855SDValue
2856HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2857 const {
2858 // Rewrite conversion between integer and floating-point in such a way that
2859 // the integer type is extended/narrowed to match the bitwidth of the
2860 // floating-point type, combined with additional integer-integer extensions
2861 // or narrowings to match the original input/result types.
2862 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2863 //
2864 // The input/result types are not required to be legal, but if they are
2865 // legal, this function should not introduce illegal types.
2866
2867 unsigned Opc = Op.getOpcode();
2870
2871 SDValue Inp = Op.getOperand(0);
2872 MVT InpTy = ty(Inp);
2873 MVT ResTy = ty(Op);
2874
2875 if (InpTy == ResTy)
2876 return Op;
2877
2878 const SDLoc &dl(Op);
2880
2881 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2882 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2883 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2884 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2885 return Res;
2886}
2887
2888SDValue
2889HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2890 unsigned Opc = Op.getOpcode();
2892
2893 const SDLoc &dl(Op);
2894 SDValue Op0 = Op.getOperand(0);
2895 MVT InpTy = ty(Op0);
2896 MVT ResTy = ty(Op);
2897 assert(InpTy.changeTypeToInteger() == ResTy);
2898
2899 // int32_t conv_f32_to_i32(uint32_t inp) {
2900 // // s | exp8 | frac23
2901 //
2902 // int neg = (int32_t)inp < 0;
2903 //
2904 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2905 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2906 // // produce a large positive "expm1", which will result in max u/int.
2907 // // In all IEEE formats, bias is the largest positive number that can be
2908 // // represented in bias-width bits (i.e. 011..1).
2909 // int32_t expm1 = (inp << 1) - 0x80000000;
2910 // expm1 >>= 24;
2911 //
2912 // // Always insert the "implicit 1". Subnormal numbers will become 0
2913 // // regardless.
2914 // uint32_t frac = (inp << 8) | 0x80000000;
2915 //
2916 // // "frac" is the fraction part represented as Q1.31. If it was
2917 // // interpreted as uint32_t, it would be the fraction part multiplied
2918 // // by 2^31.
2919 //
2920 // // Calculate the amount of right shift, since shifting further to the
2921 // // left would lose significant bits. Limit it to 32, because we want
2922 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2923 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2924 // // left by 31). "rsh" can be negative.
2925 // int32_t rsh = min(31 - (expm1 + 1), 32);
2926 //
2927 // frac >>= rsh; // rsh == 32 will produce 0
2928 //
2929 // // Everything up to this point is the same for conversion to signed
2930 // // unsigned integer.
2931 //
2932 // if (neg) // Only for signed int
2933 // frac = -frac; //
2934 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2935 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2936 // if (rsh <= 0 && !neg) //
2937 // frac = 0x7fffffff; //
2938 //
2939 // if (neg) // Only for unsigned int
2940 // frac = 0; //
2941 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2942 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2943 //
2944 // return frac;
2945 // }
2946
2947 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2948
2949 // Zero = V6_vd0();
2950 // Neg = V6_vgtw(Zero, Inp);
2951 // One = V6_lvsplatw(1);
2952 // M80 = V6_lvsplatw(0x80000000);
2953 // Exp00 = V6_vaslwv(Inp, One);
2954 // Exp01 = V6_vsubw(Exp00, M80);
2955 // ExpM1 = V6_vasrw(Exp01, 24);
2956 // Frc00 = V6_vaslw(Inp, 8);
2957 // Frc01 = V6_vor(Frc00, M80);
2958 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2959 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2960 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2961
2962 // if signed int:
2963 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2964 // Pos = V6_vgtw(Rsh01, Zero);
2965 // Frc13 = V6_vsubw(Zero, Frc02);
2966 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2967 // Int = V6_vmux(Pos, Frc14, Bnd);
2968 //
2969 // if unsigned int:
2970 // Rsn = V6_vgtw(Zero, Rsh01)
2971 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2972 // Int = V6_vmux(Neg, Zero, Frc23)
2973
2974 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2975 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2976 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2977
2978 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2979 SDValue Zero = getZero(dl, ResTy, DAG);
2980 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2981 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2982 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2983 SDValue One = DAG.getConstant(1, dl, ResTy);
2984 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2985 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2986 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2987 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2988
2989 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2990 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2991 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2992
2993 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2994 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2995 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2996 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2997 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2998
2999 SDValue Int;
3000
3001 if (Opc == ISD::FP_TO_SINT) {
3002 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3003 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3004 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3005 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3006 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3007 } else {
3009 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3010 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3011 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3012 }
3013
3014 return Int;
3015}
3016
3017SDValue
3018HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3019 unsigned Opc = Op.getOpcode();
3021
3022 const SDLoc &dl(Op);
3023 SDValue Op0 = Op.getOperand(0);
3024 MVT InpTy = ty(Op0);
3025 MVT ResTy = ty(Op);
3026 assert(ResTy.changeTypeToInteger() == InpTy);
3027
3028 // uint32_t vnoc1_rnd(int32_t w) {
3029 // int32_t iszero = w == 0;
3030 // int32_t isneg = w < 0;
3031 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3032 //
3033 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3034 // uint32_t frac0 = (uint64_t)u << norm_left;
3035 //
3036 // // Rounding:
3037 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3038 // uint32_t renorm = (frac0 > frac1);
3039 // uint32_t rup = (int)(frac0 << 22) < 0;
3040 //
3041 // uint32_t frac2 = frac0 >> 8;
3042 // uint32_t frac3 = frac1 >> 8;
3043 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3044 //
3045 // int32_t exp = 32 - norm_left + renorm + 127;
3046 // exp <<= 23;
3047 //
3048 // uint32_t sign = 0x80000000 * isneg;
3049 // uint32_t f = sign | exp | frac;
3050 // return iszero ? 0 : f;
3051 // }
3052
3053 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3054 bool Signed = Opc == ISD::SINT_TO_FP;
3055
3056 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3057 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3058
3059 SDValue Zero = getZero(dl, InpTy, DAG);
3060 SDValue One = DAG.getConstant(1, dl, InpTy);
3061 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3062 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3063 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3064 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3065 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3066
3067 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3068 if (Signed) {
3069 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3070 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3071 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3072 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3073 }
3074
3075 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3076 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3077 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3078 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3079 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3080 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3081 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3082 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3083 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3084
3085 return Flt;
3086}
3087
3088SDValue
3089HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3090 unsigned Opc = Op.getOpcode();
3091 unsigned TLOpc;
3092 switch (Opc) {
3093 case ISD::ANY_EXTEND:
3094 case ISD::SIGN_EXTEND:
3095 case ISD::ZERO_EXTEND:
3096 TLOpc = HexagonISD::TL_EXTEND;
3097 break;
3098 case ISD::TRUNCATE:
3100 break;
3101#ifndef NDEBUG
3102 Op.dump(&DAG);
3103#endif
3104 llvm_unreachable("Unexpected operator");
3105 }
3106
3107 const SDLoc &dl(Op);
3108 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3109 DAG.getUNDEF(MVT::i128), // illegal type
3110 DAG.getConstant(Opc, dl, MVT::i32));
3111}
3112
3113SDValue
3114HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3115 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3116 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3117 unsigned Opc = Op.getConstantOperandVal(2);
3118 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3119}
3120
3121HexagonTargetLowering::VectorPair
3122HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3123 assert(!Op.isMachineOpcode());
3124 SmallVector<SDValue, 2> OpsL, OpsH;
3125 const SDLoc &dl(Op);
3126
3127 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3128 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3129 SDValue TV = DAG.getValueType(Ty);
3130 return std::make_pair(TV, TV);
3131 };
3132
3133 for (SDValue A : Op.getNode()->ops()) {
3134 auto [Lo, Hi] =
3135 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3136 // Special case for type operand.
3137 switch (Op.getOpcode()) {
3138 case ISD::SIGN_EXTEND_INREG:
3139 case HexagonISD::SSAT:
3140 case HexagonISD::USAT:
3141 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3142 std::tie(Lo, Hi) = SplitVTNode(N);
3143 break;
3144 }
3145 OpsL.push_back(Lo);
3146 OpsH.push_back(Hi);
3147 }
3148
3149 MVT ResTy = ty(Op);
3150 MVT HalfTy = typeSplit(ResTy).first;
3151 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3152 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3153 return {L, H};
3154}
3155
3156SDValue
3157HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3158 auto *MemN = cast<MemSDNode>(Op.getNode());
3159
3160 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3161 if (!isHvxPairTy(MemTy))
3162 return Op;
3163
3164 const SDLoc &dl(Op);
3165 unsigned HwLen = Subtarget.getVectorLength();
3166 MVT SingleTy = typeSplit(MemTy).first;
3167 SDValue Chain = MemN->getChain();
3168 SDValue Base0 = MemN->getBasePtr();
3169 SDValue Base1 =
3170 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3171 unsigned MemOpc = MemN->getOpcode();
3172
3173 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3174 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3175 MachineFunction &MF = DAG.getMachineFunction();
3176 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3177 ? (uint64_t)MemoryLocation::UnknownSize
3178 : HwLen;
3179 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3180 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3181 }
3182
3183 if (MemOpc == ISD::LOAD) {
3184 assert(cast<LoadSDNode>(Op)->isUnindexed());
3185 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3186 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3187 return DAG.getMergeValues(
3188 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3189 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3190 Load0.getValue(1), Load1.getValue(1)) }, dl);
3191 }
3192 if (MemOpc == ISD::STORE) {
3193 assert(cast<StoreSDNode>(Op)->isUnindexed());
3194 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3195 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3196 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3197 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3198 }
3199
3200 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3201
3202 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3203 assert(MaskN->isUnindexed());
3204 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3205 SDValue Offset = DAG.getUNDEF(MVT::i32);
3206
3207 if (MemOpc == ISD::MLOAD) {
3208 VectorPair Thru =
3209 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3210 SDValue MLoad0 =
3211 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3212 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3213 ISD::NON_EXTLOAD, false);
3214 SDValue MLoad1 =
3215 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3216 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3217 ISD::NON_EXTLOAD, false);
3218 return DAG.getMergeValues(
3219 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3220 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3221 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3222 }
3223 if (MemOpc == ISD::MSTORE) {
3224 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3225 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3226 Masks.first, SingleTy, MOp0,
3227 ISD::UNINDEXED, false, false);
3228 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3229 Masks.second, SingleTy, MOp1,
3230 ISD::UNINDEXED, false, false);
3231 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3232 }
3233
3234 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3235 llvm_unreachable(Name.c_str());
3236}
3237
3238SDValue
3239HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3240 const SDLoc &dl(Op);
3241 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3242 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3243 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3244 "Not widening loads of i1 yet");
3245
3246 SDValue Chain = LoadN->getChain();
3247 SDValue Base = LoadN->getBasePtr();
3248 SDValue Offset = DAG.getUNDEF(MVT::i32);
3249
3250 MVT ResTy = ty(Op);
3251 unsigned HwLen = Subtarget.getVectorLength();
3252 unsigned ResLen = ResTy.getStoreSize();
3253 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3254
3255 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3256 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3257 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3258
3259 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3260 MachineFunction &MF = DAG.getMachineFunction();
3261 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3262
3263 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3264 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3266 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3267 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3268}
3269
3270SDValue
3271HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3272 const SDLoc &dl(Op);
3273 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3274 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3275 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3276 "Not widening stores of i1 yet");
3277
3278 SDValue Chain = StoreN->getChain();
3279 SDValue Base = StoreN->getBasePtr();
3280 SDValue Offset = DAG.getUNDEF(MVT::i32);
3281
3282 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3283 MVT ValueTy = ty(Value);
3284 unsigned ValueLen = ValueTy.getVectorNumElements();
3285 unsigned HwLen = Subtarget.getVectorLength();
3286 assert(isPowerOf2_32(ValueLen));
3287
3288 for (unsigned Len = ValueLen; Len < HwLen; ) {
3289 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3290 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3291 }
3292 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3293
3294 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3295 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3296 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3297 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3298 MachineFunction &MF = DAG.getMachineFunction();
3299 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3300 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3301 MemOp, ISD::UNINDEXED, false, false);
3302}
3303
3304SDValue
3305HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3306 const SDLoc &dl(Op);
3307 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3308 MVT ElemTy = ty(Op0).getVectorElementType();
3309 unsigned HwLen = Subtarget.getVectorLength();
3310
3311 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3312 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3313 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3314 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3315 return SDValue();
3316
3317 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3318 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3319 EVT ResTy =
3320 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3321 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3322 {WideOp0, WideOp1, Op.getOperand(2)});
3323
3324 EVT RetTy = typeLegalize(ty(Op), DAG);
3325 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3326 {SetCC, getZero(dl, MVT::i32, DAG)});
3327}
3328
3329SDValue
3330HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3331 unsigned Opc = Op.getOpcode();
3332 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3333 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3334 return isHvxPairTy(ty(V));
3335 });
3336
3337 if (IsPairOp) {
3338 switch (Opc) {
3339 default:
3340 break;
3341 case ISD::LOAD:
3342 case ISD::STORE:
3343 case ISD::MLOAD:
3344 case ISD::MSTORE:
3345 return SplitHvxMemOp(Op, DAG);
3346 case ISD::SINT_TO_FP:
3347 case ISD::UINT_TO_FP:
3348 case ISD::FP_TO_SINT:
3349 case ISD::FP_TO_UINT:
3350 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3351 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3352 break;
3353 case ISD::ABS:
3354 case ISD::CTPOP:
3355 case ISD::CTLZ:
3356 case ISD::CTTZ:
3357 case ISD::MUL:
3358 case ISD::FADD:
3359 case ISD::FSUB:
3360 case ISD::FMUL:
3361 case ISD::FMINIMUMNUM:
3362 case ISD::FMAXIMUMNUM:
3363 case ISD::MULHS:
3364 case ISD::MULHU:
3365 case ISD::AND:
3366 case ISD::OR:
3367 case ISD::XOR:
3368 case ISD::SRA:
3369 case ISD::SHL:
3370 case ISD::SRL:
3371 case ISD::FSHL:
3372 case ISD::FSHR:
3373 case ISD::SMIN:
3374 case ISD::SMAX:
3375 case ISD::UMIN:
3376 case ISD::UMAX:
3377 case ISD::SETCC:
3378 case ISD::VSELECT:
3380 case ISD::SPLAT_VECTOR:
3381 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3382 case ISD::SIGN_EXTEND:
3383 case ISD::ZERO_EXTEND:
3384 // In general, sign- and zero-extends can't be split and still
3385 // be legal. The only exception is extending bool vectors.
3386 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3387 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3388 break;
3389 }
3390 }
3391
3392 switch (Opc) {
3393 default:
3394 break;
3395 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3396 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3397 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3398 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3399 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3400 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3401 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3402 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3403 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3404 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3405 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3406 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3407 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3408 case ISD::SRA:
3409 case ISD::SHL:
3410 case ISD::SRL: return LowerHvxShift(Op, DAG);
3411 case ISD::FSHL:
3412 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3413 case ISD::MULHS:
3414 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3415 case ISD::SMUL_LOHI:
3416 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3417 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3418 case ISD::SETCC:
3419 case ISD::INTRINSIC_VOID: return Op;
3420 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3421 case ISD::MLOAD:
3422 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3423 // Unaligned loads will be handled by the default lowering.
3424 case ISD::LOAD: return SDValue();
3425 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3426 case ISD::FP_TO_SINT:
3427 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3428 case ISD::SINT_TO_FP:
3429 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3430
3431 // Special nodes:
3434 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3435 }
3436#ifndef NDEBUG
3437 Op.dumpr(&DAG);
3438#endif
3439 llvm_unreachable("Unhandled HVX operation");
3440}
3441
3442SDValue
3443HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3444 const {
3445 // Rewrite the extension/truncation/saturation op into steps where each
3446 // step changes the type widths by a factor of 2.
3447 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3448 //
3449 // Some of the vector types in Op may not be legal.
3450
3451 unsigned Opc = Op.getOpcode();
3452 switch (Opc) {
3453 case HexagonISD::SSAT:
3454 case HexagonISD::USAT:
3457 break;
3458 case ISD::ANY_EXTEND:
3459 case ISD::ZERO_EXTEND:
3460 case ISD::SIGN_EXTEND:
3461 case ISD::TRUNCATE:
3462 llvm_unreachable("ISD:: ops will be auto-folded");
3463 break;
3464#ifndef NDEBUG
3465 Op.dump(&DAG);
3466#endif
3467 llvm_unreachable("Unexpected operation");
3468 }
3469
3470 SDValue Inp = Op.getOperand(0);
3471 MVT InpTy = ty(Inp);
3472 MVT ResTy = ty(Op);
3473
3474 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3475 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3476 assert(InpWidth != ResWidth);
3477
3478 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3479 return Op;
3480
3481 const SDLoc &dl(Op);
3482 unsigned NumElems = InpTy.getVectorNumElements();
3483 assert(NumElems == ResTy.getVectorNumElements());
3484
3485 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3486 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3487 switch (Opc) {
3488 case HexagonISD::SSAT:
3489 case HexagonISD::USAT:
3490 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3493 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3494 default:
3495 llvm_unreachable("Unexpected opcode");
3496 }
3497 };
3498
3499 SDValue S = Inp;
3500 if (InpWidth < ResWidth) {
3501 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3502 while (InpWidth * 2 <= ResWidth)
3503 S = repeatOp(InpWidth *= 2, S);
3504 } else {
3505 // InpWidth > ResWidth
3506 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3507 while (InpWidth / 2 >= ResWidth)
3508 S = repeatOp(InpWidth /= 2, S);
3509 }
3510 return S;
3511}
3512
3513SDValue
3514HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3515 SDValue Inp0 = Op.getOperand(0);
3516 MVT InpTy = ty(Inp0);
3517 MVT ResTy = ty(Op);
3518 unsigned InpWidth = InpTy.getSizeInBits();
3519 unsigned ResWidth = ResTy.getSizeInBits();
3520 unsigned Opc = Op.getOpcode();
3521
3522 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3523 // First, make sure that the narrower type is widened to HVX.
3524 // This may cause the result to be wider than what the legalizer
3525 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3526 // desired type.
3527 auto [WInpTy, WResTy] =
3528 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3529 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3530 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3531 SDValue S;
3533 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3534 Op.getOperand(2));
3535 } else {
3536 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3537 }
3538 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3539 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3540 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3541 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3542 } else {
3543 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3544 return RemoveTLWrapper(Op, DAG);
3545 }
3546 llvm_unreachable("Unexpected situation");
3547}
3548
3549void
3550HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3552 unsigned Opc = N->getOpcode();
3553 SDValue Op(N, 0);
3554 SDValue Inp0; // Optional first argument.
3555 if (N->getNumOperands() > 0)
3556 Inp0 = Op.getOperand(0);
3557
3558 switch (Opc) {
3559 case ISD::ANY_EXTEND:
3560 case ISD::SIGN_EXTEND:
3561 case ISD::ZERO_EXTEND:
3562 case ISD::TRUNCATE:
3563 if (Subtarget.isHVXElementType(ty(Op)) &&
3564 Subtarget.isHVXElementType(ty(Inp0))) {
3565 Results.push_back(CreateTLWrapper(Op, DAG));
3566 }
3567 break;
3568 case ISD::SETCC:
3569 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3570 if (SDValue T = WidenHvxSetCC(Op, DAG))
3571 Results.push_back(T);
3572 }
3573 break;
3574 case ISD::STORE: {
3575 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3576 SDValue Store = WidenHvxStore(Op, DAG);
3577 Results.push_back(Store);
3578 }
3579 break;
3580 }
3581 case ISD::MLOAD:
3582 if (isHvxPairTy(ty(Op))) {
3583 SDValue S = SplitHvxMemOp(Op, DAG);
3585 Results.push_back(S.getOperand(0));
3586 Results.push_back(S.getOperand(1));
3587 }
3588 break;
3589 case ISD::MSTORE:
3590 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3591 SDValue S = SplitHvxMemOp(Op, DAG);
3592 Results.push_back(S);
3593 }
3594 break;
3595 case ISD::SINT_TO_FP:
3596 case ISD::UINT_TO_FP:
3597 case ISD::FP_TO_SINT:
3598 case ISD::FP_TO_UINT:
3599 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3600 SDValue T = EqualizeFpIntConversion(Op, DAG);
3601 Results.push_back(T);
3602 }
3603 break;
3604 case HexagonISD::SSAT:
3605 case HexagonISD::USAT:
3608 Results.push_back(LegalizeHvxResize(Op, DAG));
3609 break;
3610 default:
3611 break;
3612 }
3613}
3614
3615void
3616HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3618 unsigned Opc = N->getOpcode();
3619 SDValue Op(N, 0);
3620 SDValue Inp0; // Optional first argument.
3621 if (N->getNumOperands() > 0)
3622 Inp0 = Op.getOperand(0);
3623
3624 switch (Opc) {
3625 case ISD::ANY_EXTEND:
3626 case ISD::SIGN_EXTEND:
3627 case ISD::ZERO_EXTEND:
3628 case ISD::TRUNCATE:
3629 if (Subtarget.isHVXElementType(ty(Op)) &&
3630 Subtarget.isHVXElementType(ty(Inp0))) {
3631 Results.push_back(CreateTLWrapper(Op, DAG));
3632 }
3633 break;
3634 case ISD::SETCC:
3635 if (shouldWidenToHvx(ty(Op), DAG)) {
3636 if (SDValue T = WidenHvxSetCC(Op, DAG))
3637 Results.push_back(T);
3638 }
3639 break;
3640 case ISD::LOAD: {
3641 if (shouldWidenToHvx(ty(Op), DAG)) {
3642 SDValue Load = WidenHvxLoad(Op, DAG);
3643 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3644 Results.push_back(Load.getOperand(0));
3645 Results.push_back(Load.getOperand(1));
3646 }
3647 break;
3648 }
3649 case ISD::BITCAST:
3650 if (isHvxBoolTy(ty(Inp0))) {
3651 SDValue C = LowerHvxBitcast(Op, DAG);
3652 Results.push_back(C);
3653 }
3654 break;
3655 case ISD::FP_TO_SINT:
3656 case ISD::FP_TO_UINT:
3657 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3658 SDValue T = EqualizeFpIntConversion(Op, DAG);
3659 Results.push_back(T);
3660 }
3661 break;
3662 case HexagonISD::SSAT:
3663 case HexagonISD::USAT:
3666 Results.push_back(LegalizeHvxResize(Op, DAG));
3667 break;
3668 default:
3669 break;
3670 }
3671}
3672
3673SDValue
3674HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3675 DAGCombinerInfo &DCI) const {
3676 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3677 // to extract-subvector (shuffle V, pick even, pick odd)
3678
3679 assert(Op.getOpcode() == ISD::TRUNCATE);
3680 SelectionDAG &DAG = DCI.DAG;
3681 const SDLoc &dl(Op);
3682
3683 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3684 return SDValue();
3685 SDValue Cast = Op.getOperand(0);
3686 SDValue Src = Cast.getOperand(0);
3687
3688 EVT TruncTy = Op.getValueType();
3689 EVT CastTy = Cast.getValueType();
3690 EVT SrcTy = Src.getValueType();
3691 if (SrcTy.isSimple())
3692 return SDValue();
3693 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3694 return SDValue();
3695 unsigned SrcLen = SrcTy.getVectorNumElements();
3696 unsigned CastLen = CastTy.getVectorNumElements();
3697 if (2 * CastLen != SrcLen)
3698 return SDValue();
3699
3700 SmallVector<int, 128> Mask(SrcLen);
3701 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3702 Mask[i] = 2 * i;
3703 Mask[i + CastLen] = 2 * i + 1;
3704 }
3705 SDValue Deal =
3706 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3707 return opSplit(Deal, dl, DAG).first;
3708}
3709
3710SDValue
3711HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3712 SDValue Op, DAGCombinerInfo &DCI) const {
3713 // Fold
3714 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3715 // into
3716 // shuffle (concat x, y), undef, m3
3717 if (Op.getNumOperands() != 2)
3718 return SDValue();
3719
3720 SelectionDAG &DAG = DCI.DAG;
3721 const SDLoc &dl(Op);
3722 SDValue V0 = Op.getOperand(0);
3723 SDValue V1 = Op.getOperand(1);
3724
3725 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3726 return SDValue();
3727 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3728 return SDValue();
3729
3730 SetVector<SDValue> Order;
3731 Order.insert(V0.getOperand(0));
3732 Order.insert(V0.getOperand(1));
3733 Order.insert(V1.getOperand(0));
3734 Order.insert(V1.getOperand(1));
3735
3736 if (Order.size() > 2)
3737 return SDValue();
3738
3739 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3740 // result must be the same.
3741 EVT InpTy = V0.getValueType();
3742 assert(InpTy.isVector());
3743 unsigned InpLen = InpTy.getVectorNumElements();
3744
3745 SmallVector<int, 128> LongMask;
3746 auto AppendToMask = [&](SDValue Shuffle) {
3747 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3748 ArrayRef<int> Mask = SV->getMask();
3749 SDValue X = Shuffle.getOperand(0);
3750 SDValue Y = Shuffle.getOperand(1);
3751 for (int M : Mask) {
3752 if (M == -1) {
3753 LongMask.push_back(M);
3754 continue;
3755 }
3756 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3757 if (static_cast<unsigned>(M) >= InpLen)
3758 M -= InpLen;
3759
3760 int OutOffset = Order[0] == Src ? 0 : InpLen;
3761 LongMask.push_back(M + OutOffset);
3762 }
3763 };
3764
3765 AppendToMask(V0);
3766 AppendToMask(V1);
3767
3768 SDValue C0 = Order.front();
3769 SDValue C1 = Order.back(); // Can be same as front
3770 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3771
3772 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3773 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3774}
3775
3776SDValue
3777HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3778 const {
3779 const SDLoc &dl(N);
3780 SelectionDAG &DAG = DCI.DAG;
3781 SDValue Op(N, 0);
3782 unsigned Opc = Op.getOpcode();
3783
3785
3786 if (Opc == ISD::TRUNCATE)
3787 return combineTruncateBeforeLegal(Op, DCI);
3788 if (Opc == ISD::CONCAT_VECTORS)
3789 return combineConcatVectorsBeforeLegal(Op, DCI);
3790
3791 if (DCI.isBeforeLegalizeOps())
3792 return SDValue();
3793
3794 switch (Opc) {
3795 case ISD::VSELECT: {
3796 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3797 SDValue Cond = Ops[0];
3798 if (Cond->getOpcode() == ISD::XOR) {
3799 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3800 if (C1->getOpcode() == HexagonISD::QTRUE)
3801 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3802 }
3803 break;
3804 }
3805 case HexagonISD::V2Q:
3806 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3807 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3808 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3809 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3810 }
3811 break;
3812 case HexagonISD::Q2V:
3813 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3814 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3815 DAG.getAllOnesConstant(dl, MVT::i32));
3816 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3817 return getZero(dl, ty(Op), DAG);
3818 break;
3820 if (isUndef(Ops[1]))
3821 return Ops[0];
3822 break;
3823 case HexagonISD::VROR: {
3824 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3825 SDValue Vec = Ops[0].getOperand(0);
3826 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3827 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3828 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3829 }
3830 break;
3831 }
3832 }
3833
3834 return SDValue();
3835}
3836
3837bool
3838HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3839 if (Subtarget.isHVXVectorType(Ty, true))
3840 return false;
3841 auto Action = getPreferredHvxVectorAction(Ty);
3843 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3844 return false;
3845}
3846
3847bool
3848HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3849 if (Subtarget.isHVXVectorType(Ty, true))
3850 return false;
3851 auto Action = getPreferredHvxVectorAction(Ty);
3853 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3854 return false;
3855}
3856
3857bool
3858HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3859 if (!Subtarget.useHVXOps())
3860 return false;
3861 // If the type of any result, or any operand type are HVX vector types,
3862 // this is an HVX operation.
3863 auto IsHvxTy = [this](EVT Ty) {
3864 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3865 };
3866 auto IsHvxOp = [this](SDValue Op) {
3867 return Op.getValueType().isSimple() &&
3868 Subtarget.isHVXVectorType(ty(Op), true);
3869 };
3870 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3871 return true;
3872
3873 // Check if this could be an HVX operation after type widening.
3874 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3875 if (!Op.getValueType().isSimple())
3876 return false;
3877 MVT ValTy = ty(Op);
3878 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3879 };
3880
3881 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3882 if (IsWidenedToHvx(SDValue(N, i)))
3883 return true;
3884 }
3885 return llvm::any_of(N->ops(), IsWidenedToHvx);
3886}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6060
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:102
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:131
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:137
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:150
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.