LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
35static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
36static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
37static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
38
39static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
40 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
41 MVT ElemTy = Ty.getScalarType();
42 switch (ElemTy.SimpleTy) {
43 case MVT::f16:
44 return std::make_tuple(5, 15, 10);
45 case MVT::f32:
46 return std::make_tuple(8, 127, 23);
47 case MVT::f64:
48 return std::make_tuple(11, 1023, 52);
49 default:
50 break;
51 }
52 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
53}
54
55void
56HexagonTargetLowering::initializeHVXLowering() {
57 if (Subtarget.useHVX64BOps()) {
58 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
59 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
60 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
61 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
62 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
63 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
64 // These "short" boolean vector types should be legal because
65 // they will appear as results of vector compares. If they were
66 // not legal, type legalization would try to make them legal
67 // and that would require using operations that do not use or
68 // produce such types. That, in turn, would imply using custom
69 // nodes, which would be unoptimizable by the DAG combiner.
70 // The idea is to rely on target-independent operations as much
71 // as possible.
72 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
73 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
74 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
75 } else if (Subtarget.useHVX128BOps()) {
76 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
77 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
78 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
79 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
80 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
81 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
82 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
83 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
84 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
85 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
86 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
87 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
88 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
89 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
90 }
91 }
92
93 // Set up operation actions.
94
95 bool Use64b = Subtarget.useHVX64BOps();
96 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
97 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
98 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
99 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
100 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
101
102 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
104 AddPromotedToType(Opc, FromTy, ToTy);
105 };
106
107 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
108 // Note: v16i1 -> i16 is handled in type legalization instead of op
109 // legalization.
110 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
111 setOperationAction(ISD::BITCAST, MVT::i32, Custom);
112 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
113 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
114 setOperationAction(ISD::BITCAST, MVT::v128i1, Custom);
115 setOperationAction(ISD::BITCAST, MVT::i128, Custom);
119
120 if (Subtarget.useHVX128BOps())
121 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
122 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
123 Subtarget.useHVXFloatingPoint()) {
124
125 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
126 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
127
128 for (MVT T : FloatV) {
132 setOperationAction(ISD::FMINIMUMNUM, T, Legal);
133 setOperationAction(ISD::FMAXIMUMNUM, T, Legal);
134
137
140
141 setOperationAction(ISD::MLOAD, T, Custom);
142 setOperationAction(ISD::MSTORE, T, Custom);
143 // Custom-lower BUILD_VECTOR. The standard (target-independent)
144 // handling of it would convert it to a load, which is not always
145 // the optimal choice.
147 }
148
149
150 // BUILD_VECTOR with f16 operands cannot be promoted without
151 // promoting the result, so lower the node to vsplat or constant pool
155
156 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
157 // generated.
158 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
159 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
160 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
161 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
162
163 for (MVT P : FloatW) {
164 setOperationAction(ISD::LOAD, P, Custom);
165 setOperationAction(ISD::STORE, P, Custom);
169 setOperationAction(ISD::FMINIMUMNUM, P, Custom);
170 setOperationAction(ISD::FMAXIMUMNUM, P, Custom);
173
174 // Custom-lower BUILD_VECTOR. The standard (target-independent)
175 // handling of it would convert it to a load, which is not always
176 // the optimal choice.
178 // Make concat-vectors custom to handle concats of more than 2 vectors.
180
181 setOperationAction(ISD::MLOAD, P, Custom);
182 setOperationAction(ISD::MSTORE, P, Custom);
183 }
184
185 if (Subtarget.useHVXQFloatOps()) {
186 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Custom);
188 } else if (Subtarget.useHVXIEEEFPOps()) {
189 setOperationAction(ISD::FP_EXTEND, MVT::v64f32, Legal);
191 }
192 }
193
194 for (MVT T : LegalV) {
197
211 if (T != ByteV) {
215 }
216
219 if (T.getScalarType() != MVT::i32) {
222 }
223
225 setOperationAction(ISD::LOAD, T, Custom);
226 setOperationAction(ISD::MLOAD, T, Custom);
227 setOperationAction(ISD::MSTORE, T, Custom);
228 if (T.getScalarType() != MVT::i32) {
231 }
232
234 // Make concat-vectors custom to handle concats of more than 2 vectors.
245 if (T != ByteV) {
247 // HVX only has shifts of words and halfwords.
251
252 // Promote all shuffles to operate on vectors of bytes.
253 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
254 }
255
256 if (Subtarget.useHVXFloatingPoint()) {
257 // Same action for both QFloat and IEEE.
262 }
263
271 }
272
273 for (MVT T : LegalW) {
274 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
275 // independent) handling of it would convert it to a load, which is
276 // not always the optimal choice.
278 // Make concat-vectors custom to handle concats of more than 2 vectors.
280
281 // Custom-lower these operations for pairs. Expand them into a concat
282 // of the corresponding operations on individual vectors.
291
292 setOperationAction(ISD::LOAD, T, Custom);
293 setOperationAction(ISD::STORE, T, Custom);
294 setOperationAction(ISD::MLOAD, T, Custom);
295 setOperationAction(ISD::MSTORE, T, Custom);
300
313 if (T != ByteW) {
317
318 // Promote all shuffles to operate on vectors of bytes.
319 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
320 }
323
326 if (T.getScalarType() != MVT::i32) {
329 }
330
331 if (Subtarget.useHVXFloatingPoint()) {
332 // Same action for both QFloat and IEEE.
337 }
338 }
339
340 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
341 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
342 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
345
346 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
347 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
348 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
349 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
350 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
351 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
352 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
353 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
354 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
355 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
356 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
357 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
358
359 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
360 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
361 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
362 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
363 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
364 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
365 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
366 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
367 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
368 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
369 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
370 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
371
372 // Boolean vectors.
373
374 for (MVT T : LegalW) {
375 // Boolean types for vector pairs will overlap with the boolean
376 // types for single vectors, e.g.
377 // v64i8 -> v64i1 (single)
378 // v64i16 -> v64i1 (pair)
379 // Set these actions first, and allow the single actions to overwrite
380 // any duplicates.
381 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
386 // Masked load/store takes a mask that may need splitting.
387 setOperationAction(ISD::MLOAD, BoolW, Custom);
388 setOperationAction(ISD::MSTORE, BoolW, Custom);
389 }
390
391 for (MVT T : LegalV) {
392 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
403 }
404
405 if (Use64b) {
406 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
408 } else {
409 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
411 }
412
413 // Handle store widening for short vectors.
414 unsigned HwLen = Subtarget.getVectorLength();
415 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
416 if (ElemTy == MVT::i1)
417 continue;
418 int ElemWidth = ElemTy.getFixedSizeInBits();
419 int MaxElems = (8*HwLen) / ElemWidth;
420 for (int N = 2; N < MaxElems; N *= 2) {
421 MVT VecTy = MVT::getVectorVT(ElemTy, N);
422 auto Action = getPreferredVectorAction(VecTy);
424 setOperationAction(ISD::LOAD, VecTy, Custom);
425 setOperationAction(ISD::STORE, VecTy, Custom);
431 if (Subtarget.useHVXFloatingPoint()) {
436 }
437
438 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
439 if (!isTypeLegal(BoolTy))
441 }
442 }
443 }
444
446}
447
448unsigned
449HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
450 MVT ElemTy = VecTy.getVectorElementType();
451 unsigned VecLen = VecTy.getVectorNumElements();
452 unsigned HwLen = Subtarget.getVectorLength();
453
454 // Split vectors of i1 that exceed byte vector length.
455 if (ElemTy == MVT::i1 && VecLen > HwLen)
457
458 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
459 // For shorter vectors of i1, widen them if any of the corresponding
460 // vectors of integers needs to be widened.
461 if (ElemTy == MVT::i1) {
462 for (MVT T : Tys) {
463 assert(T != MVT::i1);
464 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
465 if (A != ~0u)
466 return A;
467 }
468 return ~0u;
469 }
470
471 // If the size of VecTy is at least half of the vector length,
472 // widen the vector. Note: the threshold was not selected in
473 // any scientific way.
474 if (llvm::is_contained(Tys, ElemTy)) {
475 unsigned VecWidth = VecTy.getSizeInBits();
476 unsigned HwWidth = 8*HwLen;
477 if (VecWidth > 2*HwWidth)
479
480 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
481 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
483 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
485 }
486
487 // Defer to default.
488 return ~0u;
489}
490
491unsigned
492HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
493 unsigned Opc = Op.getOpcode();
494 switch (Opc) {
499 }
501}
502
504HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
505 const SDLoc &dl, SelectionDAG &DAG) const {
507 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
508 append_range(IntOps, Ops);
509 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
510}
511
512MVT
513HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
514 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
515
516 MVT ElemTy = Tys.first.getVectorElementType();
517 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
518 Tys.second.getVectorNumElements());
519}
520
521HexagonTargetLowering::TypePair
522HexagonTargetLowering::typeSplit(MVT VecTy) const {
523 assert(VecTy.isVector());
524 unsigned NumElem = VecTy.getVectorNumElements();
525 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
526 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
527 return { HalfTy, HalfTy };
528}
529
530MVT
531HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
532 MVT ElemTy = VecTy.getVectorElementType();
533 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
534 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
535}
536
537MVT
538HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
539 MVT ElemTy = VecTy.getVectorElementType();
540 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
541 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
542}
543
545HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
546 SelectionDAG &DAG) const {
547 if (ty(Vec).getVectorElementType() == ElemTy)
548 return Vec;
549 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
550 return DAG.getBitcast(CastTy, Vec);
551}
552
554HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
555 SelectionDAG &DAG) const {
556 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
557 Ops.first, Ops.second);
558}
559
560HexagonTargetLowering::VectorPair
561HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
562 SelectionDAG &DAG) const {
563 TypePair Tys = typeSplit(ty(Vec));
564 if (Vec.getOpcode() == HexagonISD::QCAT)
565 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
566 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
567}
568
569bool
570HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
571 return Subtarget.isHVXVectorType(Ty) &&
572 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
573}
574
575bool
576HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
577 return Subtarget.isHVXVectorType(Ty) &&
578 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
579}
580
581bool
582HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
583 return Subtarget.isHVXVectorType(Ty, true) &&
584 Ty.getVectorElementType() == MVT::i1;
585}
586
587bool HexagonTargetLowering::allowsHvxMemoryAccess(
588 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
589 // Bool vectors are excluded by default, but make it explicit to
590 // emphasize that bool vectors cannot be loaded or stored.
591 // Also, disallow double vector stores (to prevent unnecessary
592 // store widening in DAG combiner).
593 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
594 return false;
595 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
596 return false;
597 if (Fast)
598 *Fast = 1;
599 return true;
600}
601
602bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
603 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
604 if (!Subtarget.isHVXVectorType(VecTy))
605 return false;
606 // XXX Should this be false? vmemu are a bit slower than vmem.
607 if (Fast)
608 *Fast = 1;
609 return true;
610}
611
612void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
613 MachineInstr &MI, SDNode *Node) const {
614 unsigned Opc = MI.getOpcode();
615 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
616 MachineBasicBlock &MB = *MI.getParent();
617 MachineFunction &MF = *MB.getParent();
618 MachineRegisterInfo &MRI = MF.getRegInfo();
619 DebugLoc DL = MI.getDebugLoc();
620 auto At = MI.getIterator();
621
622 switch (Opc) {
623 case Hexagon::PS_vsplatib:
624 if (Subtarget.useHVXV62Ops()) {
625 // SplatV = A2_tfrsi #imm
626 // OutV = V6_lvsplatb SplatV
627 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
628 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
629 .add(MI.getOperand(1));
630 Register OutV = MI.getOperand(0).getReg();
631 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
632 .addReg(SplatV);
633 } else {
634 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
635 // OutV = V6_lvsplatw SplatV
636 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
637 const MachineOperand &InpOp = MI.getOperand(1);
638 assert(InpOp.isImm());
639 uint32_t V = InpOp.getImm() & 0xFF;
640 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
641 .addImm(V << 24 | V << 16 | V << 8 | V);
642 Register OutV = MI.getOperand(0).getReg();
643 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
644 }
645 MB.erase(At);
646 break;
647 case Hexagon::PS_vsplatrb:
648 if (Subtarget.useHVXV62Ops()) {
649 // OutV = V6_lvsplatb Inp
650 Register OutV = MI.getOperand(0).getReg();
651 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
652 .add(MI.getOperand(1));
653 } else {
654 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
655 const MachineOperand &InpOp = MI.getOperand(1);
656 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
657 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
658 Register OutV = MI.getOperand(0).getReg();
659 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
660 .addReg(SplatV);
661 }
662 MB.erase(At);
663 break;
664 case Hexagon::PS_vsplatih:
665 if (Subtarget.useHVXV62Ops()) {
666 // SplatV = A2_tfrsi #imm
667 // OutV = V6_lvsplath SplatV
668 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
669 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
670 .add(MI.getOperand(1));
671 Register OutV = MI.getOperand(0).getReg();
672 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
673 .addReg(SplatV);
674 } else {
675 // SplatV = A2_tfrsi #imm:#imm
676 // OutV = V6_lvsplatw SplatV
677 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
678 const MachineOperand &InpOp = MI.getOperand(1);
679 assert(InpOp.isImm());
680 uint32_t V = InpOp.getImm() & 0xFFFF;
681 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
682 .addImm(V << 16 | V);
683 Register OutV = MI.getOperand(0).getReg();
684 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
685 }
686 MB.erase(At);
687 break;
688 case Hexagon::PS_vsplatrh:
689 if (Subtarget.useHVXV62Ops()) {
690 // OutV = V6_lvsplath Inp
691 Register OutV = MI.getOperand(0).getReg();
692 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
693 .add(MI.getOperand(1));
694 } else {
695 // SplatV = A2_combine_ll Inp, Inp
696 // OutV = V6_lvsplatw SplatV
697 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
698 const MachineOperand &InpOp = MI.getOperand(1);
699 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
700 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
701 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
702 Register OutV = MI.getOperand(0).getReg();
703 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
704 }
705 MB.erase(At);
706 break;
707 case Hexagon::PS_vsplatiw:
708 case Hexagon::PS_vsplatrw:
709 if (Opc == Hexagon::PS_vsplatiw) {
710 // SplatV = A2_tfrsi #imm
711 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
712 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
713 .add(MI.getOperand(1));
714 MI.getOperand(1).ChangeToRegister(SplatV, false);
715 }
716 // OutV = V6_lvsplatw SplatV/Inp
717 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
718 break;
719 }
720}
721
723HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
724 SelectionDAG &DAG) const {
725 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
726 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
727
728 unsigned ElemWidth = ElemTy.getSizeInBits();
729 if (ElemWidth == 8)
730 return ElemIdx;
731
732 unsigned L = Log2_32(ElemWidth/8);
733 const SDLoc &dl(ElemIdx);
734 return DAG.getNode(ISD::SHL, dl, MVT::i32,
735 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
736}
737
739HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
740 SelectionDAG &DAG) const {
741 unsigned ElemWidth = ElemTy.getSizeInBits();
742 assert(ElemWidth >= 8 && ElemWidth <= 32);
743 if (ElemWidth == 32)
744 return Idx;
745
746 if (ty(Idx) != MVT::i32)
747 Idx = DAG.getBitcast(MVT::i32, Idx);
748 const SDLoc &dl(Idx);
749 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
750 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
751 return SubIdx;
752}
753
755HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
756 SDValue Op1, ArrayRef<int> Mask,
757 SelectionDAG &DAG) const {
758 MVT OpTy = ty(Op0);
759 assert(OpTy == ty(Op1));
760
761 MVT ElemTy = OpTy.getVectorElementType();
762 if (ElemTy == MVT::i8)
763 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
764 assert(ElemTy.getSizeInBits() >= 8);
765
766 MVT ResTy = tyVector(OpTy, MVT::i8);
767 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
768
769 SmallVector<int,128> ByteMask;
770 for (int M : Mask) {
771 if (M < 0) {
772 for (unsigned I = 0; I != ElemSize; ++I)
773 ByteMask.push_back(-1);
774 } else {
775 int NewM = M*ElemSize;
776 for (unsigned I = 0; I != ElemSize; ++I)
777 ByteMask.push_back(NewM+I);
778 }
779 }
780 assert(ResTy.getVectorNumElements() == ByteMask.size());
781 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
782 opCastElem(Op1, MVT::i8, DAG), ByteMask);
783}
784
786HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
787 const SDLoc &dl, MVT VecTy,
788 SelectionDAG &DAG) const {
789 unsigned VecLen = Values.size();
790 MachineFunction &MF = DAG.getMachineFunction();
791 MVT ElemTy = VecTy.getVectorElementType();
792 unsigned ElemWidth = ElemTy.getSizeInBits();
793 unsigned HwLen = Subtarget.getVectorLength();
794
795 unsigned ElemSize = ElemWidth / 8;
796 assert(ElemSize*VecLen == HwLen);
798
799 if (VecTy.getVectorElementType() != MVT::i32 &&
800 !(Subtarget.useHVXFloatingPoint() &&
801 VecTy.getVectorElementType() == MVT::f32)) {
802 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
803 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
804 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
805 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
806 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
807 Words.push_back(DAG.getBitcast(MVT::i32, W));
808 }
809 } else {
810 for (SDValue V : Values)
811 Words.push_back(DAG.getBitcast(MVT::i32, V));
812 }
813 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
814 unsigned NumValues = Values.size();
815 assert(NumValues > 0);
816 bool IsUndef = true;
817 for (unsigned i = 0; i != NumValues; ++i) {
818 if (Values[i].isUndef())
819 continue;
820 IsUndef = false;
821 if (!SplatV.getNode())
822 SplatV = Values[i];
823 else if (SplatV != Values[i])
824 return false;
825 }
826 if (IsUndef)
827 SplatV = Values[0];
828 return true;
829 };
830
831 unsigned NumWords = Words.size();
832 SDValue SplatV;
833 bool IsSplat = isSplat(Words, SplatV);
834 if (IsSplat && isUndef(SplatV))
835 return DAG.getUNDEF(VecTy);
836 if (IsSplat) {
837 assert(SplatV.getNode());
838 if (isNullConstant(SplatV))
839 return getZero(dl, VecTy, DAG);
840 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
841 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
842 return DAG.getBitcast(VecTy, S);
843 }
844
845 // Delay recognizing constant vectors until here, so that we can generate
846 // a vsplat.
847 SmallVector<ConstantInt*, 128> Consts(VecLen);
848 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
849 if (AllConst) {
850 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
851 (Constant**)Consts.end());
852 Constant *CV = ConstantVector::get(Tmp);
853 Align Alignment(HwLen);
854 SDValue CP =
855 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
856 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
858 }
859
860 // A special case is a situation where the vector is built entirely from
861 // elements extracted from another vector. This could be done via a shuffle
862 // more efficiently, but typically, the size of the source vector will not
863 // match the size of the vector being built (which precludes the use of a
864 // shuffle directly).
865 // This only handles a single source vector, and the vector being built
866 // should be of a sub-vector type of the source vector type.
867 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
868 SmallVectorImpl<int> &SrcIdx) {
869 SDValue Vec;
870 for (SDValue V : Values) {
871 if (isUndef(V)) {
872 SrcIdx.push_back(-1);
873 continue;
874 }
875 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
876 return false;
877 // All extracts should come from the same vector.
878 SDValue T = V.getOperand(0);
879 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
880 return false;
881 Vec = T;
882 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
883 if (C == nullptr)
884 return false;
885 int I = C->getSExtValue();
886 assert(I >= 0 && "Negative element index");
887 SrcIdx.push_back(I);
888 }
889 SrcVec = Vec;
890 return true;
891 };
892
893 SmallVector<int,128> ExtIdx;
894 SDValue ExtVec;
895 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
896 MVT ExtTy = ty(ExtVec);
897 unsigned ExtLen = ExtTy.getVectorNumElements();
898 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
899 // Construct a new shuffle mask that will produce a vector with the same
900 // number of elements as the input vector, and such that the vector we
901 // want will be the initial subvector of it.
902 SmallVector<int,128> Mask;
903 BitVector Used(ExtLen);
904
905 for (int M : ExtIdx) {
906 Mask.push_back(M);
907 if (M >= 0)
908 Used.set(M);
909 }
910 // Fill the rest of the mask with the unused elements of ExtVec in hopes
911 // that it will result in a permutation of ExtVec's elements. It's still
912 // fine if it doesn't (e.g. if undefs are present, or elements are
913 // repeated), but permutations can always be done efficiently via vdelta
914 // and vrdelta.
915 for (unsigned I = 0; I != ExtLen; ++I) {
916 if (Mask.size() == ExtLen)
917 break;
918 if (!Used.test(I))
919 Mask.push_back(I);
920 }
921
922 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
923 DAG.getUNDEF(ExtTy), Mask);
924 return ExtLen == VecLen ? S : LoHalf(S, DAG);
925 }
926 }
927
928 // Find most common element to initialize vector with. This is to avoid
929 // unnecessary vinsert/valign for cases where the same value is present
930 // many times. Creates a histogram of the vector's elements to find the
931 // most common element n.
932 assert(4*Words.size() == Subtarget.getVectorLength());
933 int VecHist[32];
934 int n = 0;
935 for (unsigned i = 0; i != NumWords; ++i) {
936 VecHist[i] = 0;
937 if (Words[i].isUndef())
938 continue;
939 for (unsigned j = i; j != NumWords; ++j)
940 if (Words[i] == Words[j])
941 VecHist[i]++;
942
943 if (VecHist[i] > VecHist[n])
944 n = i;
945 }
946
947 SDValue HalfV = getZero(dl, VecTy, DAG);
948 if (VecHist[n] > 1) {
949 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
950 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
951 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
952 }
953 SDValue HalfV0 = HalfV;
954 SDValue HalfV1 = HalfV;
955
956 // Construct two halves in parallel, then or them together. Rn and Rm count
957 // number of rotations needed before the next element. One last rotation is
958 // performed post-loop to position the last element.
959 int Rn = 0, Rm = 0;
960 SDValue Sn, Sm;
961 SDValue N = HalfV0;
962 SDValue M = HalfV1;
963 for (unsigned i = 0; i != NumWords/2; ++i) {
964 // Rotate by element count since last insertion.
965 if (Words[i] != Words[n] || VecHist[n] <= 1) {
966 Sn = DAG.getConstant(Rn, dl, MVT::i32);
967 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
968 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
969 {HalfV0, Words[i]});
970 Rn = 0;
971 }
972 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
973 Sm = DAG.getConstant(Rm, dl, MVT::i32);
974 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
975 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
976 {HalfV1, Words[i+NumWords/2]});
977 Rm = 0;
978 }
979 Rn += 4;
980 Rm += 4;
981 }
982 // Perform last rotation.
983 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
984 Sm = DAG.getConstant(Rm, dl, MVT::i32);
985 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
986 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
987
988 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
989 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
990
991 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
992
993 SDValue OutV =
994 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
995 return OutV;
996}
997
999HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1000 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1001 MVT PredTy = ty(PredV);
1002 unsigned HwLen = Subtarget.getVectorLength();
1003 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1004
1005 if (Subtarget.isHVXVectorType(PredTy, true)) {
1006 // Move the vector predicate SubV to a vector register, and scale it
1007 // down to match the representation (bytes per type element) that VecV
1008 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1009 // in general) element and put them at the front of the resulting
1010 // vector. This subvector will then be inserted into the Q2V of VecV.
1011 // To avoid having an operation that generates an illegal type (short
1012 // vector), generate a full size vector.
1013 //
1014 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1015 SmallVector<int,128> Mask(HwLen);
1016 // Scale = BitBytes(PredV) / Given BitBytes.
1017 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1018 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1019
1020 for (unsigned i = 0; i != HwLen; ++i) {
1021 unsigned Num = i % Scale;
1022 unsigned Off = i / Scale;
1023 Mask[BlockLen*Num + Off] = i;
1024 }
1025 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1026 if (!ZeroFill)
1027 return S;
1028 // Fill the bytes beyond BlockLen with 0s.
1029 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1030 // when BlockLen < HwLen.
1031 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1032 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1033 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1034 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1035 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1036 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1037 }
1038
1039 // Make sure that this is a valid scalar predicate.
1040 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1041
1042 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1043 SmallVector<SDValue,4> Words[2];
1044 unsigned IdxW = 0;
1045
1046 SDValue W0 = isUndef(PredV)
1047 ? DAG.getUNDEF(MVT::i64)
1048 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1049 Words[IdxW].push_back(HiHalf(W0, DAG));
1050 Words[IdxW].push_back(LoHalf(W0, DAG));
1051
1052 while (Bytes < BitBytes) {
1053 IdxW ^= 1;
1054 Words[IdxW].clear();
1055
1056 if (Bytes < 4) {
1057 for (const SDValue &W : Words[IdxW ^ 1]) {
1058 SDValue T = expandPredicate(W, dl, DAG);
1059 Words[IdxW].push_back(HiHalf(T, DAG));
1060 Words[IdxW].push_back(LoHalf(T, DAG));
1061 }
1062 } else {
1063 for (const SDValue &W : Words[IdxW ^ 1]) {
1064 Words[IdxW].push_back(W);
1065 Words[IdxW].push_back(W);
1066 }
1067 }
1068 Bytes *= 2;
1069 }
1070
1071 assert(Bytes == BitBytes);
1072
1073 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1074 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1075 for (const SDValue &W : Words[IdxW]) {
1076 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1077 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1078 }
1079
1080 return Vec;
1081}
1082
1083SDValue
1084HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1085 const SDLoc &dl, MVT VecTy,
1086 SelectionDAG &DAG) const {
1087 // Construct a vector V of bytes, such that a comparison V >u 0 would
1088 // produce the required vector predicate.
1089 unsigned VecLen = Values.size();
1090 unsigned HwLen = Subtarget.getVectorLength();
1091 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1093 bool AllT = true, AllF = true;
1094
1095 auto IsTrue = [] (SDValue V) {
1096 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1097 return !N->isZero();
1098 return false;
1099 };
1100 auto IsFalse = [] (SDValue V) {
1101 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1102 return N->isZero();
1103 return false;
1104 };
1105
1106 if (VecLen <= HwLen) {
1107 // In the hardware, each bit of a vector predicate corresponds to a byte
1108 // of a vector register. Calculate how many bytes does a bit of VecTy
1109 // correspond to.
1110 assert(HwLen % VecLen == 0);
1111 unsigned BitBytes = HwLen / VecLen;
1112 for (SDValue V : Values) {
1113 AllT &= IsTrue(V);
1114 AllF &= IsFalse(V);
1115
1116 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1117 : DAG.getUNDEF(MVT::i8);
1118 for (unsigned B = 0; B != BitBytes; ++B)
1119 Bytes.push_back(Ext);
1120 }
1121 } else {
1122 // There are as many i1 values, as there are bits in a vector register.
1123 // Divide the values into groups of 8 and check that each group consists
1124 // of the same value (ignoring undefs).
1125 for (unsigned I = 0; I != VecLen; I += 8) {
1126 unsigned B = 0;
1127 // Find the first non-undef value in this group.
1128 for (; B != 8; ++B) {
1129 if (!Values[I+B].isUndef())
1130 break;
1131 }
1132 SDValue F = Values[I+B];
1133 AllT &= IsTrue(F);
1134 AllF &= IsFalse(F);
1135
1136 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1137 : DAG.getUNDEF(MVT::i8);
1138 Bytes.push_back(Ext);
1139 // Verify that the rest of values in the group are the same as the
1140 // first.
1141 for (; B != 8; ++B)
1142 assert(Values[I+B].isUndef() || Values[I+B] == F);
1143 }
1144 }
1145
1146 if (AllT)
1147 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1148 if (AllF)
1149 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1150
1151 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1152 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1153 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1154}
1155
1156SDValue
1157HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1158 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1159 MVT ElemTy = ty(VecV).getVectorElementType();
1160
1161 unsigned ElemWidth = ElemTy.getSizeInBits();
1162 assert(ElemWidth >= 8 && ElemWidth <= 32);
1163 (void)ElemWidth;
1164
1165 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1166 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1167 {VecV, ByteIdx});
1168 if (ElemTy == MVT::i32)
1169 return ExWord;
1170
1171 // Have an extracted word, need to extract the smaller element out of it.
1172 // 1. Extract the bits of (the original) IdxV that correspond to the index
1173 // of the desired element in the 32-bit word.
1174 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1175 // 2. Extract the element from the word.
1176 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1177 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1178}
1179
1180SDValue
1181HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1182 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1183 // Implement other return types if necessary.
1184 assert(ResTy == MVT::i1);
1185
1186 unsigned HwLen = Subtarget.getVectorLength();
1187 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1188 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1189
1190 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1191 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1192 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1193
1194 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1195 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1196 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1197}
1198
1199SDValue
1200HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1201 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1202 MVT ElemTy = ty(VecV).getVectorElementType();
1203
1204 unsigned ElemWidth = ElemTy.getSizeInBits();
1205 assert(ElemWidth >= 8 && ElemWidth <= 32);
1206 (void)ElemWidth;
1207
1208 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1209 SDValue ByteIdxV) {
1210 MVT VecTy = ty(VecV);
1211 unsigned HwLen = Subtarget.getVectorLength();
1212 SDValue MaskV =
1213 DAG.getNode(ISD::AND, dl, MVT::i32,
1214 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1215 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1216 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1217 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1218 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1219 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1220 return TorV;
1221 };
1222
1223 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1224 if (ElemTy == MVT::i32)
1225 return InsertWord(VecV, ValV, ByteIdx);
1226
1227 // If this is not inserting a 32-bit word, convert it into such a thing.
1228 // 1. Extract the existing word from the target vector.
1229 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1230 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1231 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1232 dl, MVT::i32, DAG);
1233
1234 // 2. Treating the extracted word as a 32-bit vector, insert the given
1235 // value into it.
1236 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1237 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1238 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1239 ValV, SubIdx, dl, ElemTy, DAG);
1240
1241 // 3. Insert the 32-bit word back into the original vector.
1242 return InsertWord(VecV, Ins, ByteIdx);
1243}
1244
1245SDValue
1246HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1247 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1248 unsigned HwLen = Subtarget.getVectorLength();
1249 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1250 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1251
1252 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1253 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1254 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1255 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1256
1257 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1258 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1259}
1260
1261SDValue
1262HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1263 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1264 MVT VecTy = ty(VecV);
1265 unsigned HwLen = Subtarget.getVectorLength();
1266 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1267 MVT ElemTy = VecTy.getVectorElementType();
1268 unsigned ElemWidth = ElemTy.getSizeInBits();
1269
1270 // If the source vector is a vector pair, get the single vector containing
1271 // the subvector of interest. The subvector will never overlap two single
1272 // vectors.
1273 if (isHvxPairTy(VecTy)) {
1274 unsigned SubIdx = Hexagon::vsub_lo;
1275 if (Idx * ElemWidth >= 8 * HwLen) {
1276 SubIdx = Hexagon::vsub_hi;
1277 Idx -= VecTy.getVectorNumElements() / 2;
1278 }
1279
1280 VecTy = typeSplit(VecTy).first;
1281 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1282 if (VecTy == ResTy)
1283 return VecV;
1284 }
1285
1286 // The only meaningful subvectors of a single HVX vector are those that
1287 // fit in a scalar register.
1288 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1289
1290 MVT WordTy = tyVector(VecTy, MVT::i32);
1291 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1292 unsigned WordIdx = (Idx*ElemWidth) / 32;
1293
1294 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1295 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1296 if (ResTy.getSizeInBits() == 32)
1297 return DAG.getBitcast(ResTy, W0);
1298
1299 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1300 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1301 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1302 return DAG.getBitcast(ResTy, WW);
1303}
1304
1305SDValue
1306HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1307 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1308 MVT VecTy = ty(VecV);
1309 unsigned HwLen = Subtarget.getVectorLength();
1310 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1311 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1312 // IdxV is required to be a constant.
1313 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1314
1315 unsigned ResLen = ResTy.getVectorNumElements();
1316 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1317 unsigned Offset = Idx * BitBytes;
1318 SDValue Undef = DAG.getUNDEF(ByteTy);
1319 SmallVector<int,128> Mask;
1320
1321 if (Subtarget.isHVXVectorType(ResTy, true)) {
1322 // Converting between two vector predicates. Since the result is shorter
1323 // than the source, it will correspond to a vector predicate with the
1324 // relevant bits replicated. The replication count is the ratio of the
1325 // source and target vector lengths.
1326 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1327 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1328 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1329 for (unsigned j = 0; j != Rep; ++j)
1330 Mask.push_back(i + Offset);
1331 }
1332 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1333 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1334 }
1335
1336 // Converting between a vector predicate and a scalar predicate. In the
1337 // vector predicate, a group of BitBytes bits will correspond to a single
1338 // i1 element of the source vector type. Those bits will all have the same
1339 // value. The same will be true for ByteVec, where each byte corresponds
1340 // to a bit in the vector predicate.
1341 // The algorithm is to traverse the ByteVec, going over the i1 values from
1342 // the source vector, and generate the corresponding representation in an
1343 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1344 // elements so that the interesting 8 bytes will be in the low end of the
1345 // vector.
1346 unsigned Rep = 8 / ResLen;
1347 // Make sure the output fill the entire vector register, so repeat the
1348 // 8-byte groups as many times as necessary.
1349 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1350 // This will generate the indexes of the 8 interesting bytes.
1351 for (unsigned i = 0; i != ResLen; ++i) {
1352 for (unsigned j = 0; j != Rep; ++j)
1353 Mask.push_back(Offset + i*BitBytes);
1354 }
1355 }
1356
1357 SDValue Zero = getZero(dl, MVT::i32, DAG);
1358 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1359 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1360 // them against 0.
1361 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1362 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1363 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1364 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1365 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1366 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1367}
1368
1369SDValue
1370HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1371 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1372 MVT VecTy = ty(VecV);
1373 MVT SubTy = ty(SubV);
1374 unsigned HwLen = Subtarget.getVectorLength();
1375 MVT ElemTy = VecTy.getVectorElementType();
1376 unsigned ElemWidth = ElemTy.getSizeInBits();
1377
1378 bool IsPair = isHvxPairTy(VecTy);
1379 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1380 // The two single vectors that VecV consists of, if it's a pair.
1381 SDValue V0, V1;
1382 SDValue SingleV = VecV;
1383 SDValue PickHi;
1384
1385 if (IsPair) {
1386 V0 = LoHalf(VecV, DAG);
1387 V1 = HiHalf(VecV, DAG);
1388
1389 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1390 dl, MVT::i32);
1391 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1392 if (isHvxSingleTy(SubTy)) {
1393 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1394 unsigned Idx = CN->getZExtValue();
1395 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1396 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1397 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1398 }
1399 // If IdxV is not a constant, generate the two variants: with the
1400 // SubV as the high and as the low subregister, and select the right
1401 // pair based on the IdxV.
1402 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1403 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1404 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1405 }
1406 // The subvector being inserted must be entirely contained in one of
1407 // the vectors V0 or V1. Set SingleV to the correct one, and update
1408 // IdxV to be the index relative to the beginning of that vector.
1409 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1410 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1411 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1412 }
1413
1414 // The only meaningful subvectors of a single HVX vector are those that
1415 // fit in a scalar register.
1416 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1417 // Convert IdxV to be index in bytes.
1418 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1419 if (!IdxN || !IdxN->isZero()) {
1420 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1421 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1422 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1423 }
1424 // When inserting a single word, the rotation back to the original position
1425 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1426 // by (HwLen-4)-Idx.
1427 unsigned RolBase = HwLen;
1428 if (SubTy.getSizeInBits() == 32) {
1429 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1430 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1431 } else {
1432 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1433 SDValue R0 = LoHalf(V, DAG);
1434 SDValue R1 = HiHalf(V, DAG);
1435 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1436 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1437 DAG.getConstant(4, dl, MVT::i32));
1438 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1439 RolBase = HwLen-4;
1440 }
1441 // If the vector wasn't ror'ed, don't ror it back.
1442 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1443 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1444 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1445 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1446 }
1447
1448 if (IsPair) {
1449 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1450 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1451 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1452 }
1453 return SingleV;
1454}
1455
1456SDValue
1457HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1458 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1459 MVT VecTy = ty(VecV);
1460 MVT SubTy = ty(SubV);
1461 assert(Subtarget.isHVXVectorType(VecTy, true));
1462 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1463 // predicate as well, or it can be a scalar predicate.
1464
1465 unsigned VecLen = VecTy.getVectorNumElements();
1466 unsigned HwLen = Subtarget.getVectorLength();
1467 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1468
1469 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1470 unsigned BitBytes = HwLen / VecLen;
1471 unsigned BlockLen = HwLen / Scale;
1472
1473 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1474 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1475 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1476 SDValue ByteIdx;
1477
1478 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1479 if (!IdxN || !IdxN->isZero()) {
1480 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1481 DAG.getConstant(BitBytes, dl, MVT::i32));
1482 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1483 }
1484
1485 // ByteVec is the target vector VecV rotated in such a way that the
1486 // subvector should be inserted at index 0. Generate a predicate mask
1487 // and use vmux to do the insertion.
1488 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1489 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1490 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1491 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1492 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1493 // Rotate ByteVec back, and convert to a vector predicate.
1494 if (!IdxN || !IdxN->isZero()) {
1495 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1496 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1497 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1498 }
1499 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1500}
1501
1502SDValue
1503HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1504 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1505 // Sign- and any-extending of a vector predicate to a vector register is
1506 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1507 // a vector of 1s (where the 1s are of type matching the vector type).
1508 assert(Subtarget.isHVXVectorType(ResTy));
1509 if (!ZeroExt)
1510 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1511
1512 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1513 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1514 DAG.getConstant(1, dl, MVT::i32));
1515 SDValue False = getZero(dl, ResTy, DAG);
1516 return DAG.getSelect(dl, ResTy, VecV, True, False);
1517}
1518
1519SDValue
1520HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1521 MVT ResTy, SelectionDAG &DAG) const {
1522 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1523 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1524 // vector register. The remaining bits of the vector register are
1525 // unspecified.
1526
1527 MachineFunction &MF = DAG.getMachineFunction();
1528 unsigned HwLen = Subtarget.getVectorLength();
1529 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1530 MVT PredTy = ty(VecQ);
1531 unsigned PredLen = PredTy.getVectorNumElements();
1532 assert(HwLen % PredLen == 0);
1533 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1534
1535 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1537 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1538 // These are bytes with the LSB rotated left with respect to their index.
1539 for (unsigned i = 0; i != HwLen/8; ++i) {
1540 for (unsigned j = 0; j != 8; ++j)
1541 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1542 }
1543 Constant *CV = ConstantVector::get(Tmp);
1544 Align Alignment(HwLen);
1545 SDValue CP =
1546 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1547 SDValue Bytes =
1548 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1550
1551 // Select the bytes that correspond to true bits in the vector predicate.
1552 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1553 getZero(dl, VecTy, DAG));
1554 // Calculate the OR of all bytes in each group of 8. That will compress
1555 // all the individual bits into a single byte.
1556 // First, OR groups of 4, via vrmpy with 0x01010101.
1557 SDValue All1 =
1558 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1559 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1560 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1561 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1562 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1563 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1564
1565 // Pick every 8th byte and coalesce them at the beginning of the output.
1566 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1567 // byte and so on.
1568 SmallVector<int,128> Mask;
1569 for (unsigned i = 0; i != HwLen; ++i)
1570 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1571 SDValue Collect =
1572 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1573 return DAG.getBitcast(ResTy, Collect);
1574}
1575
1576SDValue
1577HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1578 const SDLoc &dl, SelectionDAG &DAG) const {
1579 // Take a vector and resize the element type to match the given type.
1580 MVT InpTy = ty(VecV);
1581 if (InpTy == ResTy)
1582 return VecV;
1583
1584 unsigned InpWidth = InpTy.getSizeInBits();
1585 unsigned ResWidth = ResTy.getSizeInBits();
1586
1587 if (InpTy.isFloatingPoint()) {
1588 return InpWidth < ResWidth
1589 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1590 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1591 DAG.getTargetConstant(0, dl, MVT::i32));
1592 }
1593
1594 assert(InpTy.isInteger());
1595
1596 if (InpWidth < ResWidth) {
1597 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1598 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1599 } else {
1600 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1601 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1602 }
1603}
1604
1605SDValue
1606HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1607 SelectionDAG &DAG) const {
1608 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1609
1610 const SDLoc &dl(Vec);
1611 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1612 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1613 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1614}
1615
1616SDValue
1617HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1618 const {
1619 const SDLoc &dl(Op);
1620 MVT VecTy = ty(Op);
1621
1622 unsigned Size = Op.getNumOperands();
1624 for (unsigned i = 0; i != Size; ++i)
1625 Ops.push_back(Op.getOperand(i));
1626
1627 if (VecTy.getVectorElementType() == MVT::i1)
1628 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1629
1630 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1631 // not a legal type, just bitcast the node to use i16
1632 // types and bitcast the result back to f16
1633 if (VecTy.getVectorElementType() == MVT::f16) {
1635 for (unsigned i = 0; i != Size; i++)
1636 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1637
1638 SDValue T0 = DAG.getNode(ISD::BUILD_VECTOR, dl,
1639 tyVector(VecTy, MVT::i16), NewOps);
1640 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1641 }
1642
1643 // First, split the BUILD_VECTOR for vector pairs. We could generate
1644 // some pairs directly (via splat), but splats should be generated
1645 // by the combiner prior to getting here.
1646 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1648 MVT SingleTy = typeSplit(VecTy).first;
1649 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1650 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1651 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1652 }
1653
1654 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1655}
1656
1657SDValue
1658HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1659 const {
1660 const SDLoc &dl(Op);
1661 MVT VecTy = ty(Op);
1662 MVT ArgTy = ty(Op.getOperand(0));
1663
1664 if (ArgTy == MVT::f16) {
1665 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1666 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1667 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1668 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1669 return DAG.getBitcast(VecTy, Splat);
1670 }
1671
1672 return SDValue();
1673}
1674
1675SDValue
1676HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1677 const {
1678 // Vector concatenation of two integer (non-bool) vectors does not need
1679 // special lowering. Custom-lower concats of bool vectors and expand
1680 // concats of more than 2 vectors.
1681 MVT VecTy = ty(Op);
1682 const SDLoc &dl(Op);
1683 unsigned NumOp = Op.getNumOperands();
1684 if (VecTy.getVectorElementType() != MVT::i1) {
1685 if (NumOp == 2)
1686 return Op;
1687 // Expand the other cases into a build-vector.
1689 for (SDValue V : Op.getNode()->ops())
1690 DAG.ExtractVectorElements(V, Elems);
1691 // A vector of i16 will be broken up into a build_vector of i16's.
1692 // This is a problem, since at the time of operation legalization,
1693 // all operations are expected to be type-legalized, and i16 is not
1694 // a legal type. If any of the extracted elements is not of a valid
1695 // type, sign-extend it to a valid one.
1696 for (SDValue &V : Elems) {
1697 MVT Ty = ty(V);
1698 if (!isTypeLegal(Ty)) {
1699 MVT NTy = typeLegalize(Ty, DAG);
1700 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1701 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1702 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1703 V.getOperand(0), V.getOperand(1)),
1704 DAG.getValueType(Ty));
1705 continue;
1706 }
1707 // A few less complicated cases.
1708 switch (V.getOpcode()) {
1709 case ISD::Constant:
1710 V = DAG.getSExtOrTrunc(V, dl, NTy);
1711 break;
1712 case ISD::UNDEF:
1713 V = DAG.getUNDEF(NTy);
1714 break;
1715 case ISD::TRUNCATE:
1716 V = V.getOperand(0);
1717 break;
1718 default:
1719 llvm_unreachable("Unexpected vector element");
1720 }
1721 }
1722 }
1723 return DAG.getBuildVector(VecTy, dl, Elems);
1724 }
1725
1726 assert(VecTy.getVectorElementType() == MVT::i1);
1727 unsigned HwLen = Subtarget.getVectorLength();
1728 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1729
1730 SDValue Op0 = Op.getOperand(0);
1731
1732 // If the operands are HVX types (i.e. not scalar predicates), then
1733 // defer the concatenation, and create QCAT instead.
1734 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1735 if (NumOp == 2)
1736 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1737
1738 ArrayRef<SDUse> U(Op.getNode()->ops());
1741
1742 MVT HalfTy = typeSplit(VecTy).first;
1743 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1744 Ops.take_front(NumOp/2));
1745 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1746 Ops.take_back(NumOp/2));
1747 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1748 }
1749
1750 // Count how many bytes (in a vector register) each bit in VecTy
1751 // corresponds to.
1752 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1753
1754 SmallVector<SDValue,8> Prefixes;
1755 for (SDValue V : Op.getNode()->op_values()) {
1756 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1757 Prefixes.push_back(P);
1758 }
1759
1760 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1761 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1762 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1763 SDValue Res = getZero(dl, ByteTy, DAG);
1764 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1765 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1766 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1767 }
1768 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1769}
1770
1771SDValue
1772HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1773 const {
1774 // Change the type of the extracted element to i32.
1775 SDValue VecV = Op.getOperand(0);
1776 MVT ElemTy = ty(VecV).getVectorElementType();
1777 const SDLoc &dl(Op);
1778 SDValue IdxV = Op.getOperand(1);
1779 if (ElemTy == MVT::i1)
1780 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1781
1782 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1783}
1784
1785SDValue
1786HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1787 const {
1788 const SDLoc &dl(Op);
1789 MVT VecTy = ty(Op);
1790 SDValue VecV = Op.getOperand(0);
1791 SDValue ValV = Op.getOperand(1);
1792 SDValue IdxV = Op.getOperand(2);
1793 MVT ElemTy = ty(VecV).getVectorElementType();
1794 if (ElemTy == MVT::i1)
1795 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1796
1797 if (ElemTy == MVT::f16) {
1799 tyVector(VecTy, MVT::i16),
1800 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1801 DAG.getBitcast(MVT::i16, ValV), IdxV);
1802 return DAG.getBitcast(tyVector(VecTy, MVT::f16), T0);
1803 }
1804
1805 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1806}
1807
1808SDValue
1809HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1810 const {
1811 SDValue SrcV = Op.getOperand(0);
1812 MVT SrcTy = ty(SrcV);
1813 MVT DstTy = ty(Op);
1814 SDValue IdxV = Op.getOperand(1);
1815 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1816 assert(Idx % DstTy.getVectorNumElements() == 0);
1817 (void)Idx;
1818 const SDLoc &dl(Op);
1819
1820 MVT ElemTy = SrcTy.getVectorElementType();
1821 if (ElemTy == MVT::i1)
1822 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1823
1824 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1825}
1826
1827SDValue
1828HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1829 const {
1830 // Idx does not need to be a constant.
1831 SDValue VecV = Op.getOperand(0);
1832 SDValue ValV = Op.getOperand(1);
1833 SDValue IdxV = Op.getOperand(2);
1834
1835 const SDLoc &dl(Op);
1836 MVT VecTy = ty(VecV);
1837 MVT ElemTy = VecTy.getVectorElementType();
1838 if (ElemTy == MVT::i1)
1839 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1840
1841 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1842}
1843
1844SDValue
1845HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1846 // Lower any-extends of boolean vectors to sign-extends, since they
1847 // translate directly to Q2V. Zero-extending could also be done equally
1848 // fast, but Q2V is used/recognized in more places.
1849 // For all other vectors, use zero-extend.
1850 MVT ResTy = ty(Op);
1851 SDValue InpV = Op.getOperand(0);
1852 MVT ElemTy = ty(InpV).getVectorElementType();
1853 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1854 return LowerHvxSignExt(Op, DAG);
1855 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1856}
1857
1858SDValue
1859HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1860 MVT ResTy = ty(Op);
1861 SDValue InpV = Op.getOperand(0);
1862 MVT ElemTy = ty(InpV).getVectorElementType();
1863 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1864 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1865 return Op;
1866}
1867
1868SDValue
1869HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1870 MVT ResTy = ty(Op);
1871 SDValue InpV = Op.getOperand(0);
1872 MVT ElemTy = ty(InpV).getVectorElementType();
1873 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1874 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1875 return Op;
1876}
1877
1878SDValue
1879HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1880 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1881 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1882 const SDLoc &dl(Op);
1883 MVT ResTy = ty(Op);
1884 SDValue InpV = Op.getOperand(0);
1885 assert(ResTy == ty(InpV));
1886
1887 // Calculate the vectors of 1 and bitwidth(x).
1888 MVT ElemTy = ty(InpV).getVectorElementType();
1889 unsigned ElemWidth = ElemTy.getSizeInBits();
1890
1891 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1892 DAG.getConstant(1, dl, MVT::i32));
1893 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1894 DAG.getConstant(ElemWidth, dl, MVT::i32));
1895 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1896 DAG.getAllOnesConstant(dl, MVT::i32));
1897
1898 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1899 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1900 // it separately in custom combine or selection).
1901 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1902 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1903 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1904 return DAG.getNode(ISD::SUB, dl, ResTy,
1905 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1906}
1907
1908SDValue
1909HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1910 const SDLoc &dl(Op);
1911 MVT ResTy = ty(Op);
1912 assert(ResTy.getVectorElementType() == MVT::i32);
1913
1914 SDValue Vs = Op.getOperand(0);
1915 SDValue Vt = Op.getOperand(1);
1916
1917 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
1918 unsigned Opc = Op.getOpcode();
1919
1920 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
1921 if (Opc == ISD::MULHU)
1922 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1923 if (Opc == ISD::MULHS)
1924 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
1925
1926#ifndef NDEBUG
1927 Op.dump(&DAG);
1928#endif
1929 llvm_unreachable("Unexpected mulh operation");
1930}
1931
1932SDValue
1933HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
1934 const SDLoc &dl(Op);
1935 unsigned Opc = Op.getOpcode();
1936 SDValue Vu = Op.getOperand(0);
1937 SDValue Vv = Op.getOperand(1);
1938
1939 // If the HI part is not used, convert it to a regular MUL.
1940 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
1941 // Need to preserve the types and the number of values.
1942 SDValue Hi = DAG.getUNDEF(ty(HiVal));
1943 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
1944 return DAG.getMergeValues({Lo, Hi}, dl);
1945 }
1946
1947 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
1948 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
1949
1950 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
1951 // valued nodes.
1952 if (Subtarget.useHVXV62Ops())
1953 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1954
1955 if (Opc == HexagonISD::SMUL_LOHI) {
1956 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
1957 // for other signedness LOHI is cheaper.
1958 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
1959 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
1960 SDValue Lo = DAG.getUNDEF(ty(LoVal));
1961 return DAG.getMergeValues({Lo, Hi}, dl);
1962 }
1963 }
1964
1965 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
1966}
1967
1968SDValue
1969HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
1970 SDValue Val = Op.getOperand(0);
1971 MVT ResTy = ty(Op);
1972 MVT ValTy = ty(Val);
1973 const SDLoc &dl(Op);
1974
1975 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
1976 unsigned HwLen = Subtarget.getVectorLength();
1977 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
1978 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
1979 unsigned BitWidth = ResTy.getSizeInBits();
1980
1981 if (BitWidth < 64) {
1982 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
1983 dl, MVT::i32, DAG);
1984 if (BitWidth == 32)
1985 return W0;
1986 assert(BitWidth < 32u);
1987 return DAG.getZExtOrTrunc(W0, dl, ResTy);
1988 }
1989
1990 // The result is >= 64 bits. The only options are 64 or 128.
1991 assert(BitWidth == 64 || BitWidth == 128);
1993 for (unsigned i = 0; i != BitWidth/32; ++i) {
1994 SDValue W = extractHvxElementReg(
1995 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
1996 Words.push_back(W);
1997 }
1998 SmallVector<SDValue,2> Combines;
1999 assert(Words.size() % 2 == 0);
2000 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2001 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2002 Combines.push_back(C);
2003 }
2004
2005 if (BitWidth == 64)
2006 return Combines[0];
2007
2008 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2009 }
2010
2011 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2012 // Splat the input into a 32-element i32 vector, then AND each element
2013 // with a unique bitmask to isolate individual bits.
2014 if (ResTy == MVT::v32i1 &&
2015 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2016 Subtarget.useHVX128BOps()) {
2017 SDValue Val32 = Val;
2018 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2019 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2020
2021 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2022 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2024 for (unsigned i = 0; i < 32; ++i)
2025 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2026
2027 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2028 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2029 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, Anded);
2030 }
2031
2032 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2033 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2034 unsigned BitWidth = ValTy.getSizeInBits();
2035 unsigned HwLen = Subtarget.getVectorLength();
2036 assert(BitWidth == HwLen);
2037
2038 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2039 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2040 // Splat each byte of Val 8 times.
2041 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2042 // where b0, b1,..., b15 are least to most significant bytes of I.
2044 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2045 // These are bytes with the LSB rotated left with respect to their index.
2047 for (unsigned I = 0; I != HwLen / 8; ++I) {
2048 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2049 SDValue Byte =
2050 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2051 for (unsigned J = 0; J != 8; ++J) {
2052 Bytes.push_back(Byte);
2053 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2054 }
2055 }
2056
2057 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2058 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2059 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2060
2061 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2062 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2063 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2064 }
2065
2066 return Op;
2067}
2068
2069SDValue
2070HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2071 // Sign- and zero-extends are legal.
2072 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2073 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2074 Op.getOperand(0));
2075}
2076
2077SDValue
2078HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2079 MVT ResTy = ty(Op);
2080 if (ResTy.getVectorElementType() != MVT::i1)
2081 return Op;
2082
2083 const SDLoc &dl(Op);
2084 unsigned HwLen = Subtarget.getVectorLength();
2085 unsigned VecLen = ResTy.getVectorNumElements();
2086 assert(HwLen % VecLen == 0);
2087 unsigned ElemSize = HwLen / VecLen;
2088
2089 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2090 SDValue S =
2091 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2092 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2093 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2094 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2095}
2096
2097SDValue
2098HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2099 if (SDValue S = getVectorShiftByInt(Op, DAG))
2100 return S;
2101 return Op;
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2106 SelectionDAG &DAG) const {
2107 unsigned Opc = Op.getOpcode();
2108 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2109
2110 // Make sure the shift amount is within the range of the bitwidth
2111 // of the element type.
2112 SDValue A = Op.getOperand(0);
2113 SDValue B = Op.getOperand(1);
2114 SDValue S = Op.getOperand(2);
2115
2116 MVT InpTy = ty(A);
2117 MVT ElemTy = InpTy.getVectorElementType();
2118
2119 const SDLoc &dl(Op);
2120 unsigned ElemWidth = ElemTy.getSizeInBits();
2121 bool IsLeft = Opc == ISD::FSHL;
2122
2123 // The expansion into regular shifts produces worse code for i8 and for
2124 // right shift of i32 on v65+.
2125 bool UseShifts = ElemTy != MVT::i8;
2126 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2127 UseShifts = false;
2128
2129 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2130 // If this is a funnel shift by a scalar, lower it into regular shifts.
2131 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2132 SDValue ModS =
2133 DAG.getNode(ISD::AND, dl, MVT::i32,
2134 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2135 SDValue NegS =
2136 DAG.getNode(ISD::SUB, dl, MVT::i32,
2137 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2138 SDValue IsZero =
2139 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2140 // FSHL A, B => A << | B >>n
2141 // FSHR A, B => A <<n | B >>
2142 SDValue Part1 =
2143 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2144 SDValue Part2 =
2145 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2146 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2147 // If the shift amount was 0, pick A or B, depending on the direction.
2148 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2149 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2150 }
2151
2153 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2154
2155 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2156 return DAG.getNode(MOpc, dl, ty(Op),
2157 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2158}
2159
2160SDValue
2161HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2162 const SDLoc &dl(Op);
2163 unsigned IntNo = Op.getConstantOperandVal(0);
2164 SmallVector<SDValue> Ops(Op->ops());
2165
2166 auto Swap = [&](SDValue P) {
2167 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2168 };
2169
2170 switch (IntNo) {
2171 case Intrinsic::hexagon_V6_pred_typecast:
2172 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2173 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2174 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2175 if (ResTy == InpTy)
2176 return Ops[1];
2177 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2178 }
2179 break;
2180 }
2181 case Intrinsic::hexagon_V6_vmpyss_parts:
2182 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2183 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2184 {Ops[1], Ops[2]}));
2185 case Intrinsic::hexagon_V6_vmpyuu_parts:
2186 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2187 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2188 {Ops[1], Ops[2]}));
2189 case Intrinsic::hexagon_V6_vmpyus_parts:
2190 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2191 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2192 {Ops[1], Ops[2]}));
2193 }
2194 } // switch
2195
2196 return Op;
2197}
2198
2199SDValue
2200HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2201 const SDLoc &dl(Op);
2202 unsigned HwLen = Subtarget.getVectorLength();
2203 MachineFunction &MF = DAG.getMachineFunction();
2204 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2205 SDValue Mask = MaskN->getMask();
2206 SDValue Chain = MaskN->getChain();
2207 SDValue Base = MaskN->getBasePtr();
2208 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2209
2210 unsigned Opc = Op->getOpcode();
2211 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2212
2213 if (Opc == ISD::MLOAD) {
2214 MVT ValTy = ty(Op);
2215 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2216 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2217 if (isUndef(Thru))
2218 return Load;
2219 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2220 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2221 }
2222
2223 // MSTORE
2224 // HVX only has aligned masked stores.
2225
2226 // TODO: Fold negations of the mask into the store.
2227 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2228 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2229 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2230
2231 if (MaskN->getAlign().value() % HwLen == 0) {
2232 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2233 {Mask, Base, Offset0, Value, Chain}, DAG);
2234 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2235 return Store;
2236 }
2237
2238 // Unaligned case.
2239 auto StoreAlign = [&](SDValue V, SDValue A) {
2240 SDValue Z = getZero(dl, ty(V), DAG);
2241 // TODO: use funnel shifts?
2242 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2243 // upper half.
2244 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2245 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2246 return std::make_pair(LoV, HiV);
2247 };
2248
2249 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2250 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2251 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2252 VectorPair Tmp = StoreAlign(MaskV, Base);
2253 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2254 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2255 VectorPair ValueU = StoreAlign(Value, Base);
2256
2257 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2258 SDValue StoreLo =
2259 getInstr(StoreOpc, dl, MVT::Other,
2260 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2261 SDValue StoreHi =
2262 getInstr(StoreOpc, dl, MVT::Other,
2263 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2264 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2265 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2266 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2267}
2268
2269SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2270 SelectionDAG &DAG) const {
2271 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2272 // is legal (done via a pattern).
2273 assert(Subtarget.useHVXQFloatOps());
2274
2275 assert(Op->getOpcode() == ISD::FP_EXTEND);
2276
2277 MVT VecTy = ty(Op);
2278 MVT ArgTy = ty(Op.getOperand(0));
2279 const SDLoc &dl(Op);
2280 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2281
2282 SDValue F16Vec = Op.getOperand(0);
2283
2284 APFloat FloatVal = APFloat(1.0f);
2285 bool Ignored;
2287 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2288 SDValue VmpyVec =
2289 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2290
2291 MVT HalfTy = typeSplit(VecTy).first;
2292 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2293 SDValue LoVec =
2294 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2295 SDValue HiVec =
2296 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2297
2298 SDValue ShuffVec =
2299 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2300 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2301
2302 return ShuffVec;
2303}
2304
2305SDValue
2306HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2307 // Catch invalid conversion ops (just in case).
2308 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2309 Op.getOpcode() == ISD::FP_TO_UINT);
2310
2311 MVT ResTy = ty(Op);
2312 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2313 MVT IntTy = ResTy.getVectorElementType();
2314
2315 if (Subtarget.useHVXIEEEFPOps()) {
2316 // There are only conversions from f16.
2317 if (FpTy == MVT::f16) {
2318 // Other int types aren't legal in HVX, so we shouldn't see them here.
2319 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2320 // Conversions to i8 and i16 are legal.
2321 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2322 return Op;
2323 }
2324 }
2325
2326 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2327 return EqualizeFpIntConversion(Op, DAG);
2328
2329 return ExpandHvxFpToInt(Op, DAG);
2330}
2331
2332SDValue
2333HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2334 // Catch invalid conversion ops (just in case).
2335 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2336 Op.getOpcode() == ISD::UINT_TO_FP);
2337
2338 MVT ResTy = ty(Op);
2339 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2340 MVT FpTy = ResTy.getVectorElementType();
2341
2342 if (Subtarget.useHVXIEEEFPOps()) {
2343 // There are only conversions to f16.
2344 if (FpTy == MVT::f16) {
2345 // Other int types aren't legal in HVX, so we shouldn't see them here.
2346 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2347 // i8, i16 -> f16 is legal.
2348 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2349 return Op;
2350 }
2351 }
2352
2353 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2354 return EqualizeFpIntConversion(Op, DAG);
2355
2356 return ExpandHvxIntToFp(Op, DAG);
2357}
2358
2359HexagonTargetLowering::TypePair
2360HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2361 // Compare the widths of elements of the two types, and extend the narrower
2362 // type to match the with of the wider type. For vector types, apply this
2363 // to the element type.
2364 assert(Ty0.isVector() == Ty1.isVector());
2365
2366 MVT ElemTy0 = Ty0.getScalarType();
2367 MVT ElemTy1 = Ty1.getScalarType();
2368
2369 unsigned Width0 = ElemTy0.getSizeInBits();
2370 unsigned Width1 = ElemTy1.getSizeInBits();
2371 unsigned MaxWidth = std::max(Width0, Width1);
2372
2373 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2374 if (ScalarTy.isInteger())
2375 return MVT::getIntegerVT(Width);
2376 assert(ScalarTy.isFloatingPoint());
2377 return MVT::getFloatingPointVT(Width);
2378 };
2379
2380 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2381 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2382
2383 if (!Ty0.isVector()) {
2384 // Both types are scalars.
2385 return {WideETy0, WideETy1};
2386 }
2387
2388 // Vector types.
2389 unsigned NumElem = Ty0.getVectorNumElements();
2390 assert(NumElem == Ty1.getVectorNumElements());
2391
2392 return {MVT::getVectorVT(WideETy0, NumElem),
2393 MVT::getVectorVT(WideETy1, NumElem)};
2394}
2395
2396HexagonTargetLowering::TypePair
2397HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2398 // Compare the numbers of elements of two vector types, and widen the
2399 // narrower one to match the number of elements in the wider one.
2400 assert(Ty0.isVector() && Ty1.isVector());
2401
2402 unsigned Len0 = Ty0.getVectorNumElements();
2403 unsigned Len1 = Ty1.getVectorNumElements();
2404 if (Len0 == Len1)
2405 return {Ty0, Ty1};
2406
2407 unsigned MaxLen = std::max(Len0, Len1);
2408 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2409 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2410}
2411
2412MVT
2413HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2414 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2415 assert(LegalTy.isSimple());
2416 return LegalTy.getSimpleVT();
2417}
2418
2419MVT
2420HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2421 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2422 assert(Ty.getSizeInBits() <= HwWidth);
2423 if (Ty.getSizeInBits() == HwWidth)
2424 return Ty;
2425
2426 MVT ElemTy = Ty.getScalarType();
2427 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2428}
2429
2430HexagonTargetLowering::VectorPair
2431HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2432 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2433 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2434 // whether an overflow has occurred.
2435 MVT ResTy = ty(A);
2436 assert(ResTy == ty(B));
2437 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2438
2439 if (!Signed) {
2440 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2441 // save any instructions.
2442 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2443 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2444 return {Add, Ovf};
2445 }
2446
2447 // Signed overflow has happened, if:
2448 // (A, B have the same sign) and (A+B has a different sign from either)
2449 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2450 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2451 SDValue NotA =
2452 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2453 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2454 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2455 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2456 SDValue MSB =
2457 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2458 return {Add, MSB};
2459}
2460
2461HexagonTargetLowering::VectorPair
2462HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2463 bool Signed, SelectionDAG &DAG) const {
2464 // Shift Val right by Amt bits, round the result to the nearest integer,
2465 // tie-break by rounding halves to even integer.
2466
2467 const SDLoc &dl(Val);
2468 MVT ValTy = ty(Val);
2469
2470 // This should also work for signed integers.
2471 //
2472 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2473 // bool ovf = (inp > tmp0);
2474 // uint rup = inp & (1 << (Amt+1));
2475 //
2476 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2477 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2478 // uint tmp3 = tmp2 + rup;
2479 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2480 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2481 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2482 MVT IntTy = tyVector(ValTy, ElemTy);
2483 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2484 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2485
2486 SDValue Inp = DAG.getBitcast(IntTy, Val);
2487 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2488
2489 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2490 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2491 SDValue Zero = getZero(dl, IntTy, DAG);
2492 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2493 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2494 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2495
2496 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2497 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2498 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2499 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2500
2501 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2502 SDValue One = DAG.getConstant(1, dl, IntTy);
2503 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2504 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2505 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2506 return {Mux, Ovf};
2507}
2508
2509SDValue
2510HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2511 SelectionDAG &DAG) const {
2512 MVT VecTy = ty(A);
2513 MVT PairTy = typeJoin({VecTy, VecTy});
2514 assert(VecTy.getVectorElementType() == MVT::i32);
2515
2516 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2517
2518 // mulhs(A,B) =
2519 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2520 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2521 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2522 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2523 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2524 // anything, so it cannot produce any carry over to higher bits),
2525 // so everything in [] can be shifted by 16 without loss of precision.
2526 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2527 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2528 // The final additions need to make sure to properly maintain any carry-
2529 // out bits.
2530 //
2531 // Hi(B) Lo(B)
2532 // Hi(A) Lo(A)
2533 // --------------
2534 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2535 // Hi(B)*Lo(A) | + dropping the low 16 bits
2536 // Hi(A)*Lo(B) | T2
2537 // Hi(B)*Hi(A)
2538
2539 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2540 // T1 = get Hi(A) into low halves.
2541 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2542 // P0 = interleaved T1.h*B.uh (full precision product)
2543 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2544 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2545 SDValue T2 = LoHalf(P0, DAG);
2546 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2547 // added to the final sum.
2548 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2549 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2550 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2551 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2552 // T3 = full-precision(T0+T2) >> 16
2553 // The low halves are added-unsigned, the high ones are added-signed.
2554 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2555 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2556 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2557 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2558 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2559 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2560 SDValue T5 = LoHalf(P3, DAG);
2561 // Add:
2562 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2563 return T6;
2564}
2565
2566SDValue
2567HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2568 bool SignedB, const SDLoc &dl,
2569 SelectionDAG &DAG) const {
2570 MVT VecTy = ty(A);
2571 MVT PairTy = typeJoin({VecTy, VecTy});
2572 assert(VecTy.getVectorElementType() == MVT::i32);
2573
2574 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2575
2576 if (SignedA && !SignedB) {
2577 // Make A:unsigned, B:signed.
2578 std::swap(A, B);
2579 std::swap(SignedA, SignedB);
2580 }
2581
2582 // Do halfword-wise multiplications for unsigned*unsigned product, then
2583 // add corrections for signed and unsigned*signed.
2584
2585 SDValue Lo, Hi;
2586
2587 // P0:lo = (uu) products of low halves of A and B,
2588 // P0:hi = (uu) products of high halves.
2589 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2590
2591 // Swap low/high halves in B
2592 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2593 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2594 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2595 // P1 = products of even/odd halfwords.
2596 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2597 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2598 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2599
2600 // P2:lo = low halves of P1:lo + P1:hi,
2601 // P2:hi = high halves of P1:lo + P1:hi.
2602 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2603 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2604 // Still need to add the high halves of P0:lo to P2:lo
2605 SDValue T2 =
2606 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2607 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2608
2609 // The high halves of T3 will contribute to the HI part of LOHI.
2610 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2611 {HiHalf(P2, DAG), T3, S16}, DAG);
2612
2613 // The low halves of P2 need to be added to high halves of the LO part.
2614 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2615 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2616 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2617
2618 if (SignedA) {
2619 assert(SignedB && "Signed A and unsigned B should have been inverted");
2620
2621 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2622 SDValue Zero = getZero(dl, VecTy, DAG);
2623 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2624 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2625 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2626 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2627 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2628 } else if (SignedB) {
2629 // Same correction as for mulhus:
2630 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2631 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2632 SDValue Zero = getZero(dl, VecTy, DAG);
2633 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2634 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2635 } else {
2636 assert(!SignedA && !SignedB);
2637 }
2638
2639 return DAG.getMergeValues({Lo, Hi}, dl);
2640}
2641
2642SDValue
2643HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2644 SDValue B, bool SignedB,
2645 const SDLoc &dl,
2646 SelectionDAG &DAG) const {
2647 MVT VecTy = ty(A);
2648 MVT PairTy = typeJoin({VecTy, VecTy});
2649 assert(VecTy.getVectorElementType() == MVT::i32);
2650
2651 if (SignedA && !SignedB) {
2652 // Make A:unsigned, B:signed.
2653 std::swap(A, B);
2654 std::swap(SignedA, SignedB);
2655 }
2656
2657 // Do S*S first, then make corrections for U*S or U*U if needed.
2658 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2659 SDValue P1 =
2660 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2661 SDValue Lo = LoHalf(P1, DAG);
2662 SDValue Hi = HiHalf(P1, DAG);
2663
2664 if (!SignedB) {
2665 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2666 SDValue Zero = getZero(dl, VecTy, DAG);
2667 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2668
2669 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2670 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2671 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2672 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2673 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2674 // $A))>;
2675 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2676 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2677 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2678 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2679 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2680 } else if (!SignedA) {
2681 SDValue Zero = getZero(dl, VecTy, DAG);
2682 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2683
2684 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2685 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2686 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2687 // (HiHalf (Muls64O $A, $B)),
2688 // $B)>;
2689 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2690 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2691 }
2692
2693 return DAG.getMergeValues({Lo, Hi}, dl);
2694}
2695
2696SDValue
2697HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2698 const {
2699 // Rewrite conversion between integer and floating-point in such a way that
2700 // the integer type is extended/narrowed to match the bitwidth of the
2701 // floating-point type, combined with additional integer-integer extensions
2702 // or narrowings to match the original input/result types.
2703 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2704 //
2705 // The input/result types are not required to be legal, but if they are
2706 // legal, this function should not introduce illegal types.
2707
2708 unsigned Opc = Op.getOpcode();
2711
2712 SDValue Inp = Op.getOperand(0);
2713 MVT InpTy = ty(Inp);
2714 MVT ResTy = ty(Op);
2715
2716 if (InpTy == ResTy)
2717 return Op;
2718
2719 const SDLoc &dl(Op);
2721
2722 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2723 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2724 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2725 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2726 return Res;
2727}
2728
2729SDValue
2730HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2731 unsigned Opc = Op.getOpcode();
2733
2734 const SDLoc &dl(Op);
2735 SDValue Op0 = Op.getOperand(0);
2736 MVT InpTy = ty(Op0);
2737 MVT ResTy = ty(Op);
2738 assert(InpTy.changeTypeToInteger() == ResTy);
2739
2740 // int32_t conv_f32_to_i32(uint32_t inp) {
2741 // // s | exp8 | frac23
2742 //
2743 // int neg = (int32_t)inp < 0;
2744 //
2745 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
2746 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
2747 // // produce a large positive "expm1", which will result in max u/int.
2748 // // In all IEEE formats, bias is the largest positive number that can be
2749 // // represented in bias-width bits (i.e. 011..1).
2750 // int32_t expm1 = (inp << 1) - 0x80000000;
2751 // expm1 >>= 24;
2752 //
2753 // // Always insert the "implicit 1". Subnormal numbers will become 0
2754 // // regardless.
2755 // uint32_t frac = (inp << 8) | 0x80000000;
2756 //
2757 // // "frac" is the fraction part represented as Q1.31. If it was
2758 // // interpreted as uint32_t, it would be the fraction part multiplied
2759 // // by 2^31.
2760 //
2761 // // Calculate the amount of right shift, since shifting further to the
2762 // // left would lose significant bits. Limit it to 32, because we want
2763 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
2764 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
2765 // // left by 31). "rsh" can be negative.
2766 // int32_t rsh = min(31 - (expm1 + 1), 32);
2767 //
2768 // frac >>= rsh; // rsh == 32 will produce 0
2769 //
2770 // // Everything up to this point is the same for conversion to signed
2771 // // unsigned integer.
2772 //
2773 // if (neg) // Only for signed int
2774 // frac = -frac; //
2775 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
2776 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
2777 // if (rsh <= 0 && !neg) //
2778 // frac = 0x7fffffff; //
2779 //
2780 // if (neg) // Only for unsigned int
2781 // frac = 0; //
2782 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
2783 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
2784 //
2785 // return frac;
2786 // }
2787
2788 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
2789
2790 // Zero = V6_vd0();
2791 // Neg = V6_vgtw(Zero, Inp);
2792 // One = V6_lvsplatw(1);
2793 // M80 = V6_lvsplatw(0x80000000);
2794 // Exp00 = V6_vaslwv(Inp, One);
2795 // Exp01 = V6_vsubw(Exp00, M80);
2796 // ExpM1 = V6_vasrw(Exp01, 24);
2797 // Frc00 = V6_vaslw(Inp, 8);
2798 // Frc01 = V6_vor(Frc00, M80);
2799 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
2800 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
2801 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
2802
2803 // if signed int:
2804 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
2805 // Pos = V6_vgtw(Rsh01, Zero);
2806 // Frc13 = V6_vsubw(Zero, Frc02);
2807 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
2808 // Int = V6_vmux(Pos, Frc14, Bnd);
2809 //
2810 // if unsigned int:
2811 // Rsn = V6_vgtw(Zero, Rsh01)
2812 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
2813 // Int = V6_vmux(Neg, Zero, Frc23)
2814
2815 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
2816 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2817 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
2818
2819 SDValue Inp = DAG.getBitcast(ResTy, Op0);
2820 SDValue Zero = getZero(dl, ResTy, DAG);
2821 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
2822 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
2823 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
2824 SDValue One = DAG.getConstant(1, dl, ResTy);
2825 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
2826 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
2827 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
2828 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
2829
2830 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
2831 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
2832 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
2833
2834 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
2835 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
2836 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
2837 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
2838 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
2839
2840 SDValue Int;
2841
2842 if (Opc == ISD::FP_TO_SINT) {
2843 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
2844 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
2845 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
2846 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
2847 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
2848 } else {
2850 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
2851 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
2852 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
2853 }
2854
2855 return Int;
2856}
2857
2858SDValue
2859HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2860 unsigned Opc = Op.getOpcode();
2862
2863 const SDLoc &dl(Op);
2864 SDValue Op0 = Op.getOperand(0);
2865 MVT InpTy = ty(Op0);
2866 MVT ResTy = ty(Op);
2867 assert(ResTy.changeTypeToInteger() == InpTy);
2868
2869 // uint32_t vnoc1_rnd(int32_t w) {
2870 // int32_t iszero = w == 0;
2871 // int32_t isneg = w < 0;
2872 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
2873 //
2874 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
2875 // uint32_t frac0 = (uint64_t)u << norm_left;
2876 //
2877 // // Rounding:
2878 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
2879 // uint32_t renorm = (frac0 > frac1);
2880 // uint32_t rup = (int)(frac0 << 22) < 0;
2881 //
2882 // uint32_t frac2 = frac0 >> 8;
2883 // uint32_t frac3 = frac1 >> 8;
2884 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
2885 //
2886 // int32_t exp = 32 - norm_left + renorm + 127;
2887 // exp <<= 23;
2888 //
2889 // uint32_t sign = 0x80000000 * isneg;
2890 // uint32_t f = sign | exp | frac;
2891 // return iszero ? 0 : f;
2892 // }
2893
2894 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
2895 bool Signed = Opc == ISD::SINT_TO_FP;
2896
2897 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
2898 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
2899
2900 SDValue Zero = getZero(dl, InpTy, DAG);
2901 SDValue One = DAG.getConstant(1, dl, InpTy);
2902 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
2903 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
2904 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
2905 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
2906 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
2907
2908 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
2909 if (Signed) {
2910 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
2911 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
2912 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
2913 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
2914 }
2915
2916 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
2917 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
2918 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
2919 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
2920 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
2921 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
2922 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
2923 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
2924 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
2925
2926 return Flt;
2927}
2928
2929SDValue
2930HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2931 unsigned Opc = Op.getOpcode();
2932 unsigned TLOpc;
2933 switch (Opc) {
2934 case ISD::ANY_EXTEND:
2935 case ISD::SIGN_EXTEND:
2936 case ISD::ZERO_EXTEND:
2937 TLOpc = HexagonISD::TL_EXTEND;
2938 break;
2939 case ISD::TRUNCATE:
2941 break;
2942#ifndef NDEBUG
2943 Op.dump(&DAG);
2944#endif
2945 llvm_unreachable("Unexpected operator");
2946 }
2947
2948 const SDLoc &dl(Op);
2949 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
2950 DAG.getUNDEF(MVT::i128), // illegal type
2951 DAG.getConstant(Opc, dl, MVT::i32));
2952}
2953
2954SDValue
2955HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
2956 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
2957 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
2958 unsigned Opc = Op.getConstantOperandVal(2);
2959 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
2960}
2961
2962HexagonTargetLowering::VectorPair
2963HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
2964 assert(!Op.isMachineOpcode());
2965 SmallVector<SDValue, 2> OpsL, OpsH;
2966 const SDLoc &dl(Op);
2967
2968 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
2969 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
2970 SDValue TV = DAG.getValueType(Ty);
2971 return std::make_pair(TV, TV);
2972 };
2973
2974 for (SDValue A : Op.getNode()->ops()) {
2975 auto [Lo, Hi] =
2976 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
2977 // Special case for type operand.
2978 switch (Op.getOpcode()) {
2979 case ISD::SIGN_EXTEND_INREG:
2980 case HexagonISD::SSAT:
2981 case HexagonISD::USAT:
2982 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
2983 std::tie(Lo, Hi) = SplitVTNode(N);
2984 break;
2985 }
2986 OpsL.push_back(Lo);
2987 OpsH.push_back(Hi);
2988 }
2989
2990 MVT ResTy = ty(Op);
2991 MVT HalfTy = typeSplit(ResTy).first;
2992 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
2993 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
2994 return {L, H};
2995}
2996
2997SDValue
2998HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
2999 auto *MemN = cast<MemSDNode>(Op.getNode());
3000
3001 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3002 if (!isHvxPairTy(MemTy))
3003 return Op;
3004
3005 const SDLoc &dl(Op);
3006 unsigned HwLen = Subtarget.getVectorLength();
3007 MVT SingleTy = typeSplit(MemTy).first;
3008 SDValue Chain = MemN->getChain();
3009 SDValue Base0 = MemN->getBasePtr();
3010 SDValue Base1 =
3011 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3012 unsigned MemOpc = MemN->getOpcode();
3013
3014 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3015 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3016 MachineFunction &MF = DAG.getMachineFunction();
3017 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3018 ? (uint64_t)MemoryLocation::UnknownSize
3019 : HwLen;
3020 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3021 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3022 }
3023
3024 if (MemOpc == ISD::LOAD) {
3025 assert(cast<LoadSDNode>(Op)->isUnindexed());
3026 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3027 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3028 return DAG.getMergeValues(
3029 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3030 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3031 Load0.getValue(1), Load1.getValue(1)) }, dl);
3032 }
3033 if (MemOpc == ISD::STORE) {
3034 assert(cast<StoreSDNode>(Op)->isUnindexed());
3035 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3036 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3037 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3038 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3039 }
3040
3041 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3042
3043 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3044 assert(MaskN->isUnindexed());
3045 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3046 SDValue Offset = DAG.getUNDEF(MVT::i32);
3047
3048 if (MemOpc == ISD::MLOAD) {
3049 VectorPair Thru =
3050 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3051 SDValue MLoad0 =
3052 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3053 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3054 ISD::NON_EXTLOAD, false);
3055 SDValue MLoad1 =
3056 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3057 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3058 ISD::NON_EXTLOAD, false);
3059 return DAG.getMergeValues(
3060 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3061 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3062 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3063 }
3064 if (MemOpc == ISD::MSTORE) {
3065 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3066 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3067 Masks.first, SingleTy, MOp0,
3068 ISD::UNINDEXED, false, false);
3069 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3070 Masks.second, SingleTy, MOp1,
3071 ISD::UNINDEXED, false, false);
3072 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3073 }
3074
3075 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3076 llvm_unreachable(Name.c_str());
3077}
3078
3079SDValue
3080HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3081 const SDLoc &dl(Op);
3082 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3083 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3084 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3085 "Not widening loads of i1 yet");
3086
3087 SDValue Chain = LoadN->getChain();
3088 SDValue Base = LoadN->getBasePtr();
3089 SDValue Offset = DAG.getUNDEF(MVT::i32);
3090
3091 MVT ResTy = ty(Op);
3092 unsigned HwLen = Subtarget.getVectorLength();
3093 unsigned ResLen = ResTy.getStoreSize();
3094 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3095
3096 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3097 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3098 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3099
3100 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3101 MachineFunction &MF = DAG.getMachineFunction();
3102 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3103
3104 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3105 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3107 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3108 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3109}
3110
3111SDValue
3112HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3113 const SDLoc &dl(Op);
3114 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3115 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3116 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3117 "Not widening stores of i1 yet");
3118
3119 SDValue Chain = StoreN->getChain();
3120 SDValue Base = StoreN->getBasePtr();
3121 SDValue Offset = DAG.getUNDEF(MVT::i32);
3122
3123 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3124 MVT ValueTy = ty(Value);
3125 unsigned ValueLen = ValueTy.getVectorNumElements();
3126 unsigned HwLen = Subtarget.getVectorLength();
3127 assert(isPowerOf2_32(ValueLen));
3128
3129 for (unsigned Len = ValueLen; Len < HwLen; ) {
3130 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3131 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3132 }
3133 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3134
3135 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3136 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3137 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3138 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3139 MachineFunction &MF = DAG.getMachineFunction();
3140 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3141 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3142 MemOp, ISD::UNINDEXED, false, false);
3143}
3144
3145SDValue
3146HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3147 const SDLoc &dl(Op);
3148 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3149 MVT ElemTy = ty(Op0).getVectorElementType();
3150 unsigned HwLen = Subtarget.getVectorLength();
3151
3152 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3153 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3154 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3155 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3156 return SDValue();
3157
3158 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3159 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3160 EVT ResTy =
3161 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3162 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3163 {WideOp0, WideOp1, Op.getOperand(2)});
3164
3165 EVT RetTy = typeLegalize(ty(Op), DAG);
3166 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3167 {SetCC, getZero(dl, MVT::i32, DAG)});
3168}
3169
3170SDValue
3171HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3172 unsigned Opc = Op.getOpcode();
3173 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3174 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3175 return isHvxPairTy(ty(V));
3176 });
3177
3178 if (IsPairOp) {
3179 switch (Opc) {
3180 default:
3181 break;
3182 case ISD::LOAD:
3183 case ISD::STORE:
3184 case ISD::MLOAD:
3185 case ISD::MSTORE:
3186 return SplitHvxMemOp(Op, DAG);
3187 case ISD::SINT_TO_FP:
3188 case ISD::UINT_TO_FP:
3189 case ISD::FP_TO_SINT:
3190 case ISD::FP_TO_UINT:
3191 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3192 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3193 break;
3194 case ISD::ABS:
3195 case ISD::CTPOP:
3196 case ISD::CTLZ:
3197 case ISD::CTTZ:
3198 case ISD::MUL:
3199 case ISD::FADD:
3200 case ISD::FSUB:
3201 case ISD::FMUL:
3202 case ISD::FMINIMUMNUM:
3203 case ISD::FMAXIMUMNUM:
3204 case ISD::MULHS:
3205 case ISD::MULHU:
3206 case ISD::AND:
3207 case ISD::OR:
3208 case ISD::XOR:
3209 case ISD::SRA:
3210 case ISD::SHL:
3211 case ISD::SRL:
3212 case ISD::FSHL:
3213 case ISD::FSHR:
3214 case ISD::SMIN:
3215 case ISD::SMAX:
3216 case ISD::UMIN:
3217 case ISD::UMAX:
3218 case ISD::SETCC:
3219 case ISD::VSELECT:
3221 case ISD::SPLAT_VECTOR:
3222 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3223 case ISD::SIGN_EXTEND:
3224 case ISD::ZERO_EXTEND:
3225 // In general, sign- and zero-extends can't be split and still
3226 // be legal. The only exception is extending bool vectors.
3227 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3228 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3229 break;
3230 }
3231 }
3232
3233 switch (Opc) {
3234 default:
3235 break;
3236 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3237 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3238 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3239 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3240 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3241 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3242 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3243 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3244 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3245 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3246 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3247 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3248 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3249 case ISD::SRA:
3250 case ISD::SHL:
3251 case ISD::SRL: return LowerHvxShift(Op, DAG);
3252 case ISD::FSHL:
3253 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3254 case ISD::MULHS:
3255 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3256 case ISD::SMUL_LOHI:
3257 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3258 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3259 case ISD::SETCC:
3260 case ISD::INTRINSIC_VOID: return Op;
3261 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3262 case ISD::MLOAD:
3263 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3264 // Unaligned loads will be handled by the default lowering.
3265 case ISD::LOAD: return SDValue();
3266 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3267 case ISD::FP_TO_SINT:
3268 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3269 case ISD::SINT_TO_FP:
3270 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3271
3272 // Special nodes:
3275 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3276 }
3277#ifndef NDEBUG
3278 Op.dumpr(&DAG);
3279#endif
3280 llvm_unreachable("Unhandled HVX operation");
3281}
3282
3283SDValue
3284HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3285 const {
3286 // Rewrite the extension/truncation/saturation op into steps where each
3287 // step changes the type widths by a factor of 2.
3288 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3289 //
3290 // Some of the vector types in Op may not be legal.
3291
3292 unsigned Opc = Op.getOpcode();
3293 switch (Opc) {
3294 case HexagonISD::SSAT:
3295 case HexagonISD::USAT:
3298 break;
3299 case ISD::ANY_EXTEND:
3300 case ISD::ZERO_EXTEND:
3301 case ISD::SIGN_EXTEND:
3302 case ISD::TRUNCATE:
3303 llvm_unreachable("ISD:: ops will be auto-folded");
3304 break;
3305#ifndef NDEBUG
3306 Op.dump(&DAG);
3307#endif
3308 llvm_unreachable("Unexpected operation");
3309 }
3310
3311 SDValue Inp = Op.getOperand(0);
3312 MVT InpTy = ty(Inp);
3313 MVT ResTy = ty(Op);
3314
3315 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3316 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3317 assert(InpWidth != ResWidth);
3318
3319 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3320 return Op;
3321
3322 const SDLoc &dl(Op);
3323 unsigned NumElems = InpTy.getVectorNumElements();
3324 assert(NumElems == ResTy.getVectorNumElements());
3325
3326 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3327 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3328 switch (Opc) {
3329 case HexagonISD::SSAT:
3330 case HexagonISD::USAT:
3331 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3334 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3335 default:
3336 llvm_unreachable("Unexpected opcode");
3337 }
3338 };
3339
3340 SDValue S = Inp;
3341 if (InpWidth < ResWidth) {
3342 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3343 while (InpWidth * 2 <= ResWidth)
3344 S = repeatOp(InpWidth *= 2, S);
3345 } else {
3346 // InpWidth > ResWidth
3347 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3348 while (InpWidth / 2 >= ResWidth)
3349 S = repeatOp(InpWidth /= 2, S);
3350 }
3351 return S;
3352}
3353
3354SDValue
3355HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3356 SDValue Inp0 = Op.getOperand(0);
3357 MVT InpTy = ty(Inp0);
3358 MVT ResTy = ty(Op);
3359 unsigned InpWidth = InpTy.getSizeInBits();
3360 unsigned ResWidth = ResTy.getSizeInBits();
3361 unsigned Opc = Op.getOpcode();
3362
3363 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3364 // First, make sure that the narrower type is widened to HVX.
3365 // This may cause the result to be wider than what the legalizer
3366 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3367 // desired type.
3368 auto [WInpTy, WResTy] =
3369 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3370 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3371 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3372 SDValue S;
3374 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3375 Op.getOperand(2));
3376 } else {
3377 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3378 }
3379 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3380 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3381 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3382 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3383 } else {
3384 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3385 return RemoveTLWrapper(Op, DAG);
3386 }
3387 llvm_unreachable("Unexpected situation");
3388}
3389
3390void
3391HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3393 unsigned Opc = N->getOpcode();
3394 SDValue Op(N, 0);
3395 SDValue Inp0; // Optional first argument.
3396 if (N->getNumOperands() > 0)
3397 Inp0 = Op.getOperand(0);
3398
3399 switch (Opc) {
3400 case ISD::ANY_EXTEND:
3401 case ISD::SIGN_EXTEND:
3402 case ISD::ZERO_EXTEND:
3403 case ISD::TRUNCATE:
3404 if (Subtarget.isHVXElementType(ty(Op)) &&
3405 Subtarget.isHVXElementType(ty(Inp0))) {
3406 Results.push_back(CreateTLWrapper(Op, DAG));
3407 }
3408 break;
3409 case ISD::SETCC:
3410 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3411 if (SDValue T = WidenHvxSetCC(Op, DAG))
3412 Results.push_back(T);
3413 }
3414 break;
3415 case ISD::STORE: {
3416 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3417 SDValue Store = WidenHvxStore(Op, DAG);
3418 Results.push_back(Store);
3419 }
3420 break;
3421 }
3422 case ISD::MLOAD:
3423 if (isHvxPairTy(ty(Op))) {
3424 SDValue S = SplitHvxMemOp(Op, DAG);
3426 Results.push_back(S.getOperand(0));
3427 Results.push_back(S.getOperand(1));
3428 }
3429 break;
3430 case ISD::MSTORE:
3431 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3432 SDValue S = SplitHvxMemOp(Op, DAG);
3433 Results.push_back(S);
3434 }
3435 break;
3436 case ISD::SINT_TO_FP:
3437 case ISD::UINT_TO_FP:
3438 case ISD::FP_TO_SINT:
3439 case ISD::FP_TO_UINT:
3440 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3441 SDValue T = EqualizeFpIntConversion(Op, DAG);
3442 Results.push_back(T);
3443 }
3444 break;
3445 case HexagonISD::SSAT:
3446 case HexagonISD::USAT:
3449 Results.push_back(LegalizeHvxResize(Op, DAG));
3450 break;
3451 default:
3452 break;
3453 }
3454}
3455
3456void
3457HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3459 unsigned Opc = N->getOpcode();
3460 SDValue Op(N, 0);
3461 SDValue Inp0; // Optional first argument.
3462 if (N->getNumOperands() > 0)
3463 Inp0 = Op.getOperand(0);
3464
3465 switch (Opc) {
3466 case ISD::ANY_EXTEND:
3467 case ISD::SIGN_EXTEND:
3468 case ISD::ZERO_EXTEND:
3469 case ISD::TRUNCATE:
3470 if (Subtarget.isHVXElementType(ty(Op)) &&
3471 Subtarget.isHVXElementType(ty(Inp0))) {
3472 Results.push_back(CreateTLWrapper(Op, DAG));
3473 }
3474 break;
3475 case ISD::SETCC:
3476 if (shouldWidenToHvx(ty(Op), DAG)) {
3477 if (SDValue T = WidenHvxSetCC(Op, DAG))
3478 Results.push_back(T);
3479 }
3480 break;
3481 case ISD::LOAD: {
3482 if (shouldWidenToHvx(ty(Op), DAG)) {
3483 SDValue Load = WidenHvxLoad(Op, DAG);
3484 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3485 Results.push_back(Load.getOperand(0));
3486 Results.push_back(Load.getOperand(1));
3487 }
3488 break;
3489 }
3490 case ISD::BITCAST:
3491 if (isHvxBoolTy(ty(Inp0))) {
3492 SDValue C = LowerHvxBitcast(Op, DAG);
3493 Results.push_back(C);
3494 }
3495 break;
3496 case ISD::FP_TO_SINT:
3497 case ISD::FP_TO_UINT:
3498 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3499 SDValue T = EqualizeFpIntConversion(Op, DAG);
3500 Results.push_back(T);
3501 }
3502 break;
3503 case HexagonISD::SSAT:
3504 case HexagonISD::USAT:
3507 Results.push_back(LegalizeHvxResize(Op, DAG));
3508 break;
3509 default:
3510 break;
3511 }
3512}
3513
3514SDValue
3515HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3516 DAGCombinerInfo &DCI) const {
3517 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3518 // to extract-subvector (shuffle V, pick even, pick odd)
3519
3520 assert(Op.getOpcode() == ISD::TRUNCATE);
3521 SelectionDAG &DAG = DCI.DAG;
3522 const SDLoc &dl(Op);
3523
3524 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3525 return SDValue();
3526 SDValue Cast = Op.getOperand(0);
3527 SDValue Src = Cast.getOperand(0);
3528
3529 EVT TruncTy = Op.getValueType();
3530 EVT CastTy = Cast.getValueType();
3531 EVT SrcTy = Src.getValueType();
3532 if (SrcTy.isSimple())
3533 return SDValue();
3534 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3535 return SDValue();
3536 unsigned SrcLen = SrcTy.getVectorNumElements();
3537 unsigned CastLen = CastTy.getVectorNumElements();
3538 if (2 * CastLen != SrcLen)
3539 return SDValue();
3540
3541 SmallVector<int, 128> Mask(SrcLen);
3542 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3543 Mask[i] = 2 * i;
3544 Mask[i + CastLen] = 2 * i + 1;
3545 }
3546 SDValue Deal =
3547 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3548 return opSplit(Deal, dl, DAG).first;
3549}
3550
3551SDValue
3552HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3553 SDValue Op, DAGCombinerInfo &DCI) const {
3554 // Fold
3555 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3556 // into
3557 // shuffle (concat x, y), undef, m3
3558 if (Op.getNumOperands() != 2)
3559 return SDValue();
3560
3561 SelectionDAG &DAG = DCI.DAG;
3562 const SDLoc &dl(Op);
3563 SDValue V0 = Op.getOperand(0);
3564 SDValue V1 = Op.getOperand(1);
3565
3566 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3567 return SDValue();
3568 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3569 return SDValue();
3570
3571 SetVector<SDValue> Order;
3572 Order.insert(V0.getOperand(0));
3573 Order.insert(V0.getOperand(1));
3574 Order.insert(V1.getOperand(0));
3575 Order.insert(V1.getOperand(1));
3576
3577 if (Order.size() > 2)
3578 return SDValue();
3579
3580 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3581 // result must be the same.
3582 EVT InpTy = V0.getValueType();
3583 assert(InpTy.isVector());
3584 unsigned InpLen = InpTy.getVectorNumElements();
3585
3586 SmallVector<int, 128> LongMask;
3587 auto AppendToMask = [&](SDValue Shuffle) {
3588 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3589 ArrayRef<int> Mask = SV->getMask();
3590 SDValue X = Shuffle.getOperand(0);
3591 SDValue Y = Shuffle.getOperand(1);
3592 for (int M : Mask) {
3593 if (M == -1) {
3594 LongMask.push_back(M);
3595 continue;
3596 }
3597 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3598 if (static_cast<unsigned>(M) >= InpLen)
3599 M -= InpLen;
3600
3601 int OutOffset = Order[0] == Src ? 0 : InpLen;
3602 LongMask.push_back(M + OutOffset);
3603 }
3604 };
3605
3606 AppendToMask(V0);
3607 AppendToMask(V1);
3608
3609 SDValue C0 = Order.front();
3610 SDValue C1 = Order.back(); // Can be same as front
3611 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3612
3613 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3614 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3615}
3616
3617SDValue
3618HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3619 const {
3620 const SDLoc &dl(N);
3621 SelectionDAG &DAG = DCI.DAG;
3622 SDValue Op(N, 0);
3623 unsigned Opc = Op.getOpcode();
3624
3626
3627 if (Opc == ISD::TRUNCATE)
3628 return combineTruncateBeforeLegal(Op, DCI);
3629 if (Opc == ISD::CONCAT_VECTORS)
3630 return combineConcatVectorsBeforeLegal(Op, DCI);
3631
3632 if (DCI.isBeforeLegalizeOps())
3633 return SDValue();
3634
3635 switch (Opc) {
3636 case ISD::VSELECT: {
3637 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3638 SDValue Cond = Ops[0];
3639 if (Cond->getOpcode() == ISD::XOR) {
3640 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3641 if (C1->getOpcode() == HexagonISD::QTRUE)
3642 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3643 }
3644 break;
3645 }
3646 case HexagonISD::V2Q:
3647 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3648 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3649 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3650 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3651 }
3652 break;
3653 case HexagonISD::Q2V:
3654 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3655 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3656 DAG.getAllOnesConstant(dl, MVT::i32));
3657 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3658 return getZero(dl, ty(Op), DAG);
3659 break;
3661 if (isUndef(Ops[1]))
3662 return Ops[0];
3663 break;
3664 case HexagonISD::VROR: {
3665 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3666 SDValue Vec = Ops[0].getOperand(0);
3667 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3668 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3669 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3670 }
3671 break;
3672 }
3673 }
3674
3675 return SDValue();
3676}
3677
3678bool
3679HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3680 if (Subtarget.isHVXVectorType(Ty, true))
3681 return false;
3682 auto Action = getPreferredHvxVectorAction(Ty);
3684 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3685 return false;
3686}
3687
3688bool
3689HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3690 if (Subtarget.isHVXVectorType(Ty, true))
3691 return false;
3692 auto Action = getPreferredHvxVectorAction(Ty);
3694 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3695 return false;
3696}
3697
3698bool
3699HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3700 if (!Subtarget.useHVXOps())
3701 return false;
3702 // If the type of any result, or any operand type are HVX vector types,
3703 // this is an HVX operation.
3704 auto IsHvxTy = [this](EVT Ty) {
3705 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3706 };
3707 auto IsHvxOp = [this](SDValue Op) {
3708 return Op.getValueType().isSimple() &&
3709 Subtarget.isHVXVectorType(ty(Op), true);
3710 };
3711 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3712 return true;
3713
3714 // Check if this could be an HVX operation after type widening.
3715 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3716 if (!Op.getValueType().isSimple())
3717 return false;
3718 MVT ValTy = ty(Op);
3719 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3720 };
3721
3722 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3723 if (IsWidenedToHvx(SDValue(N, i)))
3724 return true;
3725 }
3726 return llvm::any_of(N->ops(), IsWidenedToHvx);
3727}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6057
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:104
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:149
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:155
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:295
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:801
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:587
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:765
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:835
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:862
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:571
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:738
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:892
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:826
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:778
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:663
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:695
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:756
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:636
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:601
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:563
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:832
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:881
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:870
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:718
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:787
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:908
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:730
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:552
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:941
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:903
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:838
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:543
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2138
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:336
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:288
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define N
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:304
static LLVM_ABI const fltSemantics & IEEEhalf() LLVM_READNONE
Definition APFloat.cpp:264
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:458
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:331
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.