LLVM 22.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
44 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
45 MVT ElemTy = Ty.getScalarType();
46 switch (ElemTy.SimpleTy) {
47 case MVT::f16:
48 return std::make_tuple(5, 15, 10);
49 case MVT::f32:
50 return std::make_tuple(8, 127, 23);
51 case MVT::f64:
52 return std::make_tuple(11, 1023, 52);
53 default:
54 break;
55 }
56 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
57}
58
59void
60HexagonTargetLowering::initializeHVXLowering() {
61 if (Subtarget.useHVX64BOps()) {
62 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
63 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
64 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
66 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
67 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
68 // These "short" boolean vector types should be legal because
69 // they will appear as results of vector compares. If they were
70 // not legal, type legalization would try to make them legal
71 // and that would require using operations that do not use or
72 // produce such types. That, in turn, would imply using custom
73 // nodes, which would be unoptimizable by the DAG combiner.
74 // The idea is to rely on target-independent operations as much
75 // as possible.
76 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
77 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
78 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
79 } else if (Subtarget.useHVX128BOps()) {
80 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
81 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
82 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
84 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
85 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
87 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
88 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
89 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
90 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
91 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
92 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
93 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
94 }
95 if (Subtarget.useHVXV81Ops()) {
96 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
97 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
98 }
99 }
100
101 // Set up operation actions.
102
103 bool Use64b = Subtarget.useHVX64BOps();
104 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
105 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
106 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
107 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
108 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
109
110 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
112 AddPromotedToType(Opc, FromTy, ToTy);
113 };
114
115 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
116 // Note: v16i1 -> i16 is handled in type legalization instead of op
117 // legalization.
127
128 if (Subtarget.useHVX128BOps()) {
131 }
132 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
133 Subtarget.useHVXFloatingPoint()) {
134
135 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
136 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
137
138 for (MVT T : FloatV) {
144
147
150
153 // Custom-lower BUILD_VECTOR. The standard (target-independent)
154 // handling of it would convert it to a load, which is not always
155 // the optimal choice.
157 }
158
159
160 // BUILD_VECTOR with f16 operands cannot be promoted without
161 // promoting the result, so lower the node to vsplat or constant pool
165
166 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
167 // generated.
168 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
169 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
172
173 if (Subtarget.useHVXV81Ops()) {
174 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
175 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
176 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
178 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
179 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
180 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
181 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
182
186
187 setOperationAction(ISD::LOAD, MVT::v128bf16, Custom);
188 setOperationAction(ISD::STORE, MVT::v128bf16, Custom);
189
190 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
191 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
194
195 setOperationAction(ISD::MLOAD, MVT::v128bf16, Custom);
196 setOperationAction(ISD::MSTORE, MVT::v128bf16, Custom);
199
203 }
204
205 for (MVT P : FloatW) {
215
216 // Custom-lower BUILD_VECTOR. The standard (target-independent)
217 // handling of it would convert it to a load, which is not always
218 // the optimal choice.
220 // Make concat-vectors custom to handle concats of more than 2 vectors.
222
225 }
226
227 if (Subtarget.useHVXQFloatOps()) {
230 } else if (Subtarget.useHVXIEEEFPOps()) {
233 }
234 }
235
236 for (MVT T : LegalV) {
239
255 if (T != ByteV) {
259 }
260
263 if (T.getScalarType() != MVT::i32) {
266 }
267
272 if (T.getScalarType() != MVT::i32) {
275 }
276
278 // Make concat-vectors custom to handle concats of more than 2 vectors.
289 if (T != ByteV) {
291 // HVX only has shifts of words and halfwords.
295
296 // Promote all shuffles to operate on vectors of bytes.
297 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
298 }
299
300 if (Subtarget.useHVXFloatingPoint()) {
301 // Same action for both QFloat and IEEE.
306 }
307
315 }
316
317 for (MVT T : LegalW) {
318 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
319 // independent) handling of it would convert it to a load, which is
320 // not always the optimal choice.
322 // Make concat-vectors custom to handle concats of more than 2 vectors.
324
325 // Custom-lower these operations for pairs. Expand them into a concat
326 // of the corresponding operations on individual vectors.
335
344
359 if (T != ByteW) {
363
364 // Promote all shuffles to operate on vectors of bytes.
365 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
366 }
369
372 if (T.getScalarType() != MVT::i32) {
375 }
376
377 if (Subtarget.useHVXFloatingPoint()) {
378 // Same action for both QFloat and IEEE.
383 }
384 }
385
386 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
387 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
388 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
391
392 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
393 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
394 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
395 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
396 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
397 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
398 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
399 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
400 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
401 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
402 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
403 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
404 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
405 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
406
407 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
408 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
409 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
410 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
411 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
412 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
413 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
414 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
415 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
416 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
417 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
418 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
419 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
420 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
421
422 // Boolean vectors.
423
424 for (MVT T : LegalW) {
425 // Boolean types for vector pairs will overlap with the boolean
426 // types for single vectors, e.g.
427 // v64i8 -> v64i1 (single)
428 // v64i16 -> v64i1 (pair)
429 // Set these actions first, and allow the single actions to overwrite
430 // any duplicates.
431 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
436 // Masked load/store takes a mask that may need splitting.
439 }
440
441 for (MVT T : LegalV) {
442 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
453 }
454
455 if (Use64b) {
456 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
458 } else {
459 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
461 }
462
463 // Handle store widening for short vectors.
464 unsigned HwLen = Subtarget.getVectorLength();
465 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
466 if (ElemTy == MVT::i1)
467 continue;
468 int ElemWidth = ElemTy.getFixedSizeInBits();
469 int MaxElems = (8*HwLen) / ElemWidth;
470 for (int N = 2; N < MaxElems; N *= 2) {
471 MVT VecTy = MVT::getVectorVT(ElemTy, N);
472 auto Action = getPreferredVectorAction(VecTy);
481 if (Subtarget.useHVXFloatingPoint()) {
486 }
487
488 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
489 if (!isTypeLegal(BoolTy))
491 }
492 }
493 }
494
495 // Include cases which are not hander earlier
499
501}
502
503unsigned
504HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
505 // Early exit for invalid input types
506 if (!VecTy.isVector())
507 return ~0u;
508
509 MVT ElemTy = VecTy.getVectorElementType();
510 unsigned VecLen = VecTy.getVectorNumElements();
511 unsigned HwLen = Subtarget.getVectorLength();
512
513 // Split vectors of i1 that exceed byte vector length.
514 if (ElemTy == MVT::i1 && VecLen > HwLen)
516
517 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
518 // For shorter vectors of i1, widen them if any of the corresponding
519 // vectors of integers needs to be widened.
520 if (ElemTy == MVT::i1) {
521 for (MVT T : Tys) {
522 assert(T != MVT::i1);
523 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
524 if (A != ~0u)
525 return A;
526 }
527 return ~0u;
528 }
529
530 // If the size of VecTy is at least half of the vector length,
531 // widen the vector. Note: the threshold was not selected in
532 // any scientific way.
533 if (llvm::is_contained(Tys, ElemTy)) {
534 unsigned VecWidth = VecTy.getSizeInBits();
535 unsigned HwWidth = 8*HwLen;
536 if (VecWidth > 2*HwWidth)
538
539 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
540 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
542 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
544 }
545
546 // Defer to default.
547 return ~0u;
548}
549
550unsigned
551HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
552 unsigned Opc = Op.getOpcode();
553 switch (Opc) {
558 }
560}
561
563HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
564 const SDLoc &dl, SelectionDAG &DAG) const {
566 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
567 append_range(IntOps, Ops);
568 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
569}
570
571MVT
572HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
573 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
574
575 MVT ElemTy = Tys.first.getVectorElementType();
576 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
577 Tys.second.getVectorNumElements());
578}
579
580HexagonTargetLowering::TypePair
581HexagonTargetLowering::typeSplit(MVT VecTy) const {
582 assert(VecTy.isVector());
583 unsigned NumElem = VecTy.getVectorNumElements();
584 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
585 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
586 return { HalfTy, HalfTy };
587}
588
589MVT
590HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
591 MVT ElemTy = VecTy.getVectorElementType();
592 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
593 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
594}
595
596MVT
597HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
598 MVT ElemTy = VecTy.getVectorElementType();
599 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
600 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
601}
602
604HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
605 SelectionDAG &DAG) const {
606 if (ty(Vec).getVectorElementType() == ElemTy)
607 return Vec;
608 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
609 return DAG.getBitcast(CastTy, Vec);
610}
611
613HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
614 SelectionDAG &DAG) const {
615 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
616 Ops.first, Ops.second);
617}
618
619HexagonTargetLowering::VectorPair
620HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
621 SelectionDAG &DAG) const {
622 TypePair Tys = typeSplit(ty(Vec));
623 if (Vec.getOpcode() == HexagonISD::QCAT)
624 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
625 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
626}
627
628bool
629HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
630 return Subtarget.isHVXVectorType(Ty) &&
631 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
632}
633
634bool
635HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
636 return Subtarget.isHVXVectorType(Ty) &&
637 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
638}
639
640bool
641HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
642 return Subtarget.isHVXVectorType(Ty, true) &&
643 Ty.getVectorElementType() == MVT::i1;
644}
645
646bool HexagonTargetLowering::allowsHvxMemoryAccess(
647 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
648 // Bool vectors are excluded by default, but make it explicit to
649 // emphasize that bool vectors cannot be loaded or stored.
650 // Also, disallow double vector stores (to prevent unnecessary
651 // store widening in DAG combiner).
652 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
653 return false;
654 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
655 return false;
656 if (Fast)
657 *Fast = 1;
658 return true;
659}
660
661bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
662 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
663 if (!Subtarget.isHVXVectorType(VecTy))
664 return false;
665 // XXX Should this be false? vmemu are a bit slower than vmem.
666 if (Fast)
667 *Fast = 1;
668 return true;
669}
670
671void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
672 MachineInstr &MI, SDNode *Node) const {
673 unsigned Opc = MI.getOpcode();
674 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
675 MachineBasicBlock &MB = *MI.getParent();
676 MachineFunction &MF = *MB.getParent();
677 MachineRegisterInfo &MRI = MF.getRegInfo();
678 DebugLoc DL = MI.getDebugLoc();
679 auto At = MI.getIterator();
680
681 switch (Opc) {
682 case Hexagon::PS_vsplatib:
683 if (Subtarget.useHVXV62Ops()) {
684 // SplatV = A2_tfrsi #imm
685 // OutV = V6_lvsplatb SplatV
686 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
687 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
688 .add(MI.getOperand(1));
689 Register OutV = MI.getOperand(0).getReg();
690 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
691 .addReg(SplatV);
692 } else {
693 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
694 // OutV = V6_lvsplatw SplatV
695 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
696 const MachineOperand &InpOp = MI.getOperand(1);
697 assert(InpOp.isImm());
698 uint32_t V = InpOp.getImm() & 0xFF;
699 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
700 .addImm(V << 24 | V << 16 | V << 8 | V);
701 Register OutV = MI.getOperand(0).getReg();
702 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
703 }
704 MB.erase(At);
705 break;
706 case Hexagon::PS_vsplatrb:
707 if (Subtarget.useHVXV62Ops()) {
708 // OutV = V6_lvsplatb Inp
709 Register OutV = MI.getOperand(0).getReg();
710 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
711 .add(MI.getOperand(1));
712 } else {
713 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
714 const MachineOperand &InpOp = MI.getOperand(1);
715 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
716 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
717 Register OutV = MI.getOperand(0).getReg();
718 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
719 .addReg(SplatV);
720 }
721 MB.erase(At);
722 break;
723 case Hexagon::PS_vsplatih:
724 if (Subtarget.useHVXV62Ops()) {
725 // SplatV = A2_tfrsi #imm
726 // OutV = V6_lvsplath SplatV
727 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
728 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
729 .add(MI.getOperand(1));
730 Register OutV = MI.getOperand(0).getReg();
731 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
732 .addReg(SplatV);
733 } else {
734 // SplatV = A2_tfrsi #imm:#imm
735 // OutV = V6_lvsplatw SplatV
736 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
737 const MachineOperand &InpOp = MI.getOperand(1);
738 assert(InpOp.isImm());
739 uint32_t V = InpOp.getImm() & 0xFFFF;
740 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
741 .addImm(V << 16 | V);
742 Register OutV = MI.getOperand(0).getReg();
743 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
744 }
745 MB.erase(At);
746 break;
747 case Hexagon::PS_vsplatrh:
748 if (Subtarget.useHVXV62Ops()) {
749 // OutV = V6_lvsplath Inp
750 Register OutV = MI.getOperand(0).getReg();
751 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
752 .add(MI.getOperand(1));
753 } else {
754 // SplatV = A2_combine_ll Inp, Inp
755 // OutV = V6_lvsplatw SplatV
756 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
757 const MachineOperand &InpOp = MI.getOperand(1);
758 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
759 .addReg(InpOp.getReg(), 0, InpOp.getSubReg())
760 .addReg(InpOp.getReg(), 0, InpOp.getSubReg());
761 Register OutV = MI.getOperand(0).getReg();
762 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
763 }
764 MB.erase(At);
765 break;
766 case Hexagon::PS_vsplatiw:
767 case Hexagon::PS_vsplatrw:
768 if (Opc == Hexagon::PS_vsplatiw) {
769 // SplatV = A2_tfrsi #imm
770 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
771 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
772 .add(MI.getOperand(1));
773 MI.getOperand(1).ChangeToRegister(SplatV, false);
774 }
775 // OutV = V6_lvsplatw SplatV/Inp
776 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
777 break;
778 }
779}
780
782HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
783 SelectionDAG &DAG) const {
784 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
785 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
786
787 unsigned ElemWidth = ElemTy.getSizeInBits();
788 if (ElemWidth == 8)
789 return ElemIdx;
790
791 unsigned L = Log2_32(ElemWidth/8);
792 const SDLoc &dl(ElemIdx);
793 return DAG.getNode(ISD::SHL, dl, MVT::i32,
794 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
795}
796
798HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
799 SelectionDAG &DAG) const {
800 unsigned ElemWidth = ElemTy.getSizeInBits();
801 assert(ElemWidth >= 8 && ElemWidth <= 32);
802 if (ElemWidth == 32)
803 return Idx;
804
805 if (ty(Idx) != MVT::i32)
806 Idx = DAG.getBitcast(MVT::i32, Idx);
807 const SDLoc &dl(Idx);
808 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
809 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
810 return SubIdx;
811}
812
814HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
815 SDValue Op1, ArrayRef<int> Mask,
816 SelectionDAG &DAG) const {
817 MVT OpTy = ty(Op0);
818 assert(OpTy == ty(Op1));
819
820 MVT ElemTy = OpTy.getVectorElementType();
821 if (ElemTy == MVT::i8)
822 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
823 assert(ElemTy.getSizeInBits() >= 8);
824
825 MVT ResTy = tyVector(OpTy, MVT::i8);
826 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
827
828 SmallVector<int,128> ByteMask;
829 for (int M : Mask) {
830 if (M < 0) {
831 for (unsigned I = 0; I != ElemSize; ++I)
832 ByteMask.push_back(-1);
833 } else {
834 int NewM = M*ElemSize;
835 for (unsigned I = 0; I != ElemSize; ++I)
836 ByteMask.push_back(NewM+I);
837 }
838 }
839 assert(ResTy.getVectorNumElements() == ByteMask.size());
840 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
841 opCastElem(Op1, MVT::i8, DAG), ByteMask);
842}
843
845HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
846 const SDLoc &dl, MVT VecTy,
847 SelectionDAG &DAG) const {
848 unsigned VecLen = Values.size();
849 MachineFunction &MF = DAG.getMachineFunction();
850 MVT ElemTy = VecTy.getVectorElementType();
851 unsigned ElemWidth = ElemTy.getSizeInBits();
852 unsigned HwLen = Subtarget.getVectorLength();
853
854 unsigned ElemSize = ElemWidth / 8;
855 assert(ElemSize*VecLen == HwLen);
857
858 if (VecTy.getVectorElementType() != MVT::i32 &&
859 !(Subtarget.useHVXFloatingPoint() &&
860 VecTy.getVectorElementType() == MVT::f32)) {
861 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
862 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
863 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
864 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
865 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
866 Words.push_back(DAG.getBitcast(MVT::i32, W));
867 }
868 } else {
869 for (SDValue V : Values)
870 Words.push_back(DAG.getBitcast(MVT::i32, V));
871 }
872 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
873 unsigned NumValues = Values.size();
874 assert(NumValues > 0);
875 bool IsUndef = true;
876 for (unsigned i = 0; i != NumValues; ++i) {
877 if (Values[i].isUndef())
878 continue;
879 IsUndef = false;
880 if (!SplatV.getNode())
881 SplatV = Values[i];
882 else if (SplatV != Values[i])
883 return false;
884 }
885 if (IsUndef)
886 SplatV = Values[0];
887 return true;
888 };
889
890 unsigned NumWords = Words.size();
891 SDValue SplatV;
892 bool IsSplat = isSplat(Words, SplatV);
893 if (IsSplat && isUndef(SplatV))
894 return DAG.getUNDEF(VecTy);
895 if (IsSplat) {
896 assert(SplatV.getNode());
897 if (isNullConstant(SplatV))
898 return getZero(dl, VecTy, DAG);
899 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
900 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
901 return DAG.getBitcast(VecTy, S);
902 }
903
904 // Delay recognizing constant vectors until here, so that we can generate
905 // a vsplat.
906 SmallVector<ConstantInt*, 128> Consts(VecLen);
907 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
908 if (AllConst) {
909 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
910 (Constant**)Consts.end());
911 Constant *CV = ConstantVector::get(Tmp);
912 Align Alignment(HwLen);
913 SDValue CP =
914 LowerConstantPool(DAG.getConstantPool(CV, VecTy, Alignment), DAG);
915 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
917 }
918
919 // A special case is a situation where the vector is built entirely from
920 // elements extracted from another vector. This could be done via a shuffle
921 // more efficiently, but typically, the size of the source vector will not
922 // match the size of the vector being built (which precludes the use of a
923 // shuffle directly).
924 // This only handles a single source vector, and the vector being built
925 // should be of a sub-vector type of the source vector type.
926 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
927 SmallVectorImpl<int> &SrcIdx) {
928 SDValue Vec;
929 for (SDValue V : Values) {
930 if (isUndef(V)) {
931 SrcIdx.push_back(-1);
932 continue;
933 }
934 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
935 return false;
936 // All extracts should come from the same vector.
937 SDValue T = V.getOperand(0);
938 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
939 return false;
940 Vec = T;
941 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
942 if (C == nullptr)
943 return false;
944 int I = C->getSExtValue();
945 assert(I >= 0 && "Negative element index");
946 SrcIdx.push_back(I);
947 }
948 SrcVec = Vec;
949 return true;
950 };
951
952 SmallVector<int,128> ExtIdx;
953 SDValue ExtVec;
954 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
955 MVT ExtTy = ty(ExtVec);
956 unsigned ExtLen = ExtTy.getVectorNumElements();
957 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
958 // Construct a new shuffle mask that will produce a vector with the same
959 // number of elements as the input vector, and such that the vector we
960 // want will be the initial subvector of it.
961 SmallVector<int,128> Mask;
962 BitVector Used(ExtLen);
963
964 for (int M : ExtIdx) {
965 Mask.push_back(M);
966 if (M >= 0)
967 Used.set(M);
968 }
969 // Fill the rest of the mask with the unused elements of ExtVec in hopes
970 // that it will result in a permutation of ExtVec's elements. It's still
971 // fine if it doesn't (e.g. if undefs are present, or elements are
972 // repeated), but permutations can always be done efficiently via vdelta
973 // and vrdelta.
974 for (unsigned I = 0; I != ExtLen; ++I) {
975 if (Mask.size() == ExtLen)
976 break;
977 if (!Used.test(I))
978 Mask.push_back(I);
979 }
980
981 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
982 DAG.getUNDEF(ExtTy), Mask);
983 return ExtLen == VecLen ? S : LoHalf(S, DAG);
984 }
985 }
986
987 // Find most common element to initialize vector with. This is to avoid
988 // unnecessary vinsert/valign for cases where the same value is present
989 // many times. Creates a histogram of the vector's elements to find the
990 // most common element n.
991 assert(4*Words.size() == Subtarget.getVectorLength());
992 int VecHist[32];
993 int n = 0;
994 for (unsigned i = 0; i != NumWords; ++i) {
995 VecHist[i] = 0;
996 if (Words[i].isUndef())
997 continue;
998 for (unsigned j = i; j != NumWords; ++j)
999 if (Words[i] == Words[j])
1000 VecHist[i]++;
1001
1002 if (VecHist[i] > VecHist[n])
1003 n = i;
1004 }
1005
1006 SDValue HalfV = getZero(dl, VecTy, DAG);
1007 if (VecHist[n] > 1) {
1008 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1009 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1010 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1011 }
1012 SDValue HalfV0 = HalfV;
1013 SDValue HalfV1 = HalfV;
1014
1015 // Construct two halves in parallel, then or them together. Rn and Rm count
1016 // number of rotations needed before the next element. One last rotation is
1017 // performed post-loop to position the last element.
1018 int Rn = 0, Rm = 0;
1019 SDValue Sn, Sm;
1020 SDValue N = HalfV0;
1021 SDValue M = HalfV1;
1022 for (unsigned i = 0; i != NumWords/2; ++i) {
1023 // Rotate by element count since last insertion.
1024 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1025 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1026 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1027 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1028 {HalfV0, Words[i]});
1029 Rn = 0;
1030 }
1031 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1032 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1033 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1034 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1035 {HalfV1, Words[i+NumWords/2]});
1036 Rm = 0;
1037 }
1038 Rn += 4;
1039 Rm += 4;
1040 }
1041 // Perform last rotation.
1042 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1043 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1044 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1045 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1046
1047 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1048 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1049
1050 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1051
1052 SDValue OutV =
1053 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1054 return OutV;
1055}
1056
1057SDValue
1058HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1059 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1060 MVT PredTy = ty(PredV);
1061 unsigned HwLen = Subtarget.getVectorLength();
1062 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1063
1064 if (Subtarget.isHVXVectorType(PredTy, true)) {
1065 // Move the vector predicate SubV to a vector register, and scale it
1066 // down to match the representation (bytes per type element) that VecV
1067 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1068 // in general) element and put them at the front of the resulting
1069 // vector. This subvector will then be inserted into the Q2V of VecV.
1070 // To avoid having an operation that generates an illegal type (short
1071 // vector), generate a full size vector.
1072 //
1073 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1074 SmallVector<int,128> Mask(HwLen);
1075 // Scale = BitBytes(PredV) / Given BitBytes.
1076 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1077 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1078
1079 for (unsigned i = 0; i != HwLen; ++i) {
1080 unsigned Num = i % Scale;
1081 unsigned Off = i / Scale;
1082 Mask[BlockLen*Num + Off] = i;
1083 }
1084 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1085 if (!ZeroFill)
1086 return S;
1087 // Fill the bytes beyond BlockLen with 0s.
1088 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1089 // when BlockLen < HwLen.
1090 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1091 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1092 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1093 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1094 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1095 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1096 }
1097
1098 // Make sure that this is a valid scalar predicate.
1099 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1100
1101 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1102 SmallVector<SDValue,4> Words[2];
1103 unsigned IdxW = 0;
1104
1105 SDValue W0 = isUndef(PredV)
1106 ? DAG.getUNDEF(MVT::i64)
1107 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1108 if (Bytes < BitBytes) {
1109 Words[IdxW].push_back(HiHalf(W0, DAG));
1110 Words[IdxW].push_back(LoHalf(W0, DAG));
1111 } else
1112 Words[IdxW].push_back(W0);
1113
1114 while (Bytes < BitBytes) {
1115 IdxW ^= 1;
1116 Words[IdxW].clear();
1117
1118 if (Bytes < 4) {
1119 for (const SDValue &W : Words[IdxW ^ 1]) {
1120 SDValue T = expandPredicate(W, dl, DAG);
1121 Words[IdxW].push_back(HiHalf(T, DAG));
1122 Words[IdxW].push_back(LoHalf(T, DAG));
1123 }
1124 } else {
1125 for (const SDValue &W : Words[IdxW ^ 1]) {
1126 Words[IdxW].push_back(W);
1127 Words[IdxW].push_back(W);
1128 }
1129 }
1130 Bytes *= 2;
1131 }
1132
1133 while (Bytes > BitBytes) {
1134 IdxW ^= 1;
1135 Words[IdxW].clear();
1136
1137 if (Bytes <= 4) {
1138 for (const SDValue &W : Words[IdxW ^ 1]) {
1139 SDValue T = contractPredicate(W, dl, DAG);
1140 Words[IdxW].push_back(T);
1141 }
1142 } else {
1143 for (const SDValue &W : Words[IdxW ^ 1]) {
1144 Words[IdxW].push_back(W);
1145 }
1146 }
1147 Bytes /= 2;
1148 }
1149
1150 assert(Bytes == BitBytes);
1151 if (BitBytes == 1 && PredTy == MVT::v2i1)
1152 ByteTy = MVT::getVectorVT(MVT::i16, HwLen);
1153
1154 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1155 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1156 for (const SDValue &W : Words[IdxW]) {
1157 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1158 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1159 }
1160
1161 return Vec;
1162}
1163
1164SDValue
1165HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1166 const SDLoc &dl, MVT VecTy,
1167 SelectionDAG &DAG) const {
1168 // Construct a vector V of bytes, such that a comparison V >u 0 would
1169 // produce the required vector predicate.
1170 unsigned VecLen = Values.size();
1171 unsigned HwLen = Subtarget.getVectorLength();
1172 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1174 bool AllT = true, AllF = true;
1175
1176 auto IsTrue = [] (SDValue V) {
1177 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1178 return !N->isZero();
1179 return false;
1180 };
1181 auto IsFalse = [] (SDValue V) {
1182 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1183 return N->isZero();
1184 return false;
1185 };
1186
1187 if (VecLen <= HwLen) {
1188 // In the hardware, each bit of a vector predicate corresponds to a byte
1189 // of a vector register. Calculate how many bytes does a bit of VecTy
1190 // correspond to.
1191 assert(HwLen % VecLen == 0);
1192 unsigned BitBytes = HwLen / VecLen;
1193 for (SDValue V : Values) {
1194 AllT &= IsTrue(V);
1195 AllF &= IsFalse(V);
1196
1197 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1198 : DAG.getUNDEF(MVT::i8);
1199 for (unsigned B = 0; B != BitBytes; ++B)
1200 Bytes.push_back(Ext);
1201 }
1202 } else {
1203 // There are as many i1 values, as there are bits in a vector register.
1204 // Divide the values into groups of 8 and check that each group consists
1205 // of the same value (ignoring undefs).
1206 for (unsigned I = 0; I != VecLen; I += 8) {
1207 unsigned B = 0;
1208 // Find the first non-undef value in this group.
1209 for (; B != 8; ++B) {
1210 if (!Values[I+B].isUndef())
1211 break;
1212 }
1213 SDValue F = Values[I+B];
1214 AllT &= IsTrue(F);
1215 AllF &= IsFalse(F);
1216
1217 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1218 : DAG.getUNDEF(MVT::i8);
1219 Bytes.push_back(Ext);
1220 // Verify that the rest of values in the group are the same as the
1221 // first.
1222 for (; B != 8; ++B)
1223 assert(Values[I+B].isUndef() || Values[I+B] == F);
1224 }
1225 }
1226
1227 if (AllT)
1228 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1229 if (AllF)
1230 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1231
1232 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1233 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1234 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1235}
1236
1237SDValue
1238HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1239 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1240 MVT ElemTy = ty(VecV).getVectorElementType();
1241
1242 unsigned ElemWidth = ElemTy.getSizeInBits();
1243 assert(ElemWidth >= 8 && ElemWidth <= 32);
1244 (void)ElemWidth;
1245
1246 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1247 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1248 {VecV, ByteIdx});
1249 if (ElemTy == MVT::i32)
1250 return ExWord;
1251
1252 // Have an extracted word, need to extract the smaller element out of it.
1253 // 1. Extract the bits of (the original) IdxV that correspond to the index
1254 // of the desired element in the 32-bit word.
1255 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1256 // 2. Extract the element from the word.
1257 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1258 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1259}
1260
1261SDValue
1262HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1263 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1264 // Implement other return types if necessary.
1265 assert(ResTy == MVT::i1);
1266
1267 unsigned HwLen = Subtarget.getVectorLength();
1268 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1269 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1270
1271 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1272 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1273 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1274
1275 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1276 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1277 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1278}
1279
1280SDValue
1281HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1282 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1283 MVT ElemTy = ty(VecV).getVectorElementType();
1284
1285 unsigned ElemWidth = ElemTy.getSizeInBits();
1286 assert(ElemWidth >= 8 && ElemWidth <= 32);
1287 (void)ElemWidth;
1288
1289 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1290 SDValue ByteIdxV) {
1291 MVT VecTy = ty(VecV);
1292 unsigned HwLen = Subtarget.getVectorLength();
1293 SDValue MaskV =
1294 DAG.getNode(ISD::AND, dl, MVT::i32,
1295 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1296 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1297 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1298 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1299 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1300 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1301 return TorV;
1302 };
1303
1304 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1305 if (ElemTy == MVT::i32)
1306 return InsertWord(VecV, ValV, ByteIdx);
1307
1308 // If this is not inserting a 32-bit word, convert it into such a thing.
1309 // 1. Extract the existing word from the target vector.
1310 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1311 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1312 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1313 dl, MVT::i32, DAG);
1314
1315 // 2. Treating the extracted word as a 32-bit vector, insert the given
1316 // value into it.
1317 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1318 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1319 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1320 ValV, SubIdx, dl, ElemTy, DAG);
1321
1322 // 3. Insert the 32-bit word back into the original vector.
1323 return InsertWord(VecV, Ins, ByteIdx);
1324}
1325
1326SDValue
1327HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1328 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1329 unsigned HwLen = Subtarget.getVectorLength();
1330 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1331 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1332
1333 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1334 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1335 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1336 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1337
1338 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1339 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1340}
1341
1342SDValue
1343HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1344 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1345 MVT VecTy = ty(VecV);
1346 unsigned HwLen = Subtarget.getVectorLength();
1347 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1348 MVT ElemTy = VecTy.getVectorElementType();
1349 unsigned ElemWidth = ElemTy.getSizeInBits();
1350
1351 // If the source vector is a vector pair, get the single vector containing
1352 // the subvector of interest. The subvector will never overlap two single
1353 // vectors.
1354 if (isHvxPairTy(VecTy)) {
1355 unsigned SubIdx = Hexagon::vsub_lo;
1356 if (Idx * ElemWidth >= 8 * HwLen) {
1357 SubIdx = Hexagon::vsub_hi;
1358 Idx -= VecTy.getVectorNumElements() / 2;
1359 }
1360
1361 VecTy = typeSplit(VecTy).first;
1362 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1363 if (VecTy == ResTy)
1364 return VecV;
1365 }
1366
1367 // The only meaningful subvectors of a single HVX vector are those that
1368 // fit in a scalar register.
1369 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1370
1371 MVT WordTy = tyVector(VecTy, MVT::i32);
1372 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1373 unsigned WordIdx = (Idx*ElemWidth) / 32;
1374
1375 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1376 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1377 if (ResTy.getSizeInBits() == 32)
1378 return DAG.getBitcast(ResTy, W0);
1379
1380 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1381 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1382 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1383 return DAG.getBitcast(ResTy, WW);
1384}
1385
1386SDValue
1387HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1388 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1389 MVT VecTy = ty(VecV);
1390 unsigned HwLen = Subtarget.getVectorLength();
1391 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1392 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1393 // IdxV is required to be a constant.
1394 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1395
1396 unsigned ResLen = ResTy.getVectorNumElements();
1397 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1398 unsigned Offset = Idx * BitBytes;
1399 SDValue Undef = DAG.getUNDEF(ByteTy);
1400 SmallVector<int,128> Mask;
1401
1402 if (Subtarget.isHVXVectorType(ResTy, true)) {
1403 // Converting between two vector predicates. Since the result is shorter
1404 // than the source, it will correspond to a vector predicate with the
1405 // relevant bits replicated. The replication count is the ratio of the
1406 // source and target vector lengths.
1407 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1408 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1409 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1410 for (unsigned j = 0; j != Rep; ++j)
1411 Mask.push_back(i + Offset);
1412 }
1413 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1414 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1415 }
1416
1417 // Converting between a vector predicate and a scalar predicate. In the
1418 // vector predicate, a group of BitBytes bits will correspond to a single
1419 // i1 element of the source vector type. Those bits will all have the same
1420 // value. The same will be true for ByteVec, where each byte corresponds
1421 // to a bit in the vector predicate.
1422 // The algorithm is to traverse the ByteVec, going over the i1 values from
1423 // the source vector, and generate the corresponding representation in an
1424 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1425 // elements so that the interesting 8 bytes will be in the low end of the
1426 // vector.
1427 unsigned Rep = 8 / ResLen;
1428 // Make sure the output fill the entire vector register, so repeat the
1429 // 8-byte groups as many times as necessary.
1430 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1431 // This will generate the indexes of the 8 interesting bytes.
1432 for (unsigned i = 0; i != ResLen; ++i) {
1433 for (unsigned j = 0; j != Rep; ++j)
1434 Mask.push_back(Offset + i*BitBytes);
1435 }
1436 }
1437
1438 SDValue Zero = getZero(dl, MVT::i32, DAG);
1439 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1440 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1441 // them against 0.
1442 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1443 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1444 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1445 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1446 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1447 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1448}
1449
1450SDValue
1451HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1452 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1453 MVT VecTy = ty(VecV);
1454 MVT SubTy = ty(SubV);
1455 unsigned HwLen = Subtarget.getVectorLength();
1456 MVT ElemTy = VecTy.getVectorElementType();
1457 unsigned ElemWidth = ElemTy.getSizeInBits();
1458
1459 bool IsPair = isHvxPairTy(VecTy);
1460 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1461 // The two single vectors that VecV consists of, if it's a pair.
1462 SDValue V0, V1;
1463 SDValue SingleV = VecV;
1464 SDValue PickHi;
1465
1466 if (IsPair) {
1467 V0 = LoHalf(VecV, DAG);
1468 V1 = HiHalf(VecV, DAG);
1469
1470 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1471 dl, MVT::i32);
1472 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1473 if (isHvxSingleTy(SubTy)) {
1474 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1475 unsigned Idx = CN->getZExtValue();
1476 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1477 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1478 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1479 }
1480 // If IdxV is not a constant, generate the two variants: with the
1481 // SubV as the high and as the low subregister, and select the right
1482 // pair based on the IdxV.
1483 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1484 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1485 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1486 }
1487 // The subvector being inserted must be entirely contained in one of
1488 // the vectors V0 or V1. Set SingleV to the correct one, and update
1489 // IdxV to be the index relative to the beginning of that vector.
1490 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1491 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1492 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1493 }
1494
1495 // The only meaningful subvectors of a single HVX vector are those that
1496 // fit in a scalar register.
1497 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1498 // Convert IdxV to be index in bytes.
1499 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1500 if (!IdxN || !IdxN->isZero()) {
1501 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1502 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1503 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1504 }
1505 // When inserting a single word, the rotation back to the original position
1506 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1507 // by (HwLen-4)-Idx.
1508 unsigned RolBase = HwLen;
1509 if (SubTy.getSizeInBits() == 32) {
1510 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1511 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1512 } else {
1513 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1514 SDValue R0 = LoHalf(V, DAG);
1515 SDValue R1 = HiHalf(V, DAG);
1516 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1517 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1518 DAG.getConstant(4, dl, MVT::i32));
1519 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1520 RolBase = HwLen-4;
1521 }
1522 // If the vector wasn't ror'ed, don't ror it back.
1523 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1524 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1525 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1526 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1527 }
1528
1529 if (IsPair) {
1530 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1531 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1532 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1533 }
1534 return SingleV;
1535}
1536
1537SDValue
1538HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1539 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1540 MVT VecTy = ty(VecV);
1541 MVT SubTy = ty(SubV);
1542 assert(Subtarget.isHVXVectorType(VecTy, true));
1543 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1544 // predicate as well, or it can be a scalar predicate.
1545
1546 unsigned VecLen = VecTy.getVectorNumElements();
1547 unsigned HwLen = Subtarget.getVectorLength();
1548 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1549
1550 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1551 unsigned BitBytes = HwLen / VecLen;
1552 unsigned BlockLen = HwLen / Scale;
1553
1554 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1555 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1556 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1557 SDValue ByteIdx;
1558
1559 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1560 if (!IdxN || !IdxN->isZero()) {
1561 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1562 DAG.getConstant(BitBytes, dl, MVT::i32));
1563 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1564 }
1565
1566 // ByteVec is the target vector VecV rotated in such a way that the
1567 // subvector should be inserted at index 0. Generate a predicate mask
1568 // and use vmux to do the insertion.
1569 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1570 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1571 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1572 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1573 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1574 // Rotate ByteVec back, and convert to a vector predicate.
1575 if (!IdxN || !IdxN->isZero()) {
1576 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1577 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1578 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1579 }
1580 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1581}
1582
1583SDValue
1584HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1585 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1586 // Sign- and any-extending of a vector predicate to a vector register is
1587 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1588 // a vector of 1s (where the 1s are of type matching the vector type).
1589 assert(Subtarget.isHVXVectorType(ResTy));
1590 if (!ZeroExt)
1591 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1592
1593 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1594 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1595 DAG.getConstant(1, dl, MVT::i32));
1596 SDValue False = getZero(dl, ResTy, DAG);
1597 return DAG.getSelect(dl, ResTy, VecV, True, False);
1598}
1599
1600SDValue
1601HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1602 MVT ResTy, SelectionDAG &DAG) const {
1603 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1604 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1605 // vector register. The remaining bits of the vector register are
1606 // unspecified.
1607
1608 MachineFunction &MF = DAG.getMachineFunction();
1609 unsigned HwLen = Subtarget.getVectorLength();
1610 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1611 MVT PredTy = ty(VecQ);
1612 unsigned PredLen = PredTy.getVectorNumElements();
1613 assert(HwLen % PredLen == 0);
1614 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1615
1616 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1618 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1619 // These are bytes with the LSB rotated left with respect to their index.
1620 for (unsigned i = 0; i != HwLen/8; ++i) {
1621 for (unsigned j = 0; j != 8; ++j)
1622 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1623 }
1624 Constant *CV = ConstantVector::get(Tmp);
1625 Align Alignment(HwLen);
1626 SDValue CP =
1627 LowerConstantPool(DAG.getConstantPool(CV, ByteTy, Alignment), DAG);
1628 SDValue Bytes =
1629 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1631
1632 // Select the bytes that correspond to true bits in the vector predicate.
1633 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1634 getZero(dl, VecTy, DAG));
1635 // Calculate the OR of all bytes in each group of 8. That will compress
1636 // all the individual bits into a single byte.
1637 // First, OR groups of 4, via vrmpy with 0x01010101.
1638 SDValue All1 =
1639 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1640 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1641 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1642 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1643 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1644 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1645
1646 // Pick every 8th byte and coalesce them at the beginning of the output.
1647 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1648 // byte and so on.
1649 SmallVector<int,128> Mask;
1650 for (unsigned i = 0; i != HwLen; ++i)
1651 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1652 SDValue Collect =
1653 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1654 return DAG.getBitcast(ResTy, Collect);
1655}
1656
1657SDValue
1658HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1659 const SDLoc &dl, SelectionDAG &DAG) const {
1660 // Take a vector and resize the element type to match the given type.
1661 MVT InpTy = ty(VecV);
1662 if (InpTy == ResTy)
1663 return VecV;
1664
1665 unsigned InpWidth = InpTy.getSizeInBits();
1666 unsigned ResWidth = ResTy.getSizeInBits();
1667
1668 if (InpTy.isFloatingPoint()) {
1669 return InpWidth < ResWidth
1670 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1671 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1672 DAG.getTargetConstant(0, dl, MVT::i32));
1673 }
1674
1675 assert(InpTy.isInteger());
1676
1677 if (InpWidth < ResWidth) {
1678 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1679 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1680 } else {
1681 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1682 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1683 }
1684}
1685
1686SDValue
1687HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1688 SelectionDAG &DAG) const {
1689 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1690
1691 const SDLoc &dl(Vec);
1692 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1693 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1694 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1695}
1696
1697SDValue
1698HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1699 const {
1700 const SDLoc &dl(Op);
1701 MVT VecTy = ty(Op);
1702
1703 unsigned Size = Op.getNumOperands();
1705 for (unsigned i = 0; i != Size; ++i)
1706 Ops.push_back(Op.getOperand(i));
1707
1708 if (VecTy.getVectorElementType() == MVT::i1)
1709 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1710
1711 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1712 // not a legal type, just bitcast the node to use i16
1713 // types and bitcast the result back to f16
1714 if (VecTy.getVectorElementType() == MVT::f16 ||
1715 VecTy.getVectorElementType() == MVT::bf16) {
1717 for (unsigned i = 0; i != Size; i++)
1718 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1719
1720 SDValue T0 =
1721 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1722 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1723 }
1724
1725 // First, split the BUILD_VECTOR for vector pairs. We could generate
1726 // some pairs directly (via splat), but splats should be generated
1727 // by the combiner prior to getting here.
1728 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1730 MVT SingleTy = typeSplit(VecTy).first;
1731 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1732 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1733 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1734 }
1735
1736 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1737}
1738
1739SDValue
1740HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1741 const {
1742 const SDLoc &dl(Op);
1743 MVT VecTy = ty(Op);
1744 MVT ArgTy = ty(Op.getOperand(0));
1745
1746 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1747 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1748 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1749 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1750 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1751 return DAG.getBitcast(VecTy, Splat);
1752 }
1753
1754 return SDValue();
1755}
1756
1757SDValue
1758HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1759 const {
1760 // Vector concatenation of two integer (non-bool) vectors does not need
1761 // special lowering. Custom-lower concats of bool vectors and expand
1762 // concats of more than 2 vectors.
1763 MVT VecTy = ty(Op);
1764 const SDLoc &dl(Op);
1765 unsigned NumOp = Op.getNumOperands();
1766 if (VecTy.getVectorElementType() != MVT::i1) {
1767 if (NumOp == 2)
1768 return Op;
1769 // Expand the other cases into a build-vector.
1771 for (SDValue V : Op.getNode()->ops())
1772 DAG.ExtractVectorElements(V, Elems);
1773 // A vector of i16 will be broken up into a build_vector of i16's.
1774 // This is a problem, since at the time of operation legalization,
1775 // all operations are expected to be type-legalized, and i16 is not
1776 // a legal type. If any of the extracted elements is not of a valid
1777 // type, sign-extend it to a valid one.
1778 for (SDValue &V : Elems) {
1779 MVT Ty = ty(V);
1780 if (!isTypeLegal(Ty)) {
1781 MVT NTy = typeLegalize(Ty, DAG);
1782 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1783 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1784 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1785 V.getOperand(0), V.getOperand(1)),
1786 DAG.getValueType(Ty));
1787 continue;
1788 }
1789 // A few less complicated cases.
1790 switch (V.getOpcode()) {
1791 case ISD::Constant:
1792 V = DAG.getSExtOrTrunc(V, dl, NTy);
1793 break;
1794 case ISD::UNDEF:
1795 V = DAG.getUNDEF(NTy);
1796 break;
1797 case ISD::TRUNCATE:
1798 V = V.getOperand(0);
1799 break;
1800 default:
1801 llvm_unreachable("Unexpected vector element");
1802 }
1803 }
1804 }
1805 return DAG.getBuildVector(VecTy, dl, Elems);
1806 }
1807
1808 assert(VecTy.getVectorElementType() == MVT::i1);
1809 unsigned HwLen = Subtarget.getVectorLength();
1810 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1811
1812 SDValue Op0 = Op.getOperand(0);
1813
1814 // If the operands are HVX types (i.e. not scalar predicates), then
1815 // defer the concatenation, and create QCAT instead.
1816 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1817 if (NumOp == 2)
1818 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1819
1820 ArrayRef<SDUse> U(Op.getNode()->ops());
1823
1824 MVT HalfTy = typeSplit(VecTy).first;
1825 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1826 Ops.take_front(NumOp/2));
1827 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1828 Ops.take_back(NumOp/2));
1829 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1830 }
1831
1832 // Count how many bytes (in a vector register) each bit in VecTy
1833 // corresponds to.
1834 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1835
1836 SmallVector<SDValue,8> Prefixes;
1837 for (SDValue V : Op.getNode()->op_values()) {
1838 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1839 Prefixes.push_back(P);
1840 }
1841
1842 unsigned InpLen = ty(Op.getOperand(0)).getVectorNumElements();
1843 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1844 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1845 SDValue Res = getZero(dl, ByteTy, DAG);
1846 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1847 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1848 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1849 }
1850 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1851}
1852
1853SDValue
1854HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1855 const {
1856 // Change the type of the extracted element to i32.
1857 SDValue VecV = Op.getOperand(0);
1858 MVT ElemTy = ty(VecV).getVectorElementType();
1859 const SDLoc &dl(Op);
1860 SDValue IdxV = Op.getOperand(1);
1861 if (ElemTy == MVT::i1)
1862 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1863
1864 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1865}
1866
1867SDValue
1868HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1869 const {
1870 const SDLoc &dl(Op);
1871 MVT VecTy = ty(Op);
1872 SDValue VecV = Op.getOperand(0);
1873 SDValue ValV = Op.getOperand(1);
1874 SDValue IdxV = Op.getOperand(2);
1875 MVT ElemTy = ty(VecV).getVectorElementType();
1876 if (ElemTy == MVT::i1)
1877 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1878
1879 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1881 tyVector(VecTy, MVT::i16),
1882 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1883 DAG.getBitcast(MVT::i16, ValV), IdxV);
1884 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1885 }
1886
1887 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1888}
1889
1890SDValue
1891HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1892 const {
1893 SDValue SrcV = Op.getOperand(0);
1894 MVT SrcTy = ty(SrcV);
1895 MVT DstTy = ty(Op);
1896 SDValue IdxV = Op.getOperand(1);
1897 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1898 assert(Idx % DstTy.getVectorNumElements() == 0);
1899 (void)Idx;
1900 const SDLoc &dl(Op);
1901
1902 MVT ElemTy = SrcTy.getVectorElementType();
1903 if (ElemTy == MVT::i1)
1904 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1905
1906 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1907}
1908
1909SDValue
1910HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1911 const {
1912 // Idx does not need to be a constant.
1913 SDValue VecV = Op.getOperand(0);
1914 SDValue ValV = Op.getOperand(1);
1915 SDValue IdxV = Op.getOperand(2);
1916
1917 const SDLoc &dl(Op);
1918 MVT VecTy = ty(VecV);
1919 MVT ElemTy = VecTy.getVectorElementType();
1920 if (ElemTy == MVT::i1)
1921 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1922
1923 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1924}
1925
1926SDValue
1927HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1928 // Lower any-extends of boolean vectors to sign-extends, since they
1929 // translate directly to Q2V. Zero-extending could also be done equally
1930 // fast, but Q2V is used/recognized in more places.
1931 // For all other vectors, use zero-extend.
1932 MVT ResTy = ty(Op);
1933 SDValue InpV = Op.getOperand(0);
1934 MVT ElemTy = ty(InpV).getVectorElementType();
1935 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1936 return LowerHvxSignExt(Op, DAG);
1937 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1938}
1939
1940SDValue
1941HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1942 MVT ResTy = ty(Op);
1943 SDValue InpV = Op.getOperand(0);
1944 MVT ElemTy = ty(InpV).getVectorElementType();
1945 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1946 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
1947 return Op;
1948}
1949
1950SDValue
1951HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1952 MVT ResTy = ty(Op);
1953 SDValue InpV = Op.getOperand(0);
1954 MVT ElemTy = ty(InpV).getVectorElementType();
1955 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1956 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
1957 return Op;
1958}
1959
1960SDValue
1961HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1962 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1963 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1964 const SDLoc &dl(Op);
1965 MVT ResTy = ty(Op);
1966 SDValue InpV = Op.getOperand(0);
1967 assert(ResTy == ty(InpV));
1968
1969 // Calculate the vectors of 1 and bitwidth(x).
1970 MVT ElemTy = ty(InpV).getVectorElementType();
1971 unsigned ElemWidth = ElemTy.getSizeInBits();
1972
1973 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1974 DAG.getConstant(1, dl, MVT::i32));
1975 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1976 DAG.getConstant(ElemWidth, dl, MVT::i32));
1977 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1978 DAG.getAllOnesConstant(dl, MVT::i32));
1979
1980 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1981 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1982 // it separately in custom combine or selection).
1983 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
1984 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
1985 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
1986 return DAG.getNode(ISD::SUB, dl, ResTy,
1987 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
1988}
1989
1990SDValue
1991HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1992 const SDLoc &dl(Op);
1993 MVT ResTy = ty(Op);
1994 assert(ResTy.getVectorElementType() == MVT::i32);
1995
1996 SDValue Vs = Op.getOperand(0);
1997 SDValue Vt = Op.getOperand(1);
1998
1999 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
2000 unsigned Opc = Op.getOpcode();
2001
2002 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2003 if (Opc == ISD::MULHU)
2004 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2005 if (Opc == ISD::MULHS)
2006 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2007
2008#ifndef NDEBUG
2009 Op.dump(&DAG);
2010#endif
2011 llvm_unreachable("Unexpected mulh operation");
2012}
2013
2014SDValue
2015HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2016 const SDLoc &dl(Op);
2017 unsigned Opc = Op.getOpcode();
2018 SDValue Vu = Op.getOperand(0);
2019 SDValue Vv = Op.getOperand(1);
2020
2021 // If the HI part is not used, convert it to a regular MUL.
2022 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2023 // Need to preserve the types and the number of values.
2024 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2025 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2026 return DAG.getMergeValues({Lo, Hi}, dl);
2027 }
2028
2029 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2030 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2031
2032 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2033 // valued nodes.
2034 if (Subtarget.useHVXV62Ops())
2035 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2036
2037 if (Opc == HexagonISD::SMUL_LOHI) {
2038 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2039 // for other signedness LOHI is cheaper.
2040 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2041 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2042 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2043 return DAG.getMergeValues({Lo, Hi}, dl);
2044 }
2045 }
2046
2047 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2048}
2049
2050SDValue
2051HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2052 SDValue Val = Op.getOperand(0);
2053 MVT ResTy = ty(Op);
2054 MVT ValTy = ty(Val);
2055 const SDLoc &dl(Op);
2056
2057 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2058 unsigned HwLen = Subtarget.getVectorLength();
2059 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2060 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2061 unsigned BitWidth = ResTy.getSizeInBits();
2062
2063 if (BitWidth < 64) {
2064 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2065 dl, MVT::i32, DAG);
2066 if (BitWidth == 32)
2067 return W0;
2068 assert(BitWidth < 32u);
2069 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2070 }
2071
2072 // The result is >= 64 bits. The only options are 64 or 128.
2073 assert(BitWidth == 64 || BitWidth == 128);
2075 for (unsigned i = 0; i != BitWidth/32; ++i) {
2076 SDValue W = extractHvxElementReg(
2077 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2078 Words.push_back(W);
2079 }
2080 SmallVector<SDValue,2> Combines;
2081 assert(Words.size() % 2 == 0);
2082 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2083 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2084 Combines.push_back(C);
2085 }
2086
2087 if (BitWidth == 64)
2088 return Combines[0];
2089
2090 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2091 }
2092
2093 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2094 // Splat the input into a 32-element i32 vector, then AND each element
2095 // with a unique bitmask to isolate individual bits.
2096 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2097 assert(Val32.getValueType().getSizeInBits() == 32 &&
2098 "Input must be 32 bits");
2099 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2100 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2102 for (unsigned i = 0; i < 32; ++i)
2103 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2104
2105 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2106 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2107 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2108 };
2109 // === Case: v32i1 ===
2110 if (ResTy == MVT::v32i1 &&
2111 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2112 Subtarget.useHVX128BOps()) {
2113 SDValue Val32 = Val;
2114 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2115 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2116 return bitcastI32ToV32I1(Val32);
2117 }
2118 // === Case: v64i1 ===
2119 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2120 // Split i64 into lo/hi 32-bit halves.
2121 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2122 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2123 DAG.getConstant(32, dl, MVT::i64));
2124 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2125
2126 // Reuse the same 32-bit logic twice.
2127 SDValue LoRes = bitcastI32ToV32I1(Lo);
2128 SDValue HiRes = bitcastI32ToV32I1(Hi);
2129
2130 // Concatenate into a v64i1 predicate.
2131 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2132 }
2133
2134 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2135 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2136 unsigned BitWidth = ValTy.getSizeInBits();
2137 unsigned HwLen = Subtarget.getVectorLength();
2138 assert(BitWidth == HwLen);
2139
2140 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2141 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2142 // Splat each byte of Val 8 times.
2143 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2144 // where b0, b1,..., b15 are least to most significant bytes of I.
2146 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2147 // These are bytes with the LSB rotated left with respect to their index.
2149 for (unsigned I = 0; I != HwLen / 8; ++I) {
2150 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2151 SDValue Byte =
2152 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2153 for (unsigned J = 0; J != 8; ++J) {
2154 Bytes.push_back(Byte);
2155 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2156 }
2157 }
2158
2159 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2160 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2161 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2162
2163 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2164 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2165 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2166 }
2167
2168 return Op;
2169}
2170
2171SDValue
2172HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2173 // Sign- and zero-extends are legal.
2174 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2175 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2176 Op.getOperand(0));
2177}
2178
2179SDValue
2180HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2181 MVT ResTy = ty(Op);
2182 if (ResTy.getVectorElementType() != MVT::i1)
2183 return Op;
2184
2185 const SDLoc &dl(Op);
2186 unsigned HwLen = Subtarget.getVectorLength();
2187 unsigned VecLen = ResTy.getVectorNumElements();
2188 assert(HwLen % VecLen == 0);
2189 unsigned ElemSize = HwLen / VecLen;
2190
2191 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2192 SDValue S =
2193 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2194 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2195 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2196 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2197}
2198
2199SDValue
2200HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2201 if (SDValue S = getVectorShiftByInt(Op, DAG))
2202 return S;
2203 return Op;
2204}
2205
2206SDValue
2207HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2208 SelectionDAG &DAG) const {
2209 unsigned Opc = Op.getOpcode();
2210 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2211
2212 // Make sure the shift amount is within the range of the bitwidth
2213 // of the element type.
2214 SDValue A = Op.getOperand(0);
2215 SDValue B = Op.getOperand(1);
2216 SDValue S = Op.getOperand(2);
2217
2218 MVT InpTy = ty(A);
2219 MVT ElemTy = InpTy.getVectorElementType();
2220
2221 const SDLoc &dl(Op);
2222 unsigned ElemWidth = ElemTy.getSizeInBits();
2223 bool IsLeft = Opc == ISD::FSHL;
2224
2225 // The expansion into regular shifts produces worse code for i8 and for
2226 // right shift of i32 on v65+.
2227 bool UseShifts = ElemTy != MVT::i8;
2228 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2229 UseShifts = false;
2230
2231 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2232 // If this is a funnel shift by a scalar, lower it into regular shifts.
2233 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2234 SDValue ModS =
2235 DAG.getNode(ISD::AND, dl, MVT::i32,
2236 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2237 SDValue NegS =
2238 DAG.getNode(ISD::SUB, dl, MVT::i32,
2239 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2240 SDValue IsZero =
2241 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2242 // FSHL A, B => A << | B >>n
2243 // FSHR A, B => A <<n | B >>
2244 SDValue Part1 =
2245 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2246 SDValue Part2 =
2247 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2248 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2249 // If the shift amount was 0, pick A or B, depending on the direction.
2250 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2251 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2252 }
2253
2255 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2256
2257 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2258 return DAG.getNode(MOpc, dl, ty(Op),
2259 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2260}
2261
2262SDValue
2263HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2264 const SDLoc &dl(Op);
2265 unsigned IntNo = Op.getConstantOperandVal(0);
2266 SmallVector<SDValue> Ops(Op->ops());
2267
2268 auto Swap = [&](SDValue P) {
2269 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2270 };
2271
2272 switch (IntNo) {
2273 case Intrinsic::hexagon_V6_pred_typecast:
2274 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2275 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2276 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2277 if (ResTy == InpTy)
2278 return Ops[1];
2279 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2280 }
2281 break;
2282 }
2283 case Intrinsic::hexagon_V6_vmpyss_parts:
2284 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2285 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2286 {Ops[1], Ops[2]}));
2287 case Intrinsic::hexagon_V6_vmpyuu_parts:
2288 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2289 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2290 {Ops[1], Ops[2]}));
2291 case Intrinsic::hexagon_V6_vmpyus_parts:
2292 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2293 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2294 {Ops[1], Ops[2]}));
2295 }
2296 } // switch
2297
2298 return Op;
2299}
2300
2301SDValue
2302HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2303 const SDLoc &dl(Op);
2304 unsigned HwLen = Subtarget.getVectorLength();
2305 MachineFunction &MF = DAG.getMachineFunction();
2306 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2307 SDValue Mask = MaskN->getMask();
2308 SDValue Chain = MaskN->getChain();
2309 SDValue Base = MaskN->getBasePtr();
2310 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2311
2312 unsigned Opc = Op->getOpcode();
2314
2315 if (Opc == ISD::MLOAD) {
2316 MVT ValTy = ty(Op);
2317 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2318 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2319 if (isUndef(Thru))
2320 return Load;
2321 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2322 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2323 }
2324
2325 // MSTORE
2326 // HVX only has aligned masked stores.
2327
2328 // TODO: Fold negations of the mask into the store.
2329 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2330 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2331 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2332
2333 if (MaskN->getAlign().value() % HwLen == 0) {
2334 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2335 {Mask, Base, Offset0, Value, Chain}, DAG);
2336 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2337 return Store;
2338 }
2339
2340 // Unaligned case.
2341 auto StoreAlign = [&](SDValue V, SDValue A) {
2342 SDValue Z = getZero(dl, ty(V), DAG);
2343 // TODO: use funnel shifts?
2344 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2345 // upper half.
2346 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2347 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2348 return std::make_pair(LoV, HiV);
2349 };
2350
2351 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2352 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2353 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2354 VectorPair Tmp = StoreAlign(MaskV, Base);
2355 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2356 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2357 VectorPair ValueU = StoreAlign(Value, Base);
2358
2359 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2360 SDValue StoreLo =
2361 getInstr(StoreOpc, dl, MVT::Other,
2362 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2363 SDValue StoreHi =
2364 getInstr(StoreOpc, dl, MVT::Other,
2365 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2366 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2367 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2368 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2369}
2370
2371SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2372 SelectionDAG &DAG) const {
2373 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2374 // is legal (done via a pattern).
2375 assert(Subtarget.useHVXQFloatOps());
2376
2377 assert(Op->getOpcode() == ISD::FP_EXTEND);
2378
2379 MVT VecTy = ty(Op);
2380 MVT ArgTy = ty(Op.getOperand(0));
2381 const SDLoc &dl(Op);
2382
2383 if (ArgTy == MVT::v64bf16) {
2384 MVT HalfTy = typeSplit(VecTy).first;
2385 SDValue BF16Vec = Op.getOperand(0);
2386 SDValue Zeroes =
2387 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2388 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2389 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2390 // values.
2391 SDValue ShuffVec =
2392 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2393 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2394 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2395 {VecPair.second, VecPair.first,
2396 DAG.getSignedConstant(-4, dl, MVT::i32)},
2397 DAG);
2398 return Result;
2399 }
2400
2401 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2402
2403 SDValue F16Vec = Op.getOperand(0);
2404
2405 APFloat FloatVal = APFloat(1.0f);
2406 bool Ignored;
2408 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2409 SDValue VmpyVec =
2410 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2411
2412 MVT HalfTy = typeSplit(VecTy).first;
2413 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2414 SDValue LoVec =
2415 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2416 SDValue HiVec =
2417 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2418
2419 SDValue ShuffVec =
2420 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2421 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2422
2423 return ShuffVec;
2424}
2425
2426SDValue
2427HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2428 // Catch invalid conversion ops (just in case).
2429 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2430 Op.getOpcode() == ISD::FP_TO_UINT);
2431
2432 MVT ResTy = ty(Op);
2433 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2434 MVT IntTy = ResTy.getVectorElementType();
2435
2436 if (Subtarget.useHVXIEEEFPOps()) {
2437 // There are only conversions from f16.
2438 if (FpTy == MVT::f16) {
2439 // Other int types aren't legal in HVX, so we shouldn't see them here.
2440 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2441 // Conversions to i8 and i16 are legal.
2442 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2443 return Op;
2444 }
2445 }
2446
2447 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2448 return EqualizeFpIntConversion(Op, DAG);
2449
2450 return ExpandHvxFpToInt(Op, DAG);
2451}
2452
2453// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2454// R1 = #1, R2 holds the v32i1 param
2455// V1 = vsplat(R1)
2456// V2 = vsplat(R2)
2457// Q0 = vand(V1,R1)
2458// V0.w=prefixsum(Q0)
2459// V0.w=vsub(V0.w,V1.w)
2460// V2.w = vlsr(V2.w,V0.w)
2461// V2 = vand(V2,V1)
2462// V2.sf = V2.w
2463SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2464 SelectionDAG &DAG) const {
2465
2466 MVT ResTy = ty(PredOp);
2467 const SDLoc &dl(PredOp);
2468
2469 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2470 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2471 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2472 SDValue(RegConst, 0));
2473 SDNode *PredTransfer =
2474 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2475 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2476 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2477 SDValue(PredTransfer, 0));
2478 SDNode *SplatParam = DAG.getMachineNode(
2479 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2480 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2481 SDNode *Vsub =
2482 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2483 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2484 SDNode *IndexShift =
2485 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2486 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2487 SDNode *MaskOff =
2488 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2489 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2490 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2491 SDValue(MaskOff, 0));
2492 return SDValue(Convert, 0);
2493}
2494
2495// For vector type v64i1 uint_to_fo to v64f16:
2496// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2497// R3 = subreg_high (R32)
2498// R2 = subreg_low (R32)
2499// R1 = #1
2500// V1 = vsplat(R1)
2501// V2 = vsplat(R2)
2502// V3 = vsplat(R3)
2503// Q0 = vand(V1,R1)
2504// V0.w=prefixsum(Q0)
2505// V0.w=vsub(V0.w,V1.w)
2506// V2.w = vlsr(V2.w,V0.w)
2507// V3.w = vlsr(V3.w,V0.w)
2508// V2 = vand(V2,V1)
2509// V3 = vand(V3,V1)
2510// V2.h = vpacke(V3.w,V2.w)
2511// V2.hf = V2.h
2512SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2513 SelectionDAG &DAG) const {
2514
2515 MVT ResTy = ty(PredOp);
2516 const SDLoc &dl(PredOp);
2517
2518 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2519 // Get the hi and lo regs
2520 SDValue HiReg =
2521 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2522 SDValue LoReg =
2523 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2524 // Get constant #1 and splat into vector V1
2525 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2526 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2527 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2528 SDValue(RegConst, 0));
2529 // Splat the hi and lo args
2530 SDNode *SplatHi =
2531 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2532 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2533 SDNode *SplatLo =
2534 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2535 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2536 // vand between splatted const and const
2537 SDNode *PredTransfer =
2538 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2539 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2540 // Get the prefixsum
2541 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2542 SDValue(PredTransfer, 0));
2543 // Get the vsub
2544 SDNode *Vsub =
2545 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2546 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2547 // Get vlsr for hi and lo
2548 SDNode *IndexShift_hi =
2549 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2550 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2551 SDNode *IndexShift_lo =
2552 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2553 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2554 // Get vand of hi and lo
2555 SDNode *MaskOff_hi =
2556 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2557 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2558 SDNode *MaskOff_lo =
2559 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2560 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2561 // Pack them
2562 SDNode *Pack =
2563 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2564 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2565 SDNode *Convert =
2566 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2567 return SDValue(Convert, 0);
2568}
2569
2570SDValue
2571HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2572 // Catch invalid conversion ops (just in case).
2573 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2574 Op.getOpcode() == ISD::UINT_TO_FP);
2575
2576 MVT ResTy = ty(Op);
2577 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2578 MVT FpTy = ResTy.getVectorElementType();
2579
2580 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2581 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2582 return LowerHvxPred32ToFp(Op, DAG);
2583 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2584 return LowerHvxPred64ToFp(Op, DAG);
2585 }
2586
2587 if (Subtarget.useHVXIEEEFPOps()) {
2588 // There are only conversions to f16.
2589 if (FpTy == MVT::f16) {
2590 // Other int types aren't legal in HVX, so we shouldn't see them here.
2591 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2592 // i8, i16 -> f16 is legal.
2593 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2594 return Op;
2595 }
2596 }
2597
2598 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2599 return EqualizeFpIntConversion(Op, DAG);
2600
2601 return ExpandHvxIntToFp(Op, DAG);
2602}
2603
2604HexagonTargetLowering::TypePair
2605HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2606 // Compare the widths of elements of the two types, and extend the narrower
2607 // type to match the with of the wider type. For vector types, apply this
2608 // to the element type.
2609 assert(Ty0.isVector() == Ty1.isVector());
2610
2611 MVT ElemTy0 = Ty0.getScalarType();
2612 MVT ElemTy1 = Ty1.getScalarType();
2613
2614 unsigned Width0 = ElemTy0.getSizeInBits();
2615 unsigned Width1 = ElemTy1.getSizeInBits();
2616 unsigned MaxWidth = std::max(Width0, Width1);
2617
2618 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2619 if (ScalarTy.isInteger())
2620 return MVT::getIntegerVT(Width);
2621 assert(ScalarTy.isFloatingPoint());
2622 return MVT::getFloatingPointVT(Width);
2623 };
2624
2625 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2626 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2627
2628 if (!Ty0.isVector()) {
2629 // Both types are scalars.
2630 return {WideETy0, WideETy1};
2631 }
2632
2633 // Vector types.
2634 unsigned NumElem = Ty0.getVectorNumElements();
2635 assert(NumElem == Ty1.getVectorNumElements());
2636
2637 return {MVT::getVectorVT(WideETy0, NumElem),
2638 MVT::getVectorVT(WideETy1, NumElem)};
2639}
2640
2641HexagonTargetLowering::TypePair
2642HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2643 // Compare the numbers of elements of two vector types, and widen the
2644 // narrower one to match the number of elements in the wider one.
2645 assert(Ty0.isVector() && Ty1.isVector());
2646
2647 unsigned Len0 = Ty0.getVectorNumElements();
2648 unsigned Len1 = Ty1.getVectorNumElements();
2649 if (Len0 == Len1)
2650 return {Ty0, Ty1};
2651
2652 unsigned MaxLen = std::max(Len0, Len1);
2653 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2654 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2655}
2656
2657MVT
2658HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2659 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2660 assert(LegalTy.isSimple());
2661 return LegalTy.getSimpleVT();
2662}
2663
2664MVT
2665HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2666 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2667 assert(Ty.getSizeInBits() <= HwWidth);
2668 if (Ty.getSizeInBits() == HwWidth)
2669 return Ty;
2670
2671 MVT ElemTy = Ty.getScalarType();
2672 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2673}
2674
2675HexagonTargetLowering::VectorPair
2676HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2677 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2678 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2679 // whether an overflow has occurred.
2680 MVT ResTy = ty(A);
2681 assert(ResTy == ty(B));
2682 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2683
2684 if (!Signed) {
2685 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2686 // save any instructions.
2687 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2688 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2689 return {Add, Ovf};
2690 }
2691
2692 // Signed overflow has happened, if:
2693 // (A, B have the same sign) and (A+B has a different sign from either)
2694 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2695 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2696 SDValue NotA =
2697 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2698 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2699 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2700 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2701 SDValue MSB =
2702 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2703 return {Add, MSB};
2704}
2705
2706HexagonTargetLowering::VectorPair
2707HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2708 bool Signed, SelectionDAG &DAG) const {
2709 // Shift Val right by Amt bits, round the result to the nearest integer,
2710 // tie-break by rounding halves to even integer.
2711
2712 const SDLoc &dl(Val);
2713 MVT ValTy = ty(Val);
2714
2715 // This should also work for signed integers.
2716 //
2717 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2718 // bool ovf = (inp > tmp0);
2719 // uint rup = inp & (1 << (Amt+1));
2720 //
2721 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2722 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2723 // uint tmp3 = tmp2 + rup;
2724 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2725 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2726 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2727 MVT IntTy = tyVector(ValTy, ElemTy);
2728 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2729 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2730
2731 SDValue Inp = DAG.getBitcast(IntTy, Val);
2732 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2733
2734 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2735 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2736 SDValue Zero = getZero(dl, IntTy, DAG);
2737 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2738 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2739 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2740
2741 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2742 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2743 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2744 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2745
2746 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2747 SDValue One = DAG.getConstant(1, dl, IntTy);
2748 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2749 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2750 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2751 return {Mux, Ovf};
2752}
2753
2754SDValue
2755HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2756 SelectionDAG &DAG) const {
2757 MVT VecTy = ty(A);
2758 MVT PairTy = typeJoin({VecTy, VecTy});
2759 assert(VecTy.getVectorElementType() == MVT::i32);
2760
2761 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2762
2763 // mulhs(A,B) =
2764 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2765 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2766 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2767 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2768 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2769 // anything, so it cannot produce any carry over to higher bits),
2770 // so everything in [] can be shifted by 16 without loss of precision.
2771 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2772 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2773 // The final additions need to make sure to properly maintain any carry-
2774 // out bits.
2775 //
2776 // Hi(B) Lo(B)
2777 // Hi(A) Lo(A)
2778 // --------------
2779 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2780 // Hi(B)*Lo(A) | + dropping the low 16 bits
2781 // Hi(A)*Lo(B) | T2
2782 // Hi(B)*Hi(A)
2783
2784 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2785 // T1 = get Hi(A) into low halves.
2786 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2787 // P0 = interleaved T1.h*B.uh (full precision product)
2788 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2789 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2790 SDValue T2 = LoHalf(P0, DAG);
2791 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2792 // added to the final sum.
2793 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2794 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2795 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2796 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2797 // T3 = full-precision(T0+T2) >> 16
2798 // The low halves are added-unsigned, the high ones are added-signed.
2799 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2800 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2801 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2802 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2803 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2804 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
2805 SDValue T5 = LoHalf(P3, DAG);
2806 // Add:
2807 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
2808 return T6;
2809}
2810
2811SDValue
2812HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2813 bool SignedB, const SDLoc &dl,
2814 SelectionDAG &DAG) const {
2815 MVT VecTy = ty(A);
2816 MVT PairTy = typeJoin({VecTy, VecTy});
2817 assert(VecTy.getVectorElementType() == MVT::i32);
2818
2819 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2820
2821 if (SignedA && !SignedB) {
2822 // Make A:unsigned, B:signed.
2823 std::swap(A, B);
2824 std::swap(SignedA, SignedB);
2825 }
2826
2827 // Do halfword-wise multiplications for unsigned*unsigned product, then
2828 // add corrections for signed and unsigned*signed.
2829
2830 SDValue Lo, Hi;
2831
2832 // P0:lo = (uu) products of low halves of A and B,
2833 // P0:hi = (uu) products of high halves.
2834 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
2835
2836 // Swap low/high halves in B
2837 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
2838 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
2839 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
2840 // P1 = products of even/odd halfwords.
2841 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2842 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2843 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
2844
2845 // P2:lo = low halves of P1:lo + P1:hi,
2846 // P2:hi = high halves of P1:lo + P1:hi.
2847 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
2848 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
2849 // Still need to add the high halves of P0:lo to P2:lo
2850 SDValue T2 =
2851 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
2852 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
2853
2854 // The high halves of T3 will contribute to the HI part of LOHI.
2855 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2856 {HiHalf(P2, DAG), T3, S16}, DAG);
2857
2858 // The low halves of P2 need to be added to high halves of the LO part.
2859 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
2860 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
2861 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
2862
2863 if (SignedA) {
2864 assert(SignedB && "Signed A and unsigned B should have been inverted");
2865
2866 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2867 SDValue Zero = getZero(dl, VecTy, DAG);
2868 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2869 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2870 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
2871 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
2872 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
2873 } else if (SignedB) {
2874 // Same correction as for mulhus:
2875 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2876 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2877 SDValue Zero = getZero(dl, VecTy, DAG);
2878 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2879 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
2880 } else {
2881 assert(!SignedA && !SignedB);
2882 }
2883
2884 return DAG.getMergeValues({Lo, Hi}, dl);
2885}
2886
2887SDValue
2888HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2889 SDValue B, bool SignedB,
2890 const SDLoc &dl,
2891 SelectionDAG &DAG) const {
2892 MVT VecTy = ty(A);
2893 MVT PairTy = typeJoin({VecTy, VecTy});
2894 assert(VecTy.getVectorElementType() == MVT::i32);
2895
2896 if (SignedA && !SignedB) {
2897 // Make A:unsigned, B:signed.
2898 std::swap(A, B);
2899 std::swap(SignedA, SignedB);
2900 }
2901
2902 // Do S*S first, then make corrections for U*S or U*U if needed.
2903 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
2904 SDValue P1 =
2905 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
2906 SDValue Lo = LoHalf(P1, DAG);
2907 SDValue Hi = HiHalf(P1, DAG);
2908
2909 if (!SignedB) {
2910 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2911 SDValue Zero = getZero(dl, VecTy, DAG);
2912 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2913
2914 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2915 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2916 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2917 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2918 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2919 // $A))>;
2920 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2921 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
2922 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
2923 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
2924 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
2925 } else if (!SignedA) {
2926 SDValue Zero = getZero(dl, VecTy, DAG);
2927 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
2928
2929 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2930 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2931 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2932 // (HiHalf (Muls64O $A, $B)),
2933 // $B)>;
2934 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
2935 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
2936 }
2937
2938 return DAG.getMergeValues({Lo, Hi}, dl);
2939}
2940
2941SDValue
2942HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2943 const {
2944 // Rewrite conversion between integer and floating-point in such a way that
2945 // the integer type is extended/narrowed to match the bitwidth of the
2946 // floating-point type, combined with additional integer-integer extensions
2947 // or narrowings to match the original input/result types.
2948 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2949 //
2950 // The input/result types are not required to be legal, but if they are
2951 // legal, this function should not introduce illegal types.
2952
2953 unsigned Opc = Op.getOpcode();
2956
2957 SDValue Inp = Op.getOperand(0);
2958 MVT InpTy = ty(Inp);
2959 MVT ResTy = ty(Op);
2960
2961 if (InpTy == ResTy)
2962 return Op;
2963
2964 const SDLoc &dl(Op);
2966
2967 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
2968 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
2969 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
2970 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
2971 return Res;
2972}
2973
2974SDValue
2975HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2976 unsigned Opc = Op.getOpcode();
2978
2979 const SDLoc &dl(Op);
2980 SDValue Op0 = Op.getOperand(0);
2981 MVT InpTy = ty(Op0);
2982 MVT ResTy = ty(Op);
2983 assert(InpTy.changeTypeToInteger() == ResTy);
2984
2985 // At this point this is an experiment under a flag.
2986 // In arch before V81 the rounding mode is towards nearest value.
2987 // The C/C++ standard requires rounding towards zero:
2988 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2989 // finite value of real floating type is converted to an integer type, the
2990 // fractional part is discarded (i.e., the value is truncated toward zero)."
2991 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2992 // floating-point type can be converted to a prvalue of an integer type. The
2993 // conversion truncates; that is, the fractional part is discarded."
2994 if (InpTy == MVT::v64f16) {
2995 if (Subtarget.useHVXV81Ops()) {
2996 // This is c/c++ compliant
2997 SDValue ConvVec =
2998 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
2999 return ConvVec;
3000 } else if (EnableFpFastConvert) {
3001 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3002 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
3003 return ConvVec;
3004 }
3005 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3006 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3007 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3008 return ConvVec;
3009 }
3010
3011 // int32_t conv_f32_to_i32(uint32_t inp) {
3012 // // s | exp8 | frac23
3013 //
3014 // int neg = (int32_t)inp < 0;
3015 //
3016 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3017 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3018 // // produce a large positive "expm1", which will result in max u/int.
3019 // // In all IEEE formats, bias is the largest positive number that can be
3020 // // represented in bias-width bits (i.e. 011..1).
3021 // int32_t expm1 = (inp << 1) - 0x80000000;
3022 // expm1 >>= 24;
3023 //
3024 // // Always insert the "implicit 1". Subnormal numbers will become 0
3025 // // regardless.
3026 // uint32_t frac = (inp << 8) | 0x80000000;
3027 //
3028 // // "frac" is the fraction part represented as Q1.31. If it was
3029 // // interpreted as uint32_t, it would be the fraction part multiplied
3030 // // by 2^31.
3031 //
3032 // // Calculate the amount of right shift, since shifting further to the
3033 // // left would lose significant bits. Limit it to 32, because we want
3034 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3035 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3036 // // left by 31). "rsh" can be negative.
3037 // int32_t rsh = min(31 - (expm1 + 1), 32);
3038 //
3039 // frac >>= rsh; // rsh == 32 will produce 0
3040 //
3041 // // Everything up to this point is the same for conversion to signed
3042 // // unsigned integer.
3043 //
3044 // if (neg) // Only for signed int
3045 // frac = -frac; //
3046 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3047 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3048 // if (rsh <= 0 && !neg) //
3049 // frac = 0x7fffffff; //
3050 //
3051 // if (neg) // Only for unsigned int
3052 // frac = 0; //
3053 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3054 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3055 //
3056 // return frac;
3057 // }
3058
3059 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3060
3061 // Zero = V6_vd0();
3062 // Neg = V6_vgtw(Zero, Inp);
3063 // One = V6_lvsplatw(1);
3064 // M80 = V6_lvsplatw(0x80000000);
3065 // Exp00 = V6_vaslwv(Inp, One);
3066 // Exp01 = V6_vsubw(Exp00, M80);
3067 // ExpM1 = V6_vasrw(Exp01, 24);
3068 // Frc00 = V6_vaslw(Inp, 8);
3069 // Frc01 = V6_vor(Frc00, M80);
3070 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3071 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3072 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3073
3074 // if signed int:
3075 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3076 // Pos = V6_vgtw(Rsh01, Zero);
3077 // Frc13 = V6_vsubw(Zero, Frc02);
3078 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3079 // Int = V6_vmux(Pos, Frc14, Bnd);
3080 //
3081 // if unsigned int:
3082 // Rsn = V6_vgtw(Zero, Rsh01)
3083 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3084 // Int = V6_vmux(Neg, Zero, Frc23)
3085
3086 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3087 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3088 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3089
3090 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3091 SDValue Zero = getZero(dl, ResTy, DAG);
3092 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3093 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3094 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3095 SDValue One = DAG.getConstant(1, dl, ResTy);
3096 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3097 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3098 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3099 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3100
3101 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3102 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3103 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3104
3105 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3106 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3107 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3108 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3109 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3110
3111 SDValue Int;
3112
3113 if (Opc == ISD::FP_TO_SINT) {
3114 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3115 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3116 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3117 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3118 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3119 } else {
3121 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3122 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3123 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3124 }
3125
3126 return Int;
3127}
3128
3129SDValue
3130HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3131 unsigned Opc = Op.getOpcode();
3133
3134 const SDLoc &dl(Op);
3135 SDValue Op0 = Op.getOperand(0);
3136 MVT InpTy = ty(Op0);
3137 MVT ResTy = ty(Op);
3138 assert(ResTy.changeTypeToInteger() == InpTy);
3139
3140 // uint32_t vnoc1_rnd(int32_t w) {
3141 // int32_t iszero = w == 0;
3142 // int32_t isneg = w < 0;
3143 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3144 //
3145 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3146 // uint32_t frac0 = (uint64_t)u << norm_left;
3147 //
3148 // // Rounding:
3149 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3150 // uint32_t renorm = (frac0 > frac1);
3151 // uint32_t rup = (int)(frac0 << 22) < 0;
3152 //
3153 // uint32_t frac2 = frac0 >> 8;
3154 // uint32_t frac3 = frac1 >> 8;
3155 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3156 //
3157 // int32_t exp = 32 - norm_left + renorm + 127;
3158 // exp <<= 23;
3159 //
3160 // uint32_t sign = 0x80000000 * isneg;
3161 // uint32_t f = sign | exp | frac;
3162 // return iszero ? 0 : f;
3163 // }
3164
3165 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3166 bool Signed = Opc == ISD::SINT_TO_FP;
3167
3168 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3169 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3170
3171 SDValue Zero = getZero(dl, InpTy, DAG);
3172 SDValue One = DAG.getConstant(1, dl, InpTy);
3173 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3174 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3175 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3176 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3177 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3178
3179 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3180 if (Signed) {
3181 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3182 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3183 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3184 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3185 }
3186
3187 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3188 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3189 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3190 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3191 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3192 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3193 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3194 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3195 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3196
3197 return Flt;
3198}
3199
3200SDValue
3201HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3202 unsigned Opc = Op.getOpcode();
3203 unsigned TLOpc;
3204 switch (Opc) {
3205 case ISD::ANY_EXTEND:
3206 case ISD::SIGN_EXTEND:
3207 case ISD::ZERO_EXTEND:
3208 TLOpc = HexagonISD::TL_EXTEND;
3209 break;
3210 case ISD::TRUNCATE:
3212 break;
3213#ifndef NDEBUG
3214 Op.dump(&DAG);
3215#endif
3216 llvm_unreachable("Unexpected operator");
3217 }
3218
3219 const SDLoc &dl(Op);
3220 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3221 DAG.getUNDEF(MVT::i128), // illegal type
3222 DAG.getConstant(Opc, dl, MVT::i32));
3223}
3224
3225SDValue
3226HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3227 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3228 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3229 unsigned Opc = Op.getConstantOperandVal(2);
3230 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3231}
3232
3233HexagonTargetLowering::VectorPair
3234HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3235 assert(!Op.isMachineOpcode());
3236 SmallVector<SDValue, 2> OpsL, OpsH;
3237 const SDLoc &dl(Op);
3238
3239 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3240 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3241 SDValue TV = DAG.getValueType(Ty);
3242 return std::make_pair(TV, TV);
3243 };
3244
3245 for (SDValue A : Op.getNode()->ops()) {
3246 auto [Lo, Hi] =
3247 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3248 // Special case for type operand.
3249 switch (Op.getOpcode()) {
3250 case ISD::SIGN_EXTEND_INREG:
3251 case HexagonISD::SSAT:
3252 case HexagonISD::USAT:
3253 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3254 std::tie(Lo, Hi) = SplitVTNode(N);
3255 break;
3256 }
3257 OpsL.push_back(Lo);
3258 OpsH.push_back(Hi);
3259 }
3260
3261 MVT ResTy = ty(Op);
3262 MVT HalfTy = typeSplit(ResTy).first;
3263 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3264 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3265 return {L, H};
3266}
3267
3268SDValue
3269HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3270 auto *MemN = cast<MemSDNode>(Op.getNode());
3271
3272 if (!MemN->getMemoryVT().isSimple())
3273 return Op;
3274
3275 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3276 if (!isHvxPairTy(MemTy))
3277 return Op;
3278
3279 const SDLoc &dl(Op);
3280 unsigned HwLen = Subtarget.getVectorLength();
3281 MVT SingleTy = typeSplit(MemTy).first;
3282 SDValue Chain = MemN->getChain();
3283 SDValue Base0 = MemN->getBasePtr();
3284 SDValue Base1 =
3285 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(HwLen), dl);
3286 unsigned MemOpc = MemN->getOpcode();
3287
3288 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3289 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3290 MachineFunction &MF = DAG.getMachineFunction();
3291 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3292 ? (uint64_t)MemoryLocation::UnknownSize
3293 : HwLen;
3294 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize);
3295 MOp1 = MF.getMachineMemOperand(MMO, HwLen, MemSize);
3296 }
3297
3298 if (MemOpc == ISD::LOAD) {
3299 assert(cast<LoadSDNode>(Op)->isUnindexed());
3300 SDValue Load0 = DAG.getLoad(SingleTy, dl, Chain, Base0, MOp0);
3301 SDValue Load1 = DAG.getLoad(SingleTy, dl, Chain, Base1, MOp1);
3302 return DAG.getMergeValues(
3303 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, Load0, Load1),
3304 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3305 Load0.getValue(1), Load1.getValue(1)) }, dl);
3306 }
3307 if (MemOpc == ISD::STORE) {
3308 assert(cast<StoreSDNode>(Op)->isUnindexed());
3309 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3310 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3311 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3312 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3313 }
3314
3315 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3316
3317 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3318 assert(MaskN->isUnindexed());
3319 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3320 SDValue Offset = DAG.getUNDEF(MVT::i32);
3321
3322 if (MemOpc == ISD::MLOAD) {
3323 VectorPair Thru =
3324 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3325 SDValue MLoad0 =
3326 DAG.getMaskedLoad(SingleTy, dl, Chain, Base0, Offset, Masks.first,
3327 Thru.first, SingleTy, MOp0, ISD::UNINDEXED,
3328 ISD::NON_EXTLOAD, false);
3329 SDValue MLoad1 =
3330 DAG.getMaskedLoad(SingleTy, dl, Chain, Base1, Offset, Masks.second,
3331 Thru.second, SingleTy, MOp1, ISD::UNINDEXED,
3332 ISD::NON_EXTLOAD, false);
3333 return DAG.getMergeValues(
3334 { DAG.getNode(ISD::CONCAT_VECTORS, dl, MemTy, MLoad0, MLoad1),
3335 DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3336 MLoad0.getValue(1), MLoad1.getValue(1)) }, dl);
3337 }
3338 if (MemOpc == ISD::MSTORE) {
3339 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3340 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset,
3341 Masks.first, SingleTy, MOp0,
3342 ISD::UNINDEXED, false, false);
3343 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset,
3344 Masks.second, SingleTy, MOp1,
3345 ISD::UNINDEXED, false, false);
3346 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3347 }
3348
3349 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3350 llvm_unreachable(Name.c_str());
3351}
3352
3353SDValue
3354HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3355 const SDLoc &dl(Op);
3356 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3357 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3358 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3359 "Not widening loads of i1 yet");
3360
3361 SDValue Chain = LoadN->getChain();
3362 SDValue Base = LoadN->getBasePtr();
3363 SDValue Offset = DAG.getUNDEF(MVT::i32);
3364
3365 MVT ResTy = ty(Op);
3366 unsigned HwLen = Subtarget.getVectorLength();
3367 unsigned ResLen = ResTy.getStoreSize();
3368 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3369
3370 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3371 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3372 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3373
3374 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3375 MachineFunction &MF = DAG.getMachineFunction();
3376 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3377
3378 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3379 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3381 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3382 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3383}
3384
3385SDValue
3386HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3387 const SDLoc &dl(Op);
3388 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3389 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3390 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3391 "Not widening stores of i1 yet");
3392
3393 SDValue Chain = StoreN->getChain();
3394 SDValue Base = StoreN->getBasePtr();
3395 SDValue Offset = DAG.getUNDEF(MVT::i32);
3396
3397 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3398 MVT ValueTy = ty(Value);
3399 unsigned ValueLen = ValueTy.getVectorNumElements();
3400 unsigned HwLen = Subtarget.getVectorLength();
3401 assert(isPowerOf2_32(ValueLen));
3402
3403 for (unsigned Len = ValueLen; Len < HwLen; ) {
3404 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3405 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3406 }
3407 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3408
3409 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3410 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3411 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3412 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3413 MachineFunction &MF = DAG.getMachineFunction();
3414 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3415 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3416 MemOp, ISD::UNINDEXED, false, false);
3417}
3418
3419SDValue
3420HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3421 const SDLoc &dl(Op);
3422 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3423 MVT ElemTy = ty(Op0).getVectorElementType();
3424 unsigned HwLen = Subtarget.getVectorLength();
3425
3426 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3427 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3428 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3429 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3430 return SDValue();
3431
3432 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3433 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3434 EVT ResTy =
3435 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3436 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3437 {WideOp0, WideOp1, Op.getOperand(2)});
3438
3439 EVT RetTy = typeLegalize(ty(Op), DAG);
3440 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3441 {SetCC, getZero(dl, MVT::i32, DAG)});
3442}
3443
3444SDValue
3445HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3446 unsigned Opc = Op.getOpcode();
3447 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3448 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3449 return isHvxPairTy(ty(V));
3450 });
3451
3452 if (IsPairOp) {
3453 switch (Opc) {
3454 default:
3455 break;
3456 case ISD::LOAD:
3457 case ISD::STORE:
3458 case ISD::MLOAD:
3459 case ISD::MSTORE:
3460 return SplitHvxMemOp(Op, DAG);
3461 case ISD::SINT_TO_FP:
3462 case ISD::UINT_TO_FP:
3463 case ISD::FP_TO_SINT:
3464 case ISD::FP_TO_UINT:
3465 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3466 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3467 break;
3468 case ISD::ABS:
3469 case ISD::CTPOP:
3470 case ISD::CTLZ:
3471 case ISD::CTTZ:
3472 case ISD::MUL:
3473 case ISD::FADD:
3474 case ISD::FSUB:
3475 case ISD::FMUL:
3476 case ISD::FMINIMUMNUM:
3477 case ISD::FMAXIMUMNUM:
3478 case ISD::MULHS:
3479 case ISD::MULHU:
3480 case ISD::AND:
3481 case ISD::OR:
3482 case ISD::XOR:
3483 case ISD::SRA:
3484 case ISD::SHL:
3485 case ISD::SRL:
3486 case ISD::FSHL:
3487 case ISD::FSHR:
3488 case ISD::SMIN:
3489 case ISD::SMAX:
3490 case ISD::UMIN:
3491 case ISD::UMAX:
3492 case ISD::SETCC:
3493 case ISD::VSELECT:
3495 case ISD::SPLAT_VECTOR:
3496 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3497 case ISD::SIGN_EXTEND:
3498 case ISD::ZERO_EXTEND:
3499 // In general, sign- and zero-extends can't be split and still
3500 // be legal. The only exception is extending bool vectors.
3501 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3502 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3503 break;
3504 }
3505 }
3506
3507 switch (Opc) {
3508 default:
3509 break;
3510 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3511 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3512 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3513 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3514 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3515 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3516 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3517 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3518 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3519 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3520 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3521 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3522 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3523 case ISD::SRA:
3524 case ISD::SHL:
3525 case ISD::SRL: return LowerHvxShift(Op, DAG);
3526 case ISD::FSHL:
3527 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3528 case ISD::MULHS:
3529 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3530 case ISD::SMUL_LOHI:
3531 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3532 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3533 case ISD::SETCC:
3534 case ISD::INTRINSIC_VOID: return Op;
3535 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3536 case ISD::MLOAD:
3537 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3538 // Unaligned loads will be handled by the default lowering.
3539 case ISD::LOAD: return SDValue();
3540 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3541 case ISD::FP_TO_SINT:
3542 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3543 case ISD::SINT_TO_FP:
3544 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3545
3546 // Special nodes:
3549 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3550 }
3551#ifndef NDEBUG
3552 Op.dumpr(&DAG);
3553#endif
3554 llvm_unreachable("Unhandled HVX operation");
3555}
3556
3557SDValue
3558HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3559 const {
3560 // Rewrite the extension/truncation/saturation op into steps where each
3561 // step changes the type widths by a factor of 2.
3562 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3563 //
3564 // Some of the vector types in Op may not be legal.
3565
3566 unsigned Opc = Op.getOpcode();
3567 switch (Opc) {
3568 case HexagonISD::SSAT:
3569 case HexagonISD::USAT:
3572 break;
3573 case ISD::ANY_EXTEND:
3574 case ISD::ZERO_EXTEND:
3575 case ISD::SIGN_EXTEND:
3576 case ISD::TRUNCATE:
3577 llvm_unreachable("ISD:: ops will be auto-folded");
3578 break;
3579#ifndef NDEBUG
3580 Op.dump(&DAG);
3581#endif
3582 llvm_unreachable("Unexpected operation");
3583 }
3584
3585 SDValue Inp = Op.getOperand(0);
3586 MVT InpTy = ty(Inp);
3587 MVT ResTy = ty(Op);
3588
3589 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3590 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3591 assert(InpWidth != ResWidth);
3592
3593 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3594 return Op;
3595
3596 const SDLoc &dl(Op);
3597 unsigned NumElems = InpTy.getVectorNumElements();
3598 assert(NumElems == ResTy.getVectorNumElements());
3599
3600 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3601 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3602 switch (Opc) {
3603 case HexagonISD::SSAT:
3604 case HexagonISD::USAT:
3605 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3608 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3609 default:
3610 llvm_unreachable("Unexpected opcode");
3611 }
3612 };
3613
3614 SDValue S = Inp;
3615 if (InpWidth < ResWidth) {
3616 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3617 while (InpWidth * 2 <= ResWidth)
3618 S = repeatOp(InpWidth *= 2, S);
3619 } else {
3620 // InpWidth > ResWidth
3621 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3622 while (InpWidth / 2 >= ResWidth)
3623 S = repeatOp(InpWidth /= 2, S);
3624 }
3625 return S;
3626}
3627
3628SDValue
3629HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3630 SDValue Inp0 = Op.getOperand(0);
3631 MVT InpTy = ty(Inp0);
3632 MVT ResTy = ty(Op);
3633 unsigned InpWidth = InpTy.getSizeInBits();
3634 unsigned ResWidth = ResTy.getSizeInBits();
3635 unsigned Opc = Op.getOpcode();
3636
3637 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3638 // First, make sure that the narrower type is widened to HVX.
3639 // This may cause the result to be wider than what the legalizer
3640 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3641 // desired type.
3642 auto [WInpTy, WResTy] =
3643 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3644 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3645 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3646 SDValue S;
3648 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3649 Op.getOperand(2));
3650 } else {
3651 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3652 }
3653 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3654 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3655 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3656 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3657 } else {
3658 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3659 return RemoveTLWrapper(Op, DAG);
3660 }
3661 llvm_unreachable("Unexpected situation");
3662}
3663
3664void
3665HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3667 unsigned Opc = N->getOpcode();
3668 SDValue Op(N, 0);
3669 SDValue Inp0; // Optional first argument.
3670 if (N->getNumOperands() > 0)
3671 Inp0 = Op.getOperand(0);
3672
3673 switch (Opc) {
3674 case ISD::ANY_EXTEND:
3675 case ISD::SIGN_EXTEND:
3676 case ISD::ZERO_EXTEND:
3677 case ISD::TRUNCATE:
3678 if (Subtarget.isHVXElementType(ty(Op)) &&
3679 Subtarget.isHVXElementType(ty(Inp0))) {
3680 Results.push_back(CreateTLWrapper(Op, DAG));
3681 }
3682 break;
3683 case ISD::SETCC:
3684 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3685 if (SDValue T = WidenHvxSetCC(Op, DAG))
3686 Results.push_back(T);
3687 }
3688 break;
3689 case ISD::STORE: {
3690 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3691 SDValue Store = WidenHvxStore(Op, DAG);
3692 Results.push_back(Store);
3693 }
3694 break;
3695 }
3696 case ISD::MLOAD:
3697 if (isHvxPairTy(ty(Op))) {
3698 SDValue S = SplitHvxMemOp(Op, DAG);
3700 Results.push_back(S.getOperand(0));
3701 Results.push_back(S.getOperand(1));
3702 }
3703 break;
3704 case ISD::MSTORE:
3705 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
3706 SDValue S = SplitHvxMemOp(Op, DAG);
3707 Results.push_back(S);
3708 }
3709 break;
3710 case ISD::SINT_TO_FP:
3711 case ISD::UINT_TO_FP:
3712 case ISD::FP_TO_SINT:
3713 case ISD::FP_TO_UINT:
3714 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3715 SDValue T = EqualizeFpIntConversion(Op, DAG);
3716 Results.push_back(T);
3717 }
3718 break;
3719 case HexagonISD::SSAT:
3720 case HexagonISD::USAT:
3723 Results.push_back(LegalizeHvxResize(Op, DAG));
3724 break;
3725 default:
3726 break;
3727 }
3728}
3729
3730void
3731HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3733 unsigned Opc = N->getOpcode();
3734 SDValue Op(N, 0);
3735 SDValue Inp0; // Optional first argument.
3736 if (N->getNumOperands() > 0)
3737 Inp0 = Op.getOperand(0);
3738
3739 switch (Opc) {
3740 case ISD::ANY_EXTEND:
3741 case ISD::SIGN_EXTEND:
3742 case ISD::ZERO_EXTEND:
3743 case ISD::TRUNCATE:
3744 if (Subtarget.isHVXElementType(ty(Op)) &&
3745 Subtarget.isHVXElementType(ty(Inp0))) {
3746 Results.push_back(CreateTLWrapper(Op, DAG));
3747 }
3748 break;
3749 case ISD::SETCC:
3750 if (shouldWidenToHvx(ty(Op), DAG)) {
3751 if (SDValue T = WidenHvxSetCC(Op, DAG))
3752 Results.push_back(T);
3753 }
3754 break;
3755 case ISD::LOAD: {
3756 if (shouldWidenToHvx(ty(Op), DAG)) {
3757 SDValue Load = WidenHvxLoad(Op, DAG);
3758 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3759 Results.push_back(Load.getOperand(0));
3760 Results.push_back(Load.getOperand(1));
3761 }
3762 break;
3763 }
3764 case ISD::BITCAST:
3765 if (isHvxBoolTy(ty(Inp0))) {
3766 SDValue C = LowerHvxBitcast(Op, DAG);
3767 Results.push_back(C);
3768 }
3769 break;
3770 case ISD::FP_TO_SINT:
3771 case ISD::FP_TO_UINT:
3772 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
3773 SDValue T = EqualizeFpIntConversion(Op, DAG);
3774 Results.push_back(T);
3775 }
3776 break;
3777 case HexagonISD::SSAT:
3778 case HexagonISD::USAT:
3781 Results.push_back(LegalizeHvxResize(Op, DAG));
3782 break;
3783 default:
3784 break;
3785 }
3786}
3787
3788SDValue
3789HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3790 DAGCombinerInfo &DCI) const {
3791 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3792 // to extract-subvector (shuffle V, pick even, pick odd)
3793
3794 assert(Op.getOpcode() == ISD::TRUNCATE);
3795 SelectionDAG &DAG = DCI.DAG;
3796 const SDLoc &dl(Op);
3797
3798 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
3799 return SDValue();
3800 SDValue Cast = Op.getOperand(0);
3801 SDValue Src = Cast.getOperand(0);
3802
3803 EVT TruncTy = Op.getValueType();
3804 EVT CastTy = Cast.getValueType();
3805 EVT SrcTy = Src.getValueType();
3806 if (SrcTy.isSimple())
3807 return SDValue();
3808 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3809 return SDValue();
3810 unsigned SrcLen = SrcTy.getVectorNumElements();
3811 unsigned CastLen = CastTy.getVectorNumElements();
3812 if (2 * CastLen != SrcLen)
3813 return SDValue();
3814
3815 SmallVector<int, 128> Mask(SrcLen);
3816 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3817 Mask[i] = 2 * i;
3818 Mask[i + CastLen] = 2 * i + 1;
3819 }
3820 SDValue Deal =
3821 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
3822 return opSplit(Deal, dl, DAG).first;
3823}
3824
3825SDValue
3826HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3827 SDValue Op, DAGCombinerInfo &DCI) const {
3828 // Fold
3829 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3830 // into
3831 // shuffle (concat x, y), undef, m3
3832 if (Op.getNumOperands() != 2)
3833 return SDValue();
3834
3835 SelectionDAG &DAG = DCI.DAG;
3836 const SDLoc &dl(Op);
3837 SDValue V0 = Op.getOperand(0);
3838 SDValue V1 = Op.getOperand(1);
3839
3840 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3841 return SDValue();
3842 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3843 return SDValue();
3844
3845 SetVector<SDValue> Order;
3846 Order.insert(V0.getOperand(0));
3847 Order.insert(V0.getOperand(1));
3848 Order.insert(V1.getOperand(0));
3849 Order.insert(V1.getOperand(1));
3850
3851 if (Order.size() > 2)
3852 return SDValue();
3853
3854 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3855 // result must be the same.
3856 EVT InpTy = V0.getValueType();
3857 assert(InpTy.isVector());
3858 unsigned InpLen = InpTy.getVectorNumElements();
3859
3860 SmallVector<int, 128> LongMask;
3861 auto AppendToMask = [&](SDValue Shuffle) {
3862 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
3863 ArrayRef<int> Mask = SV->getMask();
3864 SDValue X = Shuffle.getOperand(0);
3865 SDValue Y = Shuffle.getOperand(1);
3866 for (int M : Mask) {
3867 if (M == -1) {
3868 LongMask.push_back(M);
3869 continue;
3870 }
3871 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3872 if (static_cast<unsigned>(M) >= InpLen)
3873 M -= InpLen;
3874
3875 int OutOffset = Order[0] == Src ? 0 : InpLen;
3876 LongMask.push_back(M + OutOffset);
3877 }
3878 };
3879
3880 AppendToMask(V0);
3881 AppendToMask(V1);
3882
3883 SDValue C0 = Order.front();
3884 SDValue C1 = Order.back(); // Can be same as front
3885 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
3886
3887 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
3888 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
3889}
3890
3891SDValue
3892HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3893 const {
3894 const SDLoc &dl(N);
3895 SelectionDAG &DAG = DCI.DAG;
3896 SDValue Op(N, 0);
3897 unsigned Opc = Op.getOpcode();
3898
3900
3901 if (Opc == ISD::TRUNCATE)
3902 return combineTruncateBeforeLegal(Op, DCI);
3903 if (Opc == ISD::CONCAT_VECTORS)
3904 return combineConcatVectorsBeforeLegal(Op, DCI);
3905
3906 if (DCI.isBeforeLegalizeOps())
3907 return SDValue();
3908
3909 switch (Opc) {
3910 case ISD::VSELECT: {
3911 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3912 SDValue Cond = Ops[0];
3913 if (Cond->getOpcode() == ISD::XOR) {
3914 SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
3915 if (C1->getOpcode() == HexagonISD::QTRUE)
3916 return DAG.getNode(ISD::VSELECT, dl, ty(Op), C0, Ops[2], Ops[1]);
3917 }
3918 break;
3919 }
3920 case HexagonISD::V2Q:
3921 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3922 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
3923 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
3924 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
3925 }
3926 break;
3927 case HexagonISD::Q2V:
3928 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3929 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
3930 DAG.getAllOnesConstant(dl, MVT::i32));
3931 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3932 return getZero(dl, ty(Op), DAG);
3933 break;
3935 if (isUndef(Ops[1]))
3936 return Ops[0];
3937 break;
3938 case HexagonISD::VROR: {
3939 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3940 SDValue Vec = Ops[0].getOperand(0);
3941 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
3942 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
3943 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
3944 }
3945 break;
3946 }
3947 }
3948
3949 return SDValue();
3950}
3951
3952bool
3953HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3954 if (Subtarget.isHVXVectorType(Ty, true))
3955 return false;
3956 auto Action = getPreferredHvxVectorAction(Ty);
3958 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3959 return false;
3960}
3961
3962bool
3963HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3964 if (Subtarget.isHVXVectorType(Ty, true))
3965 return false;
3966 auto Action = getPreferredHvxVectorAction(Ty);
3968 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
3969 return false;
3970}
3971
3972bool
3973HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3974 if (!Subtarget.useHVXOps())
3975 return false;
3976 // If the type of any result, or any operand type are HVX vector types,
3977 // this is an HVX operation.
3978 auto IsHvxTy = [this](EVT Ty) {
3979 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
3980 };
3981 auto IsHvxOp = [this](SDValue Op) {
3982 return Op.getValueType().isSimple() &&
3983 Subtarget.isHVXVectorType(ty(Op), true);
3984 };
3985 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
3986 return true;
3987
3988 // Check if this could be an HVX operation after type widening.
3989 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3990 if (!Op.getValueType().isSimple())
3991 return false;
3992 MVT ValTy = ty(Op);
3993 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
3994 };
3995
3996 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3997 if (IsWidenedToHvx(SDValue(N, i)))
3998 return true;
3999 }
4000 return llvm::any_of(N->ops(), IsWidenedToHvx);
4001}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:6053
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:270
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:593
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:771
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:868
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:898
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:981
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:249
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:369
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:228
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:669
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:701
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:642
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:607
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:887
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:724
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:966
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:793
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:909
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:360
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:463
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.