LLVM 23.0.0git
HexagonISelLoweringHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static const unsigned MaxExpandMLA = 8;
44
45static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
46 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
47 MVT ElemTy = Ty.getScalarType();
48 switch (ElemTy.SimpleTy) {
49 case MVT::f16:
50 return std::make_tuple(5, 15, 10);
51 case MVT::f32:
52 return std::make_tuple(8, 127, 23);
53 case MVT::f64:
54 return std::make_tuple(11, 1023, 52);
55 default:
56 break;
57 }
58 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
59}
60
61void
62HexagonTargetLowering::initializeHVXLowering() {
63 if (Subtarget.useHVX64BOps()) {
64 addRegisterClass(MVT::v64i8, &Hexagon::HvxVRRegClass);
65 addRegisterClass(MVT::v32i16, &Hexagon::HvxVRRegClass);
66 addRegisterClass(MVT::v16i32, &Hexagon::HvxVRRegClass);
67 addRegisterClass(MVT::v128i8, &Hexagon::HvxWRRegClass);
68 addRegisterClass(MVT::v64i16, &Hexagon::HvxWRRegClass);
69 addRegisterClass(MVT::v32i32, &Hexagon::HvxWRRegClass);
70 // These "short" boolean vector types should be legal because
71 // they will appear as results of vector compares. If they were
72 // not legal, type legalization would try to make them legal
73 // and that would require using operations that do not use or
74 // produce such types. That, in turn, would imply using custom
75 // nodes, which would be unoptimizable by the DAG combiner.
76 // The idea is to rely on target-independent operations as much
77 // as possible.
78 addRegisterClass(MVT::v16i1, &Hexagon::HvxQRRegClass);
79 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
80 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
81 } else if (Subtarget.useHVX128BOps()) {
82 addRegisterClass(MVT::v128i8, &Hexagon::HvxVRRegClass);
83 addRegisterClass(MVT::v64i16, &Hexagon::HvxVRRegClass);
84 addRegisterClass(MVT::v32i32, &Hexagon::HvxVRRegClass);
85 addRegisterClass(MVT::v256i8, &Hexagon::HvxWRRegClass);
86 addRegisterClass(MVT::v128i16, &Hexagon::HvxWRRegClass);
87 addRegisterClass(MVT::v64i32, &Hexagon::HvxWRRegClass);
88 addRegisterClass(MVT::v32i1, &Hexagon::HvxQRRegClass);
89 addRegisterClass(MVT::v64i1, &Hexagon::HvxQRRegClass);
90 addRegisterClass(MVT::v128i1, &Hexagon::HvxQRRegClass);
91 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
92 addRegisterClass(MVT::v32f32, &Hexagon::HvxVRRegClass);
93 addRegisterClass(MVT::v64f16, &Hexagon::HvxVRRegClass);
94 addRegisterClass(MVT::v64f32, &Hexagon::HvxWRRegClass);
95 addRegisterClass(MVT::v128f16, &Hexagon::HvxWRRegClass);
96 }
97 if (Subtarget.useHVXV81Ops()) {
98 addRegisterClass(MVT::v64bf16, &Hexagon::HvxVRRegClass);
99 addRegisterClass(MVT::v128bf16, &Hexagon::HvxWRRegClass);
100 }
101 }
102
103 // Set up operation actions.
104
105 bool Use64b = Subtarget.useHVX64BOps();
106 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
107 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
108 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
109 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
110 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
111
112 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
114 AddPromotedToType(Opc, FromTy, ToTy);
115 };
116
117 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
118 // Note: v16i1 -> i16 is handled in type legalization instead of op
119 // legalization.
129
130 if (Subtarget.useHVX128BOps()) {
134 setOperationAction(ISD::LOAD, MVT::v32i1, Custom);
136 setOperationAction(ISD::LOAD, MVT::v64i1, Custom);
137 setOperationAction(ISD::STORE, MVT::v128i1, Custom);
138 setOperationAction(ISD::LOAD, MVT::v128i1, Custom);
139 }
140 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
141 Subtarget.useHVXFloatingPoint()) {
142
143 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
144 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
145
146 for (MVT T : FloatV) {
152
155
158
161 // Custom-lower BUILD_VECTOR. The standard (target-independent)
162 // handling of it would convert it to a load, which is not always
163 // the optimal choice.
165 }
166
167
168 // BUILD_VECTOR with f16 operands cannot be promoted without
169 // promoting the result, so lower the node to vsplat or constant pool
173
174 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
175 // generated.
176 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
177 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
178 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
179 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
180
181 if (Subtarget.useHVXV81Ops()) {
182 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
183 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
184 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
185 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
186 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
187 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
188 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
189 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
190
194
195 setOperationAction(ISD::LOAD, MVT::v128bf16, Custom);
196 setOperationAction(ISD::STORE, MVT::v128bf16, Custom);
197
198 setOperationAction(ISD::MLOAD, MVT::v64bf16, Custom);
199 setOperationAction(ISD::MSTORE, MVT::v64bf16, Custom);
202
203 setOperationAction(ISD::MLOAD, MVT::v128bf16, Custom);
204 setOperationAction(ISD::MSTORE, MVT::v128bf16, Custom);
207
211 }
212
213 for (MVT P : FloatW) {
223
224 // Custom-lower BUILD_VECTOR. The standard (target-independent)
225 // handling of it would convert it to a load, which is not always
226 // the optimal choice.
228 // Make concat-vectors custom to handle concats of more than 2 vectors.
230
233 }
234
235 if (Subtarget.useHVXQFloatOps()) {
238 } else if (Subtarget.useHVXIEEEFPOps()) {
241 }
242 }
243
244 for (MVT T : LegalV) {
247
263 if (T != ByteV) {
267 }
268
271 if (T.getScalarType() != MVT::i32) {
274 }
275
280 if (T.getScalarType() != MVT::i32) {
283 }
284
286 // Make concat-vectors custom to handle concats of more than 2 vectors.
297 if (T != ByteV) {
299 // HVX only has shifts of words and halfwords.
303
304 // Promote all shuffles to operate on vectors of bytes.
305 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
306 }
307
308 if (Subtarget.useHVXFloatingPoint()) {
309 // Same action for both QFloat and IEEE.
314 }
315
323 }
324
325 for (MVT T : LegalW) {
326 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
327 // independent) handling of it would convert it to a load, which is
328 // not always the optimal choice.
330 // Make concat-vectors custom to handle concats of more than 2 vectors.
332
333 // Custom-lower these operations for pairs. Expand them into a concat
334 // of the corresponding operations on individual vectors.
343
352
367 if (T != ByteW) {
371
372 // Promote all shuffles to operate on vectors of bytes.
373 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
374 }
377
380 if (T.getScalarType() != MVT::i32) {
383 }
384
385 if (Subtarget.useHVXFloatingPoint()) {
386 // Same action for both QFloat and IEEE.
391 }
392 }
393
394 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
395 setOperationAction(ISD::MULHS, WordV, Custom); // -> _LOHI
396 setOperationAction(ISD::MULHU, WordV, Custom); // -> _LOHI
399
400 setCondCodeAction(ISD::SETNE, MVT::v64f16, Expand);
401 setCondCodeAction(ISD::SETLE, MVT::v64f16, Expand);
402 setCondCodeAction(ISD::SETGE, MVT::v64f16, Expand);
403 setCondCodeAction(ISD::SETLT, MVT::v64f16, Expand);
404 setCondCodeAction(ISD::SETONE, MVT::v64f16, Expand);
405 setCondCodeAction(ISD::SETOLE, MVT::v64f16, Expand);
406 setCondCodeAction(ISD::SETOGE, MVT::v64f16, Expand);
407 setCondCodeAction(ISD::SETOLT, MVT::v64f16, Expand);
408 setCondCodeAction(ISD::SETUNE, MVT::v64f16, Expand);
409 setCondCodeAction(ISD::SETULE, MVT::v64f16, Expand);
410 setCondCodeAction(ISD::SETUGE, MVT::v64f16, Expand);
411 setCondCodeAction(ISD::SETULT, MVT::v64f16, Expand);
412 setCondCodeAction(ISD::SETUO, MVT::v64f16, Expand);
413 setCondCodeAction(ISD::SETO, MVT::v64f16, Expand);
414
415 setCondCodeAction(ISD::SETNE, MVT::v32f32, Expand);
416 setCondCodeAction(ISD::SETLE, MVT::v32f32, Expand);
417 setCondCodeAction(ISD::SETGE, MVT::v32f32, Expand);
418 setCondCodeAction(ISD::SETLT, MVT::v32f32, Expand);
419 setCondCodeAction(ISD::SETONE, MVT::v32f32, Expand);
420 setCondCodeAction(ISD::SETOLE, MVT::v32f32, Expand);
421 setCondCodeAction(ISD::SETOGE, MVT::v32f32, Expand);
422 setCondCodeAction(ISD::SETOLT, MVT::v32f32, Expand);
423 setCondCodeAction(ISD::SETUNE, MVT::v32f32, Expand);
424 setCondCodeAction(ISD::SETULE, MVT::v32f32, Expand);
425 setCondCodeAction(ISD::SETUGE, MVT::v32f32, Expand);
426 setCondCodeAction(ISD::SETULT, MVT::v32f32, Expand);
427 setCondCodeAction(ISD::SETUO, MVT::v32f32, Expand);
428 setCondCodeAction(ISD::SETO, MVT::v32f32, Expand);
429
430 // Boolean vectors.
431
432 for (MVT T : LegalW) {
433 // Boolean types for vector pairs will overlap with the boolean
434 // types for single vectors, e.g.
435 // v64i8 -> v64i1 (single)
436 // v64i16 -> v64i1 (pair)
437 // Set these actions first, and allow the single actions to overwrite
438 // any duplicates.
439 MVT BoolW = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
444 // Masked load/store takes a mask that may need splitting.
447 }
448
449 for (MVT T : LegalV) {
450 MVT BoolV = MVT::getVectorVT(MVT::i1, T.getVectorNumElements());
461 }
462
463 if (Use64b) {
464 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
466 } else {
467 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
469 }
470
471 // Handle store widening for short vectors.
472 unsigned HwLen = Subtarget.getVectorLength();
473 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
474 if (ElemTy == MVT::i1)
475 continue;
476 int ElemWidth = ElemTy.getFixedSizeInBits();
477 int MaxElems = (8*HwLen) / ElemWidth;
478 for (int N = 2; N < MaxElems; N *= 2) {
479 MVT VecTy = MVT::getVectorVT(ElemTy, N);
480 auto Action = getPreferredVectorAction(VecTy);
489 if (Subtarget.useHVXFloatingPoint()) {
494 }
495
496 MVT BoolTy = MVT::getVectorVT(MVT::i1, N);
497 if (!isTypeLegal(BoolTy))
499 }
500 }
501 }
502
503 // Include cases which are not hander earlier
507
509
512
513 // Partial MLA reductions.
514 {
515 static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
518
519 auto HvxType = [=](MVT ScalarT, unsigned Factor = 1) {
520 return MVT::getVectorVT(ScalarT, Subtarget.getVectorLength() * Factor *
521 8 / ScalarT.getSizeInBits());
522 };
523
524 // Tuple of (Acc element type, input element type, vector pair).
525 // The assumption is both the input and reduction result are of the same
526 // size so the reduction ratio is the same as the ratio of element type
527 // sizes. This may not hold for all available instructions.
528 typedef std::tuple<MVT, MVT, bool> ReductionSignature;
529
530 static const std::vector<ReductionSignature> NativeReductions = {
531 {MVT::i32, MVT::i8, false},
532 };
533
534 for (const auto &R : NativeReductions) {
535
536 MVT AccType = std::get<0>(R);
537 MVT InputType = std::get<1>(R);
538 unsigned Factor = std::get<2>(R) ? 2 : 1;
539
540 // The native size is legal.
541 setPartialReduceMLAAction(MLAOps, HvxType(AccType), HvxType(InputType),
542 Legal);
543
544 // Allow custom partial MLA reductions on larger vectors than legally
545 // supported. These reduction must be declared as Custom (or Legal)
546 // for foldPartialReduceMLAMulOp() to fold the multiply by one pattern
547 // inserted when the partial reduction intrinsic is converted to
548 // PARTIAL_REDUCE_U/S/SUMLA. Otherwise, the Split action will apply
549 // on the original pattern, including the extensions and multiplies,
550 // which will make it impossible to match.
551 // There are two independent ways to extend the
552 // input size: 1. to concatenate the result - output vector is
553 // proportionally extended, 2) to reduce the result - the output vector
554 // size stays the same. We limit allowed combinations so that the total
555 // number of generated reduction instructions is limited by a constant
556 // number. This limit is arbitrary and can be revised. On one hand, it is
557 // convenient to have more choices; on the other hand, there is a
558 // diminishing benefit of very long sequences, which should probably be
559 // written as loops instead.
560 for (unsigned ConcatFactor = 1; ConcatFactor <= MaxExpandMLA;
561 ConcatFactor <<= 1)
562 for (unsigned ReductionFactor = 1; ReductionFactor <= MaxExpandMLA;
563 ReductionFactor <<= 1)
564 if (ConcatFactor * ReductionFactor != 1 &&
565 ConcatFactor * ReductionFactor <= MaxExpandMLA)
567 MLAOps, HvxType(AccType, Factor * ConcatFactor),
568 HvxType(InputType, Factor * ConcatFactor * ReductionFactor),
569 Custom);
570 }
571 }
572}
573
574unsigned
575HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
576 // Early exit for invalid input types
577 if (!VecTy.isVector())
578 return ~0u;
579
580 MVT ElemTy = VecTy.getVectorElementType();
581 unsigned VecLen = VecTy.getVectorNumElements();
582 unsigned HwLen = Subtarget.getVectorLength();
583
584 // Split vectors of i1 that exceed byte vector length.
585 if (ElemTy == MVT::i1 && VecLen > HwLen)
587
588 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
589 // For shorter vectors of i1, widen them if any of the corresponding
590 // vectors of integers needs to be widened.
591 if (ElemTy == MVT::i1) {
592 for (MVT T : Tys) {
593 assert(T != MVT::i1);
594 auto A = getPreferredHvxVectorAction(MVT::getVectorVT(T, VecLen));
595 if (A != ~0u)
596 return A;
597 }
598 return ~0u;
599 }
600
601 // If the size of VecTy is at least half of the vector length,
602 // widen the vector. Note: the threshold was not selected in
603 // any scientific way.
604 if (llvm::is_contained(Tys, ElemTy)) {
605 unsigned VecWidth = VecTy.getSizeInBits();
606 unsigned HwWidth = 8*HwLen;
607 if (VecWidth > 2*HwWidth)
609
610 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
611 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
613 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
615 }
616
617 // Defer to default.
618 return ~0u;
619}
620
621unsigned
622HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
623 unsigned Opc = Op.getOpcode();
624 switch (Opc) {
625 case HexagonISD::SMUL_LOHI:
626 case HexagonISD::UMUL_LOHI:
627 case HexagonISD::USMUL_LOHI:
629 }
631}
632
634HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
635 const SDLoc &dl, SelectionDAG &DAG) const {
637 IntOps.push_back(DAG.getConstant(IntId, dl, MVT::i32));
638 append_range(IntOps, Ops);
639 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResTy, IntOps);
640}
641
642MVT
643HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
644 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
645
646 MVT ElemTy = Tys.first.getVectorElementType();
647 return MVT::getVectorVT(ElemTy, Tys.first.getVectorNumElements() +
648 Tys.second.getVectorNumElements());
649}
650
651HexagonTargetLowering::TypePair
652HexagonTargetLowering::typeSplit(MVT VecTy) const {
653 assert(VecTy.isVector());
654 unsigned NumElem = VecTy.getVectorNumElements();
655 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
656 MVT HalfTy = MVT::getVectorVT(VecTy.getVectorElementType(), NumElem/2);
657 return { HalfTy, HalfTy };
658}
659
660MVT
661HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
662 MVT ElemTy = VecTy.getVectorElementType();
663 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() * Factor);
664 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
665}
666
667MVT
668HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
669 MVT ElemTy = VecTy.getVectorElementType();
670 MVT NewElemTy = MVT::getIntegerVT(ElemTy.getSizeInBits() / Factor);
671 return MVT::getVectorVT(NewElemTy, VecTy.getVectorNumElements());
672}
673
675HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
676 SelectionDAG &DAG) const {
677 if (ty(Vec).getVectorElementType() == ElemTy)
678 return Vec;
679 MVT CastTy = tyVector(Vec.getValueType().getSimpleVT(), ElemTy);
680 return DAG.getBitcast(CastTy, Vec);
681}
682
684HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
685 SelectionDAG &DAG) const {
686 return DAG.getNode(ISD::CONCAT_VECTORS, dl, typeJoin(ty(Ops)),
687 Ops.first, Ops.second);
688}
689
690HexagonTargetLowering::VectorPair
691HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
692 SelectionDAG &DAG) const {
693 TypePair Tys = typeSplit(ty(Vec));
694 if (Vec.getOpcode() == HexagonISD::QCAT)
695 return VectorPair(Vec.getOperand(0), Vec.getOperand(1));
696 return DAG.SplitVector(Vec, dl, Tys.first, Tys.second);
697}
698
699bool
700HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
701 return Subtarget.isHVXVectorType(Ty) &&
702 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
703}
704
705bool
706HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
707 return Subtarget.isHVXVectorType(Ty) &&
708 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
709}
710
711bool
712HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
713 return Subtarget.isHVXVectorType(Ty, true) &&
714 Ty.getVectorElementType() == MVT::i1;
715}
716
717bool HexagonTargetLowering::allowsHvxMemoryAccess(
718 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
719 // Bool vectors are excluded by default, but make it explicit to
720 // emphasize that bool vectors cannot be loaded or stored.
721 // Also, disallow double vector stores (to prevent unnecessary
722 // store widening in DAG combiner).
723 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
724 return false;
725 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
726 return false;
727 if (Fast)
728 *Fast = 1;
729 return true;
730}
731
732bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
733 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
734 if (!Subtarget.isHVXVectorType(VecTy))
735 return false;
736 // XXX Should this be false? vmemu are a bit slower than vmem.
737 if (Fast)
738 *Fast = 1;
739 return true;
740}
741
742void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
743 MachineInstr &MI, SDNode *Node) const {
744 unsigned Opc = MI.getOpcode();
745 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
746 MachineBasicBlock &MB = *MI.getParent();
747 MachineFunction &MF = *MB.getParent();
748 MachineRegisterInfo &MRI = MF.getRegInfo();
749 DebugLoc DL = MI.getDebugLoc();
750 auto At = MI.getIterator();
751
752 switch (Opc) {
753 case Hexagon::PS_vsplatib:
754 if (Subtarget.useHVXV62Ops()) {
755 // SplatV = A2_tfrsi #imm
756 // OutV = V6_lvsplatb SplatV
757 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
758 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
759 .add(MI.getOperand(1));
760 Register OutV = MI.getOperand(0).getReg();
761 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
762 .addReg(SplatV);
763 } else {
764 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
765 // OutV = V6_lvsplatw SplatV
766 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
767 const MachineOperand &InpOp = MI.getOperand(1);
768 assert(InpOp.isImm());
769 uint32_t V = InpOp.getImm() & 0xFF;
770 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
771 .addImm(V << 24 | V << 16 | V << 8 | V);
772 Register OutV = MI.getOperand(0).getReg();
773 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
774 }
775 MB.erase(At);
776 break;
777 case Hexagon::PS_vsplatrb:
778 if (Subtarget.useHVXV62Ops()) {
779 // OutV = V6_lvsplatb Inp
780 Register OutV = MI.getOperand(0).getReg();
781 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatb), OutV)
782 .add(MI.getOperand(1));
783 } else {
784 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
785 const MachineOperand &InpOp = MI.getOperand(1);
786 BuildMI(MB, At, DL, TII.get(Hexagon::S2_vsplatrb), SplatV)
787 .addReg(InpOp.getReg(), {}, InpOp.getSubReg());
788 Register OutV = MI.getOperand(0).getReg();
789 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV)
790 .addReg(SplatV);
791 }
792 MB.erase(At);
793 break;
794 case Hexagon::PS_vsplatih:
795 if (Subtarget.useHVXV62Ops()) {
796 // SplatV = A2_tfrsi #imm
797 // OutV = V6_lvsplath SplatV
798 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
799 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
800 .add(MI.getOperand(1));
801 Register OutV = MI.getOperand(0).getReg();
802 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
803 .addReg(SplatV);
804 } else {
805 // SplatV = A2_tfrsi #imm:#imm
806 // OutV = V6_lvsplatw SplatV
807 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
808 const MachineOperand &InpOp = MI.getOperand(1);
809 assert(InpOp.isImm());
810 uint32_t V = InpOp.getImm() & 0xFFFF;
811 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
812 .addImm(V << 16 | V);
813 Register OutV = MI.getOperand(0).getReg();
814 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
815 }
816 MB.erase(At);
817 break;
818 case Hexagon::PS_vsplatrh:
819 if (Subtarget.useHVXV62Ops()) {
820 // OutV = V6_lvsplath Inp
821 Register OutV = MI.getOperand(0).getReg();
822 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplath), OutV)
823 .add(MI.getOperand(1));
824 } else {
825 // SplatV = A2_combine_ll Inp, Inp
826 // OutV = V6_lvsplatw SplatV
827 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
828 const MachineOperand &InpOp = MI.getOperand(1);
829 BuildMI(MB, At, DL, TII.get(Hexagon::A2_combine_ll), SplatV)
830 .addReg(InpOp.getReg(), {}, InpOp.getSubReg())
831 .addReg(InpOp.getReg(), {}, InpOp.getSubReg());
832 Register OutV = MI.getOperand(0).getReg();
833 BuildMI(MB, At, DL, TII.get(Hexagon::V6_lvsplatw), OutV).addReg(SplatV);
834 }
835 MB.erase(At);
836 break;
837 case Hexagon::PS_vsplatiw:
838 case Hexagon::PS_vsplatrw:
839 if (Opc == Hexagon::PS_vsplatiw) {
840 // SplatV = A2_tfrsi #imm
841 Register SplatV = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
842 BuildMI(MB, At, DL, TII.get(Hexagon::A2_tfrsi), SplatV)
843 .add(MI.getOperand(1));
844 MI.getOperand(1).ChangeToRegister(SplatV, false);
845 }
846 // OutV = V6_lvsplatw SplatV/Inp
847 MI.setDesc(TII.get(Hexagon::V6_lvsplatw));
848 break;
849 }
850}
851
853HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
854 SelectionDAG &DAG) const {
855 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
856 ElemIdx = DAG.getBitcast(MVT::i32, ElemIdx);
857
858 unsigned ElemWidth = ElemTy.getSizeInBits();
859 if (ElemWidth == 8)
860 return ElemIdx;
861
862 unsigned L = Log2_32(ElemWidth/8);
863 const SDLoc &dl(ElemIdx);
864 return DAG.getNode(ISD::SHL, dl, MVT::i32,
865 {ElemIdx, DAG.getConstant(L, dl, MVT::i32)});
866}
867
869HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
870 SelectionDAG &DAG) const {
871 unsigned ElemWidth = ElemTy.getSizeInBits();
872 assert(ElemWidth >= 8 && ElemWidth <= 32);
873 if (ElemWidth == 32)
874 return Idx;
875
876 if (ty(Idx) != MVT::i32)
877 Idx = DAG.getBitcast(MVT::i32, Idx);
878 const SDLoc &dl(Idx);
879 SDValue Mask = DAG.getConstant(32/ElemWidth - 1, dl, MVT::i32);
880 SDValue SubIdx = DAG.getNode(ISD::AND, dl, MVT::i32, {Idx, Mask});
881 return SubIdx;
882}
883
885HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
886 SDValue Op1, ArrayRef<int> Mask,
887 SelectionDAG &DAG) const {
888 MVT OpTy = ty(Op0);
889 assert(OpTy == ty(Op1));
890
891 MVT ElemTy = OpTy.getVectorElementType();
892 if (ElemTy == MVT::i8)
893 return DAG.getVectorShuffle(OpTy, dl, Op0, Op1, Mask);
894 assert(ElemTy.getSizeInBits() >= 8);
895
896 MVT ResTy = tyVector(OpTy, MVT::i8);
897 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
898
899 SmallVector<int,128> ByteMask;
900 for (int M : Mask) {
901 if (M < 0) {
902 for (unsigned I = 0; I != ElemSize; ++I)
903 ByteMask.push_back(-1);
904 } else {
905 int NewM = M*ElemSize;
906 for (unsigned I = 0; I != ElemSize; ++I)
907 ByteMask.push_back(NewM+I);
908 }
909 }
910 assert(ResTy.getVectorNumElements() == ByteMask.size());
911 return DAG.getVectorShuffle(ResTy, dl, opCastElem(Op0, MVT::i8, DAG),
912 opCastElem(Op1, MVT::i8, DAG), ByteMask);
913}
914
916HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
917 const SDLoc &dl, MVT VecTy,
918 SelectionDAG &DAG) const {
919 unsigned VecLen = Values.size();
920 MachineFunction &MF = DAG.getMachineFunction();
921 MVT ElemTy = VecTy.getVectorElementType();
922 unsigned ElemWidth = ElemTy.getSizeInBits();
923 unsigned HwLen = Subtarget.getVectorLength();
924
925 unsigned ElemSize = ElemWidth / 8;
926 assert(ElemSize*VecLen == HwLen);
928
929 if (VecTy.getVectorElementType() != MVT::i32 &&
930 !(Subtarget.useHVXFloatingPoint() &&
931 VecTy.getVectorElementType() == MVT::f32)) {
932 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
933 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
934 MVT PartVT = MVT::getVectorVT(VecTy.getVectorElementType(), OpsPerWord);
935 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
936 SDValue W = buildVector32(Values.slice(i, OpsPerWord), dl, PartVT, DAG);
937 Words.push_back(DAG.getBitcast(MVT::i32, W));
938 }
939 } else {
940 for (SDValue V : Values)
941 Words.push_back(DAG.getBitcast(MVT::i32, V));
942 }
943 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
944 unsigned NumValues = Values.size();
945 assert(NumValues > 0);
946 bool IsUndef = true;
947 for (unsigned i = 0; i != NumValues; ++i) {
948 if (Values[i].isUndef())
949 continue;
950 IsUndef = false;
951 if (!SplatV.getNode())
952 SplatV = Values[i];
953 else if (SplatV != Values[i])
954 return false;
955 }
956 if (IsUndef)
957 SplatV = Values[0];
958 return true;
959 };
960
961 unsigned NumWords = Words.size();
962 SDValue SplatV;
963 bool IsSplat = isSplat(Words, SplatV);
964 if (IsSplat && isUndef(SplatV))
965 return DAG.getUNDEF(VecTy);
966 if (IsSplat) {
967 assert(SplatV.getNode());
968 if (isNullConstant(SplatV))
969 return getZero(dl, VecTy, DAG);
970 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
971 SDValue S = DAG.getNode(ISD::SPLAT_VECTOR, dl, WordTy, SplatV);
972 return DAG.getBitcast(VecTy, S);
973 }
974
975 // Delay recognizing constant vectors until here, so that we can generate
976 // a vsplat.
977 SmallVector<ConstantInt*, 128> Consts(VecLen);
978 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
979 if (AllConst) {
980 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
981 (Constant**)Consts.end());
982 Constant *CV = ConstantVector::get(Tmp);
983 Align Alignment(HwLen);
985 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
986 DAG);
987 return DAG.getLoad(VecTy, dl, DAG.getEntryNode(), CP,
989 }
990
991 // A special case is a situation where the vector is built entirely from
992 // elements extracted from another vector. This could be done via a shuffle
993 // more efficiently, but typically, the size of the source vector will not
994 // match the size of the vector being built (which precludes the use of a
995 // shuffle directly).
996 // This only handles a single source vector, and the vector being built
997 // should be of a sub-vector type of the source vector type.
998 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
999 SmallVectorImpl<int> &SrcIdx) {
1000 SDValue Vec;
1001 for (SDValue V : Values) {
1002 if (isUndef(V)) {
1003 SrcIdx.push_back(-1);
1004 continue;
1005 }
1006 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1007 return false;
1008 // All extracts should come from the same vector.
1009 SDValue T = V.getOperand(0);
1010 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
1011 return false;
1012 Vec = T;
1013 ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
1014 if (C == nullptr)
1015 return false;
1016 int I = C->getSExtValue();
1017 assert(I >= 0 && "Negative element index");
1018 SrcIdx.push_back(I);
1019 }
1020 SrcVec = Vec;
1021 return true;
1022 };
1023
1024 SmallVector<int,128> ExtIdx;
1025 SDValue ExtVec;
1026 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
1027 MVT ExtTy = ty(ExtVec);
1028 unsigned ExtLen = ExtTy.getVectorNumElements();
1029 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
1030 // Construct a new shuffle mask that will produce a vector with the same
1031 // number of elements as the input vector, and such that the vector we
1032 // want will be the initial subvector of it.
1033 SmallVector<int,128> Mask;
1034 BitVector Used(ExtLen);
1035
1036 for (int M : ExtIdx) {
1037 Mask.push_back(M);
1038 if (M >= 0)
1039 Used.set(M);
1040 }
1041 // Fill the rest of the mask with the unused elements of ExtVec in hopes
1042 // that it will result in a permutation of ExtVec's elements. It's still
1043 // fine if it doesn't (e.g. if undefs are present, or elements are
1044 // repeated), but permutations can always be done efficiently via vdelta
1045 // and vrdelta.
1046 for (unsigned I = 0; I != ExtLen; ++I) {
1047 if (Mask.size() == ExtLen)
1048 break;
1049 if (!Used.test(I))
1050 Mask.push_back(I);
1051 }
1052
1053 SDValue S = DAG.getVectorShuffle(ExtTy, dl, ExtVec,
1054 DAG.getUNDEF(ExtTy), Mask);
1055 return ExtLen == VecLen ? S : LoHalf(S, DAG);
1056 }
1057 }
1058
1059 // Find most common element to initialize vector with. This is to avoid
1060 // unnecessary vinsert/valign for cases where the same value is present
1061 // many times. Creates a histogram of the vector's elements to find the
1062 // most common element n.
1063 assert(4*Words.size() == Subtarget.getVectorLength());
1064 int VecHist[32];
1065 int n = 0;
1066 for (unsigned i = 0; i != NumWords; ++i) {
1067 VecHist[i] = 0;
1068 if (Words[i].isUndef())
1069 continue;
1070 for (unsigned j = i; j != NumWords; ++j)
1071 if (Words[i] == Words[j])
1072 VecHist[i]++;
1073
1074 if (VecHist[i] > VecHist[n])
1075 n = i;
1076 }
1077
1078 SDValue HalfV = getZero(dl, VecTy, DAG);
1079 if (VecHist[n] > 1) {
1080 SDValue SplatV = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Words[n]);
1081 HalfV = DAG.getNode(HexagonISD::VALIGN, dl, VecTy,
1082 {HalfV, SplatV, DAG.getConstant(HwLen/2, dl, MVT::i32)});
1083 }
1084 SDValue HalfV0 = HalfV;
1085 SDValue HalfV1 = HalfV;
1086
1087 // Construct two halves in parallel, then or them together. Rn and Rm count
1088 // number of rotations needed before the next element. One last rotation is
1089 // performed post-loop to position the last element.
1090 int Rn = 0, Rm = 0;
1091 SDValue Sn, Sm;
1092 SDValue N = HalfV0;
1093 SDValue M = HalfV1;
1094 for (unsigned i = 0; i != NumWords/2; ++i) {
1095 // Rotate by element count since last insertion.
1096 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1097 Sn = DAG.getConstant(Rn, dl, MVT::i32);
1098 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1099 N = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1100 {HalfV0, Words[i]});
1101 Rn = 0;
1102 }
1103 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1104 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1105 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1106 M = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy,
1107 {HalfV1, Words[i+NumWords/2]});
1108 Rm = 0;
1109 }
1110 Rn += 4;
1111 Rm += 4;
1112 }
1113 // Perform last rotation.
1114 Sn = DAG.getConstant(Rn+HwLen/2, dl, MVT::i32);
1115 Sm = DAG.getConstant(Rm, dl, MVT::i32);
1116 HalfV0 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {N, Sn});
1117 HalfV1 = DAG.getNode(HexagonISD::VROR, dl, VecTy, {M, Sm});
1118
1119 SDValue T0 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV0);
1120 SDValue T1 = DAG.getBitcast(tyVector(VecTy, MVT::i32), HalfV1);
1121
1122 SDValue DstV = DAG.getNode(ISD::OR, dl, ty(T0), {T0, T1});
1123
1124 SDValue OutV =
1125 DAG.getBitcast(tyVector(ty(DstV), VecTy.getVectorElementType()), DstV);
1126 return OutV;
1127}
1128
1129SDValue
1130HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1131 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1132 MVT PredTy = ty(PredV);
1133 unsigned HwLen = Subtarget.getVectorLength();
1134 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1135
1136 if (Subtarget.isHVXVectorType(PredTy, true)) {
1137 // Move the vector predicate SubV to a vector register, and scale it
1138 // down to match the representation (bytes per type element) that VecV
1139 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1140 // in general) element and put them at the front of the resulting
1141 // vector. This subvector will then be inserted into the Q2V of VecV.
1142 // To avoid having an operation that generates an illegal type (short
1143 // vector), generate a full size vector.
1144 //
1145 SDValue T = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, PredV);
1146 SmallVector<int,128> Mask(HwLen);
1147 // Scale = BitBytes(PredV) / Given BitBytes.
1148 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1149 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1150
1151 for (unsigned i = 0; i != HwLen; ++i) {
1152 unsigned Num = i % Scale;
1153 unsigned Off = i / Scale;
1154 Mask[BlockLen*Num + Off] = i;
1155 }
1156 SDValue S = DAG.getVectorShuffle(ByteTy, dl, T, DAG.getUNDEF(ByteTy), Mask);
1157 if (!ZeroFill)
1158 return S;
1159 // Fill the bytes beyond BlockLen with 0s.
1160 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1161 // when BlockLen < HwLen.
1162 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1163 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1164 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1165 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1166 SDValue M = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Q);
1167 return DAG.getNode(ISD::AND, dl, ByteTy, S, M);
1168 }
1169
1170 // Make sure that this is a valid scalar predicate.
1171 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1172
1173 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1174 SmallVector<SDValue,4> Words[2];
1175 unsigned IdxW = 0;
1176
1177 SDValue W0 = isUndef(PredV)
1178 ? DAG.getUNDEF(MVT::i64)
1179 : DAG.getNode(HexagonISD::P2D, dl, MVT::i64, PredV);
1180 Words[IdxW].push_back(HiHalf(W0, DAG));
1181 Words[IdxW].push_back(LoHalf(W0, DAG));
1182
1183 while (Bytes < BitBytes) {
1184 IdxW ^= 1;
1185 Words[IdxW].clear();
1186
1187 if (Bytes < 4) {
1188 for (const SDValue &W : Words[IdxW ^ 1]) {
1189 SDValue T = expandPredicate(W, dl, DAG);
1190 Words[IdxW].push_back(HiHalf(T, DAG));
1191 Words[IdxW].push_back(LoHalf(T, DAG));
1192 }
1193 } else {
1194 for (const SDValue &W : Words[IdxW ^ 1]) {
1195 Words[IdxW].push_back(W);
1196 Words[IdxW].push_back(W);
1197 }
1198 }
1199 Bytes *= 2;
1200 }
1201
1202 assert(Bytes == BitBytes);
1203 SDValue Vec = ZeroFill ? getZero(dl, ByteTy, DAG) : DAG.getUNDEF(ByteTy);
1204 SDValue S4 = DAG.getConstant(HwLen-4, dl, MVT::i32);
1205 for (const SDValue &W : Words[IdxW]) {
1206 Vec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Vec, S4);
1207 Vec = DAG.getNode(HexagonISD::VINSERTW0, dl, ByteTy, Vec, W);
1208 }
1209
1210 return Vec;
1211}
1212
1213SDValue
1214HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1215 const SDLoc &dl, MVT VecTy,
1216 SelectionDAG &DAG) const {
1217 // Construct a vector V of bytes, such that a comparison V >u 0 would
1218 // produce the required vector predicate.
1219 unsigned VecLen = Values.size();
1220 unsigned HwLen = Subtarget.getVectorLength();
1221 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1223 bool AllT = true, AllF = true;
1224
1225 auto IsTrue = [] (SDValue V) {
1226 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1227 return !N->isZero();
1228 return false;
1229 };
1230 auto IsFalse = [] (SDValue V) {
1231 if (const auto *N = dyn_cast<ConstantSDNode>(V.getNode()))
1232 return N->isZero();
1233 return false;
1234 };
1235
1236 if (VecLen <= HwLen) {
1237 // In the hardware, each bit of a vector predicate corresponds to a byte
1238 // of a vector register. Calculate how many bytes does a bit of VecTy
1239 // correspond to.
1240 assert(HwLen % VecLen == 0);
1241 unsigned BitBytes = HwLen / VecLen;
1242 for (SDValue V : Values) {
1243 AllT &= IsTrue(V);
1244 AllF &= IsFalse(V);
1245
1246 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(V, dl, MVT::i8)
1247 : DAG.getUNDEF(MVT::i8);
1248 for (unsigned B = 0; B != BitBytes; ++B)
1249 Bytes.push_back(Ext);
1250 }
1251 } else {
1252 // There are as many i1 values, as there are bits in a vector register.
1253 // Divide the values into groups of 8 and check that each group consists
1254 // of the same value (ignoring undefs).
1255 for (unsigned I = 0; I != VecLen; I += 8) {
1256 unsigned B = 0;
1257 // Find the first non-undef value in this group.
1258 for (; B != 8; ++B) {
1259 if (!Values[I+B].isUndef())
1260 break;
1261 }
1262 SDValue F = Values[I+B];
1263 AllT &= IsTrue(F);
1264 AllF &= IsFalse(F);
1265
1266 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(F, dl, MVT::i8)
1267 : DAG.getUNDEF(MVT::i8);
1268 Bytes.push_back(Ext);
1269 // Verify that the rest of values in the group are the same as the
1270 // first.
1271 for (; B != 8; ++B)
1272 assert(Values[I+B].isUndef() || Values[I+B] == F);
1273 }
1274 }
1275
1276 if (AllT)
1277 return DAG.getNode(HexagonISD::QTRUE, dl, VecTy);
1278 if (AllF)
1279 return DAG.getNode(HexagonISD::QFALSE, dl, VecTy);
1280
1281 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1282 SDValue ByteVec = buildHvxVectorReg(Bytes, dl, ByteTy, DAG);
1283 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1284}
1285
1286SDValue
1287HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1288 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1289 MVT ElemTy = ty(VecV).getVectorElementType();
1290
1291 unsigned ElemWidth = ElemTy.getSizeInBits();
1292 assert(ElemWidth >= 8 && ElemWidth <= 32);
1293 (void)ElemWidth;
1294
1295 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1296 SDValue ExWord = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1297 {VecV, ByteIdx});
1298 if (ElemTy == MVT::i32)
1299 return ExWord;
1300
1301 // Have an extracted word, need to extract the smaller element out of it.
1302 // 1. Extract the bits of (the original) IdxV that correspond to the index
1303 // of the desired element in the 32-bit word.
1304 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1305 // 2. Extract the element from the word.
1306 SDValue ExVec = DAG.getBitcast(tyVector(ty(ExWord), ElemTy), ExWord);
1307 return extractVector(ExVec, SubIdx, dl, ElemTy, MVT::i32, DAG);
1308}
1309
1310SDValue
1311HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1312 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1313 // Implement other return types if necessary.
1314 assert(ResTy == MVT::i1);
1315
1316 unsigned HwLen = Subtarget.getVectorLength();
1317 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1318 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1319
1320 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1321 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1322 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1323
1324 SDValue ExtB = extractHvxElementReg(ByteVec, IdxV, dl, MVT::i32, DAG);
1325 SDValue Zero = DAG.getTargetConstant(0, dl, MVT::i32);
1326 return getInstr(Hexagon::C2_cmpgtui, dl, MVT::i1, {ExtB, Zero}, DAG);
1327}
1328
1329SDValue
1330HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1331 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1332 MVT ElemTy = ty(VecV).getVectorElementType();
1333
1334 unsigned ElemWidth = ElemTy.getSizeInBits();
1335 assert(ElemWidth >= 8 && ElemWidth <= 32);
1336 (void)ElemWidth;
1337
1338 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1339 SDValue ByteIdxV) {
1340 MVT VecTy = ty(VecV);
1341 unsigned HwLen = Subtarget.getVectorLength();
1342 SDValue MaskV =
1343 DAG.getNode(ISD::AND, dl, MVT::i32,
1344 {ByteIdxV, DAG.getSignedConstant(-4, dl, MVT::i32)});
1345 SDValue RotV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {VecV, MaskV});
1346 SDValue InsV = DAG.getNode(HexagonISD::VINSERTW0, dl, VecTy, {RotV, ValV});
1347 SDValue SubV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1348 {DAG.getConstant(HwLen, dl, MVT::i32), MaskV});
1349 SDValue TorV = DAG.getNode(HexagonISD::VROR, dl, VecTy, {InsV, SubV});
1350 return TorV;
1351 };
1352
1353 SDValue ByteIdx = convertToByteIndex(IdxV, ElemTy, DAG);
1354 if (ElemTy == MVT::i32)
1355 return InsertWord(VecV, ValV, ByteIdx);
1356
1357 // If this is not inserting a 32-bit word, convert it into such a thing.
1358 // 1. Extract the existing word from the target vector.
1359 SDValue WordIdx = DAG.getNode(ISD::SRL, dl, MVT::i32,
1360 {ByteIdx, DAG.getConstant(2, dl, MVT::i32)});
1361 SDValue Ext = extractHvxElementReg(opCastElem(VecV, MVT::i32, DAG), WordIdx,
1362 dl, MVT::i32, DAG);
1363
1364 // 2. Treating the extracted word as a 32-bit vector, insert the given
1365 // value into it.
1366 SDValue SubIdx = getIndexInWord32(IdxV, ElemTy, DAG);
1367 MVT SubVecTy = tyVector(ty(Ext), ElemTy);
1368 SDValue Ins = insertVector(DAG.getBitcast(SubVecTy, Ext),
1369 ValV, SubIdx, dl, ElemTy, DAG);
1370
1371 // 3. Insert the 32-bit word back into the original vector.
1372 return InsertWord(VecV, Ins, ByteIdx);
1373}
1374
1375SDValue
1376HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1377 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1378 unsigned HwLen = Subtarget.getVectorLength();
1379 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1380 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1381
1382 unsigned Scale = HwLen / ty(VecV).getVectorNumElements();
1383 SDValue ScV = DAG.getConstant(Scale, dl, MVT::i32);
1384 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, ScV);
1385 ValV = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, ValV);
1386
1387 SDValue InsV = insertHvxElementReg(ByteVec, IdxV, ValV, dl, DAG);
1388 return DAG.getNode(HexagonISD::V2Q, dl, ty(VecV), InsV);
1389}
1390
1391SDValue
1392HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1393 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1394 MVT VecTy = ty(VecV);
1395 unsigned HwLen = Subtarget.getVectorLength();
1396 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1397 MVT ElemTy = VecTy.getVectorElementType();
1398 unsigned ElemWidth = ElemTy.getSizeInBits();
1399
1400 // If the source vector is a vector pair, get the single vector containing
1401 // the subvector of interest. The subvector will never overlap two single
1402 // vectors.
1403 if (isHvxPairTy(VecTy)) {
1404 unsigned SubIdx = Hexagon::vsub_lo;
1405 if (Idx * ElemWidth >= 8 * HwLen) {
1406 SubIdx = Hexagon::vsub_hi;
1407 Idx -= VecTy.getVectorNumElements() / 2;
1408 }
1409
1410 VecTy = typeSplit(VecTy).first;
1411 VecV = DAG.getTargetExtractSubreg(SubIdx, dl, VecTy, VecV);
1412 if (VecTy == ResTy)
1413 return VecV;
1414 }
1415
1416 // The only meaningful subvectors of a single HVX vector are those that
1417 // fit in a scalar register.
1418 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1419
1420 MVT WordTy = tyVector(VecTy, MVT::i32);
1421 SDValue WordVec = DAG.getBitcast(WordTy, VecV);
1422 unsigned WordIdx = (Idx*ElemWidth) / 32;
1423
1424 SDValue W0Idx = DAG.getConstant(WordIdx, dl, MVT::i32);
1425 SDValue W0 = extractHvxElementReg(WordVec, W0Idx, dl, MVT::i32, DAG);
1426 if (ResTy.getSizeInBits() == 32)
1427 return DAG.getBitcast(ResTy, W0);
1428
1429 SDValue W1Idx = DAG.getConstant(WordIdx+1, dl, MVT::i32);
1430 SDValue W1 = extractHvxElementReg(WordVec, W1Idx, dl, MVT::i32, DAG);
1431 SDValue WW = getCombine(W1, W0, dl, MVT::i64, DAG);
1432 return DAG.getBitcast(ResTy, WW);
1433}
1434
1435SDValue
1436HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1437 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1438 MVT VecTy = ty(VecV);
1439 unsigned HwLen = Subtarget.getVectorLength();
1440 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1441 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1442 // IdxV is required to be a constant.
1443 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1444
1445 unsigned ResLen = ResTy.getVectorNumElements();
1446 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1447 unsigned Offset = Idx * BitBytes;
1448 SDValue Undef = DAG.getUNDEF(ByteTy);
1449 SmallVector<int,128> Mask;
1450
1451 if (Subtarget.isHVXVectorType(ResTy, true)) {
1452 // Converting between two vector predicates. Since the result is shorter
1453 // than the source, it will correspond to a vector predicate with the
1454 // relevant bits replicated. The replication count is the ratio of the
1455 // source and target vector lengths.
1456 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1457 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1458 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1459 for (unsigned j = 0; j != Rep; ++j)
1460 Mask.push_back(i + Offset);
1461 }
1462 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1463 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, ShuffV);
1464 }
1465
1466 // Converting between a vector predicate and a scalar predicate. In the
1467 // vector predicate, a group of BitBytes bits will correspond to a single
1468 // i1 element of the source vector type. Those bits will all have the same
1469 // value. The same will be true for ByteVec, where each byte corresponds
1470 // to a bit in the vector predicate.
1471 // The algorithm is to traverse the ByteVec, going over the i1 values from
1472 // the source vector, and generate the corresponding representation in an
1473 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1474 // elements so that the interesting 8 bytes will be in the low end of the
1475 // vector.
1476 unsigned Rep = 8 / ResLen;
1477 // Make sure the output fill the entire vector register, so repeat the
1478 // 8-byte groups as many times as necessary.
1479 for (unsigned r = 0; r != HwLen / 8; ++r) {
1480 // This will generate the indexes of the 8 interesting bytes.
1481 for (unsigned i = 0; i != ResLen; ++i) {
1482 for (unsigned j = 0; j != Rep; ++j)
1483 Mask.push_back(Offset + i*BitBytes);
1484 }
1485 }
1486
1487 SDValue Zero = getZero(dl, MVT::i32, DAG);
1488 SDValue ShuffV = DAG.getVectorShuffle(ByteTy, dl, ByteVec, Undef, Mask);
1489 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1490 // them against 0.
1491 SDValue W0 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32, {ShuffV, Zero});
1492 SDValue W1 = DAG.getNode(HexagonISD::VEXTRACTW, dl, MVT::i32,
1493 {ShuffV, DAG.getConstant(4, dl, MVT::i32)});
1494 SDValue Vec64 = getCombine(W1, W0, dl, MVT::v8i8, DAG);
1495 return getInstr(Hexagon::A4_vcmpbgtui, dl, ResTy,
1496 {Vec64, DAG.getTargetConstant(0, dl, MVT::i32)}, DAG);
1497}
1498
1499SDValue
1500HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1501 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1502 MVT VecTy = ty(VecV);
1503 MVT SubTy = ty(SubV);
1504 unsigned HwLen = Subtarget.getVectorLength();
1505 MVT ElemTy = VecTy.getVectorElementType();
1506 unsigned ElemWidth = ElemTy.getSizeInBits();
1507
1508 bool IsPair = isHvxPairTy(VecTy);
1509 MVT SingleTy = MVT::getVectorVT(ElemTy, (8*HwLen)/ElemWidth);
1510 // The two single vectors that VecV consists of, if it's a pair.
1511 SDValue V0, V1;
1512 SDValue SingleV = VecV;
1513 SDValue PickHi;
1514
1515 if (IsPair) {
1516 V0 = LoHalf(VecV, DAG);
1517 V1 = HiHalf(VecV, DAG);
1518
1519 SDValue HalfV = DAG.getConstant(SingleTy.getVectorNumElements(),
1520 dl, MVT::i32);
1521 PickHi = DAG.getSetCC(dl, MVT::i1, IdxV, HalfV, ISD::SETUGT);
1522 if (isHvxSingleTy(SubTy)) {
1523 if (const auto *CN = dyn_cast<const ConstantSDNode>(IdxV.getNode())) {
1524 unsigned Idx = CN->getZExtValue();
1525 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1526 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1527 return DAG.getTargetInsertSubreg(SubIdx, dl, VecTy, VecV, SubV);
1528 }
1529 // If IdxV is not a constant, generate the two variants: with the
1530 // SubV as the high and as the low subregister, and select the right
1531 // pair based on the IdxV.
1532 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SubV, V1});
1533 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SubV});
1534 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1535 }
1536 // The subvector being inserted must be entirely contained in one of
1537 // the vectors V0 or V1. Set SingleV to the correct one, and update
1538 // IdxV to be the index relative to the beginning of that vector.
1539 SDValue S = DAG.getNode(ISD::SUB, dl, MVT::i32, IdxV, HalfV);
1540 IdxV = DAG.getNode(ISD::SELECT, dl, MVT::i32, PickHi, S, IdxV);
1541 SingleV = DAG.getNode(ISD::SELECT, dl, SingleTy, PickHi, V1, V0);
1542 }
1543
1544 // The only meaningful subvectors of a single HVX vector are those that
1545 // fit in a scalar register.
1546 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1547 // Convert IdxV to be index in bytes.
1548 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1549 if (!IdxN || !IdxN->isZero()) {
1550 IdxV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1551 DAG.getConstant(ElemWidth/8, dl, MVT::i32));
1552 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, IdxV);
1553 }
1554 // When inserting a single word, the rotation back to the original position
1555 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1556 // by (HwLen-4)-Idx.
1557 unsigned RolBase = HwLen;
1558 if (SubTy.getSizeInBits() == 32) {
1559 SDValue V = DAG.getBitcast(MVT::i32, SubV);
1560 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, V);
1561 } else {
1562 SDValue V = DAG.getBitcast(MVT::i64, SubV);
1563 SDValue R0 = LoHalf(V, DAG);
1564 SDValue R1 = HiHalf(V, DAG);
1565 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R0);
1566 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV,
1567 DAG.getConstant(4, dl, MVT::i32));
1568 SingleV = DAG.getNode(HexagonISD::VINSERTW0, dl, SingleTy, SingleV, R1);
1569 RolBase = HwLen-4;
1570 }
1571 // If the vector wasn't ror'ed, don't ror it back.
1572 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1573 SDValue RolV = DAG.getNode(ISD::SUB, dl, MVT::i32,
1574 DAG.getConstant(RolBase, dl, MVT::i32), IdxV);
1575 SingleV = DAG.getNode(HexagonISD::VROR, dl, SingleTy, SingleV, RolV);
1576 }
1577
1578 if (IsPair) {
1579 SDValue InLo = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {SingleV, V1});
1580 SDValue InHi = DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, {V0, SingleV});
1581 return DAG.getNode(ISD::SELECT, dl, VecTy, PickHi, InHi, InLo);
1582 }
1583 return SingleV;
1584}
1585
1586SDValue
1587HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1588 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1589 MVT VecTy = ty(VecV);
1590 MVT SubTy = ty(SubV);
1591 assert(Subtarget.isHVXVectorType(VecTy, true));
1592 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1593 // predicate as well, or it can be a scalar predicate.
1594
1595 unsigned VecLen = VecTy.getVectorNumElements();
1596 unsigned HwLen = Subtarget.getVectorLength();
1597 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1598
1599 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1600 unsigned BitBytes = HwLen / VecLen;
1601 unsigned BlockLen = HwLen / Scale;
1602
1603 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1604 SDValue ByteVec = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, VecV);
1605 SDValue ByteSub = createHvxPrefixPred(SubV, dl, BitBytes, false, DAG);
1606 SDValue ByteIdx;
1607
1608 auto *IdxN = dyn_cast<ConstantSDNode>(IdxV.getNode());
1609 if (!IdxN || !IdxN->isZero()) {
1610 ByteIdx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
1611 DAG.getConstant(BitBytes, dl, MVT::i32));
1612 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteIdx);
1613 }
1614
1615 // ByteVec is the target vector VecV rotated in such a way that the
1616 // subvector should be inserted at index 0. Generate a predicate mask
1617 // and use vmux to do the insertion.
1618 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1619 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1620 SDValue Q = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
1621 {DAG.getConstant(BlockLen, dl, MVT::i32)}, DAG);
1622 ByteVec = getInstr(Hexagon::V6_vmux, dl, ByteTy, {Q, ByteSub, ByteVec}, DAG);
1623 // Rotate ByteVec back, and convert to a vector predicate.
1624 if (!IdxN || !IdxN->isZero()) {
1625 SDValue HwLenV = DAG.getConstant(HwLen, dl, MVT::i32);
1626 SDValue ByteXdi = DAG.getNode(ISD::SUB, dl, MVT::i32, HwLenV, ByteIdx);
1627 ByteVec = DAG.getNode(HexagonISD::VROR, dl, ByteTy, ByteVec, ByteXdi);
1628 }
1629 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, ByteVec);
1630}
1631
1632SDValue
1633HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1634 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1635 // Sign- and any-extending of a vector predicate to a vector register is
1636 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1637 // a vector of 1s (where the 1s are of type matching the vector type).
1638 assert(Subtarget.isHVXVectorType(ResTy));
1639 if (!ZeroExt)
1640 return DAG.getNode(HexagonISD::Q2V, dl, ResTy, VecV);
1641
1642 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1643 SDValue True = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
1644 DAG.getConstant(1, dl, MVT::i32));
1645 SDValue False = getZero(dl, ResTy, DAG);
1646 return DAG.getSelect(dl, ResTy, VecV, True, False);
1647}
1648
1649SDValue
1650HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1651 MVT ResTy, SelectionDAG &DAG) const {
1652 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1653 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1654 // vector register. The remaining bits of the vector register are
1655 // unspecified.
1656
1657 MachineFunction &MF = DAG.getMachineFunction();
1658 unsigned HwLen = Subtarget.getVectorLength();
1659 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1660 MVT PredTy = ty(VecQ);
1661 unsigned PredLen = PredTy.getVectorNumElements();
1662 assert(HwLen % PredLen == 0);
1663 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(8*HwLen/PredLen), PredLen);
1664
1665 Type *Int8Ty = Type::getInt8Ty(*DAG.getContext());
1667 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1668 // These are bytes with the LSB rotated left with respect to their index.
1669 for (unsigned i = 0; i != HwLen/8; ++i) {
1670 for (unsigned j = 0; j != 8; ++j)
1671 Tmp.push_back(ConstantInt::get(Int8Ty, 1ull << j));
1672 }
1673 Constant *CV = ConstantVector::get(Tmp);
1674 Align Alignment(HwLen);
1676 DAG.getConstantPool(CV, getPointerTy(DAG.getDataLayout()), Alignment),
1677 DAG);
1678 SDValue Bytes =
1679 DAG.getLoad(ByteTy, dl, DAG.getEntryNode(), CP,
1681
1682 // Select the bytes that correspond to true bits in the vector predicate.
1683 SDValue Sel = DAG.getSelect(dl, VecTy, VecQ, DAG.getBitcast(VecTy, Bytes),
1684 getZero(dl, VecTy, DAG));
1685 // Calculate the OR of all bytes in each group of 8. That will compress
1686 // all the individual bits into a single byte.
1687 // First, OR groups of 4, via vrmpy with 0x01010101.
1688 SDValue All1 =
1689 DAG.getSplatBuildVector(MVT::v4i8, dl, DAG.getConstant(1, dl, MVT::i32));
1690 SDValue Vrmpy = getInstr(Hexagon::V6_vrmpyub, dl, ByteTy, {Sel, All1}, DAG);
1691 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1692 SDValue Rot = getInstr(Hexagon::V6_valignbi, dl, ByteTy,
1693 {Vrmpy, Vrmpy, DAG.getTargetConstant(4, dl, MVT::i32)}, DAG);
1694 SDValue Vor = DAG.getNode(ISD::OR, dl, ByteTy, {Vrmpy, Rot});
1695
1696 // Pick every 8th byte and coalesce them at the beginning of the output.
1697 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1698 // byte and so on.
1699 SmallVector<int,128> Mask;
1700 for (unsigned i = 0; i != HwLen; ++i)
1701 Mask.push_back((8*i) % HwLen + i/(HwLen/8));
1702 SDValue Collect =
1703 DAG.getVectorShuffle(ByteTy, dl, Vor, DAG.getUNDEF(ByteTy), Mask);
1704 return DAG.getBitcast(ResTy, Collect);
1705}
1706
1707SDValue
1708HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1709 const SDLoc &dl, SelectionDAG &DAG) const {
1710 // Take a vector and resize the element type to match the given type.
1711 MVT InpTy = ty(VecV);
1712 if (InpTy == ResTy)
1713 return VecV;
1714
1715 unsigned InpWidth = InpTy.getSizeInBits();
1716 unsigned ResWidth = ResTy.getSizeInBits();
1717
1718 if (InpTy.isFloatingPoint()) {
1719 return InpWidth < ResWidth
1720 ? DAG.getNode(ISD::FP_EXTEND, dl, ResTy, VecV)
1721 : DAG.getNode(ISD::FP_ROUND, dl, ResTy, VecV,
1722 DAG.getTargetConstant(0, dl, MVT::i32));
1723 }
1724
1725 assert(InpTy.isInteger());
1726
1727 if (InpWidth < ResWidth) {
1728 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1729 return DAG.getNode(ExtOpc, dl, ResTy, VecV);
1730 } else {
1731 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1732 return DAG.getNode(NarOpc, dl, ResTy, VecV, DAG.getValueType(ResTy));
1733 }
1734}
1735
1736SDValue
1737HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1738 SelectionDAG &DAG) const {
1739 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1740
1741 const SDLoc &dl(Vec);
1742 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1743 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubTy,
1744 {Vec, DAG.getConstant(ElemIdx, dl, MVT::i32)});
1745}
1746
1747SDValue
1748HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1749 const {
1750 const SDLoc &dl(Op);
1751 MVT VecTy = ty(Op);
1752
1753 unsigned Size = Op.getNumOperands();
1755 for (unsigned i = 0; i != Size; ++i)
1756 Ops.push_back(Op.getOperand(i));
1757
1758 if (VecTy.getVectorElementType() == MVT::i1)
1759 return buildHvxVectorPred(Ops, dl, VecTy, DAG);
1760
1761 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1762 // not a legal type, just bitcast the node to use i16
1763 // types and bitcast the result back to f16
1764 if (VecTy.getVectorElementType() == MVT::f16 ||
1765 VecTy.getVectorElementType() == MVT::bf16) {
1767 for (unsigned i = 0; i != Size; i++)
1768 NewOps.push_back(DAG.getBitcast(MVT::i16, Ops[i]));
1769
1770 SDValue T0 =
1771 DAG.getNode(ISD::BUILD_VECTOR, dl, tyVector(VecTy, MVT::i16), NewOps);
1772 return DAG.getBitcast(tyVector(VecTy, VecTy.getVectorElementType()), T0);
1773 }
1774
1775 // First, split the BUILD_VECTOR for vector pairs. We could generate
1776 // some pairs directly (via splat), but splats should be generated
1777 // by the combiner prior to getting here.
1778 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1780 MVT SingleTy = typeSplit(VecTy).first;
1781 SDValue V0 = buildHvxVectorReg(A.take_front(Size / 2), dl, SingleTy, DAG);
1782 SDValue V1 = buildHvxVectorReg(A.drop_front(Size / 2), dl, SingleTy, DAG);
1783 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VecTy, V0, V1);
1784 }
1785
1786 return buildHvxVectorReg(Ops, dl, VecTy, DAG);
1787}
1788
1789SDValue
1790HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1791 const {
1792 const SDLoc &dl(Op);
1793 MVT VecTy = ty(Op);
1794 MVT ArgTy = ty(Op.getOperand(0));
1795
1796 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1797 MVT SplatTy = MVT::getVectorVT(MVT::i16, VecTy.getVectorNumElements());
1798 SDValue ToInt16 = DAG.getBitcast(MVT::i16, Op.getOperand(0));
1799 SDValue ToInt32 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, ToInt16);
1800 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, SplatTy, ToInt32);
1801 return DAG.getBitcast(VecTy, Splat);
1802 }
1803
1804 return SDValue();
1805}
1806
1807SDValue
1808HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1809 const {
1810 // Vector concatenation of two integer (non-bool) vectors does not need
1811 // special lowering. Custom-lower concats of bool vectors and expand
1812 // concats of more than 2 vectors.
1813 MVT VecTy = ty(Op);
1814 const SDLoc &dl(Op);
1815 unsigned NumOp = Op.getNumOperands();
1816 if (VecTy.getVectorElementType() != MVT::i1) {
1817 if (NumOp == 2)
1818 return Op;
1819 // Expand the other cases into a build-vector.
1821 for (SDValue V : Op.getNode()->ops())
1822 DAG.ExtractVectorElements(V, Elems);
1823 // A vector of i16 will be broken up into a build_vector of i16's.
1824 // This is a problem, since at the time of operation legalization,
1825 // all operations are expected to be type-legalized, and i16 is not
1826 // a legal type. If any of the extracted elements is not of a valid
1827 // type, sign-extend it to a valid one.
1828 for (SDValue &V : Elems) {
1829 MVT Ty = ty(V);
1830 if (!isTypeLegal(Ty)) {
1831 MVT NTy = typeLegalize(Ty, DAG);
1832 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1833 V = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NTy,
1834 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NTy,
1835 V.getOperand(0), V.getOperand(1)),
1836 DAG.getValueType(Ty));
1837 continue;
1838 }
1839 // A few less complicated cases.
1840 switch (V.getOpcode()) {
1841 case ISD::Constant:
1842 V = DAG.getSExtOrTrunc(V, dl, NTy);
1843 break;
1844 case ISD::UNDEF:
1845 V = DAG.getUNDEF(NTy);
1846 break;
1847 case ISD::TRUNCATE:
1848 V = V.getOperand(0);
1849 break;
1850 default:
1851 llvm_unreachable("Unexpected vector element");
1852 }
1853 }
1854 }
1855 return DAG.getBuildVector(VecTy, dl, Elems);
1856 }
1857
1858 assert(VecTy.getVectorElementType() == MVT::i1);
1859 unsigned HwLen = Subtarget.getVectorLength();
1860 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1861
1862 SDValue Op0 = Op.getOperand(0);
1863
1864 // If the operands are HVX types (i.e. not scalar predicates), then
1865 // defer the concatenation, and create QCAT instead.
1866 if (Subtarget.isHVXVectorType(ty(Op0), true)) {
1867 if (NumOp == 2)
1868 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, Op0, Op.getOperand(1));
1869
1870 ArrayRef<SDUse> U(Op.getNode()->ops());
1873
1874 MVT HalfTy = typeSplit(VecTy).first;
1875 SDValue V0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1876 Ops.take_front(NumOp/2));
1877 SDValue V1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfTy,
1878 Ops.take_back(NumOp/2));
1879 return DAG.getNode(HexagonISD::QCAT, dl, VecTy, V0, V1);
1880 }
1881
1882 // Count how many bytes (in a vector register) each bit in VecTy
1883 // corresponds to.
1884 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1885
1886 // Make sure that createHvxPrefixPred will only ever need to expand
1887 // the predicate, i.e. bytes-per-bit in the input is not greater than
1888 // the target bytes-per-bit in the result.
1889 SDValue Combined = combineConcatOfScalarPreds(Op, BitBytes, DAG);
1890 SmallVector<SDValue,8> Prefixes;
1891 for (SDValue V : Combined.getNode()->op_values()) {
1892 SDValue P = createHvxPrefixPred(V, dl, BitBytes, true, DAG);
1893 Prefixes.push_back(P);
1894 }
1895
1896 unsigned InpLen = ty(Combined.getOperand(0)).getVectorNumElements();
1897 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
1898 SDValue S = DAG.getConstant(HwLen - InpLen*BitBytes, dl, MVT::i32);
1899 SDValue Res = getZero(dl, ByteTy, DAG);
1900 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1901 Res = DAG.getNode(HexagonISD::VROR, dl, ByteTy, Res, S);
1902 Res = DAG.getNode(ISD::OR, dl, ByteTy, Res, Prefixes[e-i-1]);
1903 }
1904 return DAG.getNode(HexagonISD::V2Q, dl, VecTy, Res);
1905}
1906
1907SDValue
1908HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1909 const {
1910 // Change the type of the extracted element to i32.
1911 SDValue VecV = Op.getOperand(0);
1912 MVT ElemTy = ty(VecV).getVectorElementType();
1913 const SDLoc &dl(Op);
1914 SDValue IdxV = Op.getOperand(1);
1915 if (ElemTy == MVT::i1)
1916 return extractHvxElementPred(VecV, IdxV, dl, ty(Op), DAG);
1917
1918 return extractHvxElementReg(VecV, IdxV, dl, ty(Op), DAG);
1919}
1920
1921SDValue
1922HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1923 const {
1924 const SDLoc &dl(Op);
1925 MVT VecTy = ty(Op);
1926 SDValue VecV = Op.getOperand(0);
1927 SDValue ValV = Op.getOperand(1);
1928 SDValue IdxV = Op.getOperand(2);
1929 MVT ElemTy = ty(VecV).getVectorElementType();
1930 if (ElemTy == MVT::i1)
1931 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1932
1933 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1935 tyVector(VecTy, MVT::i16),
1936 DAG.getBitcast(tyVector(VecTy, MVT::i16), VecV),
1937 DAG.getBitcast(MVT::i16, ValV), IdxV);
1938 return DAG.getBitcast(tyVector(VecTy, ElemTy), T0);
1939 }
1940
1941 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1942}
1943
1944SDValue
1945HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1946 const {
1947 SDValue SrcV = Op.getOperand(0);
1948 MVT SrcTy = ty(SrcV);
1949 MVT DstTy = ty(Op);
1950 SDValue IdxV = Op.getOperand(1);
1951 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1952 assert(Idx % DstTy.getVectorNumElements() == 0);
1953 (void)Idx;
1954 const SDLoc &dl(Op);
1955
1956 MVT ElemTy = SrcTy.getVectorElementType();
1957 if (ElemTy == MVT::i1)
1958 return extractHvxSubvectorPred(SrcV, IdxV, dl, DstTy, DAG);
1959
1960 return extractHvxSubvectorReg(Op, SrcV, IdxV, dl, DstTy, DAG);
1961}
1962
1963SDValue
1964HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1965 const {
1966 // Idx does not need to be a constant.
1967 SDValue VecV = Op.getOperand(0);
1968 SDValue ValV = Op.getOperand(1);
1969 SDValue IdxV = Op.getOperand(2);
1970
1971 const SDLoc &dl(Op);
1972 MVT VecTy = ty(VecV);
1973 MVT ElemTy = VecTy.getVectorElementType();
1974 if (ElemTy == MVT::i1)
1975 return insertHvxSubvectorPred(VecV, ValV, IdxV, dl, DAG);
1976
1977 return insertHvxSubvectorReg(VecV, ValV, IdxV, dl, DAG);
1978}
1979
1980SDValue
1981HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1982 // Lower any-extends of boolean vectors to sign-extends, since they
1983 // translate directly to Q2V. Zero-extending could also be done equally
1984 // fast, but Q2V is used/recognized in more places.
1985 // For all other vectors, use zero-extend.
1986 MVT ResTy = ty(Op);
1987 SDValue InpV = Op.getOperand(0);
1988 MVT ElemTy = ty(InpV).getVectorElementType();
1989 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
1990 return LowerHvxSignExt(Op, DAG);
1991 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(Op), ResTy, InpV);
1992}
1993
1994SDValue
1995HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1996 MVT ResTy = ty(Op);
1997 SDValue InpV = Op.getOperand(0);
1998 MVT ElemTy = ty(InpV).getVectorElementType();
1999 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
2000 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), false, DAG);
2001 return Op;
2002}
2003
2004SDValue
2005HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
2006 MVT ResTy = ty(Op);
2007 SDValue InpV = Op.getOperand(0);
2008 MVT ElemTy = ty(InpV).getVectorElementType();
2009 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(ResTy))
2010 return extendHvxVectorPred(InpV, SDLoc(Op), ty(Op), true, DAG);
2011 return Op;
2012}
2013
2014SDValue
2015HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
2016 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
2017 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
2018 const SDLoc &dl(Op);
2019 MVT ResTy = ty(Op);
2020 SDValue InpV = Op.getOperand(0);
2021 assert(ResTy == ty(InpV));
2022
2023 // Calculate the vectors of 1 and bitwidth(x).
2024 MVT ElemTy = ty(InpV).getVectorElementType();
2025 unsigned ElemWidth = ElemTy.getSizeInBits();
2026
2027 SDValue Vec1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2028 DAG.getConstant(1, dl, MVT::i32));
2029 SDValue VecW = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2030 DAG.getConstant(ElemWidth, dl, MVT::i32));
2031 SDValue VecN1 = DAG.getNode(ISD::SPLAT_VECTOR, dl, ResTy,
2032 DAG.getAllOnesConstant(dl, MVT::i32));
2033
2034 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
2035 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
2036 // it separately in custom combine or selection).
2037 SDValue A = DAG.getNode(ISD::AND, dl, ResTy,
2038 {DAG.getNode(ISD::XOR, dl, ResTy, {InpV, VecN1}),
2039 DAG.getNode(ISD::SUB, dl, ResTy, {InpV, Vec1})});
2040 return DAG.getNode(ISD::SUB, dl, ResTy,
2041 {VecW, DAG.getNode(ISD::CTLZ, dl, ResTy, A)});
2042}
2043
2044SDValue
2045HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
2046 const SDLoc &dl(Op);
2047 MVT ResTy = ty(Op);
2048 assert(ResTy.getVectorElementType() == MVT::i32);
2049
2050 SDValue Vs = Op.getOperand(0);
2051 SDValue Vt = Op.getOperand(1);
2052
2053 SDVTList ResTys = DAG.getVTList(ResTy, ResTy);
2054 unsigned Opc = Op.getOpcode();
2055
2056 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2057 if (Opc == ISD::MULHU)
2058 return DAG.getNode(HexagonISD::UMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2059 if (Opc == ISD::MULHS)
2060 return DAG.getNode(HexagonISD::SMUL_LOHI, dl, ResTys, {Vs, Vt}).getValue(1);
2061
2062#ifndef NDEBUG
2063 Op.dump(&DAG);
2064#endif
2065 llvm_unreachable("Unexpected mulh operation");
2066}
2067
2068SDValue
2069HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2070 const SDLoc &dl(Op);
2071 unsigned Opc = Op.getOpcode();
2072 SDValue Vu = Op.getOperand(0);
2073 SDValue Vv = Op.getOperand(1);
2074
2075 // If the HI part is not used, convert it to a regular MUL.
2076 if (auto HiVal = Op.getValue(1); HiVal.use_empty()) {
2077 // Need to preserve the types and the number of values.
2078 SDValue Hi = DAG.getUNDEF(ty(HiVal));
2079 SDValue Lo = DAG.getNode(ISD::MUL, dl, ty(Op), {Vu, Vv});
2080 return DAG.getMergeValues({Lo, Hi}, dl);
2081 }
2082
2083 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2084 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2085
2086 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2087 // valued nodes.
2088 if (Subtarget.useHVXV62Ops())
2089 return emitHvxMulLoHiV62(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2090
2091 if (Opc == HexagonISD::SMUL_LOHI) {
2092 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2093 // for other signedness LOHI is cheaper.
2094 if (auto LoVal = Op.getValue(0); LoVal.use_empty()) {
2095 SDValue Hi = emitHvxMulHsV60(Vu, Vv, dl, DAG);
2096 SDValue Lo = DAG.getUNDEF(ty(LoVal));
2097 return DAG.getMergeValues({Lo, Hi}, dl);
2098 }
2099 }
2100
2101 return emitHvxMulLoHiV60(Vu, SignedVu, Vv, SignedVv, dl, DAG);
2102}
2103
2104SDValue
2105HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2106 SDValue Val = Op.getOperand(0);
2107 MVT ResTy = ty(Op);
2108 MVT ValTy = ty(Val);
2109 const SDLoc &dl(Op);
2110
2111 if (isHvxBoolTy(ValTy) && ResTy.isScalarInteger()) {
2112 unsigned HwLen = Subtarget.getVectorLength();
2113 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen/4);
2114
2115 // When the predicate is shorter than the predicate register, each boolean
2116 // is represented by multiple consecutive bits in the input register.
2117 // Condense the bits so each boolean is represented by one bit. This only
2118 // handles 2x and 4x compaction ratios.
2119 unsigned PredLen = ValTy.getVectorNumElements();
2120 if (PredLen < HwLen) {
2121 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2122 Val = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Val);
2123 if (HwLen > PredLen * 2) {
2124 assert(HwLen == PredLen * 4);
2125 PredLen *= 2;
2126 Val = getInstr(Hexagon::V6_vdealh, dl, ByteTy, Val, DAG);
2127 }
2128 if (HwLen > PredLen) {
2129 assert(HwLen == PredLen * 2);
2130 Val = getInstr(Hexagon::V6_vdealb, dl, ByteTy, Val, DAG);
2131 }
2132 Val = DAG.getNode(HexagonISD::V2Q, dl, ValTy, Val);
2133 }
2134
2135 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2136 unsigned BitWidth = ResTy.getSizeInBits();
2137
2138 if (BitWidth < 64) {
2139 SDValue W0 = extractHvxElementReg(VQ, DAG.getConstant(0, dl, MVT::i32),
2140 dl, MVT::i32, DAG);
2141 if (BitWidth == 32)
2142 return W0;
2143 assert(BitWidth < 32u);
2144 return DAG.getZExtOrTrunc(W0, dl, ResTy);
2145 }
2146
2147 // The result is >= 64 bits. The only options are 64 or 128.
2148 assert(BitWidth == 64 || BitWidth == 128);
2150 for (unsigned i = 0; i != BitWidth/32; ++i) {
2151 SDValue W = extractHvxElementReg(
2152 VQ, DAG.getConstant(i, dl, MVT::i32), dl, MVT::i32, DAG);
2153 Words.push_back(W);
2154 }
2155 SmallVector<SDValue,2> Combines;
2156 assert(Words.size() % 2 == 0);
2157 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2158 SDValue C = getCombine(Words[i+1], Words[i], dl, MVT::i64, DAG);
2159 Combines.push_back(C);
2160 }
2161
2162 if (BitWidth == 64)
2163 return Combines[0];
2164
2165 return DAG.getNode(ISD::BUILD_PAIR, dl, ResTy, Combines);
2166 }
2167
2168 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2169 // Splat the input into a 32-element i32 vector, then AND each element
2170 // with a unique bitmask to isolate individual bits.
2171 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2172 assert(Val32.getValueType().getSizeInBits() == 32 &&
2173 "Input must be 32 bits");
2174 MVT VecTy = MVT::getVectorVT(MVT::i32, 32);
2175 SDValue Splat = DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Val32);
2177 for (unsigned i = 0; i < 32; ++i)
2178 Mask.push_back(DAG.getConstant(1ull << i, dl, MVT::i32));
2179
2180 SDValue MaskVec = DAG.getBuildVector(VecTy, dl, Mask);
2181 SDValue Anded = DAG.getNode(ISD::AND, dl, VecTy, Splat, MaskVec);
2182 return DAG.getNode(HexagonISD::V2Q, dl, MVT::v32i1, Anded);
2183 };
2184 // === Case: v32i1 ===
2185 if (ResTy == MVT::v32i1 &&
2186 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2187 Subtarget.useHVX128BOps()) {
2188 SDValue Val32 = Val;
2189 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2190 Val32 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Val);
2191 return bitcastI32ToV32I1(Val32);
2192 }
2193 // === Case: v64i1 ===
2194 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2195 // Split i64 into lo/hi 32-bit halves.
2196 SDValue Lo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Val);
2197 SDValue HiShifted = DAG.getNode(ISD::SRL, dl, MVT::i64, Val,
2198 DAG.getConstant(32, dl, MVT::i64));
2199 SDValue Hi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, HiShifted);
2200
2201 // Reuse the same 32-bit logic twice.
2202 SDValue LoRes = bitcastI32ToV32I1(Lo);
2203 SDValue HiRes = bitcastI32ToV32I1(Hi);
2204
2205 // Concatenate into a v64i1 predicate.
2206 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, LoRes, HiRes);
2207 }
2208
2209 if (isHvxBoolTy(ResTy) && ValTy.isScalarInteger()) {
2210 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2211 unsigned BitWidth = ValTy.getSizeInBits();
2212 unsigned HwLen = Subtarget.getVectorLength();
2213 assert(BitWidth == HwLen);
2214
2215 MVT ValAsVecTy = MVT::getVectorVT(MVT::i8, BitWidth / 8);
2216 SDValue ValAsVec = DAG.getBitcast(ValAsVecTy, Val);
2217 // Splat each byte of Val 8 times.
2218 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2219 // where b0, b1,..., b15 are least to most significant bytes of I.
2221 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2222 // These are bytes with the LSB rotated left with respect to their index.
2224 for (unsigned I = 0; I != HwLen / 8; ++I) {
2225 SDValue Idx = DAG.getConstant(I, dl, MVT::i32);
2226 SDValue Byte =
2227 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, ValAsVec, Idx);
2228 for (unsigned J = 0; J != 8; ++J) {
2229 Bytes.push_back(Byte);
2230 Tmp.push_back(DAG.getConstant(1ull << J, dl, MVT::i8));
2231 }
2232 }
2233
2234 MVT ConstantVecTy = MVT::getVectorVT(MVT::i8, HwLen);
2235 SDValue ConstantVec = DAG.getBuildVector(ConstantVecTy, dl, Tmp);
2236 SDValue I2V = buildHvxVectorReg(Bytes, dl, ConstantVecTy, DAG);
2237
2238 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2239 I2V = DAG.getNode(ISD::AND, dl, ConstantVecTy, {I2V, ConstantVec});
2240 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, I2V);
2241 }
2242
2243 return Op;
2244}
2245
2246SDValue HexagonTargetLowering::LowerHvxStore(SDValue Op,
2247 SelectionDAG &DAG) const {
2248 const SDLoc &dl(Op);
2249 StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
2250 SDValue Val = SN->getValue();
2251 MVT ValTy = ty(Val);
2252
2253 // Check if this is a store of an HVX bool vector (predicate)
2254 if (!isHvxBoolTy(ValTy))
2255 return SDValue();
2256
2257 unsigned NumBits = ValTy.getVectorNumElements();
2258 MachineMemOperand *MMO = SN->getMemOperand();
2259
2260 // Check alignment requirements based on predicate size
2261 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2262 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2263 return SDValue();
2264
2265 unsigned HwLen = Subtarget.getVectorLength();
2266 MVT WordTy = MVT::getVectorVT(MVT::i32, HwLen / 4);
2267
2268 // Compress the predicate into a vector register
2269 SDValue VQ = compressHvxPred(Val, dl, WordTy, DAG);
2270
2271 // Extract words from the compressed vector
2273 for (unsigned i = 0; i != NumBits / 32; ++i) {
2274 SDValue W = extractHvxElementReg(VQ, DAG.getConstant(i, dl, MVT::i32), dl,
2275 MVT::i32, DAG);
2276 Words.push_back(W);
2277 }
2278
2279 SDValue Chain = SN->getChain();
2280 SDValue BasePtr = SN->getBasePtr();
2281 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2282
2283 if (NumBits == 32)
2284 return DAG.getStore(Chain, dl, Words[0], BasePtr, PtrInfo,
2285 MMO->getBaseAlign());
2286
2287 if (NumBits == 64) {
2288 SDValue W64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
2289 return DAG.getStore(Chain, dl, W64, BasePtr, PtrInfo, MMO->getBaseAlign());
2290 }
2291
2292 if (NumBits == 128) {
2293 SDValue Lo64 = getCombine(Words[1], Words[0], dl, MVT::i64, DAG);
2294 SDValue Hi64 = getCombine(Words[3], Words[2], dl, MVT::i64, DAG);
2295
2296 Chain =
2297 DAG.getStore(Chain, dl, Lo64, BasePtr, PtrInfo, MMO->getBaseAlign());
2298
2299 SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
2300 SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
2301 return DAG.getStore(Chain, dl, Hi64, Ptr8, PtrInfo.getWithOffset(8),
2302 Align(8));
2303 }
2304
2305 return SDValue();
2306}
2307
2308SDValue HexagonTargetLowering::LowerHvxLoad(SDValue Op,
2309 SelectionDAG &DAG) const {
2310 const SDLoc &dl(Op);
2311 LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
2312 MVT ResTy = ty(Op);
2313
2314 // Check if this is a load of an HVX bool vector (predicate)
2315 if (!isHvxBoolTy(ResTy))
2316 return SDValue();
2317
2318 unsigned NumBits = ResTy.getVectorNumElements();
2319 MachineMemOperand *MMO = LN->getMemOperand();
2320
2321 unsigned RequiredAlign = (NumBits == 32) ? 4 : 8;
2322 if (MMO->getBaseAlign().value() % RequiredAlign != 0)
2323 return SDValue();
2324
2325 SDValue Chain = LN->getChain();
2326 SDValue BasePtr = LN->getBasePtr();
2327 MachinePointerInfo PtrInfo = MMO->getPointerInfo();
2328
2329 if (NumBits == 32) {
2330 SDValue W32 =
2331 DAG.getLoad(MVT::i32, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2332 SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v32i1, W32);
2333 SDValue Ops[] = {Pred, W32.getValue(1)};
2334 return DAG.getMergeValues(Ops, dl);
2335 }
2336
2337 if (NumBits == 64) {
2338 SDValue W64 =
2339 DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2340 SDValue Pred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, W64);
2341 SDValue Ops[] = {Pred, W64.getValue(1)};
2342 return DAG.getMergeValues(Ops, dl);
2343 }
2344
2345 if (NumBits == 128) {
2346 SDValue Lo64 =
2347 DAG.getLoad(MVT::i64, dl, Chain, BasePtr, PtrInfo, MMO->getBaseAlign());
2348 Chain = Lo64.getValue(1);
2349
2350 SDValue Offset8 = DAG.getConstant(8, dl, MVT::i32);
2351 SDValue Ptr8 = DAG.getNode(ISD::ADD, dl, MVT::i32, BasePtr, Offset8);
2352 SDValue Hi64 = DAG.getLoad(MVT::i64, dl, Chain, Ptr8,
2353 PtrInfo.getWithOffset(8), Align(8));
2354
2355 SDValue LoPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Lo64);
2356 SDValue HiPred = DAG.getNode(ISD::BITCAST, dl, MVT::v64i1, Hi64);
2357 SDValue Pred =
2358 DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v128i1, LoPred, HiPred);
2359
2360 SDValue Ops[] = {Pred, Hi64.getValue(1)};
2361 return DAG.getMergeValues(Ops, dl);
2362 }
2363
2364 return SDValue();
2365}
2366
2367SDValue
2368HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2369 // Sign- and zero-extends are legal.
2370 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2371 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(Op), ty(Op),
2372 Op.getOperand(0));
2373}
2374
2375SDValue
2376HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2377 MVT ResTy = ty(Op);
2378 if (ResTy.getVectorElementType() != MVT::i1)
2379 return Op;
2380
2381 const SDLoc &dl(Op);
2382 unsigned HwLen = Subtarget.getVectorLength();
2383 unsigned VecLen = ResTy.getVectorNumElements();
2384 assert(HwLen % VecLen == 0);
2385 unsigned ElemSize = HwLen / VecLen;
2386
2387 MVT VecTy = MVT::getVectorVT(MVT::getIntegerVT(ElemSize * 8), VecLen);
2388 SDValue S =
2389 DAG.getNode(ISD::SELECT, dl, VecTy, Op.getOperand(0),
2390 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(1)),
2391 DAG.getNode(HexagonISD::Q2V, dl, VecTy, Op.getOperand(2)));
2392 return DAG.getNode(HexagonISD::V2Q, dl, ResTy, S);
2393}
2394
2395SDValue
2396HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2397 if (SDValue S = getVectorShiftByInt(Op, DAG))
2398 return S;
2399 return Op;
2400}
2401
2402SDValue
2403HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2404 SelectionDAG &DAG) const {
2405 unsigned Opc = Op.getOpcode();
2406 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2407
2408 // Make sure the shift amount is within the range of the bitwidth
2409 // of the element type.
2410 SDValue A = Op.getOperand(0);
2411 SDValue B = Op.getOperand(1);
2412 SDValue S = Op.getOperand(2);
2413
2414 MVT InpTy = ty(A);
2415 MVT ElemTy = InpTy.getVectorElementType();
2416
2417 const SDLoc &dl(Op);
2418 unsigned ElemWidth = ElemTy.getSizeInBits();
2419 bool IsLeft = Opc == ISD::FSHL;
2420
2421 // The expansion into regular shifts produces worse code for i8 and for
2422 // right shift of i32 on v65+.
2423 bool UseShifts = ElemTy != MVT::i8;
2424 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2425 UseShifts = false;
2426
2427 if (SDValue SplatV = getSplatValue(S, DAG); SplatV && UseShifts) {
2428 // If this is a funnel shift by a scalar, lower it into regular shifts.
2429 SDValue Mask = DAG.getConstant(ElemWidth - 1, dl, MVT::i32);
2430 SDValue ModS =
2431 DAG.getNode(ISD::AND, dl, MVT::i32,
2432 {DAG.getZExtOrTrunc(SplatV, dl, MVT::i32), Mask});
2433 SDValue NegS =
2434 DAG.getNode(ISD::SUB, dl, MVT::i32,
2435 {DAG.getConstant(ElemWidth, dl, MVT::i32), ModS});
2436 SDValue IsZero =
2437 DAG.getSetCC(dl, MVT::i1, ModS, getZero(dl, MVT::i32, DAG), ISD::SETEQ);
2438 // FSHL A, B => A << | B >>n
2439 // FSHR A, B => A <<n | B >>
2440 SDValue Part1 =
2441 DAG.getNode(HexagonISD::VASL, dl, InpTy, {A, IsLeft ? ModS : NegS});
2442 SDValue Part2 =
2443 DAG.getNode(HexagonISD::VLSR, dl, InpTy, {B, IsLeft ? NegS : ModS});
2444 SDValue Or = DAG.getNode(ISD::OR, dl, InpTy, {Part1, Part2});
2445 // If the shift amount was 0, pick A or B, depending on the direction.
2446 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2447 return DAG.getNode(ISD::SELECT, dl, InpTy, {IsZero, (IsLeft ? A : B), Or});
2448 }
2449
2451 InpTy, dl, DAG.getConstant(ElemWidth - 1, dl, ElemTy));
2452
2453 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2454 return DAG.getNode(MOpc, dl, ty(Op),
2455 {A, B, DAG.getNode(ISD::AND, dl, InpTy, {S, Mask})});
2456}
2457
2458SDValue
2459HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2460 const SDLoc &dl(Op);
2461 unsigned IntNo = Op.getConstantOperandVal(0);
2462 SmallVector<SDValue> Ops(Op->ops());
2463
2464 auto Swap = [&](SDValue P) {
2465 return DAG.getMergeValues({P.getValue(1), P.getValue(0)}, dl);
2466 };
2467
2468 switch (IntNo) {
2469 case Intrinsic::hexagon_V6_pred_typecast:
2470 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2471 MVT ResTy = ty(Op), InpTy = ty(Ops[1]);
2472 if (isHvxBoolTy(ResTy) && isHvxBoolTy(InpTy)) {
2473 if (ResTy == InpTy)
2474 return Ops[1];
2475 return DAG.getNode(HexagonISD::TYPECAST, dl, ResTy, Ops[1]);
2476 }
2477 break;
2478 }
2479 case Intrinsic::hexagon_V6_vmpyss_parts:
2480 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2481 return Swap(DAG.getNode(HexagonISD::SMUL_LOHI, dl, Op->getVTList(),
2482 {Ops[1], Ops[2]}));
2483 case Intrinsic::hexagon_V6_vmpyuu_parts:
2484 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2485 return Swap(DAG.getNode(HexagonISD::UMUL_LOHI, dl, Op->getVTList(),
2486 {Ops[1], Ops[2]}));
2487 case Intrinsic::hexagon_V6_vmpyus_parts:
2488 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2489 return Swap(DAG.getNode(HexagonISD::USMUL_LOHI, dl, Op->getVTList(),
2490 {Ops[1], Ops[2]}));
2491 }
2492 } // switch
2493
2494 return Op;
2495}
2496
2497SDValue
2498HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2499 const SDLoc &dl(Op);
2500 unsigned HwLen = Subtarget.getVectorLength();
2501 MachineFunction &MF = DAG.getMachineFunction();
2502 auto *MaskN = cast<MaskedLoadStoreSDNode>(Op.getNode());
2503 SDValue Mask = MaskN->getMask();
2504 SDValue Chain = MaskN->getChain();
2505 SDValue Base = MaskN->getBasePtr();
2506 auto *MemOp = MF.getMachineMemOperand(MaskN->getMemOperand(), 0, HwLen);
2507
2508 unsigned Opc = Op->getOpcode();
2510
2511 if (Opc == ISD::MLOAD) {
2512 MVT ValTy = ty(Op);
2513 SDValue Load = DAG.getLoad(ValTy, dl, Chain, Base, MemOp);
2514 SDValue Thru = cast<MaskedLoadSDNode>(MaskN)->getPassThru();
2515 if (isUndef(Thru))
2516 return Load;
2517 SDValue VSel = DAG.getNode(ISD::VSELECT, dl, ValTy, Mask, Load, Thru);
2518 return DAG.getMergeValues({VSel, Load.getValue(1)}, dl);
2519 }
2520
2521 // MSTORE
2522 // HVX only has aligned masked stores.
2523
2524 // TODO: Fold negations of the mask into the store.
2525 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2526 SDValue Value = cast<MaskedStoreSDNode>(MaskN)->getValue();
2527 SDValue Offset0 = DAG.getTargetConstant(0, dl, ty(Base));
2528
2529 if (MaskN->getAlign().value() % HwLen == 0) {
2530 SDValue Store = getInstr(StoreOpc, dl, MVT::Other,
2531 {Mask, Base, Offset0, Value, Chain}, DAG);
2532 DAG.setNodeMemRefs(cast<MachineSDNode>(Store.getNode()), {MemOp});
2533 return Store;
2534 }
2535
2536 // Unaligned case.
2537 auto StoreAlign = [&](SDValue V, SDValue A) {
2538 SDValue Z = getZero(dl, ty(V), DAG);
2539 // TODO: use funnel shifts?
2540 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2541 // upper half.
2542 SDValue LoV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {V, Z, A}, DAG);
2543 SDValue HiV = getInstr(Hexagon::V6_vlalignb, dl, ty(V), {Z, V, A}, DAG);
2544 return std::make_pair(LoV, HiV);
2545 };
2546
2547 MVT ByteTy = MVT::getVectorVT(MVT::i8, HwLen);
2548 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
2549 SDValue MaskV = DAG.getNode(HexagonISD::Q2V, dl, ByteTy, Mask);
2550 VectorPair Tmp = StoreAlign(MaskV, Base);
2551 VectorPair MaskU = {DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.first),
2552 DAG.getNode(HexagonISD::V2Q, dl, BoolTy, Tmp.second)};
2553 VectorPair ValueU = StoreAlign(Value, Base);
2554
2555 SDValue Offset1 = DAG.getTargetConstant(HwLen, dl, MVT::i32);
2556 SDValue StoreLo =
2557 getInstr(StoreOpc, dl, MVT::Other,
2558 {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2559 SDValue StoreHi =
2560 getInstr(StoreOpc, dl, MVT::Other,
2561 {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2562 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreLo.getNode()), {MemOp});
2563 DAG.setNodeMemRefs(cast<MachineSDNode>(StoreHi.getNode()), {MemOp});
2564 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, {StoreLo, StoreHi});
2565}
2566
2567SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2568 SelectionDAG &DAG) const {
2569 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2570 // is legal (done via a pattern).
2571 assert(Subtarget.useHVXQFloatOps());
2572
2573 assert(Op->getOpcode() == ISD::FP_EXTEND);
2574
2575 MVT VecTy = ty(Op);
2576 MVT ArgTy = ty(Op.getOperand(0));
2577 const SDLoc &dl(Op);
2578
2579 if (ArgTy == MVT::v64bf16) {
2580 MVT HalfTy = typeSplit(VecTy).first;
2581 SDValue BF16Vec = Op.getOperand(0);
2582 SDValue Zeroes =
2583 getInstr(Hexagon::V6_vxor, dl, HalfTy, {BF16Vec, BF16Vec}, DAG);
2584 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2585 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2586 // values.
2587 SDValue ShuffVec =
2588 getInstr(Hexagon::V6_vshufoeh, dl, VecTy, {BF16Vec, Zeroes}, DAG);
2589 VectorPair VecPair = opSplit(ShuffVec, dl, DAG);
2590 SDValue Result = getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2591 {VecPair.second, VecPair.first,
2592 DAG.getSignedConstant(-4, dl, MVT::i32)},
2593 DAG);
2594 return Result;
2595 }
2596
2597 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2598
2599 SDValue F16Vec = Op.getOperand(0);
2600
2601 APFloat FloatVal = APFloat(1.0f);
2602 bool Ignored;
2604 SDValue Fp16Ones = DAG.getConstantFP(FloatVal, dl, ArgTy);
2605 SDValue VmpyVec =
2606 getInstr(Hexagon::V6_vmpy_qf32_hf, dl, VecTy, {F16Vec, Fp16Ones}, DAG);
2607
2608 MVT HalfTy = typeSplit(VecTy).first;
2609 VectorPair Pair = opSplit(VmpyVec, dl, DAG);
2610 SDValue LoVec =
2611 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.first}, DAG);
2612 SDValue HiVec =
2613 getInstr(Hexagon::V6_vconv_sf_qf32, dl, HalfTy, {Pair.second}, DAG);
2614
2615 SDValue ShuffVec =
2616 getInstr(Hexagon::V6_vshuffvdd, dl, VecTy,
2617 {HiVec, LoVec, DAG.getSignedConstant(-4, dl, MVT::i32)}, DAG);
2618
2619 return ShuffVec;
2620}
2621
2622SDValue
2623HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2624 // Catch invalid conversion ops (just in case).
2625 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2626 Op.getOpcode() == ISD::FP_TO_UINT);
2627
2628 MVT ResTy = ty(Op);
2629 MVT FpTy = ty(Op.getOperand(0)).getVectorElementType();
2630 MVT IntTy = ResTy.getVectorElementType();
2631
2632 if (Subtarget.useHVXIEEEFPOps()) {
2633 // There are only conversions from f16.
2634 if (FpTy == MVT::f16) {
2635 // Other int types aren't legal in HVX, so we shouldn't see them here.
2636 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2637 // Conversions to i8 and i16 are legal.
2638 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2639 return Op;
2640 }
2641 }
2642
2643 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2644 return EqualizeFpIntConversion(Op, DAG);
2645
2646 return ExpandHvxFpToInt(Op, DAG);
2647}
2648
2649// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2650// R1 = #1, R2 holds the v32i1 param
2651// V1 = vsplat(R1)
2652// V2 = vsplat(R2)
2653// Q0 = vand(V1,R1)
2654// V0.w=prefixsum(Q0)
2655// V0.w=vsub(V0.w,V1.w)
2656// V2.w = vlsr(V2.w,V0.w)
2657// V2 = vand(V2,V1)
2658// V2.sf = V2.w
2659SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2660 SelectionDAG &DAG) const {
2661
2662 MVT ResTy = ty(PredOp);
2663 const SDLoc &dl(PredOp);
2664
2665 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2666 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2667 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2668 SDValue(RegConst, 0));
2669 SDNode *PredTransfer =
2670 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2671 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2672 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2673 SDValue(PredTransfer, 0));
2674 SDNode *SplatParam = DAG.getMachineNode(
2675 Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2676 DAG.getNode(ISD::BITCAST, dl, MVT::i32, PredOp.getOperand(0)));
2677 SDNode *Vsub =
2678 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2679 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2680 SDNode *IndexShift =
2681 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2682 SDValue(SplatParam, 0), SDValue(Vsub, 0));
2683 SDNode *MaskOff =
2684 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2685 SDValue(IndexShift, 0), SDValue(SplatConst, 0));
2686 SDNode *Convert = DAG.getMachineNode(Hexagon::V6_vconv_sf_w, dl, ResTy,
2687 SDValue(MaskOff, 0));
2688 return SDValue(Convert, 0);
2689}
2690
2691// For vector type v64i1 uint_to_fo to v64f16:
2692// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2693// R3 = subreg_high (R32)
2694// R2 = subreg_low (R32)
2695// R1 = #1
2696// V1 = vsplat(R1)
2697// V2 = vsplat(R2)
2698// V3 = vsplat(R3)
2699// Q0 = vand(V1,R1)
2700// V0.w=prefixsum(Q0)
2701// V0.w=vsub(V0.w,V1.w)
2702// V2.w = vlsr(V2.w,V0.w)
2703// V3.w = vlsr(V3.w,V0.w)
2704// V2 = vand(V2,V1)
2705// V3 = vand(V3,V1)
2706// V2.h = vpacke(V3.w,V2.w)
2707// V2.hf = V2.h
2708SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2709 SelectionDAG &DAG) const {
2710
2711 MVT ResTy = ty(PredOp);
2712 const SDLoc &dl(PredOp);
2713
2714 SDValue Inp = DAG.getNode(ISD::BITCAST, dl, MVT::i64, PredOp.getOperand(0));
2715 // Get the hi and lo regs
2716 SDValue HiReg =
2717 DAG.getTargetExtractSubreg(Hexagon::isub_hi, dl, MVT::i32, Inp);
2718 SDValue LoReg =
2719 DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, Inp);
2720 // Get constant #1 and splat into vector V1
2721 SDValue Const = DAG.getTargetConstant(0x1, dl, MVT::i32);
2722 SDNode *RegConst = DAG.getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, Const);
2723 SDNode *SplatConst = DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2724 SDValue(RegConst, 0));
2725 // Splat the hi and lo args
2726 SDNode *SplatHi =
2727 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2728 DAG.getNode(ISD::BITCAST, dl, MVT::i32, HiReg));
2729 SDNode *SplatLo =
2730 DAG.getMachineNode(Hexagon::V6_lvsplatw, dl, MVT::v32i32,
2731 DAG.getNode(ISD::BITCAST, dl, MVT::i32, LoReg));
2732 // vand between splatted const and const
2733 SDNode *PredTransfer =
2734 DAG.getMachineNode(Hexagon::V6_vandvrt, dl, MVT::v32i1,
2735 SDValue(SplatConst, 0), SDValue(RegConst, 0));
2736 // Get the prefixsum
2737 SDNode *PrefixSum = DAG.getMachineNode(Hexagon::V6_vprefixqw, dl, MVT::v32i32,
2738 SDValue(PredTransfer, 0));
2739 // Get the vsub
2740 SDNode *Vsub =
2741 DAG.getMachineNode(Hexagon::V6_vsubw, dl, MVT::v32i32,
2742 SDValue(PrefixSum, 0), SDValue(SplatConst, 0));
2743 // Get vlsr for hi and lo
2744 SDNode *IndexShift_hi =
2745 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2746 SDValue(SplatHi, 0), SDValue(Vsub, 0));
2747 SDNode *IndexShift_lo =
2748 DAG.getMachineNode(Hexagon::V6_vlsrwv, dl, MVT::v32i32,
2749 SDValue(SplatLo, 0), SDValue(Vsub, 0));
2750 // Get vand of hi and lo
2751 SDNode *MaskOff_hi =
2752 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2753 SDValue(IndexShift_hi, 0), SDValue(SplatConst, 0));
2754 SDNode *MaskOff_lo =
2755 DAG.getMachineNode(Hexagon::V6_vand, dl, MVT::v32i32,
2756 SDValue(IndexShift_lo, 0), SDValue(SplatConst, 0));
2757 // Pack them
2758 SDNode *Pack =
2759 DAG.getMachineNode(Hexagon::V6_vpackeh, dl, MVT::v64i16,
2760 SDValue(MaskOff_hi, 0), SDValue(MaskOff_lo, 0));
2761 SDNode *Convert =
2762 DAG.getMachineNode(Hexagon::V6_vconv_hf_h, dl, ResTy, SDValue(Pack, 0));
2763 return SDValue(Convert, 0);
2764}
2765
2766SDValue
2767HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2768 // Catch invalid conversion ops (just in case).
2769 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2770 Op.getOpcode() == ISD::UINT_TO_FP);
2771
2772 MVT ResTy = ty(Op);
2773 MVT IntTy = ty(Op.getOperand(0)).getVectorElementType();
2774 MVT FpTy = ResTy.getVectorElementType();
2775
2776 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2777 if (ResTy == MVT::v32f32 && ty(Op.getOperand(0)) == MVT::v32i1)
2778 return LowerHvxPred32ToFp(Op, DAG);
2779 if (ResTy == MVT::v64f16 && ty(Op.getOperand(0)) == MVT::v64i1)
2780 return LowerHvxPred64ToFp(Op, DAG);
2781 }
2782
2783 if (Subtarget.useHVXIEEEFPOps()) {
2784 // There are only conversions to f16.
2785 if (FpTy == MVT::f16) {
2786 // Other int types aren't legal in HVX, so we shouldn't see them here.
2787 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2788 // i8, i16 -> f16 is legal.
2789 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2790 return Op;
2791 }
2792 }
2793
2794 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2795 return EqualizeFpIntConversion(Op, DAG);
2796
2797 return ExpandHvxIntToFp(Op, DAG);
2798}
2799
2800HexagonTargetLowering::TypePair
2801HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2802 // Compare the widths of elements of the two types, and extend the narrower
2803 // type to match the with of the wider type. For vector types, apply this
2804 // to the element type.
2805 assert(Ty0.isVector() == Ty1.isVector());
2806
2807 MVT ElemTy0 = Ty0.getScalarType();
2808 MVT ElemTy1 = Ty1.getScalarType();
2809
2810 unsigned Width0 = ElemTy0.getSizeInBits();
2811 unsigned Width1 = ElemTy1.getSizeInBits();
2812 unsigned MaxWidth = std::max(Width0, Width1);
2813
2814 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2815 if (ScalarTy.isInteger())
2816 return MVT::getIntegerVT(Width);
2817 assert(ScalarTy.isFloatingPoint());
2818 return MVT::getFloatingPointVT(Width);
2819 };
2820
2821 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2822 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2823
2824 if (!Ty0.isVector()) {
2825 // Both types are scalars.
2826 return {WideETy0, WideETy1};
2827 }
2828
2829 // Vector types.
2830 unsigned NumElem = Ty0.getVectorNumElements();
2831 assert(NumElem == Ty1.getVectorNumElements());
2832
2833 return {MVT::getVectorVT(WideETy0, NumElem),
2834 MVT::getVectorVT(WideETy1, NumElem)};
2835}
2836
2837HexagonTargetLowering::TypePair
2838HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2839 // Compare the numbers of elements of two vector types, and widen the
2840 // narrower one to match the number of elements in the wider one.
2841 assert(Ty0.isVector() && Ty1.isVector());
2842
2843 unsigned Len0 = Ty0.getVectorNumElements();
2844 unsigned Len1 = Ty1.getVectorNumElements();
2845 if (Len0 == Len1)
2846 return {Ty0, Ty1};
2847
2848 unsigned MaxLen = std::max(Len0, Len1);
2849 return {MVT::getVectorVT(Ty0.getVectorElementType(), MaxLen),
2850 MVT::getVectorVT(Ty1.getVectorElementType(), MaxLen)};
2851}
2852
2853MVT
2854HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2855 EVT LegalTy = getTypeToTransformTo(*DAG.getContext(), Ty);
2856 assert(LegalTy.isSimple());
2857 return LegalTy.getSimpleVT();
2858}
2859
2860MVT
2861HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2862 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2863 assert(Ty.getSizeInBits() <= HwWidth);
2864 if (Ty.getSizeInBits() == HwWidth)
2865 return Ty;
2866
2867 MVT ElemTy = Ty.getScalarType();
2868 return MVT::getVectorVT(ElemTy, HwWidth / ElemTy.getSizeInBits());
2869}
2870
2871HexagonTargetLowering::VectorPair
2872HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2873 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2874 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2875 // whether an overflow has occurred.
2876 MVT ResTy = ty(A);
2877 assert(ResTy == ty(B));
2878 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorNumElements());
2879
2880 if (!Signed) {
2881 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2882 // save any instructions.
2883 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2884 SDValue Ovf = DAG.getSetCC(dl, PredTy, Add, A, ISD::SETULT);
2885 return {Add, Ovf};
2886 }
2887
2888 // Signed overflow has happened, if:
2889 // (A, B have the same sign) and (A+B has a different sign from either)
2890 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2891 SDValue Add = DAG.getNode(ISD::ADD, dl, ResTy, {A, B});
2892 SDValue NotA =
2893 DAG.getNode(ISD::XOR, dl, ResTy, {A, DAG.getAllOnesConstant(dl, ResTy)});
2894 SDValue Xor0 = DAG.getNode(ISD::XOR, dl, ResTy, {NotA, B});
2895 SDValue Xor1 = DAG.getNode(ISD::XOR, dl, ResTy, {Add, B});
2896 SDValue And = DAG.getNode(ISD::AND, dl, ResTy, {Xor0, Xor1});
2897 SDValue MSB =
2898 DAG.getSetCC(dl, PredTy, And, getZero(dl, ResTy, DAG), ISD::SETLT);
2899 return {Add, MSB};
2900}
2901
2902HexagonTargetLowering::VectorPair
2903HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2904 bool Signed, SelectionDAG &DAG) const {
2905 // Shift Val right by Amt bits, round the result to the nearest integer,
2906 // tie-break by rounding halves to even integer.
2907
2908 const SDLoc &dl(Val);
2909 MVT ValTy = ty(Val);
2910
2911 // This should also work for signed integers.
2912 //
2913 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2914 // bool ovf = (inp > tmp0);
2915 // uint rup = inp & (1 << (Amt+1));
2916 //
2917 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2918 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2919 // uint tmp3 = tmp2 + rup;
2920 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2921 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2922 MVT ElemTy = MVT::getIntegerVT(ElemWidth);
2923 MVT IntTy = tyVector(ValTy, ElemTy);
2924 MVT PredTy = MVT::getVectorVT(MVT::i1, IntTy.getVectorNumElements());
2925 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2926
2927 SDValue Inp = DAG.getBitcast(IntTy, Val);
2928 SDValue LowBits = DAG.getConstant((1ull << (Amt - 1)) - 1, dl, IntTy);
2929
2930 SDValue AmtP1 = DAG.getConstant(1ull << Amt, dl, IntTy);
2931 SDValue And = DAG.getNode(ISD::AND, dl, IntTy, {Inp, AmtP1});
2932 SDValue Zero = getZero(dl, IntTy, DAG);
2933 SDValue Bit = DAG.getSetCC(dl, PredTy, And, Zero, ISD::SETNE);
2934 SDValue Rup = DAG.getZExtOrTrunc(Bit, dl, IntTy);
2935 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(Inp, LowBits, dl, Signed, DAG);
2936
2937 SDValue AmtM1 = DAG.getConstant(Amt - 1, dl, IntTy);
2938 SDValue Tmp1 = DAG.getNode(ShRight, dl, IntTy, Inp, AmtM1);
2939 SDValue Tmp2 = DAG.getNode(ShRight, dl, IntTy, Tmp0, AmtM1);
2940 SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, IntTy, Tmp2, Rup);
2941
2942 SDValue Eq = DAG.getSetCC(dl, PredTy, Tmp1, Tmp2, ISD::SETEQ);
2943 SDValue One = DAG.getConstant(1, dl, IntTy);
2944 SDValue Tmp4 = DAG.getNode(ShRight, dl, IntTy, {Tmp2, One});
2945 SDValue Tmp5 = DAG.getNode(ShRight, dl, IntTy, {Tmp3, One});
2946 SDValue Mux = DAG.getNode(ISD::VSELECT, dl, IntTy, {Eq, Tmp5, Tmp4});
2947 return {Mux, Ovf};
2948}
2949
2950SDValue
2951HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2952 SelectionDAG &DAG) const {
2953 MVT VecTy = ty(A);
2954 MVT PairTy = typeJoin({VecTy, VecTy});
2955 assert(VecTy.getVectorElementType() == MVT::i32);
2956
2957 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
2958
2959 // mulhs(A,B) =
2960 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2961 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2962 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2963 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2964 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2965 // anything, so it cannot produce any carry over to higher bits),
2966 // so everything in [] can be shifted by 16 without loss of precision.
2967 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2968 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2969 // The final additions need to make sure to properly maintain any carry-
2970 // out bits.
2971 //
2972 // Hi(B) Lo(B)
2973 // Hi(A) Lo(A)
2974 // --------------
2975 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2976 // Hi(B)*Lo(A) | + dropping the low 16 bits
2977 // Hi(A)*Lo(B) | T2
2978 // Hi(B)*Hi(A)
2979
2980 SDValue T0 = getInstr(Hexagon::V6_vmpyewuh, dl, VecTy, {B, A}, DAG);
2981 // T1 = get Hi(A) into low halves.
2982 SDValue T1 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {A, S16}, DAG);
2983 // P0 = interleaved T1.h*B.uh (full precision product)
2984 SDValue P0 = getInstr(Hexagon::V6_vmpyhus, dl, PairTy, {T1, B}, DAG);
2985 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2986 SDValue T2 = LoHalf(P0, DAG);
2987 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2988 // added to the final sum.
2989 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2990 SDValue P1 = getInstr(Hexagon::V6_vadduhw, dl, PairTy, {T0, T2}, DAG);
2991 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2992 SDValue P2 = getInstr(Hexagon::V6_vaddhw, dl, PairTy, {T0, T2}, DAG);
2993 // T3 = full-precision(T0+T2) >> 16
2994 // The low halves are added-unsigned, the high ones are added-signed.
2995 SDValue T3 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
2996 {HiHalf(P2, DAG), LoHalf(P1, DAG), S16}, DAG);
2997 SDValue T4 = getInstr(Hexagon::V6_vasrw, dl, VecTy, {B, S16}, DAG);
2998 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2999 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
3000 SDValue P3 = getInstr(Hexagon::V6_vmpyhv, dl, PairTy, {T1, T4}, DAG);
3001 SDValue T5 = LoHalf(P3, DAG);
3002 // Add:
3003 SDValue T6 = DAG.getNode(ISD::ADD, dl, VecTy, {T3, T5});
3004 return T6;
3005}
3006
3007SDValue
3008HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
3009 bool SignedB, const SDLoc &dl,
3010 SelectionDAG &DAG) const {
3011 MVT VecTy = ty(A);
3012 MVT PairTy = typeJoin({VecTy, VecTy});
3013 assert(VecTy.getVectorElementType() == MVT::i32);
3014
3015 SDValue S16 = DAG.getConstant(16, dl, MVT::i32);
3016
3017 if (SignedA && !SignedB) {
3018 // Make A:unsigned, B:signed.
3019 std::swap(A, B);
3020 std::swap(SignedA, SignedB);
3021 }
3022
3023 // Do halfword-wise multiplications for unsigned*unsigned product, then
3024 // add corrections for signed and unsigned*signed.
3025
3026 SDValue Lo, Hi;
3027
3028 // P0:lo = (uu) products of low halves of A and B,
3029 // P0:hi = (uu) products of high halves.
3030 SDValue P0 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, B}, DAG);
3031
3032 // Swap low/high halves in B
3033 SDValue T0 = getInstr(Hexagon::V6_lvsplatw, dl, VecTy,
3034 {DAG.getConstant(0x02020202, dl, MVT::i32)}, DAG);
3035 SDValue T1 = getInstr(Hexagon::V6_vdelta, dl, VecTy, {B, T0}, DAG);
3036 // P1 = products of even/odd halfwords.
3037 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
3038 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
3039 SDValue P1 = getInstr(Hexagon::V6_vmpyuhv, dl, PairTy, {A, T1}, DAG);
3040
3041 // P2:lo = low halves of P1:lo + P1:hi,
3042 // P2:hi = high halves of P1:lo + P1:hi.
3043 SDValue P2 = getInstr(Hexagon::V6_vadduhw, dl, PairTy,
3044 {HiHalf(P1, DAG), LoHalf(P1, DAG)}, DAG);
3045 // Still need to add the high halves of P0:lo to P2:lo
3046 SDValue T2 =
3047 getInstr(Hexagon::V6_vlsrw, dl, VecTy, {LoHalf(P0, DAG), S16}, DAG);
3048 SDValue T3 = DAG.getNode(ISD::ADD, dl, VecTy, {LoHalf(P2, DAG), T2});
3049
3050 // The high halves of T3 will contribute to the HI part of LOHI.
3051 SDValue T4 = getInstr(Hexagon::V6_vasrw_acc, dl, VecTy,
3052 {HiHalf(P2, DAG), T3, S16}, DAG);
3053
3054 // The low halves of P2 need to be added to high halves of the LO part.
3055 Lo = getInstr(Hexagon::V6_vaslw_acc, dl, VecTy,
3056 {LoHalf(P0, DAG), LoHalf(P2, DAG), S16}, DAG);
3057 Hi = DAG.getNode(ISD::ADD, dl, VecTy, {HiHalf(P0, DAG), T4});
3058
3059 if (SignedA) {
3060 assert(SignedB && "Signed A and unsigned B should have been inverted");
3061
3062 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3063 SDValue Zero = getZero(dl, VecTy, DAG);
3064 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3065 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3066 SDValue X0 = DAG.getNode(ISD::VSELECT, dl, VecTy, {Q0, B, Zero});
3067 SDValue X1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, X0, A}, DAG);
3068 Hi = getInstr(Hexagon::V6_vsubw, dl, VecTy, {Hi, X1}, DAG);
3069 } else if (SignedB) {
3070 // Same correction as for mulhus:
3071 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
3072 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3073 SDValue Zero = getZero(dl, VecTy, DAG);
3074 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3075 Hi = getInstr(Hexagon::V6_vsubwq, dl, VecTy, {Q1, Hi, A}, DAG);
3076 } else {
3077 assert(!SignedA && !SignedB);
3078 }
3079
3080 return DAG.getMergeValues({Lo, Hi}, dl);
3081}
3082
3083SDValue
3084HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
3085 SDValue B, bool SignedB,
3086 const SDLoc &dl,
3087 SelectionDAG &DAG) const {
3088 MVT VecTy = ty(A);
3089 MVT PairTy = typeJoin({VecTy, VecTy});
3090 assert(VecTy.getVectorElementType() == MVT::i32);
3091
3092 if (SignedA && !SignedB) {
3093 // Make A:unsigned, B:signed.
3094 std::swap(A, B);
3095 std::swap(SignedA, SignedB);
3096 }
3097
3098 // Do S*S first, then make corrections for U*S or U*U if needed.
3099 SDValue P0 = getInstr(Hexagon::V6_vmpyewuh_64, dl, PairTy, {A, B}, DAG);
3100 SDValue P1 =
3101 getInstr(Hexagon::V6_vmpyowh_64_acc, dl, PairTy, {P0, A, B}, DAG);
3102 SDValue Lo = LoHalf(P1, DAG);
3103 SDValue Hi = HiHalf(P1, DAG);
3104
3105 if (!SignedB) {
3106 assert(!SignedA && "Signed A and unsigned B should have been inverted");
3107 SDValue Zero = getZero(dl, VecTy, DAG);
3108 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3109
3110 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
3111 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
3112 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
3113 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
3114 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
3115 // $A))>;
3116 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3117 SDValue Q1 = DAG.getSetCC(dl, PredTy, B, Zero, ISD::SETLT);
3118 SDValue T0 = getInstr(Hexagon::V6_vandvqv, dl, VecTy, {Q0, B}, DAG);
3119 SDValue T1 = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q1, T0, A}, DAG);
3120 Hi = getInstr(Hexagon::V6_vaddw, dl, VecTy, {Hi, T1}, DAG);
3121 } else if (!SignedA) {
3122 SDValue Zero = getZero(dl, VecTy, DAG);
3123 MVT PredTy = MVT::getVectorVT(MVT::i1, VecTy.getVectorNumElements());
3124
3125 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
3126 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
3127 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
3128 // (HiHalf (Muls64O $A, $B)),
3129 // $B)>;
3130 SDValue Q0 = DAG.getSetCC(dl, PredTy, A, Zero, ISD::SETLT);
3131 Hi = getInstr(Hexagon::V6_vaddwq, dl, VecTy, {Q0, Hi, B}, DAG);
3132 }
3133
3134 return DAG.getMergeValues({Lo, Hi}, dl);
3135}
3136
3137SDValue
3138HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
3139 const {
3140 // Rewrite conversion between integer and floating-point in such a way that
3141 // the integer type is extended/narrowed to match the bitwidth of the
3142 // floating-point type, combined with additional integer-integer extensions
3143 // or narrowings to match the original input/result types.
3144 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
3145 //
3146 // The input/result types are not required to be legal, but if they are
3147 // legal, this function should not introduce illegal types.
3148
3149 unsigned Opc = Op.getOpcode();
3152
3153 SDValue Inp = Op.getOperand(0);
3154 MVT InpTy = ty(Inp);
3155 MVT ResTy = ty(Op);
3156
3157 if (InpTy == ResTy)
3158 return Op;
3159
3160 const SDLoc &dl(Op);
3162
3163 auto [WInpTy, WResTy] = typeExtendToWider(InpTy, ResTy);
3164 SDValue WInp = resizeToWidth(Inp, WInpTy, Signed, dl, DAG);
3165 SDValue Conv = DAG.getNode(Opc, dl, WResTy, WInp);
3166 SDValue Res = resizeToWidth(Conv, ResTy, Signed, dl, DAG);
3167 return Res;
3168}
3169
3170SDValue
3171HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
3172 unsigned Opc = Op.getOpcode();
3174
3175 const SDLoc &dl(Op);
3176 SDValue Op0 = Op.getOperand(0);
3177 MVT InpTy = ty(Op0);
3178 MVT ResTy = ty(Op);
3179 assert(InpTy.changeTypeToInteger() == ResTy);
3180
3181 // At this point this is an experiment under a flag.
3182 // In arch before V81 the rounding mode is towards nearest value.
3183 // The C/C++ standard requires rounding towards zero:
3184 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
3185 // finite value of real floating type is converted to an integer type, the
3186 // fractional part is discarded (i.e., the value is truncated toward zero)."
3187 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
3188 // floating-point type can be converted to a prvalue of an integer type. The
3189 // conversion truncates; that is, the fractional part is discarded."
3190 if (InpTy == MVT::v64f16) {
3191 if (Subtarget.useHVXV81Ops()) {
3192 // This is c/c++ compliant
3193 SDValue ConvVec =
3194 getInstr(Hexagon::V6_vconv_h_hf_rnd, dl, ResTy, {Op0}, DAG);
3195 return ConvVec;
3196 } else if (EnableFpFastConvert) {
3197 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3198 SDValue ConvVec = getInstr(Hexagon::V6_vconv_h_hf, dl, ResTy, {Op0}, DAG);
3199 return ConvVec;
3200 }
3201 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3202 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3203 SDValue ConvVec = getInstr(Hexagon::V6_vconv_w_sf, dl, ResTy, {Op0}, DAG);
3204 return ConvVec;
3205 }
3206
3207 // int32_t conv_f32_to_i32(uint32_t inp) {
3208 // // s | exp8 | frac23
3209 //
3210 // int neg = (int32_t)inp < 0;
3211 //
3212 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3213 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3214 // // produce a large positive "expm1", which will result in max u/int.
3215 // // In all IEEE formats, bias is the largest positive number that can be
3216 // // represented in bias-width bits (i.e. 011..1).
3217 // int32_t expm1 = (inp << 1) - 0x80000000;
3218 // expm1 >>= 24;
3219 //
3220 // // Always insert the "implicit 1". Subnormal numbers will become 0
3221 // // regardless.
3222 // uint32_t frac = (inp << 8) | 0x80000000;
3223 //
3224 // // "frac" is the fraction part represented as Q1.31. If it was
3225 // // interpreted as uint32_t, it would be the fraction part multiplied
3226 // // by 2^31.
3227 //
3228 // // Calculate the amount of right shift, since shifting further to the
3229 // // left would lose significant bits. Limit it to 32, because we want
3230 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3231 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3232 // // left by 31). "rsh" can be negative.
3233 // int32_t rsh = min(31 - (expm1 + 1), 32);
3234 //
3235 // frac >>= rsh; // rsh == 32 will produce 0
3236 //
3237 // // Everything up to this point is the same for conversion to signed
3238 // // unsigned integer.
3239 //
3240 // if (neg) // Only for signed int
3241 // frac = -frac; //
3242 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3243 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3244 // if (rsh <= 0 && !neg) //
3245 // frac = 0x7fffffff; //
3246 //
3247 // if (neg) // Only for unsigned int
3248 // frac = 0; //
3249 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3250 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3251 //
3252 // return frac;
3253 // }
3254
3255 MVT PredTy = MVT::getVectorVT(MVT::i1, ResTy.getVectorElementCount());
3256
3257 // Zero = V6_vd0();
3258 // Neg = V6_vgtw(Zero, Inp);
3259 // One = V6_lvsplatw(1);
3260 // M80 = V6_lvsplatw(0x80000000);
3261 // Exp00 = V6_vaslwv(Inp, One);
3262 // Exp01 = V6_vsubw(Exp00, M80);
3263 // ExpM1 = V6_vasrw(Exp01, 24);
3264 // Frc00 = V6_vaslw(Inp, 8);
3265 // Frc01 = V6_vor(Frc00, M80);
3266 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3267 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3268 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3269
3270 // if signed int:
3271 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3272 // Pos = V6_vgtw(Rsh01, Zero);
3273 // Frc13 = V6_vsubw(Zero, Frc02);
3274 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3275 // Int = V6_vmux(Pos, Frc14, Bnd);
3276 //
3277 // if unsigned int:
3278 // Rsn = V6_vgtw(Zero, Rsh01)
3279 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3280 // Int = V6_vmux(Neg, Zero, Frc23)
3281
3282 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(InpTy);
3283 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3284 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3285
3286 SDValue Inp = DAG.getBitcast(ResTy, Op0);
3287 SDValue Zero = getZero(dl, ResTy, DAG);
3288 SDValue Neg = DAG.getSetCC(dl, PredTy, Inp, Zero, ISD::SETLT);
3289 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, ResTy);
3290 SDValue M7F = DAG.getConstant((1ull << (ElemWidth - 1)) - 1, dl, ResTy);
3291 SDValue One = DAG.getConstant(1, dl, ResTy);
3292 SDValue Exp00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, One});
3293 SDValue Exp01 = DAG.getNode(ISD::SUB, dl, ResTy, {Exp00, M80});
3294 SDValue MNE = DAG.getConstant(ElemWidth - ExpWidth, dl, ResTy);
3295 SDValue ExpM1 = DAG.getNode(ISD::SRA, dl, ResTy, {Exp01, MNE});
3296
3297 SDValue ExpW = DAG.getConstant(ExpWidth, dl, ResTy);
3298 SDValue Frc00 = DAG.getNode(ISD::SHL, dl, ResTy, {Inp, ExpW});
3299 SDValue Frc01 = DAG.getNode(ISD::OR, dl, ResTy, {Frc00, M80});
3300
3301 SDValue MN2 = DAG.getConstant(ElemWidth - 2, dl, ResTy);
3302 SDValue Rsh00 = DAG.getNode(ISD::SUB, dl, ResTy, {MN2, ExpM1});
3303 SDValue MW = DAG.getConstant(ElemWidth, dl, ResTy);
3304 SDValue Rsh01 = DAG.getNode(ISD::SMIN, dl, ResTy, {Rsh00, MW});
3305 SDValue Frc02 = DAG.getNode(ISD::SRL, dl, ResTy, {Frc01, Rsh01});
3306
3307 SDValue Int;
3308
3309 if (Opc == ISD::FP_TO_SINT) {
3310 SDValue Bnd = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, M80, M7F});
3311 SDValue Pos = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETGT);
3312 SDValue Frc13 = DAG.getNode(ISD::SUB, dl, ResTy, {Zero, Frc02});
3313 SDValue Frc14 = DAG.getNode(ISD::VSELECT, dl, ResTy, {Neg, Frc13, Frc02});
3314 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, {Pos, Frc14, Bnd});
3315 } else {
3317 SDValue Rsn = DAG.getSetCC(dl, PredTy, Rsh01, Zero, ISD::SETLT);
3318 SDValue Frc23 = DAG.getNode(ISD::VSELECT, dl, ResTy, Rsn, M7F, Frc02);
3319 Int = DAG.getNode(ISD::VSELECT, dl, ResTy, Neg, Zero, Frc23);
3320 }
3321
3322 return Int;
3323}
3324
3325SDValue
3326HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3327 unsigned Opc = Op.getOpcode();
3329
3330 const SDLoc &dl(Op);
3331 SDValue Op0 = Op.getOperand(0);
3332 MVT InpTy = ty(Op0);
3333 MVT ResTy = ty(Op);
3334 assert(ResTy.changeTypeToInteger() == InpTy);
3335
3336 // uint32_t vnoc1_rnd(int32_t w) {
3337 // int32_t iszero = w == 0;
3338 // int32_t isneg = w < 0;
3339 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3340 //
3341 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3342 // uint32_t frac0 = (uint64_t)u << norm_left;
3343 //
3344 // // Rounding:
3345 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3346 // uint32_t renorm = (frac0 > frac1);
3347 // uint32_t rup = (int)(frac0 << 22) < 0;
3348 //
3349 // uint32_t frac2 = frac0 >> 8;
3350 // uint32_t frac3 = frac1 >> 8;
3351 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3352 //
3353 // int32_t exp = 32 - norm_left + renorm + 127;
3354 // exp <<= 23;
3355 //
3356 // uint32_t sign = 0x80000000 * isneg;
3357 // uint32_t f = sign | exp | frac;
3358 // return iszero ? 0 : f;
3359 // }
3360
3361 MVT PredTy = MVT::getVectorVT(MVT::i1, InpTy.getVectorElementCount());
3362 bool Signed = Opc == ISD::SINT_TO_FP;
3363
3364 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(ResTy);
3365 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3366
3367 SDValue Zero = getZero(dl, InpTy, DAG);
3368 SDValue One = DAG.getConstant(1, dl, InpTy);
3369 SDValue IsZero = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETEQ);
3370 SDValue Abs = Signed ? DAG.getNode(ISD::ABS, dl, InpTy, Op0) : Op0;
3371 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, InpTy, Abs);
3372 SDValue NLeft = DAG.getNode(ISD::ADD, dl, InpTy, {Clz, One});
3373 SDValue Frac0 = DAG.getNode(ISD::SHL, dl, InpTy, {Abs, NLeft});
3374
3375 auto [Frac, Ovf] = emitHvxShiftRightRnd(Frac0, ExpWidth + 1, false, DAG);
3376 if (Signed) {
3377 SDValue IsNeg = DAG.getSetCC(dl, PredTy, Op0, Zero, ISD::SETLT);
3378 SDValue M80 = DAG.getConstant(1ull << (ElemWidth - 1), dl, InpTy);
3379 SDValue Sign = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsNeg, M80, Zero});
3380 Frac = DAG.getNode(ISD::OR, dl, InpTy, {Sign, Frac});
3381 }
3382
3383 SDValue Rnrm = DAG.getZExtOrTrunc(Ovf, dl, InpTy);
3384 SDValue Exp0 = DAG.getConstant(ElemWidth + ExpBias, dl, InpTy);
3385 SDValue Exp1 = DAG.getNode(ISD::ADD, dl, InpTy, {Rnrm, Exp0});
3386 SDValue Exp2 = DAG.getNode(ISD::SUB, dl, InpTy, {Exp1, NLeft});
3387 SDValue Exp3 = DAG.getNode(ISD::SHL, dl, InpTy,
3388 {Exp2, DAG.getConstant(FracWidth, dl, InpTy)});
3389 SDValue Flt0 = DAG.getNode(ISD::OR, dl, InpTy, {Frac, Exp3});
3390 SDValue Flt1 = DAG.getNode(ISD::VSELECT, dl, InpTy, {IsZero, Zero, Flt0});
3391 SDValue Flt = DAG.getBitcast(ResTy, Flt1);
3392
3393 return Flt;
3394}
3395
3396SDValue
3397HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3398 unsigned Opc = Op.getOpcode();
3399 unsigned TLOpc;
3400 switch (Opc) {
3401 case ISD::ANY_EXTEND:
3402 case ISD::SIGN_EXTEND:
3403 case ISD::ZERO_EXTEND:
3404 TLOpc = HexagonISD::TL_EXTEND;
3405 break;
3406 case ISD::TRUNCATE:
3408 break;
3409#ifndef NDEBUG
3410 Op.dump(&DAG);
3411#endif
3412 llvm_unreachable("Unexpected operator");
3413 }
3414
3415 const SDLoc &dl(Op);
3416 return DAG.getNode(TLOpc, dl, ty(Op), Op.getOperand(0),
3417 DAG.getUNDEF(MVT::i128), // illegal type
3418 DAG.getConstant(Opc, dl, MVT::i32));
3419}
3420
3421SDValue
3422HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3423 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3424 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3425 unsigned Opc = Op.getConstantOperandVal(2);
3426 return DAG.getNode(Opc, SDLoc(Op), ty(Op), Op.getOperand(0));
3427}
3428
3429HexagonTargetLowering::VectorPair
3430HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3431 assert(!Op.isMachineOpcode());
3432 SmallVector<SDValue, 2> OpsL, OpsH;
3433 const SDLoc &dl(Op);
3434
3435 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3436 MVT Ty = typeSplit(N->getVT().getSimpleVT()).first;
3437 SDValue TV = DAG.getValueType(Ty);
3438 return std::make_pair(TV, TV);
3439 };
3440
3441 for (SDValue A : Op.getNode()->ops()) {
3442 auto [Lo, Hi] =
3443 ty(A).isVector() ? opSplit(A, dl, DAG) : std::make_pair(A, A);
3444 // Special case for type operand.
3445 switch (Op.getOpcode()) {
3446 case ISD::SIGN_EXTEND_INREG:
3447 case HexagonISD::SSAT:
3448 case HexagonISD::USAT:
3449 if (const auto *N = dyn_cast<const VTSDNode>(A.getNode()))
3450 std::tie(Lo, Hi) = SplitVTNode(N);
3451 break;
3452 }
3453 OpsL.push_back(Lo);
3454 OpsH.push_back(Hi);
3455 }
3456
3457 MVT ResTy = ty(Op);
3458 MVT HalfTy = typeSplit(ResTy).first;
3459 SDValue L = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsL);
3460 SDValue H = DAG.getNode(Op.getOpcode(), dl, HalfTy, OpsH);
3461 return {L, H};
3462}
3463
3464SDValue
3465HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3466 auto *MemN = cast<MemSDNode>(Op.getNode());
3467 unsigned MemOpc = MemN->getOpcode();
3468 EVT MemTy = MemN->getMemoryVT();
3469
3470 if ((MemOpc == ISD::STORE || MemOpc == ISD::LOAD) &&
3471 (!MemTy.isSimple() || !isHvxPairTy(MemTy.getSimpleVT())))
3472 return Op;
3473
3474 EVT ValueType;
3475 if (MemOpc == ISD::STORE)
3477 else if (MemOpc == ISD::MSTORE)
3479 else // ISD::LOAD, ISD::MLOAD.
3480 ValueType = MemN->getValueType(0);
3481
3482 EVT LoVT, HiVT;
3483 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(ValueType);
3484
3485 EVT LoMemVT, HiMemVT;
3486 bool HiIsEmpty = false;
3487 std::tie(LoMemVT, HiMemVT) =
3488 DAG.GetDependentSplitDestVTs(MemTy, LoVT, &HiIsEmpty);
3489
3490 uint64_t LoSize = LoMemVT.getSizeInBits().getFixedValue() / 8;
3491 uint64_t HiSize = HiMemVT.getSizeInBits().getFixedValue() / 8;
3492
3493 const SDLoc &dl(Op);
3494 SDValue Chain = MemN->getChain();
3495 SDValue Base0 = MemN->getBasePtr();
3496 SDValue Base1 =
3497 DAG.getMemBasePlusOffset(Base0, TypeSize::getFixed(LoSize), dl);
3498
3499 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3500 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3501 MachineFunction &MF = DAG.getMachineFunction();
3502 auto MemSize = [=](uint64_t Size) {
3503 return (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3504 ? (uint64_t)MemoryLocation::UnknownSize
3505 : Size;
3506 };
3507 // MOp1 will not be used if HiIsEmpty for masked loads and stores (MLOAD and
3508 // MSTORE). Non-masked loads and store are always of double-vector size (see
3509 // isHvxPairTy() check above).
3510 MOp0 = MF.getMachineMemOperand(MMO, 0, MemSize(LoSize));
3511 MOp1 = MF.getMachineMemOperand(MMO, LoSize, MemSize(HiSize));
3512 }
3513
3514 if (MemOpc == ISD::LOAD) {
3515 assert(cast<LoadSDNode>(Op)->isUnindexed());
3516 SDValue Load0 = DAG.getLoad(LoVT, dl, Chain, Base0, MOp0);
3517 SDValue Load1 = DAG.getLoad(HiVT, dl, Chain, Base1, MOp1);
3518 return DAG.getMergeValues(
3519 {DAG.getNode(ISD::CONCAT_VECTORS, dl, MemN->getValueType(0), Load0,
3520 Load1),
3521 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Load0.getValue(1),
3522 Load1.getValue(1))},
3523 dl);
3524 }
3525 if (MemOpc == ISD::STORE) {
3526 assert(cast<StoreSDNode>(Op)->isUnindexed());
3527 VectorPair Vals = opSplit(cast<StoreSDNode>(Op)->getValue(), dl, DAG);
3528 SDValue Store0 = DAG.getStore(Chain, dl, Vals.first, Base0, MOp0);
3529 SDValue Store1 = DAG.getStore(Chain, dl, Vals.second, Base1, MOp1);
3530 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store0, Store1);
3531 }
3532
3533 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3534
3535 auto MaskN = cast<MaskedLoadStoreSDNode>(Op);
3536 assert(MaskN->isUnindexed());
3537 VectorPair Masks = opSplit(MaskN->getMask(), dl, DAG);
3538 SDValue Offset = DAG.getUNDEF(MVT::i32);
3539
3540 if (MemOpc == ISD::MLOAD) {
3541 VectorPair Thru =
3542 opSplit(cast<MaskedLoadSDNode>(Op)->getPassThru(), dl, DAG);
3543 SDValue MLoad0 = DAG.getMaskedLoad(LoVT, dl, Chain, Base0, Offset,
3544 Masks.first, Thru.first, LoMemVT, MOp0,
3546
3547 // The hi masked load has zero storage size. We therefore simply set it to
3548 // the low masked load and rely on subsequent removal from the chain as it
3549 // is unused. See DAGTypeLegalizer::SplitVecRes_MLOAD() for the same logic.
3550 SDValue MLoad1 =
3551 HiIsEmpty ? MLoad0
3552 : DAG.getMaskedLoad(HiVT, dl, Chain, Base1, Offset,
3553 Masks.second, Thru.second, HiMemVT, MOp1,
3555 return DAG.getMergeValues(
3556 {DAG.getNode(ISD::CONCAT_VECTORS, dl, MemN->getValueType(0), MLoad0,
3557 MLoad1),
3558 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MLoad0.getValue(1),
3559 MLoad1.getValue(1))},
3560 dl);
3561 }
3562 if (MemOpc == ISD::MSTORE) {
3563 VectorPair Vals = opSplit(cast<MaskedStoreSDNode>(Op)->getValue(), dl, DAG);
3564 SDValue MStore0 =
3565 DAG.getMaskedStore(Chain, dl, Vals.first, Base0, Offset, Masks.first,
3566 LoMemVT, MOp0, ISD::UNINDEXED, false, false);
3567 if (HiIsEmpty)
3568 return MStore0;
3569 SDValue MStore1 =
3570 DAG.getMaskedStore(Chain, dl, Vals.second, Base1, Offset, Masks.second,
3571 HiMemVT, MOp1, ISD::UNINDEXED, false, false);
3572 return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MStore0, MStore1);
3573 }
3574
3575 std::string Name = "Unexpected operation: " + Op->getOperationName(&DAG);
3576 llvm_unreachable(Name.c_str());
3577}
3578
3579SDValue
3580HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3581 const SDLoc &dl(Op);
3582 auto *LoadN = cast<LoadSDNode>(Op.getNode());
3583 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3584 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3585 "Not widening loads of i1 yet");
3586
3587 SDValue Chain = LoadN->getChain();
3588 SDValue Base = LoadN->getBasePtr();
3589 SDValue Offset = DAG.getUNDEF(MVT::i32);
3590
3591 MVT ResTy = ty(Op);
3592 unsigned HwLen = Subtarget.getVectorLength();
3593 unsigned ResLen = ResTy.getStoreSize();
3594 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3595
3596 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3597 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3598 {DAG.getConstant(ResLen, dl, MVT::i32)}, DAG);
3599
3600 MVT LoadTy = MVT::getVectorVT(MVT::i8, HwLen);
3601 MachineFunction &MF = DAG.getMachineFunction();
3602 auto *MemOp = MF.getMachineMemOperand(LoadN->getMemOperand(), 0, HwLen);
3603
3604 SDValue Load = DAG.getMaskedLoad(LoadTy, dl, Chain, Base, Offset, Mask,
3605 DAG.getUNDEF(LoadTy), LoadTy, MemOp,
3607 SDValue Value = opCastElem(Load, ResTy.getVectorElementType(), DAG);
3608 return DAG.getMergeValues({Value, Load.getValue(1)}, dl);
3609}
3610
3611SDValue
3612HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3613 const SDLoc &dl(Op);
3614 auto *StoreN = cast<StoreSDNode>(Op.getNode());
3615 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3616 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3617 "Not widening stores of i1 yet");
3618
3619 SDValue Chain = StoreN->getChain();
3620 SDValue Base = StoreN->getBasePtr();
3621 SDValue Offset = DAG.getUNDEF(MVT::i32);
3622
3623 SDValue Value = opCastElem(StoreN->getValue(), MVT::i8, DAG);
3624 MVT ValueTy = ty(Value);
3625 unsigned ValueLen = ValueTy.getVectorNumElements();
3626 unsigned HwLen = Subtarget.getVectorLength();
3627 assert(isPowerOf2_32(ValueLen));
3628
3629 for (unsigned Len = ValueLen; Len < HwLen; ) {
3630 Value = opJoin({Value, DAG.getUNDEF(ty(Value))}, dl, DAG);
3631 Len = ty(Value).getVectorNumElements(); // This is Len *= 2
3632 }
3633 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3634
3635 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3636 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
3637 SDValue Mask = getInstr(Hexagon::V6_pred_scalar2, dl, BoolTy,
3638 {DAG.getConstant(ValueLen, dl, MVT::i32)}, DAG);
3639 MachineFunction &MF = DAG.getMachineFunction();
3640 auto *MemOp = MF.getMachineMemOperand(StoreN->getMemOperand(), 0, HwLen);
3641 return DAG.getMaskedStore(Chain, dl, Value, Base, Offset, Mask, ty(Value),
3642 MemOp, ISD::UNINDEXED, false, false);
3643}
3644
3645SDValue
3646HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3647 const SDLoc &dl(Op);
3648 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
3649 MVT ElemTy = ty(Op0).getVectorElementType();
3650 unsigned HwLen = Subtarget.getVectorLength();
3651
3652 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3653 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3654 MVT WideOpTy = MVT::getVectorVT(ElemTy, WideOpLen);
3655 if (!Subtarget.isHVXVectorType(WideOpTy, true))
3656 return SDValue();
3657
3658 SDValue WideOp0 = appendUndef(Op0, WideOpTy, DAG);
3659 SDValue WideOp1 = appendUndef(Op1, WideOpTy, DAG);
3660 EVT ResTy =
3661 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), WideOpTy);
3662 SDValue SetCC = DAG.getNode(ISD::SETCC, dl, ResTy,
3663 {WideOp0, WideOp1, Op.getOperand(2)});
3664
3665 EVT RetTy = typeLegalize(ty(Op), DAG);
3666 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3667 {SetCC, getZero(dl, MVT::i32, DAG)});
3668}
3669
3670SDValue HexagonTargetLowering::WidenHvxTruncateToBool(SDValue Op,
3671 SelectionDAG &DAG) const {
3672 // Handle truncation to boolean vector where the result boolean type
3673 // needs widening (e.g., v16i32 -> v16i1 where v16i1 is not a standard
3674 // HVX predicate type, or v16i8 -> v16i1 in 128-byte mode).
3675 // Widen the input to HVX width, perform the truncate to the widened
3676 // boolean type, then extract the result.
3677 const SDLoc &dl(Op);
3678 SDValue Inp = Op.getOperand(0);
3679 MVT InpTy = ty(Inp);
3680 MVT ResTy = ty(Op);
3681
3682 assert(ResTy.getVectorElementType() == MVT::i1 &&
3683 "Expected boolean result type");
3684
3685 MVT ElemTy = InpTy.getVectorElementType();
3686 unsigned HwLen = Subtarget.getVectorLength();
3687
3688 // Calculate the widened input type that fills the HVX register.
3689 unsigned WideLen = (8 * HwLen) / ElemTy.getSizeInBits();
3690 MVT WideInpTy = MVT::getVectorVT(ElemTy, WideLen);
3691 if (!Subtarget.isHVXVectorType(WideInpTy, false))
3692 return SDValue();
3693
3694 // Widen the input to HVX width.
3695 SDValue WideInp = appendUndef(Inp, WideInpTy, DAG);
3696
3697 // Perform the truncate to widened boolean type.
3698 MVT WideBoolTy = MVT::getVectorVT(MVT::i1, WideLen);
3699 SDValue WideTrunc = DAG.getNode(ISD::TRUNCATE, dl, WideBoolTy, WideInp);
3700
3701 // Extract the result.
3702 EVT RetTy = typeLegalize(ResTy, DAG);
3703 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RetTy,
3704 {WideTrunc, getZero(dl, MVT::i32, DAG)});
3705}
3706
3707SDValue
3708HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3709 unsigned Opc = Op.getOpcode();
3710 bool IsPairOp = isHvxPairTy(ty(Op)) ||
3711 llvm::any_of(Op.getNode()->ops(), [this] (SDValue V) {
3712 return isHvxPairTy(ty(V));
3713 });
3714
3715 if (IsPairOp) {
3716 switch (Opc) {
3717 default:
3718 break;
3719 case ISD::LOAD:
3720 case ISD::STORE:
3721 case ISD::MLOAD:
3722 case ISD::MSTORE:
3723 return SplitHvxMemOp(Op, DAG);
3724 case ISD::SINT_TO_FP:
3725 case ISD::UINT_TO_FP:
3726 case ISD::FP_TO_SINT:
3727 case ISD::FP_TO_UINT:
3728 if (ty(Op).getSizeInBits() == ty(Op.getOperand(0)).getSizeInBits())
3729 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3730 break;
3731 case ISD::ABS:
3732 case ISD::CTPOP:
3733 case ISD::CTLZ:
3734 case ISD::CTTZ:
3735 case ISD::MUL:
3736 case ISD::FADD:
3737 case ISD::FSUB:
3738 case ISD::FMUL:
3739 case ISD::FMINIMUMNUM:
3740 case ISD::FMAXIMUMNUM:
3741 case ISD::MULHS:
3742 case ISD::MULHU:
3743 case ISD::AND:
3744 case ISD::OR:
3745 case ISD::XOR:
3746 case ISD::SRA:
3747 case ISD::SHL:
3748 case ISD::SRL:
3749 case ISD::FSHL:
3750 case ISD::FSHR:
3751 case ISD::SMIN:
3752 case ISD::SMAX:
3753 case ISD::UMIN:
3754 case ISD::UMAX:
3755 case ISD::SETCC:
3756 case ISD::VSELECT:
3758 case ISD::SPLAT_VECTOR:
3759 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3760 case ISD::SIGN_EXTEND:
3761 case ISD::ZERO_EXTEND:
3762 // In general, sign- and zero-extends can't be split and still
3763 // be legal. The only exception is extending bool vectors.
3764 if (ty(Op.getOperand(0)).getVectorElementType() == MVT::i1)
3765 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3766 break;
3767 }
3768 }
3769
3770 switch (Opc) {
3771 default:
3772 break;
3773 // clang-format off
3774 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3775 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3776 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3777 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3778 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3779 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3780 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3781 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3782 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3783 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3784 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3785 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3786 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3787 case ISD::SRA:
3788 case ISD::SHL:
3789 case ISD::SRL: return LowerHvxShift(Op, DAG);
3790 case ISD::FSHL:
3791 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3792 case ISD::MULHS:
3793 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3794 case ISD::SMUL_LOHI:
3795 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3796 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3797 case ISD::SETCC:
3798 case ISD::INTRINSIC_VOID: return Op;
3799 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3800 case ISD::MLOAD:
3801 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3802 // Unaligned loads will be handled by the default lowering.
3803 case ISD::LOAD: return LowerHvxLoad(Op, DAG);
3804 case ISD::STORE: return LowerHvxStore(Op, DAG);
3805 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3806 case ISD::FP_TO_SINT:
3807 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3808 case ISD::SINT_TO_FP:
3809 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3810
3811 // Special nodes:
3812 case HexagonISD::SMUL_LOHI:
3813 case HexagonISD::UMUL_LOHI:
3814 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3815
3819 return LowerHvxPartialReduceMLA(Op, DAG);
3820 // clang-format on
3821 }
3822#ifndef NDEBUG
3823 Op.dumpr(&DAG);
3824#endif
3825 llvm_unreachable("Unhandled HVX operation");
3826}
3827
3828SDValue
3829HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3830 const {
3831 // Rewrite the extension/truncation/saturation op into steps where each
3832 // step changes the type widths by a factor of 2.
3833 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3834 //
3835 // Some of the vector types in Op may not be legal.
3836
3837 unsigned Opc = Op.getOpcode();
3838 switch (Opc) {
3839 case HexagonISD::SSAT:
3840 case HexagonISD::USAT:
3843 break;
3844 case ISD::ANY_EXTEND:
3845 case ISD::ZERO_EXTEND:
3846 case ISD::SIGN_EXTEND:
3847 case ISD::TRUNCATE:
3848 llvm_unreachable("ISD:: ops will be auto-folded");
3849 break;
3850#ifndef NDEBUG
3851 Op.dump(&DAG);
3852#endif
3853 llvm_unreachable("Unexpected operation");
3854 }
3855
3856 SDValue Inp = Op.getOperand(0);
3857 MVT InpTy = ty(Inp);
3858 MVT ResTy = ty(Op);
3859
3860 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3861 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3862 assert(InpWidth != ResWidth);
3863
3864 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3865 return Op;
3866
3867 const SDLoc &dl(Op);
3868 unsigned NumElems = InpTy.getVectorNumElements();
3869 assert(NumElems == ResTy.getVectorNumElements());
3870
3871 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3872 MVT Ty = MVT::getVectorVT(MVT::getIntegerVT(NewWidth), NumElems);
3873 switch (Opc) {
3874 case HexagonISD::SSAT:
3875 case HexagonISD::USAT:
3876 return DAG.getNode(Opc, dl, Ty, {Arg, DAG.getValueType(Ty)});
3879 return DAG.getNode(Opc, dl, Ty, {Arg, Op.getOperand(1), Op.getOperand(2)});
3880 default:
3881 llvm_unreachable("Unexpected opcode");
3882 }
3883 };
3884
3885 SDValue S = Inp;
3886 if (InpWidth < ResWidth) {
3887 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3888 while (InpWidth * 2 <= ResWidth)
3889 S = repeatOp(InpWidth *= 2, S);
3890 } else {
3891 // InpWidth > ResWidth
3892 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3893 while (InpWidth / 2 >= ResWidth)
3894 S = repeatOp(InpWidth /= 2, S);
3895 }
3896 return S;
3897}
3898
3899SDValue
3900HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3901 SDValue Inp0 = Op.getOperand(0);
3902 MVT InpTy = ty(Inp0);
3903 MVT ResTy = ty(Op);
3904 unsigned InpWidth = InpTy.getSizeInBits();
3905 unsigned ResWidth = ResTy.getSizeInBits();
3906 unsigned Opc = Op.getOpcode();
3907
3908 if (shouldWidenToHvx(InpTy, DAG) || shouldWidenToHvx(ResTy, DAG)) {
3909 // First, make sure that the narrower type is widened to HVX.
3910 // This may cause the result to be wider than what the legalizer
3911 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3912 // desired type.
3913 auto [WInpTy, WResTy] =
3914 InpWidth < ResWidth ? typeWidenToWider(typeWidenToHvx(InpTy), ResTy)
3915 : typeWidenToWider(InpTy, typeWidenToHvx(ResTy));
3916 SDValue W = appendUndef(Inp0, WInpTy, DAG);
3917 SDValue S;
3919 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, Op.getOperand(1),
3920 Op.getOperand(2));
3921 } else {
3922 S = DAG.getNode(Opc, SDLoc(Op), WResTy, W, DAG.getValueType(WResTy));
3923 }
3924 SDValue T = ExpandHvxResizeIntoSteps(S, DAG);
3925 return extractSubvector(T, typeLegalize(ResTy, DAG), 0, DAG);
3926 } else if (shouldSplitToHvx(InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3927 // For multi-step extends/truncates (e.g., i8->i32), expand into
3928 // single-step operations first. Splitting a multi-step TL_EXTEND
3929 // would halve the operand type to a sub-HVX size (e.g., v128i8 ->
3930 // v64i8), creating illegal types that cause issues in the type
3931 // legalizer's map tracking. Single-step operations (e.g., i16->i32)
3932 // are safe to split because their halved operand types remain legal.
3933 SDValue T = ExpandHvxResizeIntoSteps(Op, DAG);
3934 if (T != Op)
3935 return T;
3936 return opJoin(SplitVectorOp(Op, DAG), SDLoc(Op), DAG);
3937 } else {
3938 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3939 return RemoveTLWrapper(Op, DAG);
3940 }
3941 llvm_unreachable("Unexpected situation");
3942}
3943
3944void
3945HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3947 unsigned Opc = N->getOpcode();
3948 SDValue Op(N, 0);
3949 SDValue Inp0; // Optional first argument.
3950 if (N->getNumOperands() > 0)
3951 Inp0 = Op.getOperand(0);
3952
3953 switch (Opc) {
3954 case ISD::ANY_EXTEND:
3955 case ISD::SIGN_EXTEND:
3956 case ISD::ZERO_EXTEND:
3957 if (Subtarget.isHVXElementType(ty(Op)) &&
3958 Subtarget.isHVXElementType(ty(Inp0))) {
3959 Results.push_back(CreateTLWrapper(Op, DAG));
3960 }
3961 break;
3962 case ISD::TRUNCATE:
3963 // Handle truncate to boolean vector when the input is not a
3964 // standard HVX vector type (single or pair). This covers cases
3965 // where the input needs widening (e.g., v64i8 -> v64i1 in
3966 // 128-byte mode) and cases where the result boolean type itself
3967 // needs widening (e.g., v16i32 -> v16i1). When the input is
3968 // already an HVX type, tablegen patterns handle the truncation
3969 // directly (e.g., v64i16 -> v64i1 via V6_vandvrt).
3970 if (ty(Op).getVectorElementType() == MVT::i1 &&
3971 !Subtarget.isHVXVectorType(ty(Inp0), false)) {
3972 if (SDValue T = WidenHvxTruncateToBool(Op, DAG))
3973 Results.push_back(T);
3974 } else if (Subtarget.isHVXElementType(ty(Op)) &&
3975 Subtarget.isHVXElementType(ty(Inp0))) {
3976 Results.push_back(CreateTLWrapper(Op, DAG));
3977 }
3978 break;
3979 case ISD::SETCC:
3980 if (shouldWidenToHvx(ty(Inp0), DAG)) {
3981 if (SDValue T = WidenHvxSetCC(Op, DAG))
3982 Results.push_back(T);
3983 }
3984 break;
3985 case ISD::STORE: {
3986 if (shouldWidenToHvx(ty(cast<StoreSDNode>(N)->getValue()), DAG)) {
3987 SDValue Store = WidenHvxStore(Op, DAG);
3988 Results.push_back(Store);
3989 }
3990 break;
3991 }
3992 case ISD::MLOAD:
3993 if (isHvxPairTy(ty(Op))) {
3994 SDValue S = SplitHvxMemOp(Op, DAG);
3996 Results.push_back(S.getOperand(0));
3997 Results.push_back(S.getOperand(1));
3998 }
3999 break;
4000 case ISD::MSTORE:
4001 if (isHvxPairTy(ty(Op->getOperand(1)))) { // Stored value
4002 SDValue S = SplitHvxMemOp(Op, DAG);
4003 Results.push_back(S);
4004 }
4005 break;
4006 case ISD::SINT_TO_FP:
4007 case ISD::UINT_TO_FP:
4008 case ISD::FP_TO_SINT:
4009 case ISD::FP_TO_UINT:
4010 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
4011 SDValue T = EqualizeFpIntConversion(Op, DAG);
4012 Results.push_back(T);
4013 }
4014 break;
4015 case HexagonISD::SSAT:
4016 case HexagonISD::USAT:
4019 Results.push_back(LegalizeHvxResize(Op, DAG));
4020 break;
4021 default:
4022 break;
4023 }
4024}
4025
4026void
4027HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
4029 unsigned Opc = N->getOpcode();
4030 SDValue Op(N, 0);
4031 SDValue Inp0; // Optional first argument.
4032 if (N->getNumOperands() > 0)
4033 Inp0 = Op.getOperand(0);
4034
4035 switch (Opc) {
4036 case ISD::ANY_EXTEND:
4037 case ISD::SIGN_EXTEND:
4038 case ISD::ZERO_EXTEND:
4039 if (Subtarget.isHVXElementType(ty(Op)) &&
4040 Subtarget.isHVXElementType(ty(Inp0))) {
4041 Results.push_back(CreateTLWrapper(Op, DAG));
4042 }
4043 break;
4044 case ISD::TRUNCATE:
4045 // Handle truncate to boolean vector when the input is not a
4046 // standard HVX vector type. See comment in LowerHvxOperationWrapper.
4047 if (ty(Op).getVectorElementType() == MVT::i1 &&
4048 !Subtarget.isHVXVectorType(ty(Inp0), false)) {
4049 if (SDValue T = WidenHvxTruncateToBool(Op, DAG))
4050 Results.push_back(T);
4051 } else if (Subtarget.isHVXElementType(ty(Op)) &&
4052 Subtarget.isHVXElementType(ty(Inp0))) {
4053 Results.push_back(CreateTLWrapper(Op, DAG));
4054 }
4055 break;
4056 case ISD::SETCC:
4057 if (shouldWidenToHvx(ty(Op), DAG)) {
4058 if (SDValue T = WidenHvxSetCC(Op, DAG))
4059 Results.push_back(T);
4060 }
4061 break;
4062 case ISD::LOAD: {
4063 if (shouldWidenToHvx(ty(Op), DAG)) {
4064 SDValue Load = WidenHvxLoad(Op, DAG);
4065 assert(Load->getOpcode() == ISD::MERGE_VALUES);
4066 Results.push_back(Load.getOperand(0));
4067 Results.push_back(Load.getOperand(1));
4068 }
4069 break;
4070 }
4071 case ISD::BITCAST:
4072 if (isHvxBoolTy(ty(Inp0))) {
4073 SDValue C = LowerHvxBitcast(Op, DAG);
4074 Results.push_back(C);
4075 }
4076 break;
4077 case ISD::FP_TO_SINT:
4078 case ISD::FP_TO_UINT:
4079 if (ty(Op).getSizeInBits() != ty(Inp0).getSizeInBits()) {
4080 SDValue T = EqualizeFpIntConversion(Op, DAG);
4081 Results.push_back(T);
4082 }
4083 break;
4084 case HexagonISD::SSAT:
4085 case HexagonISD::USAT:
4088 Results.push_back(LegalizeHvxResize(Op, DAG));
4089 break;
4090 default:
4091 break;
4092 }
4093}
4094
4095SDValue
4096HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
4097 DAGCombinerInfo &DCI) const {
4098 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
4099 // to extract-subvector (shuffle V, pick even, pick odd)
4100
4101 assert(Op.getOpcode() == ISD::TRUNCATE);
4102 SelectionDAG &DAG = DCI.DAG;
4103 const SDLoc &dl(Op);
4104
4105 if (Op.getOperand(0).getOpcode() == ISD::BITCAST)
4106 return SDValue();
4107 SDValue Cast = Op.getOperand(0);
4108 SDValue Src = Cast.getOperand(0);
4109
4110 EVT TruncTy = Op.getValueType();
4111 EVT CastTy = Cast.getValueType();
4112 EVT SrcTy = Src.getValueType();
4113 if (SrcTy.isSimple())
4114 return SDValue();
4115 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
4116 return SDValue();
4117 unsigned SrcLen = SrcTy.getVectorNumElements();
4118 unsigned CastLen = CastTy.getVectorNumElements();
4119 if (2 * CastLen != SrcLen)
4120 return SDValue();
4121
4122 SmallVector<int, 128> Mask(SrcLen);
4123 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
4124 Mask[i] = 2 * i;
4125 Mask[i + CastLen] = 2 * i + 1;
4126 }
4127 SDValue Deal =
4128 DAG.getVectorShuffle(SrcTy, dl, Src, DAG.getUNDEF(SrcTy), Mask);
4129 return opSplit(Deal, dl, DAG).first;
4130}
4131
4132SDValue
4133HexagonTargetLowering::combineConcatOfShuffles(SDValue Op,
4134 SelectionDAG &DAG) const {
4135 // Fold
4136 // concat (shuffle x, y, m1), (shuffle x, y, m2)
4137 // into
4138 // shuffle (concat x, y), undef, m3
4139 if (Op.getNumOperands() != 2)
4140 return SDValue();
4141
4142 const SDLoc &dl(Op);
4143 SDValue V0 = Op.getOperand(0);
4144 SDValue V1 = Op.getOperand(1);
4145
4146 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
4147 return SDValue();
4148 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
4149 return SDValue();
4150
4151 SetVector<SDValue> Order;
4152 Order.insert(V0.getOperand(0));
4153 Order.insert(V0.getOperand(1));
4154 Order.insert(V1.getOperand(0));
4155 Order.insert(V1.getOperand(1));
4156
4157 if (Order.size() > 2)
4158 return SDValue();
4159
4160 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
4161 // result must be the same.
4162 EVT InpTy = V0.getValueType();
4163 assert(InpTy.isVector());
4164 unsigned InpLen = InpTy.getVectorNumElements();
4165
4166 SmallVector<int, 128> LongMask;
4167 auto AppendToMask = [&](SDValue Shuffle) {
4168 auto *SV = cast<ShuffleVectorSDNode>(Shuffle.getNode());
4169 ArrayRef<int> Mask = SV->getMask();
4170 SDValue X = Shuffle.getOperand(0);
4171 SDValue Y = Shuffle.getOperand(1);
4172 for (int M : Mask) {
4173 if (M == -1) {
4174 LongMask.push_back(M);
4175 continue;
4176 }
4177 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
4178 if (static_cast<unsigned>(M) >= InpLen)
4179 M -= InpLen;
4180
4181 int OutOffset = Order[0] == Src ? 0 : InpLen;
4182 LongMask.push_back(M + OutOffset);
4183 }
4184 };
4185
4186 AppendToMask(V0);
4187 AppendToMask(V1);
4188
4189 SDValue C0 = Order.front();
4190 SDValue C1 = Order.back(); // Can be same as front
4191 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(*DAG.getContext());
4192
4193 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, LongTy, {C0, C1});
4194 return DAG.getVectorShuffle(LongTy, dl, Cat, DAG.getUNDEF(LongTy), LongMask);
4195}
4196
4197// Reassociate concat(p1, p2, ...) into
4198// concat(concat(p1, ...), concat(pi, ...), ...)
4199// where each inner concat produces a predicate where each bit corresponds
4200// to at most BitBytes bytes.
4201// Concatenating predicates decreases the number of bytes per each predicate
4202// bit.
4203SDValue
4204HexagonTargetLowering::combineConcatOfScalarPreds(SDValue Op, unsigned BitBytes,
4205 SelectionDAG &DAG) const {
4206 const SDLoc &dl(Op);
4207 SmallVector<SDValue> Ops(Op->ops());
4208 MVT ResTy = ty(Op);
4209 MVT InpTy = ty(Ops[0]);
4210 unsigned InpLen = InpTy.getVectorNumElements(); // Scalar predicate
4211 unsigned ResLen = ResTy.getVectorNumElements(); // HVX vector predicate
4212 assert(InpLen <= 8 && "Too long for scalar predicate");
4213 assert(ResLen > 8 && "Too short for HVX vector predicate");
4214
4215 unsigned Bytes = 8 / InpLen; // Bytes-per-bit in input
4216
4217 // Already in the right form?
4218 if (Bytes <= BitBytes)
4219 return Op;
4220
4221 ArrayRef<SDValue> Inputs(Ops);
4222 unsigned SliceLen = Bytes / BitBytes;
4223
4225 // (8 / BitBytes) is the desired length of the result of the inner concat.
4226 MVT InnerTy = MVT::getVectorVT(MVT::i1, 8 / BitBytes);
4227 for (unsigned i = 0; i != ResLen / (8 / BitBytes); ++i) {
4228 SDValue Cat = DAG.getNode(ISD::CONCAT_VECTORS, dl, InnerTy,
4229 Inputs.slice(SliceLen * i, SliceLen));
4230 Cats.push_back(Cat);
4231 }
4232
4233 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Cats);
4234}
4235
4236SDValue HexagonTargetLowering::combineConcatVectorsBeforeLegal(
4237 SDValue Op, DAGCombinerInfo &DCI) const {
4238 MVT ResTy = ty(Op);
4239 MVT ElemTy = ResTy.getVectorElementType();
4240
4241 if (ElemTy != MVT::i1) {
4242 return combineConcatOfShuffles(Op, DCI.DAG);
4243 }
4244 return SDValue();
4245}
4246
4247// Create the inner partial reduction MLA that can be efficiently lowered. This
4248// function is used by partial and full reductions.
4249SDValue HexagonTargetLowering::createExtendingPartialReduceMLA(
4250 unsigned Opcode, EVT AccEltType, unsigned AccNumElements, EVT InputType,
4251 const SDValue &A, const SDValue &B, unsigned &RemainingReductionRatio,
4252 const SDLoc &DL, SelectionDAG &DAG) const {
4253 const auto &Subtarget = DAG.getSubtarget<HexagonSubtarget>();
4254 if (!Subtarget.useHVXOps())
4255 return SDValue();
4256
4257 EVT InputEltType = InputType.getVectorElementType();
4258
4259 // Find if an optimized instruction for the sub-reduction is available.
4260 unsigned NativeRatio;
4261 if (AccEltType == MVT::i32 && InputEltType == MVT::i8)
4262 NativeRatio = 4;
4263 else
4264 return SDValue();
4265
4266 // We only handle the case when additional reduction will be needed, i.e.
4267 // input is longer by a larger factor than the result.
4268 ElementCount InputEC = InputType.getVectorElementCount();
4269 if (!InputEC.isKnownMultipleOf(AccNumElements * NativeRatio))
4270 return SDValue();
4271
4272 unsigned InputNumElements = InputEC.getFixedValue();
4273 RemainingReductionRatio = InputNumElements / (AccNumElements * NativeRatio);
4274 if (RemainingReductionRatio == 1)
4275 return SDValue();
4276
4277 // Create a reduction by the natively supported factor.
4278 EVT IntermediateType = EVT::getVectorVT(*DAG.getContext(), AccEltType,
4279 InputNumElements / NativeRatio);
4280
4281 SDValue Zero = DAG.getConstant(0, DL, IntermediateType);
4282 return DAG.getNode(Opcode, DL, IntermediateType, Zero, A, B);
4283}
4284
4285static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType,
4286 unsigned &Opcode, SDValue &A, SDValue &B) {
4287 SDValue Mul = N;
4288 EVT AccType = Mul.getValueType(); // Vector input type after extension.
4289 if (ScalarType != AccType.getVectorElementType())
4290 return false;
4291 bool swap = false;
4292 if (Mul->getOpcode() != ISD::MUL)
4293 return false;
4294 A = Mul->getOperand(0);
4295 B = Mul->getOperand(1);
4296 if (A.getOpcode() == ISD::ZERO_EXTEND) {
4297 if (B.getOpcode() == ISD::ZERO_EXTEND)
4298 Opcode = ISD::PARTIAL_REDUCE_UMLA;
4299 else if (B.getOpcode() == ISD::SIGN_EXTEND) {
4300 swap = true;
4302 } else
4303 return false;
4304 } else if (A.getOpcode() == ISD::SIGN_EXTEND) {
4305 if (B.getOpcode() == ISD::ZERO_EXTEND)
4307 else if (B.getOpcode() == ISD::SIGN_EXTEND)
4308 Opcode = ISD::PARTIAL_REDUCE_SMLA;
4309 else
4310 return false;
4311 } else
4312 return false;
4313
4314 // Get multiplication arguments before extension.
4315 A = A->getOperand(0);
4316 B = B->getOperand(0);
4317 if (A.getValueType() != B.getValueType())
4318 return false;
4319
4320 if (swap)
4321 std::swap(A, B);
4322
4323 return true;
4324}
4325
4326SDValue HexagonTargetLowering::splitVecReduceAdd(SDNode *N,
4327 SelectionDAG &DAG) const {
4328 if (!Subtarget.useHVXOps())
4329 return SDValue();
4330
4331 EVT ScalarType = N->getValueType(0);
4332 unsigned Opcode;
4333 SDValue A, B;
4334 if (!DetectExtendingMultiply(N->getOperand(0), ScalarType, Opcode, A, B))
4335 return SDValue();
4336
4337 SDLoc DL(N);
4338 unsigned RemainingReductionRatio;
4339 SDValue Partial =
4340 createExtendingPartialReduceMLA(Opcode, ScalarType, 1, A.getValueType(),
4341 A, B, RemainingReductionRatio, DL, DAG);
4342 if (!Partial)
4343 return SDValue();
4344
4345 // We could have inserted a trivial MLA and rely on the folding action,
4346 // similar to how vector_partial_reduce_add is lowered to an MLA in
4347 // SelectionDAGBuilder. However, we just replace the final result since we
4348 // have analyzed the input completely.
4349 return DAG.getNode(ISD::VECREDUCE_ADD, DL, ScalarType, Partial);
4350}
4351
4352// When possible, separate an MLA reduction with extended operands but
4353// unsupported reduction factor into an extending partial reduction that
4354// can be efficiently lowered, and a follow-up partial reduction.
4355// partial_reduce_mla(a, x, y) ->
4356// partial_reduce_mla(a, partial_reduce_mla(0, x, y), 1)
4357SDValue
4358HexagonTargetLowering::splitExtendingPartialReduceMLA(SDNode *N,
4359 SelectionDAG &DAG) const {
4360 if (!Subtarget.useHVXOps())
4361 return SDValue();
4362
4363 SDValue Acc = N->getOperand(0);
4364 SDValue A = N->getOperand(1);
4365 SDValue B = N->getOperand(2);
4366 if (A.getValueType() != B.getValueType())
4367 return SDValue();
4368
4369 // The types should be declared as custom, but do not split already legal
4370 // operation.
4371 EVT AccType = Acc.getValueType();
4372 EVT InputType = A.getValueType();
4373 if (getPartialReduceMLAAction(N->getOpcode(), AccType, InputType) != Custom)
4374 return SDValue();
4375
4376 SDLoc DL(N);
4377 unsigned RemainingReductionRatio;
4378 SDValue Partial = createExtendingPartialReduceMLA(
4379 N->getOpcode(), AccType.getVectorElementType(),
4380 AccType.getVectorNumElements(), InputType, A, B, RemainingReductionRatio,
4381 DL, DAG);
4382 if (!Partial)
4383 return SDValue();
4384 assert(RemainingReductionRatio <= MaxExpandMLA);
4385
4386 // Create the reduction for the remaining ratio.
4387 EVT IntermediateType = Partial->getOperand(0).getValueType();
4388 SDValue One = DAG.getConstant(1, DL, IntermediateType);
4389 return DAG.getNode(N->getOpcode() == ISD::PARTIAL_REDUCE_UMLA
4392 DL, AccType, Acc, Partial, One);
4393}
4394
4395SDValue
4396HexagonTargetLowering::LowerHvxPartialReduceMLA(SDValue Op,
4397 SelectionDAG &DAG) const {
4398 const SDLoc &DL(Op);
4399 SDValue Acc = Op.getOperand(0);
4400 SDValue A = Op.getOperand(1);
4401 SDValue B = Op.getOperand(2);
4402
4403 // Split the input vectors into units of one HVX vector length.
4404 unsigned HwVectorSizeInBits = Subtarget.getVectorLength() * 8;
4405
4406 EVT AccType = Acc.getValueType();
4407 EVT AccEltType = AccType.getVectorElementType();
4408 unsigned AccSubvectorNumElements =
4409 HwVectorSizeInBits / AccEltType.getSizeInBits();
4410 EVT AccSubvectorType =
4411 EVT::getVectorVT(*DAG.getContext(), AccEltType, AccSubvectorNumElements);
4412
4413 EVT InputType = A.getValueType();
4414 assert(InputType.getSizeInBits() % HwVectorSizeInBits == 0);
4415 EVT InputEltType = InputType.getVectorElementType();
4416 unsigned InputSubvectorNumElements =
4417 HwVectorSizeInBits / InputEltType.getSizeInBits();
4418 EVT InputSubvectorType = EVT::getVectorVT(*DAG.getContext(), InputEltType,
4419 InputSubvectorNumElements);
4420
4421 unsigned SubvectorNum = InputType.getFixedSizeInBits() / HwVectorSizeInBits;
4423
4424 for (unsigned I = 0; I != SubvectorNum; ++I) {
4425 SDValue SubvectorAcc = DAG.getExtractSubvector(DL, AccSubvectorType, Acc,
4426 I * AccSubvectorNumElements);
4427 SDValue SubvectorA = DAG.getExtractSubvector(DL, InputSubvectorType, A,
4428 I * InputSubvectorNumElements);
4429 SDValue SubvectorB = DAG.getExtractSubvector(DL, InputSubvectorType, B,
4430 I * InputSubvectorNumElements);
4431 SDValue SubvectorMLA = DAG.getNode(Op.getOpcode(), DL, AccSubvectorType,
4432 SubvectorAcc, SubvectorA, SubvectorB);
4433 Subvectors.push_back(SubvectorMLA);
4434 }
4435
4436 return DAG.getNode(ISD::CONCAT_VECTORS, DL, AccType, Subvectors);
4437}
4438
4439SDValue
4440HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
4441 const {
4442 const SDLoc &dl(N);
4443 SelectionDAG &DAG = DCI.DAG;
4444 SDValue Op(N, 0);
4445 unsigned Opc = Op.getOpcode();
4446
4448
4449 if (Opc == ISD::TRUNCATE)
4450 return combineTruncateBeforeLegal(Op, DCI);
4451 if (Opc == ISD::CONCAT_VECTORS)
4452 return combineConcatVectorsBeforeLegal(Op, DCI);
4453
4454 if (DCI.isBeforeLegalizeOps())
4455 return SDValue();
4456
4457 switch (Opc) {
4458 case HexagonISD::V2Q:
4459 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
4460 if (const auto *C = dyn_cast<ConstantSDNode>(Ops[0].getOperand(0)))
4461 return C->isZero() ? DAG.getNode(HexagonISD::QFALSE, dl, ty(Op))
4462 : DAG.getNode(HexagonISD::QTRUE, dl, ty(Op));
4463 }
4464 break;
4465 case HexagonISD::Q2V:
4466 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
4467 return DAG.getNode(ISD::SPLAT_VECTOR, dl, ty(Op),
4468 DAG.getAllOnesConstant(dl, MVT::i32));
4469 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
4470 return getZero(dl, ty(Op), DAG);
4471 break;
4472 case HexagonISD::VINSERTW0:
4473 if (isUndef(Ops[1]))
4474 return Ops[0];
4475 break;
4476 case HexagonISD::VROR: {
4477 if (Ops[0].getOpcode() == HexagonISD::VROR) {
4478 SDValue Vec = Ops[0].getOperand(0);
4479 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(1);
4480 SDValue Rot = DAG.getNode(ISD::ADD, dl, ty(Rot0), {Rot0, Rot1});
4481 return DAG.getNode(HexagonISD::VROR, dl, ty(Op), {Vec, Rot});
4482 }
4483 break;
4484 }
4485 }
4486
4487 return SDValue();
4488}
4489
4490bool
4491HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
4492 if (Subtarget.isHVXVectorType(Ty, true))
4493 return false;
4494 auto Action = getPreferredHvxVectorAction(Ty);
4496 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4497 return false;
4498}
4499
4500bool
4501HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
4502 if (Subtarget.isHVXVectorType(Ty, true))
4503 return false;
4504 auto Action = getPreferredHvxVectorAction(Ty);
4506 return Subtarget.isHVXVectorType(typeLegalize(Ty, DAG), true);
4507 return false;
4508}
4509
4510bool
4511HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
4512 if (!Subtarget.useHVXOps())
4513 return false;
4514 // If the type of any result, or any operand type are HVX vector types,
4515 // this is an HVX operation.
4516 auto IsHvxTy = [this](EVT Ty) {
4517 return Ty.isSimple() && Subtarget.isHVXVectorType(Ty.getSimpleVT(), true);
4518 };
4519 auto IsHvxOp = [this](SDValue Op) {
4520 return Op.getValueType().isSimple() &&
4521 Subtarget.isHVXVectorType(ty(Op), true);
4522 };
4523 if (llvm::any_of(N->values(), IsHvxTy) || llvm::any_of(N->ops(), IsHvxOp))
4524 return true;
4525
4526 // Check if this could be an HVX operation after type widening.
4527 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
4528 if (!Op.getValueType().isSimple())
4529 return false;
4530 MVT ValTy = ty(Op);
4531 return ValTy.isVector() && shouldWidenToHvx(ValTy, DAG);
4532 };
4533
4534 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
4535 if (IsWidenedToHvx(SDValue(N, i)))
4536 return true;
4537 }
4538 return llvm::any_of(N->ops(), IsWidenedToHvx);
4539}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static std::tuple< unsigned, unsigned, unsigned > getIEEEProperties(MVT Ty)
static const unsigned MaxExpandMLA
static const MVT LegalV128[]
static const MVT LegalW128[]
static const MVT LegalW64[]
static const MVT LegalV64[]
static bool DetectExtendingMultiply(const SDValue &N, EVT ScalarType, unsigned &Opcode, SDValue &A, SDValue &B)
static cl::opt< unsigned > HvxWidenThreshold("hexagon-hvx-widen", cl::Hidden, cl::init(16), cl::desc("Lower threshold (in bytes) for widening to HVX vectors"))
static cl::opt< bool > EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(false), cl::desc("Enable FP fast conversion routine."))
IRTranslator LLVM IR MI
static constexpr Value * getValue(Ty &ValueOrUse)
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isSplat(Value *V)
Return true if V is a splat of a value (which is used when multiplying a matrix with a scalar).
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Promote Memory to Register
Definition Mem2Reg.cpp:110
This file provides utility analysis objects describing memory locations.
#define T
#define T1
#define P(N)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallVector class.
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static llvm::Type * getVectorElementType(llvm::Type *Ty)
BinaryOperator * Mul
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
static const fltSemantics & IEEEhalf()
Definition APFloat.h:294
LLVM_ABI opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition APFloat.cpp:5912
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:185
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &, LLVMContext &C, EVT VT) const override
Return the ValueType of the result of SETCC operations.
LegalizeTypeAction getPreferredVectorAction(MVT VT) const override
Return the preferred vector type legalization action.
const SDValue & getBasePtr() const
Machine Value Type.
static MVT getFloatingPointVT(unsigned BitWidth)
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
MVT changeTypeToInteger()
Return the type converted to an equivalently sized integer or vector with integer element type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
ElementCount getVectorElementCount() const
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
const MachinePointerInfo & getPointerInfo() const
Align getBaseAlign() const
Return the minimum known alignment in bytes of the base address, without the offset.
unsigned getSubReg() const
int64_t getImm() const
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
Register getReg() const
getReg - Returns the register number.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const SDValue & getChain() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< value_op_iterator > op_values() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
const SDValue & getOperand(unsigned Num) const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI void ExtractVectorElements(SDValue Op, SmallVectorImpl< SDValue > &Args, unsigned Start=0, unsigned Count=0, EVT EltVT=EVT())
Append the extracted elements from Start to Count out of the vector Op in Args.
LLVM_ABI SDValue getConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offs=0, bool isT=false, unsigned TargetFlags=0)
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getExtractSubvector(const SDLoc &DL, EVT VT, SDValue Vec, unsigned Idx)
Return the VT typed sub-vector of Vec at Idx.
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI std::pair< EVT, EVT > GetSplitDestVTs(const EVT &VT) const
Compute the VTs needed for the low/hi parts of a type which is split (or expanded) into two not neces...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
LLVM_ABI void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getMemBasePlusOffset(SDValue Base, TypeSize Offset, const SDLoc &DL, const SDNodeFlags Flags=SDNodeFlags())
Returns sum of the base pointer and offset.
LLVM_ABI std::pair< SDValue, SDValue > SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the vector with EXTRACT_SUBVECTOR using the provided VTs and return the low/high part.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
LLVM_ABI SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
LLVM_ABI SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
LLVM_ABI SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Base, SDValue Offset, SDValue Mask, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, bool IsTruncating=false, bool IsCompressing=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
LLVM_ABI SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
LLVMContext * getContext() const
LLVM_ABI std::pair< EVT, EVT > GetDependentSplitDestVTs(const EVT &VT, const EVT &EnvVT, bool *HiIsEmpty) const
Compute the VTs needed for the low/hi parts of a type, dependent on an enveloping VT that has been sp...
LLVM_ABI SDValue getTargetInsertSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand, SDValue Subreg)
A convenience function for creating TargetInstrInfo::INSERT_SUBREG nodes.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Base, SDValue Offset, SDValue Mask, SDValue Src0, EVT MemVT, MachineMemOperand *MMO, ISD::MemIndexedMode AM, ISD::LoadExtType, bool IsExpanding=false)
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
const value_type & front() const
Return the first element of the SetVector.
Definition SetVector.h:132
const value_type & back() const
Return the last element of the SetVector.
Definition SetVector.h:138
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
const SDValue & getBasePtr() const
const SDValue & getValue() const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setIndexedLoadAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
void setIndexedStoreAction(ArrayRef< unsigned > IdxModes, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setPartialReduceMLAAction(unsigned Opc, MVT AccVT, MVT InputVT, LegalizeAction Action)
Indicate how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treate...
LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT, EVT InputVT) const
Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treated.
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:823
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:783
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:857
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:884
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:914
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:997
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition ISDOpcodes.h:254
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:848
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:800
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:769
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:854
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:903
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:892
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:982
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:809
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:930
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:963
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:925
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:860
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1745
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:373
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:494
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:396
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:404
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const