LLVM 23.0.0git
LegalizeVectorOps.cpp
Go to the documentation of this file.
1//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SelectionDAG::LegalizeVectors method.
10//
11// The vector legalizer looks for vector operations which might need to be
12// scalarized and legalizes them. This is a separate step from Legalize because
13// scalarizing can introduce illegal types. For example, suppose we have an
14// ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition
15// on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
16// operation, which introduces nodes with the illegal type i64 which must be
17// expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
18// the operation must be unrolled, which introduces nodes with the illegal
19// type i8 which must be promoted.
20//
21// This does not legalize vector manipulations like ISD::BUILD_VECTOR,
22// or operations that happen to take a vector which are custom-lowered;
23// the legalization for such operations never produces nodes
24// with illegal types, so it's okay to put off legalizing them until
25// SelectionDAG::Legalize runs.
26//
27//===----------------------------------------------------------------------===//
28
29#include "llvm/ADT/DenseMap.h"
39#include "llvm/IR/DataLayout.h"
42#include "llvm/Support/Debug.h"
44#include <cassert>
45#include <cstdint>
46#include <iterator>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "legalizevectorops"
52
53namespace {
54
55class VectorLegalizer {
56 SelectionDAG& DAG;
57 const TargetLowering &TLI;
58 bool Changed = false; // Keep track of whether anything changed
59
60 /// For nodes that are of legal width, and that have more than one use, this
61 /// map indicates what regularized operand to use. This allows us to avoid
62 /// legalizing the same thing more than once.
64
65 /// Adds a node to the translation cache.
66 void AddLegalizedOperand(SDValue From, SDValue To) {
67 LegalizedNodes.insert(std::make_pair(From, To));
68 // If someone requests legalization of the new node, return itself.
69 if (From != To)
70 LegalizedNodes.insert(std::make_pair(To, To));
71 }
72
73 /// Legalizes the given node.
74 SDValue LegalizeOp(SDValue Op);
75
76 /// Assuming the node is legal, "legalize" the results.
77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
78
79 /// Make sure Results are legal and update the translation cache.
80 SDValue RecursivelyLegalizeResults(SDValue Op,
82
83 /// Wrapper to interface LowerOperation with a vector of Results.
84 /// Returns false if the target wants to use default expansion. Otherwise
85 /// returns true. If return is true and the Results are empty, then the
86 /// target wants to keep the input node as is.
87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
88
89 /// Implements unrolling a VSETCC.
90 SDValue UnrollVSETCC(SDNode *Node);
91
92 /// Implement expand-based legalization of vector operations.
93 ///
94 /// This is just a high-level routine to dispatch to specific code paths for
95 /// operations to legalize them.
97
98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
99 /// FP_TO_SINT isn't legal.
100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
101
102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
103 /// SINT_TO_FLOAT and SHR on vectors isn't legal.
104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
105
106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
107 SDValue ExpandSEXTINREG(SDNode *Node);
108
109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
110 ///
111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper
112 /// type. The contents of the bits in the extended part of each element are
113 /// undef.
114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
115
116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
117 ///
118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper
119 /// type, then shifts left and arithmetic shifts right to introduce a sign
120 /// extension.
121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
122
123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
124 ///
125 /// Shuffles the low lanes of the operand into place and blends zeros into
126 /// the remaining lanes, finally bitcasting to the proper type.
127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
128
129 /// Expand bswap of vectors into a shuffle if legal.
130 SDValue ExpandBSWAP(SDNode *Node);
131
132 /// Implement vselect in terms of XOR, AND, OR when blend is not
133 /// supported by the target.
134 SDValue ExpandVSELECT(SDNode *Node);
135 SDValue ExpandVP_SELECT(SDNode *Node);
136 SDValue ExpandVP_MERGE(SDNode *Node);
137 SDValue ExpandVP_REM(SDNode *Node);
138 SDValue ExpandVP_FNEG(SDNode *Node);
139 SDValue ExpandVP_FABS(SDNode *Node);
140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
141 SDValue ExpandLOOP_DEPENDENCE_MASK(SDNode *N);
142 SDValue ExpandSELECT(SDNode *Node);
143 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
144 SDValue ExpandStore(SDNode *N);
145 SDValue ExpandFNEG(SDNode *Node);
146 SDValue ExpandFABS(SDNode *Node);
147 SDValue ExpandFCOPYSIGN(SDNode *Node);
148 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
149 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
150 SDValue ExpandBITREVERSE(SDNode *Node);
151 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
152 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
153 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
154 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
155 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
156 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
157
158 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
160
161 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
162
163 /// Implements vector promotion.
164 ///
165 /// This is essentially just bitcasting the operands to a different type and
166 /// bitcasting the result back to the original type.
168
169 /// Implements [SU]INT_TO_FP vector promotion.
170 ///
171 /// This is a [zs]ext of the input operand to a larger integer type.
172 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
173
174 /// Implements FP_TO_[SU]INT vector promotion of the result type.
175 ///
176 /// It is promoted to a larger integer type. The result is then
177 /// truncated back to the original type.
178 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
179
180 /// Implements vector setcc operation promotion.
181 ///
182 /// All vector operands are promoted to a vector type with larger element
183 /// type.
184 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
185
186 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
187
188 /// Calculate the reduction using a type of higher precision and round the
189 /// result to match the original type. Setting NonArithmetic signifies the
190 /// rounding of the result does not affect its value.
191 void PromoteFloatVECREDUCE(SDNode *Node, SmallVectorImpl<SDValue> &Results,
192 bool NonArithmetic);
193
194 void PromoteVECTOR_COMPRESS(SDNode *Node, SmallVectorImpl<SDValue> &Results);
195
196public:
197 VectorLegalizer(SelectionDAG& dag) :
198 DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
199
200 /// Begin legalizer the vector operations in the DAG.
201 bool Run();
202};
203
204} // end anonymous namespace
205
206bool VectorLegalizer::Run() {
207 // Before we start legalizing vector nodes, check if there are any vectors.
208 bool HasVectors = false;
210 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
211 // Check if the values of the nodes contain vectors. We don't need to check
212 // the operands because we are going to check their values at some point.
213 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
214
215 // If we found a vector node we can start the legalization.
216 if (HasVectors)
217 break;
218 }
219
220 // If this basic block has no vectors then no need to legalize vectors.
221 if (!HasVectors)
222 return false;
223
224 // The legalize process is inherently a bottom-up recursive process (users
225 // legalize their uses before themselves). Given infinite stack space, we
226 // could just start legalizing on the root and traverse the whole graph. In
227 // practice however, this causes us to run out of stack space on large basic
228 // blocks. To avoid this problem, compute an ordering of the nodes where each
229 // node is only legalized after all of its operands are legalized.
232 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
233 LegalizeOp(SDValue(&*I, 0));
234
235 // Finally, it's possible the root changed. Get the new root.
236 SDValue OldRoot = DAG.getRoot();
237 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
238 DAG.setRoot(LegalizedNodes[OldRoot]);
239
240 LegalizedNodes.clear();
241
242 // Remove dead nodes now.
243 DAG.RemoveDeadNodes();
244
245 return Changed;
246}
247
248SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
249 assert(Op->getNumValues() == Result->getNumValues() &&
250 "Unexpected number of results");
251 // Generic legalization: just pass the operand through.
252 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
253 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
254 return SDValue(Result, Op.getResNo());
255}
256
258VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
260 assert(Results.size() == Op->getNumValues() &&
261 "Unexpected number of results");
262 // Make sure that the generated code is itself legal.
263 for (unsigned i = 0, e = Results.size(); i != e; ++i) {
264 Results[i] = LegalizeOp(Results[i]);
265 AddLegalizedOperand(Op.getValue(i), Results[i]);
266 }
267
268 return Results[Op.getResNo()];
269}
270
271SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
272 // Note that LegalizeOp may be reentered even from single-use nodes, which
273 // means that we always must cache transformed nodes.
274 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
275 if (I != LegalizedNodes.end()) return I->second;
276
277 // Legalize the operands
279 for (const SDValue &Oper : Op->op_values())
280 Ops.push_back(LegalizeOp(Oper));
281
282 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
283
284 bool HasVectorValueOrOp =
285 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
286 llvm::any_of(Node->op_values(),
287 [](SDValue O) { return O.getValueType().isVector(); });
288 if (!HasVectorValueOrOp)
289 return TranslateLegalizeResults(Op, Node);
290
291 TargetLowering::LegalizeAction Action = TargetLowering::Legal;
292 EVT ValVT;
293 switch (Op.getOpcode()) {
294 default:
295 return TranslateLegalizeResults(Op, Node);
296 case ISD::LOAD: {
297 LoadSDNode *LD = cast<LoadSDNode>(Node);
298 ISD::LoadExtType ExtType = LD->getExtensionType();
299 EVT LoadedVT = LD->getMemoryVT();
300 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
301 Action = TLI.getLoadAction(LD->getValueType(0), LoadedVT, LD->getAlign(),
302 LD->getAddressSpace(), ExtType, false);
303 break;
304 }
305 case ISD::STORE: {
306 StoreSDNode *ST = cast<StoreSDNode>(Node);
307 EVT StVT = ST->getMemoryVT();
308 MVT ValVT = ST->getValue().getSimpleValueType();
309 if (StVT.isVector() && ST->isTruncatingStore())
310 Action = TLI.getTruncStoreAction(ValVT, StVT, ST->getAlign(),
311 ST->getAddressSpace());
312 break;
313 }
315 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
316 // This operation lies about being legal: when it claims to be legal,
317 // it should actually be expanded.
318 if (Action == TargetLowering::Legal)
319 Action = TargetLowering::Expand;
320 break;
321#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
322 case ISD::STRICT_##DAGN:
323#include "llvm/IR/ConstrainedOps.def"
324 ValVT = Node->getValueType(0);
325 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
326 Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
327 ValVT = Node->getOperand(1).getValueType();
328 if (Op.getOpcode() == ISD::STRICT_FSETCC ||
329 Op.getOpcode() == ISD::STRICT_FSETCCS) {
330 MVT OpVT = Node->getOperand(1).getSimpleValueType();
331 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
332 Action = TLI.getCondCodeAction(CCCode, OpVT);
333 if (Action == TargetLowering::Legal)
334 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
335 } else {
336 Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
337 }
338 // If we're asked to expand a strict vector floating-point operation,
339 // by default we're going to simply unroll it. That is usually the
340 // best approach, except in the case where the resulting strict (scalar)
341 // operations would themselves use the fallback mutation to non-strict.
342 // In that specific case, just do the fallback on the vector op.
343 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
344 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
345 TargetLowering::Legal) {
346 EVT EltVT = ValVT.getVectorElementType();
347 if (TLI.getOperationAction(Node->getOpcode(), EltVT)
348 == TargetLowering::Expand &&
349 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
350 == TargetLowering::Legal)
351 Action = TargetLowering::Legal;
352 }
353 break;
354 case ISD::ADD:
355 case ISD::SUB:
356 case ISD::MUL:
357 case ISD::MULHS:
358 case ISD::MULHU:
359 case ISD::SDIV:
360 case ISD::UDIV:
361 case ISD::SREM:
362 case ISD::UREM:
363 case ISD::SDIVREM:
364 case ISD::UDIVREM:
365 case ISD::FADD:
366 case ISD::FSUB:
367 case ISD::FMUL:
368 case ISD::FDIV:
369 case ISD::FREM:
370 case ISD::AND:
371 case ISD::OR:
372 case ISD::XOR:
373 case ISD::SHL:
374 case ISD::SRA:
375 case ISD::SRL:
376 case ISD::FSHL:
377 case ISD::FSHR:
378 case ISD::ROTL:
379 case ISD::ROTR:
380 case ISD::ABS:
381 case ISD::ABDS:
382 case ISD::ABDU:
383 case ISD::AVGCEILS:
384 case ISD::AVGCEILU:
385 case ISD::AVGFLOORS:
386 case ISD::AVGFLOORU:
387 case ISD::BSWAP:
388 case ISD::BITREVERSE:
389 case ISD::CTLZ:
390 case ISD::CTTZ:
393 case ISD::CTPOP:
394 case ISD::CLMUL:
395 case ISD::CLMULH:
396 case ISD::CLMULR:
397 case ISD::SELECT:
398 case ISD::VSELECT:
399 case ISD::SELECT_CC:
400 case ISD::ZERO_EXTEND:
401 case ISD::ANY_EXTEND:
402 case ISD::TRUNCATE:
403 case ISD::SIGN_EXTEND:
404 case ISD::FP_TO_SINT:
405 case ISD::FP_TO_UINT:
406 case ISD::FNEG:
407 case ISD::FABS:
408 case ISD::FMINNUM:
409 case ISD::FMAXNUM:
412 case ISD::FMINIMUM:
413 case ISD::FMAXIMUM:
414 case ISD::FMINIMUMNUM:
415 case ISD::FMAXIMUMNUM:
416 case ISD::FCOPYSIGN:
417 case ISD::FSQRT:
418 case ISD::FSIN:
419 case ISD::FCOS:
420 case ISD::FTAN:
421 case ISD::FASIN:
422 case ISD::FACOS:
423 case ISD::FATAN:
424 case ISD::FATAN2:
425 case ISD::FSINH:
426 case ISD::FCOSH:
427 case ISD::FTANH:
428 case ISD::FLDEXP:
429 case ISD::FPOWI:
430 case ISD::FPOW:
431 case ISD::FCBRT:
432 case ISD::FLOG:
433 case ISD::FLOG2:
434 case ISD::FLOG10:
435 case ISD::FEXP:
436 case ISD::FEXP2:
437 case ISD::FEXP10:
438 case ISD::FCEIL:
439 case ISD::FTRUNC:
440 case ISD::FRINT:
441 case ISD::FNEARBYINT:
442 case ISD::FROUND:
443 case ISD::FROUNDEVEN:
444 case ISD::FFLOOR:
445 case ISD::FP_ROUND:
446 case ISD::FP_EXTEND:
448 case ISD::FMA:
453 case ISD::SMIN:
454 case ISD::SMAX:
455 case ISD::UMIN:
456 case ISD::UMAX:
457 case ISD::SMUL_LOHI:
458 case ISD::UMUL_LOHI:
459 case ISD::SADDO:
460 case ISD::UADDO:
461 case ISD::SSUBO:
462 case ISD::USUBO:
463 case ISD::SMULO:
464 case ISD::UMULO:
467 case ISD::FFREXP:
468 case ISD::FMODF:
469 case ISD::FSINCOS:
470 case ISD::FSINCOSPI:
471 case ISD::SADDSAT:
472 case ISD::UADDSAT:
473 case ISD::SSUBSAT:
474 case ISD::USUBSAT:
475 case ISD::SSHLSAT:
476 case ISD::USHLSAT:
479 case ISD::MGATHER:
481 case ISD::SCMP:
482 case ISD::UCMP:
485 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
486 break;
487 case ISD::SMULFIX:
488 case ISD::SMULFIXSAT:
489 case ISD::UMULFIX:
490 case ISD::UMULFIXSAT:
491 case ISD::SDIVFIX:
492 case ISD::SDIVFIXSAT:
493 case ISD::UDIVFIX:
494 case ISD::UDIVFIXSAT: {
495 unsigned Scale = Node->getConstantOperandVal(2);
496 Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
497 Node->getValueType(0), Scale);
498 break;
499 }
500 case ISD::LROUND:
501 case ISD::LLROUND:
502 case ISD::LRINT:
503 case ISD::LLRINT:
504 case ISD::SINT_TO_FP:
505 case ISD::UINT_TO_FP:
521 case ISD::CTTZ_ELTS:
524 Action = TLI.getOperationAction(Node->getOpcode(),
525 Node->getOperand(0).getValueType());
526 break;
529 Action = TLI.getOperationAction(Node->getOpcode(),
530 Node->getOperand(1).getValueType());
531 break;
532 case ISD::SETCC: {
533 MVT OpVT = Node->getOperand(0).getSimpleValueType();
534 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
535 Action = TLI.getCondCodeAction(CCCode, OpVT);
536 if (Action == TargetLowering::Legal)
537 Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
538 break;
539 }
544 Action =
545 TLI.getPartialReduceMLAAction(Op.getOpcode(), Node->getValueType(0),
546 Node->getOperand(1).getValueType());
547 break;
548
549#define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \
550 case ISD::VPID: { \
551 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \
552 : Node->getOperand(LEGALPOS).getValueType(); \
553 if (ISD::VPID == ISD::VP_SETCC) { \
554 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
555 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \
556 if (Action != TargetLowering::Legal) \
557 break; \
558 } \
559 /* Defer non-vector results to LegalizeDAG. */ \
560 if (!Node->getValueType(0).isVector() && \
561 Node->getValueType(0) != MVT::Other) { \
562 Action = TargetLowering::Legal; \
563 break; \
564 } \
565 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \
566 } break;
567#include "llvm/IR/VPIntrinsics.def"
568 }
569
570 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
571
572 SmallVector<SDValue, 8> ResultVals;
573 switch (Action) {
574 default: llvm_unreachable("This action is not supported yet!");
575 case TargetLowering::Promote:
576 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
577 "This action is not supported yet!");
578 LLVM_DEBUG(dbgs() << "Promoting\n");
579 Promote(Node, ResultVals);
580 assert(!ResultVals.empty() && "No results for promotion?");
581 break;
582 case TargetLowering::Legal:
583 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
584 break;
585 case TargetLowering::Custom:
586 LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
587 if (LowerOperationWrapper(Node, ResultVals))
588 break;
589 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
590 [[fallthrough]];
591 case TargetLowering::Expand:
592 LLVM_DEBUG(dbgs() << "Expanding\n");
593 Expand(Node, ResultVals);
594 break;
595 }
596
597 if (ResultVals.empty())
598 return TranslateLegalizeResults(Op, Node);
599
600 Changed = true;
601 return RecursivelyLegalizeResults(Op, ResultVals);
602}
603
604// FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
605// merge them somehow?
606bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
607 SmallVectorImpl<SDValue> &Results) {
608 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
609
610 if (!Res.getNode())
611 return false;
612
613 if (Res == SDValue(Node, 0))
614 return true;
615
616 // If the original node has one result, take the return value from
617 // LowerOperation as is. It might not be result number 0.
618 if (Node->getNumValues() == 1) {
619 Results.push_back(Res);
620 return true;
621 }
622
623 // If the original node has multiple results, then the return node should
624 // have the same number of results.
625 assert((Node->getNumValues() == Res->getNumValues()) &&
626 "Lowering returned the wrong number of results!");
627
628 // Places new result values base on N result number.
629 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
630 Results.push_back(Res.getValue(I));
631
632 return true;
633}
634
635void VectorLegalizer::PromoteSETCC(SDNode *Node,
636 SmallVectorImpl<SDValue> &Results) {
637 MVT VecVT = Node->getOperand(0).getSimpleValueType();
638 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
639
640 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
641
642 SDLoc DL(Node);
643 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
644
645 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
646 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
647 Operands[2] = Node->getOperand(2);
648
649 if (Node->getOpcode() == ISD::VP_SETCC) {
650 Operands[3] = Node->getOperand(3); // mask
651 Operands[4] = Node->getOperand(4); // evl
652 }
653
654 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
655 Operands, Node->getFlags());
656
657 Results.push_back(Res);
658}
659
660void VectorLegalizer::PromoteSTRICT(SDNode *Node,
661 SmallVectorImpl<SDValue> &Results) {
662 MVT VecVT = Node->getOperand(1).getSimpleValueType();
663 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
664
665 assert(VecVT.isFloatingPoint());
666
667 SDLoc DL(Node);
668 SmallVector<SDValue, 5> Operands(Node->getNumOperands());
670
671 for (unsigned j = 1; j != Node->getNumOperands(); ++j)
672 if (Node->getOperand(j).getValueType().isVector() &&
673 !(ISD::isVPOpcode(Node->getOpcode()) &&
674 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
675 {
676 // promote the vector operand.
677 SDValue Ext =
678 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
679 {Node->getOperand(0), Node->getOperand(j)});
680 Operands[j] = Ext.getValue(0);
681 Chains.push_back(Ext.getValue(1));
682 } else
683 Operands[j] = Node->getOperand(j); // Skip no vector operand.
684
685 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
686
687 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
688
689 SDValue Res =
690 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
691
692 SDValue Round =
693 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
694 {Res.getValue(1), Res.getValue(0),
695 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
696
697 Results.push_back(Round.getValue(0));
698 Results.push_back(Round.getValue(1));
699}
700
701void VectorLegalizer::PromoteFloatVECREDUCE(SDNode *Node,
702 SmallVectorImpl<SDValue> &Results,
703 bool NonArithmetic) {
704 MVT OpVT = Node->getOperand(0).getSimpleValueType();
705 assert(OpVT.isFloatingPoint() && "Expected floating point reduction!");
706 MVT NewOpVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OpVT);
707
708 SDLoc DL(Node);
709 SDValue NewOp = DAG.getNode(ISD::FP_EXTEND, DL, NewOpVT, Node->getOperand(0));
710 SDValue Rdx =
711 DAG.getNode(Node->getOpcode(), DL, NewOpVT.getVectorElementType(), NewOp,
712 Node->getFlags());
713 SDValue Res =
714 DAG.getNode(ISD::FP_ROUND, DL, Node->getValueType(0), Rdx,
715 DAG.getIntPtrConstant(NonArithmetic, DL, /*isTarget=*/true));
716 Results.push_back(Res);
717}
718
719void VectorLegalizer::PromoteVECTOR_COMPRESS(
720 SDNode *Node, SmallVectorImpl<SDValue> &Results) {
721 SDLoc DL(Node);
722 EVT VT = Node->getValueType(0);
723 MVT PromotedVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT.getSimpleVT());
724 assert((VT.isInteger() || VT.getSizeInBits() == PromotedVT.getSizeInBits()) &&
725 "Only integer promotion or bitcasts between types is supported");
726
727 SDValue Vec = Node->getOperand(0);
728 SDValue Mask = Node->getOperand(1);
729 SDValue Passthru = Node->getOperand(2);
730 if (VT.isInteger()) {
731 Vec = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Vec);
732 Mask = TLI.promoteTargetBoolean(DAG, Mask, PromotedVT);
733 Passthru = DAG.getNode(ISD::ANY_EXTEND, DL, PromotedVT, Passthru);
734 } else {
735 Vec = DAG.getBitcast(PromotedVT, Vec);
736 Passthru = DAG.getBitcast(PromotedVT, Passthru);
737 }
738
740 DAG.getNode(ISD::VECTOR_COMPRESS, DL, PromotedVT, Vec, Mask, Passthru);
741 Result = VT.isInteger() ? DAG.getNode(ISD::TRUNCATE, DL, VT, Result)
742 : DAG.getBitcast(VT, Result);
743 Results.push_back(Result);
744}
745
746void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
747 // For a few operations there is a specific concept for promotion based on
748 // the operand's type.
749 switch (Node->getOpcode()) {
750 case ISD::SINT_TO_FP:
751 case ISD::UINT_TO_FP:
754 // "Promote" the operation by extending the operand.
755 PromoteINT_TO_FP(Node, Results);
756 return;
757 case ISD::FP_TO_UINT:
758 case ISD::FP_TO_SINT:
761 // Promote the operation by extending the operand.
762 PromoteFP_TO_INT(Node, Results);
763 return;
764 case ISD::VP_SETCC:
765 case ISD::SETCC:
766 // Promote the operation by extending the operand.
767 PromoteSETCC(Node, Results);
768 return;
769 case ISD::STRICT_FADD:
770 case ISD::STRICT_FSUB:
771 case ISD::STRICT_FMUL:
772 case ISD::STRICT_FDIV:
774 case ISD::STRICT_FMA:
775 PromoteSTRICT(Node, Results);
776 return;
778 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/false);
779 return;
784 PromoteFloatVECREDUCE(Node, Results, /*NonArithmetic=*/true);
785 return;
787 PromoteVECTOR_COMPRESS(Node, Results);
788 return;
789
790 case ISD::FP_ROUND:
791 case ISD::FP_EXTEND:
792 // These operations are used to do promotion so they can't be promoted
793 // themselves.
794 llvm_unreachable("Don't know how to promote this operation!");
795 case ISD::VP_FABS:
796 case ISD::VP_FCOPYSIGN:
797 case ISD::VP_FNEG:
798 // Promoting fabs, fneg, and fcopysign changes their semantics.
799 llvm_unreachable("These operations should not be promoted");
800 }
801
802 // There are currently two cases of vector promotion:
803 // 1) Bitcasting a vector of integers to a different type to a vector of the
804 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
805 // 2) Extending a vector of floats to a vector of the same number of larger
806 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
807 assert(Node->getNumValues() == 1 &&
808 "Can't promote a vector with multiple results!");
809 MVT VT = Node->getSimpleValueType(0);
810 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
811 SDLoc dl(Node);
812 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
813
814 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
815 // Do not promote the mask operand of a VP OP.
816 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
817 ISD::getVPMaskIdx(Node->getOpcode()) == j;
818 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
819 if (Node->getOperand(j)
820 .getValueType()
821 .getVectorElementType()
822 .isFloatingPoint() &&
824 if (ISD::isVPOpcode(Node->getOpcode())) {
825 unsigned EVLIdx =
827 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
828 Operands[j] =
829 DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j),
830 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
831 } else {
832 Operands[j] =
833 DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
834 }
835 else
836 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
837 else
838 Operands[j] = Node->getOperand(j);
839 }
840
841 SDValue Res =
842 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
843
844 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
847 if (ISD::isVPOpcode(Node->getOpcode())) {
848 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode());
849 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode());
850 Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res,
851 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx));
852 } else {
853 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
854 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
855 }
856 else
857 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
858
859 Results.push_back(Res);
860}
861
862void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
863 SmallVectorImpl<SDValue> &Results) {
864 // INT_TO_FP operations may require the input operand be promoted even
865 // when the type is otherwise legal.
866 bool IsStrict = Node->isStrictFPOpcode();
867 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
868 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
870 "Vectors have different number of elements!");
871
872 SDLoc dl(Node);
873 SmallVector<SDValue, 4> Operands(Node->getNumOperands());
874
875 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
876 Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
879 for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
880 if (Node->getOperand(j).getValueType().isVector())
881 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
882 else
883 Operands[j] = Node->getOperand(j);
884 }
885
886 if (IsStrict) {
887 SDValue Res = DAG.getNode(Node->getOpcode(), dl,
888 {Node->getValueType(0), MVT::Other}, Operands);
889 Results.push_back(Res);
890 Results.push_back(Res.getValue(1));
891 return;
892 }
893
894 SDValue Res =
895 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
896 Results.push_back(Res);
897}
898
899// For FP_TO_INT we promote the result type to a vector type with wider
900// elements and then truncate the result. This is different from the default
901// PromoteVector which uses bitcast to promote thus assumning that the
902// promoted vector type has the same overall size.
903void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
904 SmallVectorImpl<SDValue> &Results) {
905 MVT VT = Node->getSimpleValueType(0);
906 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
907 bool IsStrict = Node->isStrictFPOpcode();
909 "Vectors have different number of elements!");
910
911 unsigned NewOpc = Node->getOpcode();
912 // Change FP_TO_UINT to FP_TO_SINT if possible.
913 // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
914 if (NewOpc == ISD::FP_TO_UINT &&
916 NewOpc = ISD::FP_TO_SINT;
917
918 if (NewOpc == ISD::STRICT_FP_TO_UINT &&
920 NewOpc = ISD::STRICT_FP_TO_SINT;
921
922 SDLoc dl(Node);
923 SDValue Promoted, Chain;
924 if (IsStrict) {
925 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
926 {Node->getOperand(0), Node->getOperand(1)});
927 Chain = Promoted.getValue(1);
928 } else
929 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
930
931 // Assert that the converted value fits in the original type. If it doesn't
932 // (eg: because the value being converted is too big), then the result of the
933 // original operation was undefined anyway, so the assert is still correct.
934 if (Node->getOpcode() == ISD::FP_TO_UINT ||
935 Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
936 NewOpc = ISD::AssertZext;
937 else
938 NewOpc = ISD::AssertSext;
939
940 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
941 DAG.getValueType(VT.getScalarType()));
942 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
943 Results.push_back(Promoted);
944 if (IsStrict)
945 Results.push_back(Chain);
946}
947
948std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
949 LoadSDNode *LD = cast<LoadSDNode>(N);
950 return TLI.scalarizeVectorLoad(LD, DAG);
951}
952
953SDValue VectorLegalizer::ExpandStore(SDNode *N) {
954 StoreSDNode *ST = cast<StoreSDNode>(N);
955 SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
956 return TF;
957}
958
959void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
960 switch (Node->getOpcode()) {
961 case ISD::LOAD: {
962 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
963 Results.push_back(Tmp.first);
964 Results.push_back(Tmp.second);
965 return;
966 }
967 case ISD::STORE:
968 Results.push_back(ExpandStore(Node));
969 return;
971 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
972 Results.push_back(Node->getOperand(i));
973 return;
975 if (SDValue Expanded = ExpandSEXTINREG(Node)) {
976 Results.push_back(Expanded);
977 return;
978 }
979 break;
981 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
982 return;
984 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
985 return;
987 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
988 return;
989 case ISD::BSWAP:
990 if (SDValue Expanded = ExpandBSWAP(Node)) {
991 Results.push_back(Expanded);
992 return;
993 }
994 break;
995 case ISD::VP_BSWAP:
996 Results.push_back(TLI.expandVPBSWAP(Node, DAG));
997 return;
998 case ISD::VSELECT:
999 if (SDValue Expanded = ExpandVSELECT(Node)) {
1000 Results.push_back(Expanded);
1001 return;
1002 }
1003 break;
1004 case ISD::VP_SELECT:
1005 if (SDValue Expanded = ExpandVP_SELECT(Node)) {
1006 Results.push_back(Expanded);
1007 return;
1008 }
1009 break;
1010 case ISD::VP_SREM:
1011 case ISD::VP_UREM:
1012 if (SDValue Expanded = ExpandVP_REM(Node)) {
1013 Results.push_back(Expanded);
1014 return;
1015 }
1016 break;
1017 case ISD::VP_FNEG:
1018 if (SDValue Expanded = ExpandVP_FNEG(Node)) {
1019 Results.push_back(Expanded);
1020 return;
1021 }
1022 break;
1023 case ISD::VP_FABS:
1024 if (SDValue Expanded = ExpandVP_FABS(Node)) {
1025 Results.push_back(Expanded);
1026 return;
1027 }
1028 break;
1029 case ISD::VP_FCOPYSIGN:
1030 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
1031 Results.push_back(Expanded);
1032 return;
1033 }
1034 break;
1035 case ISD::SELECT:
1036 if (SDValue Expanded = ExpandSELECT(Node)) {
1037 Results.push_back(Expanded);
1038 return;
1039 }
1040 break;
1041 case ISD::SELECT_CC: {
1042 if (Node->getValueType(0).isScalableVector()) {
1043 EVT CondVT = TLI.getSetCCResultType(
1044 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
1045 SDValue SetCC =
1046 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
1047 Node->getOperand(1), Node->getOperand(4));
1048 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
1049 Node->getOperand(2),
1050 Node->getOperand(3)));
1051 return;
1052 }
1053 break;
1054 }
1055 case ISD::FP_TO_UINT:
1056 ExpandFP_TO_UINT(Node, Results);
1057 return;
1058 case ISD::UINT_TO_FP:
1059 ExpandUINT_TO_FLOAT(Node, Results);
1060 return;
1061 case ISD::FNEG:
1062 if (SDValue Expanded = ExpandFNEG(Node)) {
1063 Results.push_back(Expanded);
1064 return;
1065 }
1066 break;
1067 case ISD::FABS:
1068 if (SDValue Expanded = ExpandFABS(Node)) {
1069 Results.push_back(Expanded);
1070 return;
1071 }
1072 break;
1073 case ISD::FCOPYSIGN:
1074 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
1075 Results.push_back(Expanded);
1076 return;
1077 }
1078 break;
1079 case ISD::FCANONICALIZE: {
1080 // If the scalar element type has a
1081 // Legal/Custom FCANONICALIZE, don't
1082 // mess with the vector, fall back.
1083 EVT VT = Node->getValueType(0);
1084 EVT EltVT = VT.getVectorElementType();
1086 TargetLowering::Expand)
1087 break;
1088 // Otherwise canonicalize the whole vector.
1089 SDValue Mul = TLI.expandFCANONICALIZE(Node, DAG);
1090 Results.push_back(Mul);
1091 return;
1092 }
1093 case ISD::FSUB:
1094 ExpandFSUB(Node, Results);
1095 return;
1096 case ISD::SETCC:
1097 case ISD::VP_SETCC:
1098 ExpandSETCC(Node, Results);
1099 return;
1100 case ISD::ABS:
1101 if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
1102 Results.push_back(Expanded);
1103 return;
1104 }
1105 break;
1106 case ISD::ABDS:
1107 case ISD::ABDU:
1108 if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
1109 Results.push_back(Expanded);
1110 return;
1111 }
1112 break;
1113 case ISD::AVGCEILS:
1114 case ISD::AVGCEILU:
1115 case ISD::AVGFLOORS:
1116 case ISD::AVGFLOORU:
1117 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
1118 Results.push_back(Expanded);
1119 return;
1120 }
1121 break;
1122 case ISD::BITREVERSE:
1123 if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1124 Results.push_back(Expanded);
1125 return;
1126 }
1127 break;
1128 case ISD::VP_BITREVERSE:
1129 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
1130 Results.push_back(Expanded);
1131 return;
1132 }
1133 break;
1134 case ISD::CTPOP:
1135 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
1136 Results.push_back(Expanded);
1137 return;
1138 }
1139 break;
1140 case ISD::VP_CTPOP:
1141 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
1142 Results.push_back(Expanded);
1143 return;
1144 }
1145 break;
1146 case ISD::CTLZ:
1148 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
1149 Results.push_back(Expanded);
1150 return;
1151 }
1152 break;
1153 case ISD::VP_CTLZ:
1154 case ISD::VP_CTLZ_ZERO_UNDEF:
1155 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
1156 Results.push_back(Expanded);
1157 return;
1158 }
1159 break;
1160 case ISD::CTTZ:
1162 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
1163 Results.push_back(Expanded);
1164 return;
1165 }
1166 break;
1167 case ISD::VP_CTTZ:
1168 case ISD::VP_CTTZ_ZERO_UNDEF:
1169 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
1170 Results.push_back(Expanded);
1171 return;
1172 }
1173 break;
1174 case ISD::FSHL:
1175 case ISD::VP_FSHL:
1176 case ISD::FSHR:
1177 case ISD::VP_FSHR:
1178 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
1179 Results.push_back(Expanded);
1180 return;
1181 }
1182 break;
1183 case ISD::CLMUL:
1184 case ISD::CLMULR:
1185 case ISD::CLMULH:
1186 if (SDValue Expanded = TLI.expandCLMUL(Node, DAG)) {
1187 Results.push_back(Expanded);
1188 return;
1189 }
1190 break;
1191 case ISD::ROTL:
1192 case ISD::ROTR:
1193 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
1194 Results.push_back(Expanded);
1195 return;
1196 }
1197 break;
1198 case ISD::FMINNUM:
1199 case ISD::FMAXNUM:
1200 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
1201 Results.push_back(Expanded);
1202 return;
1203 }
1204 break;
1205 case ISD::FMINIMUM:
1206 case ISD::FMAXIMUM:
1207 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
1208 return;
1209 case ISD::FMINIMUMNUM:
1210 case ISD::FMAXIMUMNUM:
1211 Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
1212 return;
1213 case ISD::SMIN:
1214 case ISD::SMAX:
1215 case ISD::UMIN:
1216 case ISD::UMAX:
1217 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1218 Results.push_back(Expanded);
1219 return;
1220 }
1221 break;
1222 case ISD::UADDO:
1223 case ISD::USUBO:
1224 ExpandUADDSUBO(Node, Results);
1225 return;
1226 case ISD::SADDO:
1227 case ISD::SSUBO:
1228 ExpandSADDSUBO(Node, Results);
1229 return;
1230 case ISD::UMULO:
1231 case ISD::SMULO:
1232 ExpandMULO(Node, Results);
1233 return;
1234 case ISD::USUBSAT:
1235 case ISD::SSUBSAT:
1236 case ISD::UADDSAT:
1237 case ISD::SADDSAT:
1238 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1239 Results.push_back(Expanded);
1240 return;
1241 }
1242 break;
1243 case ISD::USHLSAT:
1244 case ISD::SSHLSAT:
1245 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1246 Results.push_back(Expanded);
1247 return;
1248 }
1249 break;
1252 // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1253 if (Node->getValueType(0).isScalableVector()) {
1254 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
1255 Results.push_back(Expanded);
1256 return;
1257 }
1258 }
1259 break;
1260 case ISD::SMULFIX:
1261 case ISD::UMULFIX:
1262 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1263 Results.push_back(Expanded);
1264 return;
1265 }
1266 break;
1267 case ISD::SMULFIXSAT:
1268 case ISD::UMULFIXSAT:
1269 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1270 // why. Maybe it results in worse codegen compared to the unroll for some
1271 // targets? This should probably be investigated. And if we still prefer to
1272 // unroll an explanation could be helpful.
1273 break;
1274 case ISD::SDIVFIX:
1275 case ISD::UDIVFIX:
1276 ExpandFixedPointDiv(Node, Results);
1277 return;
1278 case ISD::SDIVFIXSAT:
1279 case ISD::UDIVFIXSAT:
1280 break;
1281#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1282 case ISD::STRICT_##DAGN:
1283#include "llvm/IR/ConstrainedOps.def"
1284 ExpandStrictFPOp(Node, Results);
1285 return;
1286 case ISD::VECREDUCE_ADD:
1287 case ISD::VECREDUCE_MUL:
1288 case ISD::VECREDUCE_AND:
1289 case ISD::VECREDUCE_OR:
1290 case ISD::VECREDUCE_XOR:
1301 Results.push_back(TLI.expandVecReduce(Node, DAG));
1302 return;
1307 Results.push_back(TLI.expandPartialReduceMLA(Node, DAG));
1308 return;
1311 Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
1312 return;
1313 case ISD::SREM:
1314 case ISD::UREM:
1315 ExpandREM(Node, Results);
1316 return;
1317 case ISD::VP_MERGE:
1318 if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1319 Results.push_back(Expanded);
1320 return;
1321 }
1322 break;
1323 case ISD::FREM: {
1324 RTLIB::Libcall LC = RTLIB::getREM(Node->getValueType(0));
1325 if (tryExpandVecMathCall(Node, LC, Results))
1326 return;
1327
1328 break;
1329 }
1330 case ISD::FSINCOS:
1331 case ISD::FSINCOSPI: {
1332 EVT VT = Node->getValueType(0);
1333 RTLIB::Libcall LC = Node->getOpcode() == ISD::FSINCOS
1334 ? RTLIB::getSINCOS(VT)
1335 : RTLIB::getSINCOSPI(VT);
1336 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1337 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results))
1338 return;
1339
1340 // TODO: Try to see if there's a narrower call available to use before
1341 // scalarizing.
1342 break;
1343 }
1344 case ISD::FPOW: {
1345 RTLIB::Libcall LC = RTLIB::getPOW(Node->getValueType(0));
1346 if (tryExpandVecMathCall(Node, LC, Results))
1347 return;
1348
1349 // TODO: Try to see if there's a narrower call available to use before
1350 // scalarizing.
1351 break;
1352 }
1353 case ISD::FCBRT: {
1354 RTLIB::Libcall LC = RTLIB::getCBRT(Node->getValueType(0));
1355 if (tryExpandVecMathCall(Node, LC, Results))
1356 return;
1357
1358 // TODO: Try to see if there's a narrower call available to use before
1359 // scalarizing.
1360 break;
1361 }
1362 case ISD::FMODF: {
1363 EVT VT = Node->getValueType(0);
1364 RTLIB::Libcall LC = RTLIB::getMODF(VT);
1365 if (LC != RTLIB::UNKNOWN_LIBCALL &&
1366 TLI.expandMultipleResultFPLibCall(DAG, LC, Node, Results,
1367 /*CallRetResNo=*/0))
1368 return;
1369 break;
1370 }
1372 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
1373 return;
1374 case ISD::CTTZ_ELTS:
1376 Results.push_back(TLI.expandCttzElts(Node, DAG));
1377 return;
1379 Results.push_back(TLI.expandVectorFindLastActive(Node, DAG));
1380 return;
1381 case ISD::SCMP:
1382 case ISD::UCMP:
1383 Results.push_back(TLI.expandCMP(Node, DAG));
1384 return;
1387 Results.push_back(ExpandLOOP_DEPENDENCE_MASK(Node));
1388 return;
1389
1390 case ISD::FADD:
1391 case ISD::FMUL:
1392 case ISD::FMA:
1393 case ISD::FDIV:
1394 case ISD::FCEIL:
1395 case ISD::FFLOOR:
1396 case ISD::FNEARBYINT:
1397 case ISD::FRINT:
1398 case ISD::FROUND:
1399 case ISD::FROUNDEVEN:
1400 case ISD::FTRUNC:
1401 case ISD::FSQRT:
1402 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1403 Results.push_back(Expanded);
1404 return;
1405 }
1406 break;
1407 }
1408
1409 SDValue Unrolled = DAG.UnrollVectorOp(Node);
1410 if (Node->getNumValues() == 1) {
1411 Results.push_back(Unrolled);
1412 } else {
1413 assert(Node->getNumValues() == Unrolled->getNumValues() &&
1414 "VectorLegalizer Expand returned wrong number of results!");
1415 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1416 Results.push_back(Unrolled.getValue(I));
1417 }
1418}
1419
1420SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1421 // Lower a select instruction where the condition is a scalar and the
1422 // operands are vectors. Lower this select to VSELECT and implement it
1423 // using XOR AND OR. The selector bit is broadcasted.
1424 EVT VT = Node->getValueType(0);
1425 SDLoc DL(Node);
1426
1427 SDValue Mask = Node->getOperand(0);
1428 SDValue Op1 = Node->getOperand(1);
1429 SDValue Op2 = Node->getOperand(2);
1430
1431 assert(VT.isVector() && !Mask.getValueType().isVector()
1432 && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1433
1434 // If we can't even use the basic vector operations of
1435 // AND,OR,XOR, we will have to scalarize the op.
1436 // Notice that the operation may be 'promoted' which means that it is
1437 // 'bitcasted' to another type which is handled.
1438 // Also, we need to be able to construct a splat vector using either
1439 // BUILD_VECTOR or SPLAT_VECTOR.
1440 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1441 // BUILD_VECTOR?
1442 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1443 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1444 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1447 VT) == TargetLowering::Expand)
1448 return SDValue();
1449
1450 // Generate a mask operand.
1451 EVT MaskTy = VT.changeVectorElementTypeToInteger();
1452
1453 // What is the size of each element in the vector mask.
1454 EVT BitTy = MaskTy.getScalarType();
1455
1456 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1457 DAG.getConstant(0, DL, BitTy));
1458
1459 // Broadcast the mask so that the entire vector is all one or all zero.
1460 Mask = DAG.getSplat(MaskTy, DL, Mask);
1461
1462 // Bitcast the operands to be the same type as the mask.
1463 // This is needed when we select between FP types because
1464 // the mask is a vector of integers.
1465 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1466 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1467
1468 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1469
1470 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1471 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1472 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1473 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1474}
1475
1476SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1477 EVT VT = Node->getValueType(0);
1478
1479 // Make sure that the SRA and SHL instructions are available.
1480 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1481 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1482 return SDValue();
1483
1484 SDLoc DL(Node);
1485 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1486
1487 unsigned BW = VT.getScalarSizeInBits();
1488 unsigned OrigBW = OrigTy.getScalarSizeInBits();
1489 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1490
1491 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1492 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1493}
1494
1495// Generically expand a vector anyext in register to a shuffle of the relevant
1496// lanes into the appropriate locations, with other lanes left undef.
1497SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1498 SDLoc DL(Node);
1499 EVT VT = Node->getValueType(0);
1500 int NumElements = VT.getVectorNumElements();
1501 SDValue Src = Node->getOperand(0);
1502 EVT SrcVT = Src.getValueType();
1503 int NumSrcElements = SrcVT.getVectorNumElements();
1504
1505 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1506 // into a larger vector type.
1507 if (SrcVT.bitsLE(VT)) {
1508 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1509 "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1510 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1511 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1512 NumSrcElements);
1513 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1514 }
1515
1516 // Build a base mask of undef shuffles.
1517 SmallVector<int, 16> ShuffleMask;
1518 ShuffleMask.resize(NumSrcElements, -1);
1519
1520 // Place the extended lanes into the correct locations.
1521 int ExtLaneScale = NumSrcElements / NumElements;
1522 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1523 for (int i = 0; i < NumElements; ++i)
1524 ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1525
1526 return DAG.getNode(
1527 ISD::BITCAST, DL, VT,
1528 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getPOISON(SrcVT), ShuffleMask));
1529}
1530
1531SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1532 SDLoc DL(Node);
1533 EVT VT = Node->getValueType(0);
1534 SDValue Src = Node->getOperand(0);
1535 EVT SrcVT = Src.getValueType();
1536
1537 // First build an any-extend node which can be legalized above when we
1538 // recurse through it.
1540
1541 // Now we need sign extend. Do this by shifting the elements. Even if these
1542 // aren't legal operations, they have a better chance of being legalized
1543 // without full scalarization than the sign extension does.
1544 unsigned EltWidth = VT.getScalarSizeInBits();
1545 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1546 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1547 return DAG.getNode(ISD::SRA, DL, VT,
1548 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1549 ShiftAmount);
1550}
1551
1552// Generically expand a vector zext in register to a shuffle of the relevant
1553// lanes into the appropriate locations, a blend of zero into the high bits,
1554// and a bitcast to the wider element type.
1555SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1556 SDLoc DL(Node);
1557 EVT VT = Node->getValueType(0);
1558 int NumElements = VT.getVectorNumElements();
1559 SDValue Src = Node->getOperand(0);
1560 EVT SrcVT = Src.getValueType();
1561 int NumSrcElements = SrcVT.getVectorNumElements();
1562
1563 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1564 // into a larger vector type.
1565 if (SrcVT.bitsLE(VT)) {
1566 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1567 "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1568 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1569 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1570 NumSrcElements);
1571 Src = DAG.getInsertSubvector(DL, DAG.getUNDEF(SrcVT), Src, 0);
1572 }
1573
1574 // Build up a zero vector to blend into this one.
1575 SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1576
1577 // Shuffle the incoming lanes into the correct position, and pull all other
1578 // lanes from the zero vector.
1579 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1580
1581 int ExtLaneScale = NumSrcElements / NumElements;
1582 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1583 for (int i = 0; i < NumElements; ++i)
1584 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1585
1586 return DAG.getNode(ISD::BITCAST, DL, VT,
1587 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1588}
1589
1590static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1591 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1592 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1593 for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1594 ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1595}
1596
1597SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1598 EVT VT = Node->getValueType(0);
1599
1600 // Scalable vectors can't use shuffle expansion.
1601 if (VT.isScalableVector())
1602 return TLI.expandBSWAP(Node, DAG);
1603
1604 // Generate a byte wise shuffle mask for the BSWAP.
1605 SmallVector<int, 16> ShuffleMask;
1606 createBSWAPShuffleMask(VT, ShuffleMask);
1607 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1608
1609 // Only emit a shuffle if the mask is legal.
1610 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1611 SDLoc DL(Node);
1612 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1613 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getPOISON(ByteVT),
1614 ShuffleMask);
1615 return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1616 }
1617
1618 // If we have the appropriate vector bit operations, it is better to use them
1619 // than unrolling and expanding each component.
1620 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1624 return TLI.expandBSWAP(Node, DAG);
1625
1626 // Otherwise let the caller unroll.
1627 return SDValue();
1628}
1629
1630SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1631 EVT VT = Node->getValueType(0);
1632
1633 // We can't unroll or use shuffles for scalable vectors.
1634 if (VT.isScalableVector())
1635 return TLI.expandBITREVERSE(Node, DAG);
1636
1637 // If we have the scalar operation, it's probably cheaper to unroll it.
1639 return SDValue();
1640
1641 // If the vector element width is a whole number of bytes, test if its legal
1642 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1643 // vector. This greatly reduces the number of bit shifts necessary.
1644 unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1645 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1646 SmallVector<int, 16> BSWAPMask;
1647 createBSWAPShuffleMask(VT, BSWAPMask);
1648
1649 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1650 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1652 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1653 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1656 SDLoc DL(Node);
1657 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1658 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getPOISON(ByteVT),
1659 BSWAPMask);
1660 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1661 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1662 return Op;
1663 }
1664 }
1665
1666 // If we have the appropriate vector bit operations, it is better to use them
1667 // than unrolling and expanding each component.
1668 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1672 return TLI.expandBITREVERSE(Node, DAG);
1673
1674 // Otherwise unroll.
1675 return SDValue();
1676}
1677
1678SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1679 // Implement VSELECT in terms of XOR, AND, OR
1680 // on platforms which do not support blend natively.
1681 SDLoc DL(Node);
1682
1683 SDValue Mask = Node->getOperand(0);
1684 SDValue Op1 = Node->getOperand(1);
1685 SDValue Op2 = Node->getOperand(2);
1686
1687 EVT VT = Mask.getValueType();
1688
1689 // If we can't even use the basic vector operations of
1690 // AND,OR,XOR, we will have to scalarize the op.
1691 // Notice that the operation may be 'promoted' which means that it is
1692 // 'bitcasted' to another type which is handled.
1693 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1694 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1695 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1696 return SDValue();
1697
1698 // This operation also isn't safe with AND, OR, XOR when the boolean type is
1699 // 0/1 and the select operands aren't also booleans, as we need an all-ones
1700 // vector constant to mask with.
1701 // FIXME: Sign extend 1 to all ones if that's legal on the target.
1702 auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1703 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1704 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1705 Op1.getValueType().getVectorElementType() == MVT::i1))
1706 return SDValue();
1707
1708 // If the mask and the type are different sizes, unroll the vector op. This
1709 // can occur when getSetCCResultType returns something that is different in
1710 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1711 if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1712 return SDValue();
1713
1714 // Bitcast the operands to be the same type as the mask.
1715 // This is needed when we select between FP types because
1716 // the mask is a vector of integers.
1717 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1718 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1719
1720 SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1721
1722 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1723 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1724 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1725 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1726}
1727
1728SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1729 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1730 // do not support it natively.
1731 SDLoc DL(Node);
1732
1733 SDValue Mask = Node->getOperand(0);
1734 SDValue Op1 = Node->getOperand(1);
1735 SDValue Op2 = Node->getOperand(2);
1736 SDValue EVL = Node->getOperand(3);
1737
1738 EVT VT = Mask.getValueType();
1739
1740 // If we can't even use the basic vector operations of
1741 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1742 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1743 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1744 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1745 return SDValue();
1746
1747 // This operation also isn't safe when the operands aren't also booleans.
1748 if (Op1.getValueType().getVectorElementType() != MVT::i1)
1749 return SDValue();
1750
1751 SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1752 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1753
1754 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1755 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1756 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1757}
1758
1759SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1760 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1761 // indices less than the EVL/pivot are true. Combine that with the original
1762 // mask for a full-length mask. Use a full-length VSELECT to select between
1763 // the true and false values.
1764 SDLoc DL(Node);
1765
1766 SDValue Mask = Node->getOperand(0);
1767 SDValue Op1 = Node->getOperand(1);
1768 SDValue Op2 = Node->getOperand(2);
1769 SDValue EVL = Node->getOperand(3);
1770
1771 EVT MaskVT = Mask.getValueType();
1772 bool IsFixedLen = MaskVT.isFixedLengthVector();
1773
1774 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1775 MaskVT.getVectorElementCount());
1776
1777 // If we can't construct the EVL mask efficiently, it's better to unroll.
1778 if ((IsFixedLen &&
1780 (!IsFixedLen &&
1781 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1783 return SDValue();
1784
1785 // If using a SETCC would result in a different type than the mask type,
1786 // unroll.
1787 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1788 EVLVecVT) != MaskVT)
1789 return SDValue();
1790
1791 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1792 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1793 SDValue EVLMask =
1794 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1795
1796 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1797 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1798}
1799
1800SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1801 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1802 EVT VT = Node->getValueType(0);
1803
1804 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1805
1806 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1807 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1808 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1809 return SDValue();
1810
1811 SDLoc DL(Node);
1812
1813 SDValue Dividend = Node->getOperand(0);
1814 SDValue Divisor = Node->getOperand(1);
1815 SDValue Mask = Node->getOperand(2);
1816 SDValue EVL = Node->getOperand(3);
1817
1818 // X % Y -> X-X/Y*Y
1819 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1820 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1821 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1822}
1823
1824SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1825 EVT VT = Node->getValueType(0);
1826 EVT IntVT = VT.changeVectorElementTypeToInteger();
1827
1828 if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1829 return SDValue();
1830
1831 SDValue Mask = Node->getOperand(1);
1832 SDValue EVL = Node->getOperand(2);
1833
1834 SDLoc DL(Node);
1835 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1836 SDValue SignMask = DAG.getConstant(
1837 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1838 SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
1839 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1840}
1841
1842SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1843 EVT VT = Node->getValueType(0);
1844 EVT IntVT = VT.changeVectorElementTypeToInteger();
1845
1846 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
1847 return SDValue();
1848
1849 SDValue Mask = Node->getOperand(1);
1850 SDValue EVL = Node->getOperand(2);
1851
1852 SDLoc DL(Node);
1853 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1854 SDValue ClearSignMask = DAG.getConstant(
1856 SDValue ClearSign =
1857 DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
1858 return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
1859}
1860
1861SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1862 EVT VT = Node->getValueType(0);
1863
1864 if (VT != Node->getOperand(1).getValueType())
1865 return SDValue();
1866
1867 EVT IntVT = VT.changeVectorElementTypeToInteger();
1868 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
1869 !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1870 return SDValue();
1871
1872 SDValue Mask = Node->getOperand(2);
1873 SDValue EVL = Node->getOperand(3);
1874
1875 SDLoc DL(Node);
1876 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1877 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1878
1879 SDValue SignMask = DAG.getConstant(
1880 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1881 SDValue SignBit =
1882 DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
1883
1884 SDValue ClearSignMask = DAG.getConstant(
1886 SDValue ClearedSign =
1887 DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
1888
1889 SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
1890 Mask, EVL, SDNodeFlags::Disjoint);
1891
1892 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1893}
1894
1895SDValue VectorLegalizer::ExpandLOOP_DEPENDENCE_MASK(SDNode *N) {
1896 SDLoc DL(N);
1897 EVT VT = N->getValueType(0);
1898 SDValue SourceValue = N->getOperand(0);
1899 SDValue SinkValue = N->getOperand(1);
1900 SDValue EltSizeInBytes = N->getOperand(2);
1901
1902 // Note: The lane offset is scalable if the mask is scalable.
1903 ElementCount LaneOffsetEC =
1904 ElementCount::get(N->getConstantOperandVal(3), VT.isScalableVT());
1905
1906 EVT PtrVT = SourceValue->getValueType(0);
1907 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
1908
1909 // Take the difference between the pointers and divided by the element size,
1910 // to see how many lanes separate them.
1911 SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
1912 if (IsReadAfterWrite)
1913 Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff);
1914 Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSizeInBytes);
1915
1916 // The pointers do not alias if:
1917 // * Diff <= 0 (WAR_MASK)
1918 // * Diff == 0 (RAW_MASK)
1919 EVT CmpVT =
1920 TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), PtrVT);
1921 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
1922 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
1923 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
1924
1925 // The pointers do not alias if:
1926 // Lane + LaneOffset < Diff (WAR/RAW_MASK)
1927 SDValue LaneOffset = DAG.getElementCount(DL, PtrVT, LaneOffsetEC);
1928 SDValue MaskN =
1929 DAG.getSelect(DL, PtrVT, Cmp, DAG.getConstant(-1, DL, PtrVT), Diff);
1930
1931 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, VT, LaneOffset, MaskN);
1932}
1933
1934void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1935 SmallVectorImpl<SDValue> &Results) {
1936 // Attempt to expand using TargetLowering.
1937 SDValue Result, Chain;
1938 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1939 Results.push_back(Result);
1940 if (Node->isStrictFPOpcode())
1941 Results.push_back(Chain);
1942 return;
1943 }
1944
1945 // Otherwise go ahead and unroll.
1946 if (Node->isStrictFPOpcode()) {
1947 UnrollStrictFPOp(Node, Results);
1948 return;
1949 }
1950
1951 Results.push_back(DAG.UnrollVectorOp(Node));
1952}
1953
1954void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1955 SmallVectorImpl<SDValue> &Results) {
1956 bool IsStrict = Node->isStrictFPOpcode();
1957 unsigned OpNo = IsStrict ? 1 : 0;
1958 SDValue Src = Node->getOperand(OpNo);
1959 EVT SrcVT = Src.getValueType();
1960 EVT DstVT = Node->getValueType(0);
1961 SDLoc DL(Node);
1962
1963 // Attempt to expand using TargetLowering.
1965 SDValue Chain;
1966 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1967 Results.push_back(Result);
1968 if (IsStrict)
1969 Results.push_back(Chain);
1970 return;
1971 }
1972
1973 // Make sure that the SINT_TO_FP and SRL instructions are available.
1974 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
1975 TargetLowering::Expand) ||
1976 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
1977 TargetLowering::Expand)) ||
1978 TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
1979 if (IsStrict) {
1980 UnrollStrictFPOp(Node, Results);
1981 return;
1982 }
1983
1984 Results.push_back(DAG.UnrollVectorOp(Node));
1985 return;
1986 }
1987
1988 unsigned BW = SrcVT.getScalarSizeInBits();
1989 assert((BW == 64 || BW == 32) &&
1990 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1991
1992 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the
1993 // UINT_TO_FP with a larger float and round to the smaller type
1994 if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) ||
1995 (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) {
1996 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64;
1997 SDValue UIToFP;
1999 SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true);
2000 EVT FloatVecVT = SrcVT.changeVectorElementType(*DAG.getContext(), FPVT);
2001 if (IsStrict) {
2002 UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other},
2003 {Node->getOperand(0), Src});
2004 Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other},
2005 {Node->getOperand(0), UIToFP, TargetZero});
2006 Results.push_back(Result);
2007 Results.push_back(Result.getValue(1));
2008 } else {
2009 UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src);
2010 Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero);
2011 Results.push_back(Result);
2012 }
2013
2014 return;
2015 }
2016
2017 SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
2018
2019 // Constants to clear the upper part of the word.
2020 // Notice that we can also use SHL+SHR, but using a constant is slightly
2021 // faster on x86.
2022 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
2023 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
2024
2025 // Two to the power of half-word-size.
2026 SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
2027
2028 // Clear upper part of LO, lower HI
2029 SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
2030 SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
2031
2032 if (IsStrict) {
2033 // Convert hi and lo to floats
2034 // Convert the hi part back to the upper values
2035 // TODO: Can any fast-math-flags be set on these nodes?
2036 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
2037 {Node->getOperand(0), HI});
2038 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
2039 {fHI.getValue(1), fHI, TWOHW});
2040 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
2041 {Node->getOperand(0), LO});
2042
2043 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
2044 fLO.getValue(1));
2045
2046 // Add the two halves
2047 SDValue Result =
2048 DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
2049
2050 Results.push_back(Result);
2051 Results.push_back(Result.getValue(1));
2052 return;
2053 }
2054
2055 // Convert hi and lo to floats
2056 // Convert the hi part back to the upper values
2057 // TODO: Can any fast-math-flags be set on these nodes?
2058 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
2059 fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
2060 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
2061
2062 // Add the two halves
2063 Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
2064}
2065
2066SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
2067 EVT VT = Node->getValueType(0);
2068 EVT IntVT = VT.changeVectorElementTypeToInteger();
2069
2070 if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
2071 return SDValue();
2072
2073 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2075 !VT.isScalableVector())
2076 return SDValue();
2077
2078 SDLoc DL(Node);
2079 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2080 SDValue SignMask = DAG.getConstant(
2081 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
2082 SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
2083 return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
2084}
2085
2086SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
2087 EVT VT = Node->getValueType(0);
2088 EVT IntVT = VT.changeVectorElementTypeToInteger();
2089
2090 if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
2091 return SDValue();
2092
2093 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2095 !VT.isScalableVector())
2096 return SDValue();
2097
2098 SDLoc DL(Node);
2099 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2100 SDValue ClearSignMask = DAG.getConstant(
2102 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
2103 return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
2104}
2105
2106SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
2107 EVT VT = Node->getValueType(0);
2108 EVT IntVT = VT.changeVectorElementTypeToInteger();
2109
2110 if (VT != Node->getOperand(1).getValueType() ||
2111 !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
2112 !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
2113 return SDValue();
2114
2115 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
2117 !VT.isScalableVector())
2118 return SDValue();
2119
2120 SDLoc DL(Node);
2121 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
2122 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
2123
2124 SDValue SignMask = DAG.getConstant(
2125 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
2126 SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
2127
2128 SDValue ClearSignMask = DAG.getConstant(
2130 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
2131
2132 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit,
2134
2135 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
2136}
2137
2138void VectorLegalizer::ExpandFSUB(SDNode *Node,
2139 SmallVectorImpl<SDValue> &Results) {
2140 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
2141 // we can defer this to operation legalization where it will be lowered as
2142 // a+(-b).
2143 EVT VT = Node->getValueType(0);
2144 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
2146 return; // Defer to LegalizeDAG
2147
2148 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
2149 Results.push_back(Expanded);
2150 return;
2151 }
2152
2153 SDValue Tmp = DAG.UnrollVectorOp(Node);
2154 Results.push_back(Tmp);
2155}
2156
2157void VectorLegalizer::ExpandSETCC(SDNode *Node,
2158 SmallVectorImpl<SDValue> &Results) {
2159 bool NeedInvert = false;
2160 bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
2161 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
2162 Node->getOpcode() == ISD::STRICT_FSETCCS;
2163 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
2164 unsigned Offset = IsStrict ? 1 : 0;
2165
2166 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
2167 SDValue LHS = Node->getOperand(0 + Offset);
2168 SDValue RHS = Node->getOperand(1 + Offset);
2169 SDValue CC = Node->getOperand(2 + Offset);
2170
2171 MVT OpVT = LHS.getSimpleValueType();
2172 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
2173
2174 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
2175 if (IsStrict) {
2176 UnrollStrictFPOp(Node, Results);
2177 return;
2178 }
2179 Results.push_back(UnrollVSETCC(Node));
2180 return;
2181 }
2182
2183 SDValue Mask, EVL;
2184 if (IsVP) {
2185 Mask = Node->getOperand(3 + Offset);
2186 EVL = Node->getOperand(4 + Offset);
2187 }
2188
2189 SDLoc dl(Node);
2190 bool Legalized =
2191 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
2192 EVL, NeedInvert, dl, Chain, IsSignaling);
2193
2194 if (Legalized) {
2195 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
2196 // condition code, create a new SETCC node.
2197 if (CC.getNode()) {
2198 if (IsStrict) {
2199 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
2200 {Chain, LHS, RHS, CC}, Node->getFlags());
2201 Chain = LHS.getValue(1);
2202 } else if (IsVP) {
2203 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
2204 {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
2205 } else {
2206 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
2207 Node->getFlags());
2208 }
2209 }
2210
2211 // If we expanded the SETCC by inverting the condition code, then wrap
2212 // the existing SETCC in a NOT to restore the intended condition.
2213 if (NeedInvert) {
2214 if (!IsVP)
2215 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
2216 else
2217 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
2218 }
2219 } else {
2220 assert(!IsStrict && "Don't know how to expand for strict nodes.");
2221
2222 // Otherwise, SETCC for the given comparison type must be completely
2223 // illegal; expand it into a SELECT_CC.
2224 EVT VT = Node->getValueType(0);
2225 LHS = DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
2226 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
2227 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()),
2228 CC, Node->getFlags());
2229 }
2230
2231 Results.push_back(LHS);
2232 if (IsStrict)
2233 Results.push_back(Chain);
2234}
2235
2236void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2237 SmallVectorImpl<SDValue> &Results) {
2238 SDValue Result, Overflow;
2239 TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2240 Results.push_back(Result);
2241 Results.push_back(Overflow);
2242}
2243
2244void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2245 SmallVectorImpl<SDValue> &Results) {
2246 SDValue Result, Overflow;
2247 TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2248 Results.push_back(Result);
2249 Results.push_back(Overflow);
2250}
2251
2252void VectorLegalizer::ExpandMULO(SDNode *Node,
2253 SmallVectorImpl<SDValue> &Results) {
2254 SDValue Result, Overflow;
2255 if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2256 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
2257
2258 Results.push_back(Result);
2259 Results.push_back(Overflow);
2260}
2261
2262void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2263 SmallVectorImpl<SDValue> &Results) {
2264 SDNode *N = Node;
2265 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
2266 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
2267 Results.push_back(Expanded);
2268}
2269
2270void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2271 SmallVectorImpl<SDValue> &Results) {
2272 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2273 ExpandUINT_TO_FLOAT(Node, Results);
2274 return;
2275 }
2276 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2277 ExpandFP_TO_UINT(Node, Results);
2278 return;
2279 }
2280
2281 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2282 Node->getOpcode() == ISD::STRICT_FSETCCS) {
2283 ExpandSETCC(Node, Results);
2284 return;
2285 }
2286
2287 UnrollStrictFPOp(Node, Results);
2288}
2289
2290void VectorLegalizer::ExpandREM(SDNode *Node,
2291 SmallVectorImpl<SDValue> &Results) {
2292 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2293 "Expected REM node");
2294
2296 if (!TLI.expandREM(Node, Result, DAG))
2297 Result = DAG.UnrollVectorOp(Node);
2298 Results.push_back(Result);
2299}
2300
2301// Try to expand libm nodes into vector math routine calls. Callers provide the
2302// LibFunc equivalent of the passed in Node, which is used to lookup mappings
2303// within TargetLibraryInfo. The only mappings considered are those where the
2304// result and all operands are the same vector type. While predicated nodes are
2305// not supported, we will emit calls to masked routines by passing in an all
2306// true mask.
2307bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2308 SmallVectorImpl<SDValue> &Results) {
2309 // Chain must be propagated but currently strict fp operations are down
2310 // converted to their none strict counterpart.
2311 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2312
2313 RTLIB::LibcallImpl LCImpl = DAG.getLibcalls().getLibcallImpl(LC);
2314 if (LCImpl == RTLIB::Unsupported)
2315 return false;
2316
2317 EVT VT = Node->getValueType(0);
2318 const RTLIB::RuntimeLibcallsInfo &RTLCI = TLI.getRuntimeLibcallsInfo();
2319 LLVMContext &Ctx = *DAG.getContext();
2320
2321 auto [FuncTy, FuncAttrs] = RTLCI.getFunctionTy(
2322 Ctx, DAG.getSubtarget().getTargetTriple(), DAG.getDataLayout(), LCImpl);
2323
2324 SDLoc DL(Node);
2325 TargetLowering::ArgListTy Args;
2326
2327 bool HasMaskArg = RTLCI.hasVectorMaskArgument(LCImpl);
2328
2329 // Sanity check just in case function has unexpected parameters.
2330 assert(FuncTy->getNumParams() == Node->getNumOperands() + HasMaskArg &&
2331 EVT::getEVT(FuncTy->getReturnType(), true) == VT &&
2332 "mismatch in value type and call signature type");
2333
2334 for (unsigned I = 0, E = FuncTy->getNumParams(); I != E; ++I) {
2335 Type *ParamTy = FuncTy->getParamType(I);
2336
2337 if (HasMaskArg && I == E - 1) {
2338 assert(cast<VectorType>(ParamTy)->getElementType()->isIntegerTy(1) &&
2339 "unexpected vector mask type");
2340 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), Ctx, VT);
2341 Args.emplace_back(DAG.getBoolConstant(true, DL, MaskVT, VT),
2342 MaskVT.getTypeForEVT(Ctx));
2343
2344 } else {
2345 SDValue Op = Node->getOperand(I);
2346 assert(Op.getValueType() == EVT::getEVT(ParamTy, true) &&
2347 "mismatch in value type and call argument type");
2348 Args.emplace_back(Op, ParamTy);
2349 }
2350 }
2351
2352 // Emit a call to the vector function.
2353 SDValue Callee =
2354 DAG.getExternalSymbol(LCImpl, TLI.getPointerTy(DAG.getDataLayout()));
2355 CallingConv::ID CC = RTLCI.getLibcallImplCallingConv(LCImpl);
2356
2357 TargetLowering::CallLoweringInfo CLI(DAG);
2358 CLI.setDebugLoc(DL)
2359 .setChain(DAG.getEntryNode())
2360 .setLibCallee(CC, FuncTy->getReturnType(), Callee, std::move(Args));
2361
2362 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2363 Results.push_back(CallResult.first);
2364 return true;
2365}
2366
2367void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2368 SmallVectorImpl<SDValue> &Results) {
2369 EVT VT = Node->getValueType(0);
2370 EVT EltVT = VT.getVectorElementType();
2371 unsigned NumElems = VT.getVectorNumElements();
2372 unsigned NumOpers = Node->getNumOperands();
2373 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2374
2375 EVT TmpEltVT = EltVT;
2376 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2377 Node->getOpcode() == ISD::STRICT_FSETCCS)
2378 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
2379 *DAG.getContext(), TmpEltVT);
2380
2381 EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2382 SDValue Chain = Node->getOperand(0);
2383 SDLoc dl(Node);
2384
2385 SmallVector<SDValue, 32> OpValues;
2386 SmallVector<SDValue, 32> OpChains;
2387 for (unsigned i = 0; i < NumElems; ++i) {
2389 SDValue Idx = DAG.getVectorIdxConstant(i, dl);
2390
2391 // The Chain is the first operand.
2392 Opers.push_back(Chain);
2393
2394 // Now process the remaining operands.
2395 for (unsigned j = 1; j < NumOpers; ++j) {
2396 SDValue Oper = Node->getOperand(j);
2397 EVT OperVT = Oper.getValueType();
2398
2399 if (OperVT.isVector())
2400 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2401 OperVT.getVectorElementType(), Oper, Idx);
2402
2403 Opers.push_back(Oper);
2404 }
2405
2406 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
2407 SDValue ScalarResult = ScalarOp.getValue(0);
2408 SDValue ScalarChain = ScalarOp.getValue(1);
2409
2410 if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2411 Node->getOpcode() == ISD::STRICT_FSETCCS)
2412 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
2413 DAG.getAllOnesConstant(dl, EltVT),
2414 DAG.getConstant(0, dl, EltVT));
2415
2416 OpValues.push_back(ScalarResult);
2417 OpChains.push_back(ScalarChain);
2418 }
2419
2420 SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
2421 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
2422
2423 Results.push_back(Result);
2424 Results.push_back(NewChain);
2425}
2426
2427SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2428 EVT VT = Node->getValueType(0);
2429 unsigned NumElems = VT.getVectorNumElements();
2430 EVT EltVT = VT.getVectorElementType();
2431 SDValue LHS = Node->getOperand(0);
2432 SDValue RHS = Node->getOperand(1);
2433 SDValue CC = Node->getOperand(2);
2434 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2435 SDLoc dl(Node);
2436 SmallVector<SDValue, 8> Ops(NumElems);
2437 for (unsigned i = 0; i < NumElems; ++i) {
2438 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
2439 DAG.getVectorIdxConstant(i, dl));
2440 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
2441 DAG.getVectorIdxConstant(i, dl));
2442 // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2443 Ops[i] = DAG.getNode(ISD::SETCC, dl,
2445 *DAG.getContext(), TmpEltVT),
2446 LHSElem, RHSElem, CC);
2447 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
2448 DAG.getBoolConstant(true, dl, EltVT, VT),
2449 DAG.getConstant(0, dl, EltVT));
2450 }
2451 return DAG.getBuildVector(VT, dl, Ops);
2452}
2453
2455 return VectorLegalizer(*this).Run();
2456}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file defines the DenseMap class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl< int > &ShuffleMask)
#define I(x, y, z)
Definition MD5.cpp:57
#define T
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
BinaryOperator * Mul
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
bool isBigEndian() const
Definition DataLayout.h:216
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
size_t size() const
Definition Function.h:858
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
const Triple & getTargetTriple() const
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
MVT getVectorElementType() const
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition ArrayRef.h:298
Represents one node in the SelectionDAG.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getElementCount(const SDLoc &DL, EVT VT, ElementCount EC)
const SDValue & getRoot() const
Return the root tag of the SelectionDAG.
const TargetSubtargetInfo & getSubtarget() const
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI bool LegalizeVectors()
This transforms the SelectionDAG into a SelectionDAG that only uses vector math operations supported ...
LLVM_ABI SDValue UnrollVectorOp(SDNode *N, unsigned ResNE=0)
Utility function used by legalize and lowering to "unroll" a vector operation by splitting out the sc...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getInsertSubvector(const SDLoc &DL, SDValue Vec, SDValue SubVec, unsigned Idx)
Insert SubVec at the Idx element of Vec.
LLVM_ABI SDValue getStepVector(const SDLoc &DL, EVT ResVT, const APInt &StepVal)
Returns a vector of type ResVT whose elements contain the linear sequence <0, Step,...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false, SDNodeFlags Flags={})
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
const TargetLowering & getTargetLoweringInfo() const
LLVM_ABI std::pair< SDValue, SDValue > UnrollVectorOverflowOp(SDNode *N, unsigned ResNE=0)
Like UnrollVectorOp(), but for the [US](ADD|SUB|MUL)O family of opcodes.
allnodes_const_iterator allnodes_begin() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
allnodes_const_iterator allnodes_end() const
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build Select's if you just have operands and don't want to check...
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
LLVM_ABI SDValue getExternalSymbol(const char *Sym, EVT VT)
LLVM_ABI SDValue getVPLogicalNOT(const SDLoc &DL, SDValue Val, SDValue Mask, SDValue EVL, EVT VT)
Create a vector-predicated logical NOT operation as (VP_XOR Val, BooleanOne, Mask,...
const LibcallLoweringInfo & getLibcalls() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI unsigned AssignTopologicalOrder()
Topological-sort the AllNodes list and a assign a unique node id for each node in the DAG based on th...
LLVM_ABI SDValue getBoolConstant(bool V, const SDLoc &DL, EVT VT, EVT OpVT)
Create a true or false constant of type VT using the target's BooleanContent for type OpVT.
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVMContext * getContext() const
const SDValue & setRoot(SDValue N)
Set the current root tag of the SelectionDAG.
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
SDValue getSplat(EVT VT, const SDLoc &DL, SDValue Op)
Returns a node representing a splat of one value into all lanes of the provided vector type.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
LLVM_ABI SDValue getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a logical NOT operation as (XOR Val, BooleanOne).
ilist< SDNode >::iterator allnodes_iterator
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void resize(size_type N)
void push_back(const T &Elt)
virtual bool isShuffleMaskLegal(ArrayRef< int >, EVT) const
Targets can use this to indicate that they only support some VECTOR_SHUFFLE operations,...
SDValue promoteTargetBoolean(SelectionDAG &DAG, SDValue Bool, EVT ValVT) const
Promote the given target boolean to a target boolean of the given type.
LegalizeAction getCondCodeAction(ISD::CondCode CC, MVT VT) const
Return how the condition code should be treated: either it is legal, needs to be expanded to some oth...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, unsigned Scale) const
Some fixed point operations may be natively supported by the target but only for specific scales.
bool isStrictFPEnabled() const
Return true if the target support strict float operation.
virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const
Return the ValueType of the result of SETCC operations.
BooleanContent getBooleanContents(bool isVec, bool isFloat) const
For targets without i1 registers, this gives the nature of the high-bits of boolean values held in ty...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeAction getPartialReduceMLAAction(unsigned Opc, EVT AccVT, EVT InputVT) const
Return how a PARTIAL_REDUCE_U/SMLA node with Acc type AccVT and Input type InputVT should be treated.
LegalizeAction getLoadAction(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const
LegalizeAction getOperationAction(unsigned Op, EVT VT) const
Return how this operation should be treated: either it is legal, needs to be promoted to a larger siz...
MVT getTypeToPromoteTo(unsigned Op, MVT VT) const
If the action for this operation is to promote, this method returns the ValueType to promote to.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
const RTLIB::RuntimeLibcallsInfo & getRuntimeLibcallsInfo() const
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT.
bool expandMultipleResultFPLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, SDNode *Node, SmallVectorImpl< SDValue > &Results, std::optional< unsigned > CallRetResNo={}) const
Expands a node with multiple results to an FP or vector libcall.
SDValue expandVPCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTLZ/VP_CTLZ_ZERO_UNDEF nodes.
bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]MULO.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandVPBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand VP_BSWAP nodes.
SDValue expandVecReduceSeq(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_SEQ_* into an explicit ordered calculation.
SDValue expandFCANONICALIZE(SDNode *Node, SelectionDAG &DAG) const
Expand FCANONICALIZE to FMUL with 1.
SDValue expandCTLZ(SDNode *N, SelectionDAG &DAG) const
Expand CTLZ/CTLZ_ZERO_UNDEF nodes.
SDValue expandBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand BITREVERSE nodes.
SDValue expandCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand CTTZ/CTTZ_ZERO_UNDEF nodes.
SDValue expandABD(SDNode *N, SelectionDAG &DAG) const
Expand ABDS/ABDU nodes.
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const
Expand carryless multiply.
SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]SHLSAT.
SDValue expandFP_TO_INT_SAT(SDNode *N, SelectionDAG &DAG) const
Expand FP_TO_[US]INT_SAT into FP_TO_[US]INT and selects or min/max.
SDValue expandCttzElts(SDNode *Node, SelectionDAG &DAG) const
Expand a CTTZ_ELTS or CTTZ_ELTS_ZERO_POISON by calculating (VL - i) for each active lane (i),...
void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::S(ADD|SUB)O.
SDValue expandVPBITREVERSE(SDNode *N, SelectionDAG &DAG) const
Expand VP_BITREVERSE nodes.
SDValue expandABS(SDNode *N, SelectionDAG &DAG, bool IsNegative=false) const
Expand ABS nodes.
SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const
Expand a VECREDUCE_* into an explicit calculation.
bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand float to UINT conversion.
bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const
Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
SDValue expandFMINIMUMNUM_FMAXIMUMNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimumnum/fmaximumnum into multiple comparison with selects.
SDValue expandCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand CTPOP nodes.
SDValue expandVectorNaryOpBySplitting(SDNode *Node, SelectionDAG &DAG) const
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
SDValue expandBSWAP(SDNode *N, SelectionDAG &DAG) const
Expand BSWAP nodes.
SDValue expandFMINIMUM_FMAXIMUM(SDNode *N, SelectionDAG &DAG) const
Expand fminimum/fmaximum into multiple comparison with selects.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const
Expand funnel shift.
bool LegalizeSetCCCondCode(SelectionDAG &DAG, EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, SDValue Mask, SDValue EVL, bool &NeedInvert, const SDLoc &dl, SDValue &Chain, bool IsSignaling=false) const
Legalize a SETCC or VP_SETCC with given LHS and RHS and condition code CC on the current target.
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue expandVPCTPOP(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTPOP nodes.
SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, SDValue LHS, SDValue RHS, unsigned Scale, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]DIVFIX[SAT].
SDValue expandVPCTTZ(SDNode *N, SelectionDAG &DAG) const
Expand VP_CTTZ/VP_CTTZ_ZERO_UNDEF nodes.
SDValue expandVECTOR_COMPRESS(SDNode *Node, SelectionDAG &DAG) const
Expand a vector VECTOR_COMPRESS into a sequence of extract element, store temporarily,...
SDValue expandROT(SDNode *N, bool AllowVectorOps, SelectionDAG &DAG) const
Expand rotations.
SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const
Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
SDValue expandCMP(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US]CMP.
SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT].
SDValue expandIntMINMAX(SDNode *Node, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::[US][MIN|MAX].
SDValue expandVectorFindLastActive(SDNode *N, SelectionDAG &DAG) const
Expand VECTOR_FIND_LAST_ACTIVE nodes.
SDValue expandPartialReduceMLA(SDNode *Node, SelectionDAG &DAG) const
Expands PARTIAL_REDUCE_S/UMLA nodes to a series of simpler operations, consisting of zext/sext,...
void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const
Method for building the DAG expansion of ISD::U(ADD|SUB)O.
bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const
Expand UINT(i64) to double(f64) conversion.
SDValue expandAVG(SDNode *N, SelectionDAG &DAG) const
Expand vector/scalar AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes.
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition ISDOpcodes.h:275
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ FPTRUNC_ROUND
FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
Definition ISDOpcodes.h:515
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition ISDOpcodes.h:438
@ CONVERT_FROM_ARBITRARY_FP
CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary floating-point represented as an...
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ PARTIAL_REDUCE_FMLA
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:735
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition ISDOpcodes.h:427
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VECREDUCE_SEQ_FMUL
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
LLVM_ABI std::optional< unsigned > getVPMaskIdx(unsigned Opcode)
The operand position of the vector mask.
LLVM_ABI std::optional< unsigned > getVPExplicitVectorLengthIdx(unsigned Opcode)
The operand position of the explicit vector length parameter.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI bool isVPOpcode(unsigned Opcode)
Whether this is a vector-predicated Opcode.
LLVM_ABI Libcall getREM(EVT VT)
LLVM_ABI Libcall getSINCOSPI(EVT RetVT)
getSINCOSPI - Return the SINCOSPI_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getMODF(EVT VT)
getMODF - Return the MODF_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getCBRT(EVT RetVT)
getCBRT - Return the CBRT_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getPOW(EVT RetVT)
getPOW - Return the POW_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getSINCOS(EVT RetVT)
getSINCOS - Return the SINCOS_* value for the given types, or UNKNOWN_LIBCALL if there is none.
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ Xor
Bitwise or logical XOR of integers.
DWARFExpression::Operation Op
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
#define N
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isScalableVT() const
Return true if the type is a scalable type.
Definition ValueTypes.h:195
bool isFixedLengthVector() const
Definition ValueTypes.h:189
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
CallingConv::ID getLibcallImplCallingConv(RTLIB::LibcallImpl Call) const
Get the CallingConv that should be used for the specified libcall.
std::pair< FunctionType *, AttributeList > getFunctionTy(LLVMContext &Ctx, const Triple &TT, const DataLayout &DL, RTLIB::LibcallImpl LibcallImpl) const
static bool hasVectorMaskArgument(RTLIB::LibcallImpl Impl)
Returns true if the function has a vector mask argument, which is assumed to be the last argument.