LLVM 23.0.0git
LegalizeVectorTypes.cpp
Go to the documentation of this file.
1//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file performs vector type splitting and scalarization for LegalizeTypes.
10// Scalarization is the act of changing a computation in an illegal one-element
11// vector type to be a computation in its scalar element type. For example,
12// implementing <1 x f32> arithmetic in a scalar f32 register. This is needed
13// as a base case when scalarizing vector arithmetic like <4 x f32>, which
14// eventually decomposes to scalars if the target doesn't support v4f32 or v2f32
15// types.
16// Splitting is the act of changing a computation in an invalid vector type to
17// be a computation in two vectors of half the size. For example, implementing
18// <128 x f32> operations in terms of two <64 x f32> operations.
19//
20//===----------------------------------------------------------------------===//
21
22#include "LegalizeTypes.h"
27#include "llvm/IR/DataLayout.h"
31#include <numeric>
32
33using namespace llvm;
34
35#define DEBUG_TYPE "legalize-types"
36
37//===----------------------------------------------------------------------===//
38// Result Vector Scalarization: <1 x ty> -> ty.
39//===----------------------------------------------------------------------===//
40
41void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
42 LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": ";
43 N->dump(&DAG));
44 SDValue R = SDValue();
45
46 switch (N->getOpcode()) {
47 default:
48#ifndef NDEBUG
49 dbgs() << "ScalarizeVectorResult #" << ResNo << ": ";
50 N->dump(&DAG);
51 dbgs() << "\n";
52#endif
53 report_fatal_error("Do not know how to scalarize the result of this "
54 "operator!\n");
55
58 R = ScalarizeVecRes_LOOP_DEPENDENCE_MASK(N);
59 break;
60 case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break;
61 case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
62 case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
63 case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
64 case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
66 R = ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(N);
67 break;
68 case ISD::AssertZext:
69 case ISD::AssertSext:
70 case ISD::FPOWI:
72 R = ScalarizeVecRes_UnaryOpWithExtraInput(N);
73 break;
74 case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
76 R = ScalarizeVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
77 break;
78 case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
79 case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
80 case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
81 case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
82 case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
83 case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
84 case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
85 case ISD::POISON:
86 case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break;
87 case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break;
88 case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break;
92 R = ScalarizeVecRes_VecInregOp(N);
93 break;
94 case ISD::ABS:
95 case ISD::ANY_EXTEND:
96 case ISD::BITREVERSE:
97 case ISD::BSWAP:
98 case ISD::CTLZ:
100 case ISD::CTPOP:
101 case ISD::CTTZ:
103 case ISD::FABS:
104 case ISD::FACOS:
105 case ISD::FASIN:
106 case ISD::FATAN:
107 case ISD::FCEIL:
108 case ISD::FCOS:
109 case ISD::FCOSH:
110 case ISD::FEXP:
111 case ISD::FEXP2:
112 case ISD::FEXP10:
113 case ISD::FFLOOR:
114 case ISD::FLOG:
115 case ISD::FLOG10:
116 case ISD::FLOG2:
117 case ISD::FNEARBYINT:
118 case ISD::FNEG:
119 case ISD::FREEZE:
120 case ISD::ARITH_FENCE:
121 case ISD::FP_EXTEND:
122 case ISD::FP_TO_SINT:
123 case ISD::FP_TO_UINT:
124 case ISD::FRINT:
125 case ISD::LRINT:
126 case ISD::LLRINT:
127 case ISD::FROUND:
128 case ISD::FROUNDEVEN:
129 case ISD::LROUND:
130 case ISD::LLROUND:
131 case ISD::FSIN:
132 case ISD::FSINH:
133 case ISD::FSQRT:
134 case ISD::FTAN:
135 case ISD::FTANH:
136 case ISD::FTRUNC:
137 case ISD::SIGN_EXTEND:
138 case ISD::SINT_TO_FP:
139 case ISD::TRUNCATE:
140 case ISD::UINT_TO_FP:
141 case ISD::ZERO_EXTEND:
143 R = ScalarizeVecRes_UnaryOp(N);
144 break;
146 R = ScalarizeVecRes_ADDRSPACECAST(N);
147 break;
148 case ISD::FMODF:
149 case ISD::FFREXP:
150 case ISD::FSINCOS:
151 case ISD::FSINCOSPI:
152 R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo);
153 break;
154 case ISD::ADD:
155 case ISD::AND:
156 case ISD::AVGCEILS:
157 case ISD::AVGCEILU:
158 case ISD::AVGFLOORS:
159 case ISD::AVGFLOORU:
160 case ISD::FADD:
161 case ISD::FCOPYSIGN:
162 case ISD::FDIV:
163 case ISD::FMUL:
164 case ISD::FMINNUM:
165 case ISD::FMAXNUM:
168 case ISD::FMINIMUM:
169 case ISD::FMAXIMUM:
170 case ISD::FMINIMUMNUM:
171 case ISD::FMAXIMUMNUM:
172 case ISD::FLDEXP:
173 case ISD::ABDS:
174 case ISD::ABDU:
175 case ISD::SMIN:
176 case ISD::SMAX:
177 case ISD::UMIN:
178 case ISD::UMAX:
179
180 case ISD::SADDSAT:
181 case ISD::UADDSAT:
182 case ISD::SSUBSAT:
183 case ISD::USUBSAT:
184 case ISD::SSHLSAT:
185 case ISD::USHLSAT:
186
187 case ISD::FPOW:
188 case ISD::FATAN2:
189 case ISD::FREM:
190 case ISD::FSUB:
191 case ISD::MUL:
192 case ISD::MULHS:
193 case ISD::MULHU:
194 case ISD::OR:
195 case ISD::SDIV:
196 case ISD::SREM:
197 case ISD::SUB:
198 case ISD::UDIV:
199 case ISD::UREM:
200 case ISD::XOR:
201 case ISD::SHL:
202 case ISD::SRA:
203 case ISD::SRL:
204 case ISD::ROTL:
205 case ISD::ROTR:
206 case ISD::CLMUL:
207 case ISD::CLMULR:
208 case ISD::CLMULH:
209 R = ScalarizeVecRes_BinOp(N);
210 break;
211
212 case ISD::MASKED_UDIV:
213 case ISD::MASKED_SDIV:
214 case ISD::MASKED_UREM:
215 case ISD::MASKED_SREM:
216 R = ScalarizeVecRes_MaskedBinOp(N);
217 break;
218
219 case ISD::SCMP:
220 case ISD::UCMP:
221 R = ScalarizeVecRes_CMP(N);
222 break;
223
224 case ISD::FMA:
225 case ISD::FSHL:
226 case ISD::FSHR:
227 R = ScalarizeVecRes_TernaryOp(N);
228 break;
229
230#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
231 case ISD::STRICT_##DAGN:
232#include "llvm/IR/ConstrainedOps.def"
233 R = ScalarizeVecRes_StrictFPOp(N);
234 break;
235
238 R = ScalarizeVecRes_FP_TO_XINT_SAT(N);
239 break;
240
241 case ISD::UADDO:
242 case ISD::SADDO:
243 case ISD::USUBO:
244 case ISD::SSUBO:
245 case ISD::UMULO:
246 case ISD::SMULO:
247 R = ScalarizeVecRes_OverflowOp(N, ResNo);
248 break;
249 case ISD::SMULFIX:
250 case ISD::SMULFIXSAT:
251 case ISD::UMULFIX:
252 case ISD::UMULFIXSAT:
253 case ISD::SDIVFIX:
254 case ISD::SDIVFIXSAT:
255 case ISD::UDIVFIX:
256 case ISD::UDIVFIXSAT:
257 R = ScalarizeVecRes_FIX(N);
258 break;
259 }
260
261 // If R is null, the sub-method took care of registering the result.
262 if (R.getNode())
263 SetScalarizedVector(SDValue(N, ResNo), R);
264}
265
266SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
267 SDValue LHS = GetScalarizedVector(N->getOperand(0));
268 SDValue RHS = GetScalarizedVector(N->getOperand(1));
269 return DAG.getNode(N->getOpcode(), SDLoc(N),
270 LHS.getValueType(), LHS, RHS, N->getFlags());
271}
272
273SDValue DAGTypeLegalizer::ScalarizeVecRes_MaskedBinOp(SDNode *N) {
274 SDLoc DL(N);
275 SDValue LHS = GetScalarizedVector(N->getOperand(0));
276 SDValue RHS = GetScalarizedVector(N->getOperand(1));
277 SDValue Mask = N->getOperand(2);
278 EVT MaskVT = Mask.getValueType();
279 // The vselect result and input vectors need scalarizing, but it's
280 // not a given that the mask does. For instance, in AVX512 v1i1 is legal.
281 // See the similar logic in ScalarizeVecRes_SETCC.
282 if (getTypeAction(MaskVT) == TargetLowering::TypeScalarizeVector)
283 Mask = GetScalarizedVector(Mask);
284 else
285 Mask = DAG.getExtractVectorElt(DL, MaskVT.getVectorElementType(), Mask, 0);
286 // Vectors may have a different boolean contents to scalars, so truncate to i1
287 // and let type legalization promote appropriately.
288 Mask = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Mask);
289 // Masked binary ops don't have UB on disabled lanes but produce poison, so
290 // use 1 as the divisor to avoid division by zero and overflow.
291 SDValue Divisor = DAG.getSelect(DL, LHS.getValueType(), Mask, RHS,
292 DAG.getConstant(1, DL, LHS.getValueType()));
293 return DAG.getNode(ISD::getUnmaskedBinOpOpcode(N->getOpcode()), DL,
294 LHS.getValueType(), LHS, Divisor);
295}
296
297SDValue DAGTypeLegalizer::ScalarizeVecRes_CMP(SDNode *N) {
298 SDLoc DL(N);
299
300 SDValue LHS = N->getOperand(0);
301 SDValue RHS = N->getOperand(1);
302 if (getTypeAction(LHS.getValueType()) ==
304 LHS = GetScalarizedVector(LHS);
305 RHS = GetScalarizedVector(RHS);
306 } else {
307 EVT VT = LHS.getValueType().getVectorElementType();
308 LHS = DAG.getExtractVectorElt(DL, VT, LHS, 0);
309 RHS = DAG.getExtractVectorElt(DL, VT, RHS, 0);
310 }
311
312 return DAG.getNode(N->getOpcode(), SDLoc(N),
313 N->getValueType(0).getVectorElementType(), LHS, RHS);
314}
315
316SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
317 SDValue Op0 = GetScalarizedVector(N->getOperand(0));
318 SDValue Op1 = GetScalarizedVector(N->getOperand(1));
319 SDValue Op2 = GetScalarizedVector(N->getOperand(2));
320 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
321 Op2, N->getFlags());
322}
323
324SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) {
325 SDValue Op0 = GetScalarizedVector(N->getOperand(0));
326 SDValue Op1 = GetScalarizedVector(N->getOperand(1));
327 SDValue Op2 = N->getOperand(2);
328 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1,
329 Op2, N->getFlags());
330}
331
333DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N,
334 unsigned ResNo) {
335 assert(N->getValueType(0).getVectorNumElements() == 1 &&
336 "Unexpected vector type!");
337 SDValue Elt = GetScalarizedVector(N->getOperand(0));
338
339 EVT VT0 = N->getValueType(0);
340 EVT VT1 = N->getValueType(1);
341 SDLoc dl(N);
342
343 SDNode *ScalarNode =
344 DAG.getNode(N->getOpcode(), dl,
345 {VT0.getScalarType(), VT1.getScalarType()}, Elt)
346 .getNode();
347
348 // Replace the other vector result not being explicitly scalarized here.
349 unsigned OtherNo = 1 - ResNo;
350 EVT OtherVT = N->getValueType(OtherNo);
351 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
352 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
353 } else {
354 SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT,
355 SDValue(ScalarNode, OtherNo));
356 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
357 }
358
359 return SDValue(ScalarNode, ResNo);
360}
361
362SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
363 EVT VT = N->getValueType(0).getVectorElementType();
364 unsigned NumOpers = N->getNumOperands();
365 SDValue Chain = N->getOperand(0);
366 EVT ValueVTs[] = {VT, MVT::Other};
367 SDLoc dl(N);
368
369 SmallVector<SDValue, 4> Opers(NumOpers);
370
371 // The Chain is the first operand.
372 Opers[0] = Chain;
373
374 // Now process the remaining operands.
375 for (unsigned i = 1; i < NumOpers; ++i) {
376 SDValue Oper = N->getOperand(i);
377 EVT OperVT = Oper.getValueType();
378
379 if (OperVT.isVector()) {
380 if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector)
381 Oper = GetScalarizedVector(Oper);
382 else
383 Oper =
384 DAG.getExtractVectorElt(dl, OperVT.getVectorElementType(), Oper, 0);
385 }
386
387 Opers[i] = Oper;
388 }
389
390 SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs),
391 Opers, N->getFlags());
392
393 // Legalize the chain result - switch anything that used the old chain to
394 // use the new one.
395 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
396 return Result;
397}
398
399SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
400 unsigned ResNo) {
401 SDLoc DL(N);
402 EVT ResVT = N->getValueType(0);
403 EVT OvVT = N->getValueType(1);
404
405 SDValue ScalarLHS, ScalarRHS;
406 if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
407 ScalarLHS = GetScalarizedVector(N->getOperand(0));
408 ScalarRHS = GetScalarizedVector(N->getOperand(1));
409 } else {
410 SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
411 DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
412 DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
413 ScalarLHS = ElemsLHS[0];
414 ScalarRHS = ElemsRHS[0];
415 }
416
417 SDVTList ScalarVTs = DAG.getVTList(
419 SDNode *ScalarNode = DAG.getNode(N->getOpcode(), DL, ScalarVTs,
420 {ScalarLHS, ScalarRHS}, N->getFlags())
421 .getNode();
422
423 // Replace the other vector result not being explicitly scalarized here.
424 unsigned OtherNo = 1 - ResNo;
425 EVT OtherVT = N->getValueType(OtherNo);
426 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
427 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
428 } else {
429 SDValue OtherVal = DAG.getNode(
430 ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
431 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
432 }
433
434 return SDValue(ScalarNode, ResNo);
435}
436
437SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
438 unsigned ResNo) {
439 SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
440 return GetScalarizedVector(Op);
441}
442
443SDValue DAGTypeLegalizer::ScalarizeVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
444 SDLoc DL(N);
445 SDValue SourceValue = N->getOperand(0);
446 SDValue SinkValue = N->getOperand(1);
447 SDValue EltSizeInBytes = N->getOperand(2);
448 SDValue LaneOffset = N->getOperand(3);
449
450 EVT PtrVT = SourceValue->getValueType(0);
451 bool IsReadAfterWrite = N->getOpcode() == ISD::LOOP_DEPENDENCE_RAW_MASK;
452
453 // Take the difference between the pointers and divided by the element size,
454 // to see how many lanes separate them.
455 SDValue Diff = DAG.getNode(ISD::SUB, DL, PtrVT, SinkValue, SourceValue);
456 if (IsReadAfterWrite)
457 Diff = DAG.getNode(ISD::ABS, DL, PtrVT, Diff);
458 Diff = DAG.getNode(ISD::SDIV, DL, PtrVT, Diff, EltSizeInBytes);
459
460 // The pointers do not alias if:
461 // * Diff <= 0 || LaneOffset < Diff (WAR_MASK)
462 // * Diff == 0 || LaneOffset < abs(Diff) (RAW_MASK)
463 // Note: If LaneOffset is zero, both cases will fold to "true".
464 EVT CmpVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
465 Diff.getValueType());
466 SDValue Zero = DAG.getConstant(0, DL, PtrVT);
467 SDValue Cmp = DAG.getSetCC(DL, CmpVT, Diff, Zero,
468 IsReadAfterWrite ? ISD::SETEQ : ISD::SETLE);
469 return DAG.getNode(ISD::OR, DL, CmpVT, Cmp,
470 DAG.getSetCC(DL, CmpVT, LaneOffset, Diff, ISD::SETULT));
471}
472
473SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
474 SDValue Op = N->getOperand(0);
475 if (getTypeAction(Op.getValueType()) == TargetLowering::TypeScalarizeVector)
476 Op = GetScalarizedVector(Op);
477 EVT NewVT = N->getValueType(0).getVectorElementType();
478 return DAG.getNode(ISD::BITCAST, SDLoc(N),
479 NewVT, Op);
480}
481
482SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) {
483 EVT EltVT = N->getValueType(0).getVectorElementType();
484 SDValue InOp = N->getOperand(0);
485 // The BUILD_VECTOR operands may be of wider element types and
486 // we may need to truncate them back to the requested return type.
487 if (EltVT.isInteger())
488 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
489 return InOp;
490}
491
492SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
493 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
494 N->getValueType(0).getVectorElementType(),
495 N->getOperand(0), N->getOperand(1));
496}
497
498SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
499 SDLoc DL(N);
500 SDValue Op = N->getOperand(0);
501 EVT OpVT = Op.getValueType();
502 // The result needs scalarizing, but it's not a given that the source does.
503 // See similar logic in ScalarizeVecRes_UnaryOp.
504 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
505 Op = GetScalarizedVector(Op);
506 } else {
507 EVT VT = OpVT.getVectorElementType();
508 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
509 }
510 return DAG.getNode(ISD::FP_ROUND, DL,
511 N->getValueType(0).getVectorElementType(), Op,
512 N->getOperand(1));
513}
514
515SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_FROM_ARBITRARY_FP(SDNode *N) {
516 SDLoc DL(N);
517 SDValue Op = N->getOperand(0);
518 EVT OpVT = Op.getValueType();
519 // The result needs scalarizing, but it's not a given that the source does.
520 // See similar logic in ScalarizeVecRes_UnaryOp.
521 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
522 Op = GetScalarizedVector(Op);
523 } else {
524 EVT VT = OpVT.getVectorElementType();
525 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
526 }
527 return DAG.getNode(ISD::CONVERT_FROM_ARBITRARY_FP, DL,
528 N->getValueType(0).getVectorElementType(), Op,
529 N->getOperand(1));
530}
531
532SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) {
533 SDValue Op = GetScalarizedVector(N->getOperand(0));
534 return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
535 N->getOperand(1));
536}
537
538SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) {
539 // The value to insert may have a wider type than the vector element type,
540 // so be sure to truncate it to the element type if necessary.
541 SDValue Op = N->getOperand(1);
542 EVT EltVT = N->getValueType(0).getVectorElementType();
543 if (Op.getValueType() != EltVT)
544 // FIXME: Can this happen for floating point types?
545 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op);
546 return Op;
547}
548
549SDValue DAGTypeLegalizer::ScalarizeVecRes_ATOMIC_LOAD(AtomicSDNode *N) {
550 SDValue Result = DAG.getAtomicLoad(
551 N->getExtensionType(), SDLoc(N), N->getMemoryVT().getVectorElementType(),
552 N->getValueType(0).getVectorElementType(), N->getChain(), N->getBasePtr(),
553 N->getMemOperand());
554
555 // Legalize the chain result - switch anything that used the old chain to
556 // use the new one.
557 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
558 return Result;
559}
560
561SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
562 assert(N->isUnindexed() && "Indexed vector load?");
563
564 SDValue Result = DAG.getLoad(
565 ISD::UNINDEXED, N->getExtensionType(),
566 N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(),
567 N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()),
568 N->getPointerInfo(), N->getMemoryVT().getVectorElementType(),
569 N->getBaseAlign(), N->getMemOperand()->getFlags(), N->getAAInfo());
570
571 // Legalize the chain result - switch anything that used the old chain to
572 // use the new one.
573 ReplaceValueWith(SDValue(N, 1), Result.getValue(1));
574 return Result;
575}
576
577SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) {
578 // Get the dest type - it doesn't always match the input type, e.g. int_to_fp.
579 EVT DestVT = N->getValueType(0).getVectorElementType();
580 SDValue Op = N->getOperand(0);
581 EVT OpVT = Op.getValueType();
582 SDLoc DL(N);
583 // The result needs scalarizing, but it's not a given that the source does.
584 // This is a workaround for targets where it's impossible to scalarize the
585 // result of a conversion, because the source type is legal.
586 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
587 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
588 // legal and was not scalarized.
589 // See the similar logic in ScalarizeVecRes_SETCC
590 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
591 Op = GetScalarizedVector(Op);
592 } else {
593 EVT VT = OpVT.getVectorElementType();
594 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
595 }
596 return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags());
597}
598
599SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) {
600 EVT EltVT = N->getValueType(0).getVectorElementType();
601 EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType();
602 SDValue LHS = GetScalarizedVector(N->getOperand(0));
603 return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT,
604 LHS, DAG.getValueType(ExtVT));
605}
606
607SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) {
608 SDLoc DL(N);
609 SDValue Op = N->getOperand(0);
610
611 EVT OpVT = Op.getValueType();
612 EVT OpEltVT = OpVT.getVectorElementType();
613 EVT EltVT = N->getValueType(0).getVectorElementType();
614
615 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
616 Op = GetScalarizedVector(Op);
617 } else {
618 Op = DAG.getExtractVectorElt(DL, OpEltVT, Op, 0);
619 }
620
621 switch (N->getOpcode()) {
623 return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op);
625 return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op);
627 return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op);
628 }
629
630 llvm_unreachable("Illegal extend_vector_inreg opcode");
631}
632
633SDValue DAGTypeLegalizer::ScalarizeVecRes_ADDRSPACECAST(SDNode *N) {
634 EVT DestVT = N->getValueType(0).getVectorElementType();
635 SDValue Op = N->getOperand(0);
636 EVT OpVT = Op.getValueType();
637 SDLoc DL(N);
638 // The result needs scalarizing, but it's not a given that the source does.
639 // This is a workaround for targets where it's impossible to scalarize the
640 // result of a conversion, because the source type is legal.
641 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32}
642 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is
643 // legal and was not scalarized.
644 // See the similar logic in ScalarizeVecRes_SETCC
645 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
646 Op = GetScalarizedVector(Op);
647 } else {
648 EVT VT = OpVT.getVectorElementType();
649 Op = DAG.getExtractVectorElt(DL, VT, Op, 0);
650 }
651 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
652 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
653 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
654 return DAG.getAddrSpaceCast(DL, DestVT, Op, SrcAS, DestAS);
655}
656
657SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
658 // If the operand is wider than the vector element type then it is implicitly
659 // truncated. Make that explicit here.
660 EVT EltVT = N->getValueType(0).getVectorElementType();
661 SDValue InOp = N->getOperand(0);
662 if (InOp.getValueType() != EltVT)
663 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp);
664 return InOp;
665}
666
667SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) {
668 SDValue Cond = N->getOperand(0);
669 EVT OpVT = Cond.getValueType();
670 SDLoc DL(N);
671 // The vselect result and true/value operands needs scalarizing, but it's
672 // not a given that the Cond does. For instance, in AVX512 v1i1 is legal.
673 // See the similar logic in ScalarizeVecRes_SETCC
674 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
675 Cond = GetScalarizedVector(Cond);
676 } else {
677 EVT VT = OpVT.getVectorElementType();
678 Cond = DAG.getExtractVectorElt(DL, VT, Cond, 0);
679 }
680
681 SDValue LHS = GetScalarizedVector(N->getOperand(1));
683 TLI.getBooleanContents(false, false);
684 TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false);
685
686 // If integer and float booleans have different contents then we can't
687 // reliably optimize in all cases. There is a full explanation for this in
688 // DAGCombiner::visitSELECT() where the same issue affects folding
689 // (select C, 0, 1) to (xor C, 1).
690 if (TLI.getBooleanContents(false, false) !=
691 TLI.getBooleanContents(false, true)) {
692 // At least try the common case where the boolean is generated by a
693 // comparison.
694 if (Cond->getOpcode() == ISD::SETCC) {
695 EVT OpVT = Cond->getOperand(0).getValueType();
696 ScalarBool = TLI.getBooleanContents(OpVT.getScalarType());
697 VecBool = TLI.getBooleanContents(OpVT);
698 } else
700 }
701
702 EVT CondVT = Cond.getValueType();
703 if (ScalarBool != VecBool) {
704 switch (ScalarBool) {
706 break;
710 // Vector read from all ones, scalar expects a single 1 so mask.
711 Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT,
712 Cond, DAG.getConstant(1, SDLoc(N), CondVT));
713 break;
717 // Vector reads from a one, scalar from all ones so sign extend.
718 Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT,
719 Cond, DAG.getValueType(MVT::i1));
720 break;
721 }
722 }
723
724 // Truncate the condition if needed
725 auto BoolVT = getSetCCResultType(CondVT);
726 if (BoolVT.bitsLT(CondVT))
727 Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond);
728
729 return DAG.getSelect(SDLoc(N),
730 LHS.getValueType(), Cond, LHS,
731 GetScalarizedVector(N->getOperand(2)));
732}
733
734SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
735 SDValue LHS = GetScalarizedVector(N->getOperand(1));
736 return DAG.getSelect(SDLoc(N),
737 LHS.getValueType(), N->getOperand(0), LHS,
738 GetScalarizedVector(N->getOperand(2)));
739}
740
741SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
742 SDValue LHS = GetScalarizedVector(N->getOperand(2));
743 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
744 N->getOperand(0), N->getOperand(1),
745 LHS, GetScalarizedVector(N->getOperand(3)),
746 N->getOperand(4));
747}
748
749SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) {
750 return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
751}
752
753SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) {
754 // Figure out if the scalar is the LHS or RHS and return it.
755 SDValue Arg = N->getOperand(2).getOperand(0);
756 if (Arg.isUndef())
757 return DAG.getUNDEF(N->getValueType(0).getVectorElementType());
758 unsigned Op = !cast<ConstantSDNode>(Arg)->isZero();
759 return GetScalarizedVector(N->getOperand(Op));
760}
761
762SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) {
763 SDValue Src = N->getOperand(0);
764 EVT SrcVT = Src.getValueType();
765 SDLoc dl(N);
766
767 // Handle case where result is scalarized but operand is not
768 if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector)
769 Src = GetScalarizedVector(Src);
770 else
771 Src = DAG.getNode(
773 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
774
775 EVT DstVT = N->getValueType(0).getVectorElementType();
776 return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1));
777}
778
779SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) {
780 assert(N->getValueType(0).isVector() &&
781 N->getOperand(0).getValueType().isVector() &&
782 "Operand types must be vectors");
783 SDValue LHS = N->getOperand(0);
784 SDValue RHS = N->getOperand(1);
785 EVT OpVT = LHS.getValueType();
786 EVT NVT = N->getValueType(0).getVectorElementType();
787 SDLoc DL(N);
788
789 // The result needs scalarizing, but it's not a given that the source does.
790 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) {
791 LHS = GetScalarizedVector(LHS);
792 RHS = GetScalarizedVector(RHS);
793 } else {
794 EVT VT = OpVT.getVectorElementType();
795 LHS = DAG.getExtractVectorElt(DL, VT, LHS, 0);
796 RHS = DAG.getExtractVectorElt(DL, VT, RHS, 0);
797 }
798
799 // Turn it into a scalar SETCC.
800 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
801 N->getOperand(2));
802 // Vectors may have a different boolean contents to scalars. Promote the
803 // value appropriately.
804 ISD::NodeType ExtendCode =
805 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
806 return DAG.getNode(ExtendCode, DL, NVT, Res);
807}
808
809SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) {
810 SDLoc DL(N);
811 SDValue Arg = N->getOperand(0);
812 SDValue Test = N->getOperand(1);
813 EVT ArgVT = Arg.getValueType();
814 EVT ResultVT = N->getValueType(0).getVectorElementType();
815
816 if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) {
817 Arg = GetScalarizedVector(Arg);
818 } else {
819 EVT VT = ArgVT.getVectorElementType();
820 Arg = DAG.getExtractVectorElt(DL, VT, Arg, 0);
821 }
822
823 SDValue Res =
824 DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags());
825 // Vectors may have a different boolean contents to scalars. Promote the
826 // value appropriately.
827 ISD::NodeType ExtendCode =
828 TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT));
829 return DAG.getNode(ExtendCode, DL, ResultVT, Res);
830}
831
832//===----------------------------------------------------------------------===//
833// Operand Vector Scalarization <1 x ty> -> ty.
834//===----------------------------------------------------------------------===//
835
836bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
837 LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": ";
838 N->dump(&DAG));
839 SDValue Res = SDValue();
840
841 switch (N->getOpcode()) {
842 default:
843#ifndef NDEBUG
844 dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": ";
845 N->dump(&DAG);
846 dbgs() << "\n";
847#endif
848 report_fatal_error("Do not know how to scalarize this operator's "
849 "operand!\n");
850 case ISD::BITCAST:
851 Res = ScalarizeVecOp_BITCAST(N);
852 break;
853 case ISD::FAKE_USE:
854 Res = ScalarizeVecOp_FAKE_USE(N);
855 break;
856 case ISD::ANY_EXTEND:
857 case ISD::ZERO_EXTEND:
858 case ISD::SIGN_EXTEND:
859 case ISD::TRUNCATE:
860 case ISD::FP_TO_SINT:
861 case ISD::FP_TO_UINT:
862 case ISD::SINT_TO_FP:
863 case ISD::UINT_TO_FP:
864 case ISD::LROUND:
865 case ISD::LLROUND:
866 case ISD::LRINT:
867 case ISD::LLRINT:
868 Res = ScalarizeVecOp_UnaryOp(N);
869 break;
873 Res = ScalarizeVecOp_UnaryOpWithExtraInput(N);
874 break;
879 Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
880 break;
882 Res = ScalarizeVecOp_CONCAT_VECTORS(N);
883 break;
885 Res = ScalarizeVecOp_INSERT_SUBVECTOR(N, OpNo);
886 break;
888 Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N);
889 break;
890 case ISD::VSELECT:
891 Res = ScalarizeVecOp_VSELECT(N);
892 break;
893 case ISD::SETCC:
894 Res = ScalarizeVecOp_VSETCC(N);
895 break;
898 Res = ScalarizeVecOp_VSTRICT_FSETCC(N, OpNo);
899 break;
900 case ISD::STORE:
901 Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
902 break;
904 Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
905 break;
906 case ISD::FP_ROUND:
907 Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
908 break;
910 Res = ScalarizeVecOp_STRICT_FP_EXTEND(N);
911 break;
912 case ISD::FP_EXTEND:
913 Res = ScalarizeVecOp_FP_EXTEND(N);
914 break;
930 Res = ScalarizeVecOp_VECREDUCE(N);
931 break;
934 Res = ScalarizeVecOp_VECREDUCE_SEQ(N);
935 break;
936 case ISD::SCMP:
937 case ISD::UCMP:
938 Res = ScalarizeVecOp_CMP(N);
939 break;
941 Res = ScalarizeVecOp_VECTOR_FIND_LAST_ACTIVE(N);
942 break;
943 case ISD::CTTZ_ELTS:
945 Res = ScalarizeVecOp_CTTZ_ELTS(N);
946 break;
947 case ISD::MASKED_UDIV:
948 case ISD::MASKED_SDIV:
949 case ISD::MASKED_UREM:
950 case ISD::MASKED_SREM:
951 Res = ScalarizeVecOp_MaskedBinOp(N, OpNo);
952 break;
953 }
954
955 // If the result is null, the sub-method took care of registering results etc.
956 if (!Res.getNode()) return false;
957
958 // If the result is N, the sub-method updated N in place. Tell the legalizer
959 // core about this.
960 if (Res.getNode() == N)
961 return true;
962
963 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
964 "Invalid operand expansion");
965
966 ReplaceValueWith(SDValue(N, 0), Res);
967 return false;
968}
969
970/// If the value to convert is a vector that needs to be scalarized, it must be
971/// <1 x ty>. Convert the element instead.
972SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
973 SDValue Elt = GetScalarizedVector(N->getOperand(0));
974 return DAG.getNode(ISD::BITCAST, SDLoc(N),
975 N->getValueType(0), Elt);
976}
977
978// Need to legalize vector operands of fake uses. Must be <1 x ty>.
979SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) {
980 assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 &&
981 "Fake Use: Unexpected vector type!");
982 SDValue Elt = GetScalarizedVector(N->getOperand(1));
983 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt);
984}
985
986/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
987/// Do the operation on the element instead.
988SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
989 assert(N->getValueType(0).getVectorNumElements() == 1 &&
990 "Unexpected vector type!");
991 SDValue Elt = GetScalarizedVector(N->getOperand(0));
992 SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N),
993 N->getValueType(0).getScalarType(), Elt);
994 // Revectorize the result so the types line up with what the uses of this
995 // expression expect.
996 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
997}
998
999/// Same as ScalarizeVecOp_UnaryOp with an extra operand (for example a
1000/// typesize).
1001SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOpWithExtraInput(SDNode *N) {
1002 assert(N->getValueType(0).getVectorNumElements() == 1 &&
1003 "Unexpected vector type!");
1004 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1005 SDValue Op =
1006 DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(),
1007 Elt, N->getOperand(1));
1008 // Revectorize the result so the types line up with what the uses of this
1009 // expression expect.
1010 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
1011}
1012
1013/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
1014/// Do the strict FP operation on the element instead.
1015SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
1016 assert(N->getValueType(0).getVectorNumElements() == 1 &&
1017 "Unexpected vector type!");
1018 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1019 SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
1020 { N->getValueType(0).getScalarType(), MVT::Other },
1021 { N->getOperand(0), Elt });
1022 // Legalize the chain result - switch anything that used the old chain to
1023 // use the new one.
1024 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1025 // Revectorize the result so the types line up with what the uses of this
1026 // expression expect.
1027 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1028
1029 // Do our own replacement and return SDValue() to tell the caller that we
1030 // handled all replacements since caller can only handle a single result.
1031 ReplaceValueWith(SDValue(N, 0), Res);
1032 return SDValue();
1033}
1034
1035/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
1036SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
1037 SmallVector<SDValue, 8> Ops(N->getNumOperands());
1038 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i)
1039 Ops[i] = GetScalarizedVector(N->getOperand(i));
1040 return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops);
1041}
1042
1043/// The inserted subvector is to be scalarized - use insert vector element
1044/// instead.
1045SDValue DAGTypeLegalizer::ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N,
1046 unsigned OpNo) {
1047 // We should not be attempting to scalarize the containing vector
1048 assert(OpNo == 1);
1049 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1050 SDValue ContainingVec = N->getOperand(0);
1051 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
1052 ContainingVec.getValueType(), ContainingVec, Elt,
1053 N->getOperand(2));
1054}
1055
1056/// If the input is a vector that needs to be scalarized, it must be <1 x ty>,
1057/// so just return the element, ignoring the index.
1058SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
1059 EVT VT = N->getValueType(0);
1060 SDValue Res = GetScalarizedVector(N->getOperand(0));
1061 if (Res.getValueType() != VT)
1062 Res = VT.isFloatingPoint()
1063 ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res)
1064 : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res);
1065 return Res;
1066}
1067
1068/// If the input condition is a vector that needs to be scalarized, it must be
1069/// <1 x i1>, so just convert to a normal ISD::SELECT
1070/// (still with vector output type since that was acceptable if we got here).
1071SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) {
1072 SDValue ScalarCond = GetScalarizedVector(N->getOperand(0));
1073 EVT VT = N->getValueType(0);
1074
1075 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1),
1076 N->getOperand(2));
1077}
1078
1079/// If the operand is a vector that needs to be scalarized then the
1080/// result must be v1i1, so just convert to a scalar SETCC and wrap
1081/// with a scalar_to_vector since the res type is legal if we got here
1082SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
1083 assert(N->getValueType(0).isVector() &&
1084 N->getOperand(0).getValueType().isVector() &&
1085 "Operand types must be vectors");
1086 assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
1087
1088 EVT VT = N->getValueType(0);
1089 SDValue LHS = GetScalarizedVector(N->getOperand(0));
1090 SDValue RHS = GetScalarizedVector(N->getOperand(1));
1091
1092 EVT OpVT = N->getOperand(0).getValueType();
1093 EVT NVT = VT.getVectorElementType();
1094 SDLoc DL(N);
1095 // Turn it into a scalar SETCC.
1096 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS,
1097 N->getOperand(2));
1098
1099 // Vectors may have a different boolean contents to scalars. Promote the
1100 // value appropriately.
1101 ISD::NodeType ExtendCode =
1102 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
1103
1104 Res = DAG.getNode(ExtendCode, DL, NVT, Res);
1105
1106 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
1107}
1108
1109// Similiar to ScalarizeVecOp_VSETCC, with added logic to update chains.
1110SDValue DAGTypeLegalizer::ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N,
1111 unsigned OpNo) {
1112 assert(OpNo == 1 && "Wrong operand for scalarization!");
1113 assert(N->getValueType(0).isVector() &&
1114 N->getOperand(1).getValueType().isVector() &&
1115 "Operand types must be vectors");
1116 assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
1117
1118 EVT VT = N->getValueType(0);
1119 SDValue Ch = N->getOperand(0);
1120 SDValue LHS = GetScalarizedVector(N->getOperand(1));
1121 SDValue RHS = GetScalarizedVector(N->getOperand(2));
1122 SDValue CC = N->getOperand(3);
1123
1124 EVT OpVT = N->getOperand(1).getValueType();
1125 EVT NVT = VT.getVectorElementType();
1126 SDLoc DL(N);
1127 SDValue Res = DAG.getNode(N->getOpcode(), DL, {MVT::i1, MVT::Other},
1128 {Ch, LHS, RHS, CC});
1129
1130 // Legalize the chain result - switch anything that used the old chain to
1131 // use the new one.
1132 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1133
1134 ISD::NodeType ExtendCode =
1135 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
1136
1137 Res = DAG.getNode(ExtendCode, DL, NVT, Res);
1138 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
1139
1140 // Do our own replacement and return SDValue() to tell the caller that we
1141 // handled all replacements since caller can only handle a single result.
1142 ReplaceValueWith(SDValue(N, 0), Res);
1143 return SDValue();
1144}
1145
1146/// If the value to store is a vector that needs to be scalarized, it must be
1147/// <1 x ty>. Just store the element.
1148SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
1149 assert(N->isUnindexed() && "Indexed store of one-element vector?");
1150 assert(OpNo == 1 && "Do not know how to scalarize this operand!");
1151 SDLoc dl(N);
1152
1153 if (N->isTruncatingStore())
1154 return DAG.getTruncStore(
1155 N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
1156 N->getBasePtr(), N->getPointerInfo(),
1157 N->getMemoryVT().getVectorElementType(), N->getBaseAlign(),
1158 N->getMemOperand()->getFlags(), N->getAAInfo());
1159
1160 return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
1161 N->getBasePtr(), N->getPointerInfo(), N->getBaseAlign(),
1162 N->getMemOperand()->getFlags(), N->getAAInfo());
1163}
1164
1165/// If the value to round is a vector that needs to be scalarized, it must be
1166/// <1 x ty>. Convert the element instead.
1167SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
1168 assert(OpNo == 0 && "Wrong operand for scalarization!");
1169 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1170 SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N),
1171 N->getValueType(0).getVectorElementType(), Elt,
1172 N->getOperand(1));
1173 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1174}
1175
1176SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
1177 unsigned OpNo) {
1178 assert(OpNo == 1 && "Wrong operand for scalarization!");
1179 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1180 SDValue Res =
1181 DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
1182 {N->getValueType(0).getVectorElementType(), MVT::Other},
1183 {N->getOperand(0), Elt, N->getOperand(2)});
1184 // Legalize the chain result - switch anything that used the old chain to
1185 // use the new one.
1186 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1187
1188 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1189
1190 // Do our own replacement and return SDValue() to tell the caller that we
1191 // handled all replacements since caller can only handle a single result.
1192 ReplaceValueWith(SDValue(N, 0), Res);
1193 return SDValue();
1194}
1195
1196/// If the value to extend is a vector that needs to be scalarized, it must be
1197/// <1 x ty>. Convert the element instead.
1198SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) {
1199 SDValue Elt = GetScalarizedVector(N->getOperand(0));
1200 SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N),
1201 N->getValueType(0).getVectorElementType(), Elt);
1202 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1203}
1204
1205/// If the value to extend is a vector that needs to be scalarized, it must be
1206/// <1 x ty>. Convert the element instead.
1207SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) {
1208 SDValue Elt = GetScalarizedVector(N->getOperand(1));
1209 SDValue Res =
1210 DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N),
1211 {N->getValueType(0).getVectorElementType(), MVT::Other},
1212 {N->getOperand(0), Elt});
1213 // Legalize the chain result - switch anything that used the old chain to
1214 // use the new one.
1215 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
1216
1217 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
1218
1219 // Do our own replacement and return SDValue() to tell the caller that we
1220 // handled all replacements since caller can only handle a single result.
1221 ReplaceValueWith(SDValue(N, 0), Res);
1222 return SDValue();
1223}
1224
1225SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
1226 SDValue Res = GetScalarizedVector(N->getOperand(0));
1227 // Result type may be wider than element type.
1228 if (Res.getValueType() != N->getValueType(0))
1229 Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
1230 return Res;
1231}
1232
1233SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) {
1234 SDValue AccOp = N->getOperand(0);
1235 SDValue VecOp = N->getOperand(1);
1236
1237 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
1238
1239 SDValue Op = GetScalarizedVector(VecOp);
1240 return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0),
1241 AccOp, Op, N->getFlags());
1242}
1243
1244SDValue DAGTypeLegalizer::ScalarizeVecOp_CMP(SDNode *N) {
1245 SDValue LHS = GetScalarizedVector(N->getOperand(0));
1246 SDValue RHS = GetScalarizedVector(N->getOperand(1));
1247
1248 EVT ResVT = N->getValueType(0).getVectorElementType();
1249 SDValue Cmp = DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, LHS, RHS);
1250 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Cmp);
1251}
1252
1253SDValue DAGTypeLegalizer::ScalarizeVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
1254 // Since there is no "none-active" result, the only valid return for <1 x ty>
1255 // is 0. Note: Since we check the high mask during splitting this is safe.
1256 // As e.g., a <2 x ty> operation would split to:
1257 // any_active(%hi_mask) ? (1 + last_active(%hi_mask))
1258 // : `last_active(%lo_mask)`
1259 // Which then scalarizes to:
1260 // %mask[1] ? 1 : 0
1261 EVT VT = N->getValueType(0);
1262 return DAG.getConstant(0, SDLoc(N), VT);
1263}
1264
1265SDValue DAGTypeLegalizer::ScalarizeVecOp_CTTZ_ELTS(SDNode *N) {
1266 // The number of trailing zero elements is 1 if the element is 0, and 0
1267 // otherwise.
1268 if (N->getOpcode() == ISD::CTTZ_ELTS_ZERO_POISON)
1269 return DAG.getConstant(0, SDLoc(N), N->getValueType(0));
1270 SDValue Op = GetScalarizedVector(N->getOperand(0));
1271 SDValue SetCC =
1272 DAG.getSetCC(SDLoc(N), MVT::i1, Op,
1273 DAG.getConstant(0, SDLoc(N), Op.getValueType()), ISD::SETEQ);
1274 return DAG.getZExtOrTrunc(SetCC, SDLoc(N), N->getValueType(0));
1275}
1276
1277SDValue DAGTypeLegalizer::ScalarizeVecOp_MaskedBinOp(SDNode *N, unsigned OpNo) {
1278 assert(OpNo == 2 && "Can only scalarize mask operand");
1279 SDLoc DL(N);
1280 EVT VT = N->getOperand(0).getValueType().getVectorElementType();
1281 SDValue LHS = DAG.getExtractVectorElt(DL, VT, N->getOperand(0), 0);
1282 SDValue RHS = DAG.getExtractVectorElt(DL, VT, N->getOperand(1), 0);
1283 SDValue Mask = GetScalarizedVector(N->getOperand(2));
1284 // Vectors may have a different boolean contents to scalars, so truncate to i1
1285 // and let type legalization promote appropriately.
1286 Mask = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Mask);
1287 // Masked binary ops don't have UB on disabled lanes but produce poison, so
1288 // use 1 as the divisor to avoid division by zero and overflow.
1289 SDValue BinOp =
1290 DAG.getNode(ISD::getUnmaskedBinOpOpcode(N->getOpcode()), DL, VT, LHS,
1291 DAG.getSelect(DL, VT, Mask, RHS, DAG.getConstant(1, DL, VT)));
1292 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, N->getValueType(0), BinOp);
1293}
1294
1295//===----------------------------------------------------------------------===//
1296// Result Vector Splitting
1297//===----------------------------------------------------------------------===//
1298
1299/// This method is called when the specified result of the specified node is
1300/// found to need vector splitting. At this point, the node may also have
1301/// invalid operands or may have other results that need legalization, we just
1302/// know that (at least) one result needs vector splitting.
1303void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
1304 LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG));
1305 SDValue Lo, Hi;
1306
1307 // See if the target wants to custom expand this node.
1308 if (CustomLowerNode(N, N->getValueType(ResNo), true))
1309 return;
1310
1311 switch (N->getOpcode()) {
1312 default:
1313#ifndef NDEBUG
1314 dbgs() << "SplitVectorResult #" << ResNo << ": ";
1315 N->dump(&DAG);
1316 dbgs() << "\n";
1317#endif
1318 report_fatal_error("Do not know how to split the result of this "
1319 "operator!\n");
1320
1323 SplitVecRes_LOOP_DEPENDENCE_MASK(N, Lo, Hi);
1324 break;
1325 case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
1326 case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break;
1327 case ISD::AssertSext: SplitVecRes_AssertSext(N, Lo, Hi); break;
1328 case ISD::VSELECT:
1329 case ISD::SELECT:
1330 case ISD::VP_MERGE:
1331 case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
1332 case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
1333 case ISD::POISON:
1334 case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
1335 case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
1336 case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
1337 case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
1338 case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
1339 case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
1340 case ISD::FPOWI:
1341 case ISD::FLDEXP:
1342 case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break;
1343 case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break;
1344 case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
1345 case ISD::SPLAT_VECTOR:
1347 SplitVecRes_ScalarOp(N, Lo, Hi);
1348 break;
1349 case ISD::STEP_VECTOR:
1350 SplitVecRes_STEP_VECTOR(N, Lo, Hi);
1351 break;
1352 case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
1353 case ISD::ATOMIC_LOAD:
1354 SplitVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N), Lo, Hi);
1355 break;
1356 case ISD::LOAD:
1357 SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
1358 break;
1359 case ISD::VP_LOAD:
1360 SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi);
1361 break;
1362 case ISD::VP_LOAD_FF:
1363 SplitVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N), Lo, Hi);
1364 break;
1365 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
1366 SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi);
1367 break;
1368 case ISD::MLOAD:
1369 SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi);
1370 break;
1371 case ISD::MGATHER:
1372 case ISD::VP_GATHER:
1373 SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true);
1374 break;
1376 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi);
1377 break;
1378 case ISD::SETCC:
1379 case ISD::VP_SETCC:
1380 SplitVecRes_SETCC(N, Lo, Hi);
1381 break;
1383 SplitVecRes_VECTOR_REVERSE(N, Lo, Hi);
1384 break;
1386 SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
1387 break;
1390 SplitVecRes_VECTOR_SPLICE(N, Lo, Hi);
1391 break;
1393 SplitVecRes_VECTOR_DEINTERLEAVE(N);
1394 return;
1396 SplitVecRes_VECTOR_INTERLEAVE(N);
1397 return;
1398 case ISD::VAARG:
1399 SplitVecRes_VAARG(N, Lo, Hi);
1400 break;
1401
1405 SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
1406 break;
1407
1408 case ISD::ABS:
1409 case ISD::VP_ABS:
1410 case ISD::BITREVERSE:
1411 case ISD::VP_BITREVERSE:
1412 case ISD::BSWAP:
1413 case ISD::VP_BSWAP:
1414 case ISD::CTLZ:
1415 case ISD::VP_CTLZ:
1416 case ISD::CTTZ:
1417 case ISD::VP_CTTZ:
1419 case ISD::VP_CTLZ_ZERO_POISON:
1421 case ISD::VP_CTTZ_ZERO_POISON:
1422 case ISD::CTPOP:
1423 case ISD::VP_CTPOP:
1424 case ISD::FABS: case ISD::VP_FABS:
1425 case ISD::FACOS:
1426 case ISD::FASIN:
1427 case ISD::FATAN:
1428 case ISD::FCEIL:
1429 case ISD::VP_FCEIL:
1430 case ISD::FCOS:
1431 case ISD::FCOSH:
1432 case ISD::FEXP:
1433 case ISD::FEXP2:
1434 case ISD::FEXP10:
1435 case ISD::FFLOOR:
1436 case ISD::VP_FFLOOR:
1437 case ISD::FLOG:
1438 case ISD::FLOG10:
1439 case ISD::FLOG2:
1440 case ISD::FNEARBYINT:
1441 case ISD::VP_FNEARBYINT:
1442 case ISD::FNEG: case ISD::VP_FNEG:
1443 case ISD::FREEZE:
1444 case ISD::ARITH_FENCE:
1445 case ISD::FP_EXTEND:
1446 case ISD::VP_FP_EXTEND:
1447 case ISD::FP_ROUND:
1448 case ISD::VP_FP_ROUND:
1449 case ISD::FP_TO_SINT:
1450 case ISD::VP_FP_TO_SINT:
1451 case ISD::FP_TO_UINT:
1452 case ISD::VP_FP_TO_UINT:
1453 case ISD::FRINT:
1454 case ISD::VP_FRINT:
1455 case ISD::LRINT:
1456 case ISD::VP_LRINT:
1457 case ISD::LLRINT:
1458 case ISD::VP_LLRINT:
1459 case ISD::FROUND:
1460 case ISD::VP_FROUND:
1461 case ISD::FROUNDEVEN:
1462 case ISD::VP_FROUNDEVEN:
1463 case ISD::LROUND:
1464 case ISD::LLROUND:
1465 case ISD::FSIN:
1466 case ISD::FSINH:
1467 case ISD::FSQRT: case ISD::VP_SQRT:
1468 case ISD::FTAN:
1469 case ISD::FTANH:
1470 case ISD::FTRUNC:
1471 case ISD::VP_FROUNDTOZERO:
1472 case ISD::SINT_TO_FP:
1473 case ISD::VP_SINT_TO_FP:
1474 case ISD::TRUNCATE:
1475 case ISD::VP_TRUNCATE:
1476 case ISD::UINT_TO_FP:
1477 case ISD::VP_UINT_TO_FP:
1478 case ISD::FCANONICALIZE:
1481 SplitVecRes_UnaryOp(N, Lo, Hi);
1482 break;
1483 case ISD::ADDRSPACECAST:
1484 SplitVecRes_ADDRSPACECAST(N, Lo, Hi);
1485 break;
1486 case ISD::FMODF:
1487 case ISD::FFREXP:
1488 case ISD::FSINCOS:
1489 case ISD::FSINCOSPI:
1490 SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi);
1491 break;
1492
1493 case ISD::ANY_EXTEND:
1494 case ISD::SIGN_EXTEND:
1495 case ISD::ZERO_EXTEND:
1496 case ISD::VP_SIGN_EXTEND:
1497 case ISD::VP_ZERO_EXTEND:
1498 SplitVecRes_ExtendOp(N, Lo, Hi);
1499 break;
1500
1501 case ISD::ADD: case ISD::VP_ADD:
1502 case ISD::SUB: case ISD::VP_SUB:
1503 case ISD::MUL: case ISD::VP_MUL:
1504 case ISD::CLMUL:
1505 case ISD::CLMULR:
1506 case ISD::CLMULH:
1507 case ISD::MULHS:
1508 case ISD::MULHU:
1509 case ISD::ABDS:
1510 case ISD::ABDU:
1511 case ISD::AVGCEILS:
1512 case ISD::AVGCEILU:
1513 case ISD::AVGFLOORS:
1514 case ISD::AVGFLOORU:
1515 case ISD::FADD: case ISD::VP_FADD:
1516 case ISD::FSUB: case ISD::VP_FSUB:
1517 case ISD::FMUL: case ISD::VP_FMUL:
1518 case ISD::FMINNUM:
1519 case ISD::FMINNUM_IEEE:
1520 case ISD::VP_FMINNUM:
1521 case ISD::FMAXNUM:
1522 case ISD::FMAXNUM_IEEE:
1523 case ISD::VP_FMAXNUM:
1524 case ISD::FMINIMUM:
1525 case ISD::VP_FMINIMUM:
1526 case ISD::FMAXIMUM:
1527 case ISD::VP_FMAXIMUM:
1528 case ISD::FMINIMUMNUM:
1529 case ISD::FMAXIMUMNUM:
1530 case ISD::SDIV: case ISD::VP_SDIV:
1531 case ISD::UDIV: case ISD::VP_UDIV:
1532 case ISD::FDIV: case ISD::VP_FDIV:
1533 case ISD::FPOW:
1534 case ISD::FATAN2:
1535 case ISD::AND: case ISD::VP_AND:
1536 case ISD::OR: case ISD::VP_OR:
1537 case ISD::XOR: case ISD::VP_XOR:
1538 case ISD::SHL: case ISD::VP_SHL:
1539 case ISD::SRA: case ISD::VP_SRA:
1540 case ISD::SRL: case ISD::VP_SRL:
1541 case ISD::UREM: case ISD::VP_UREM:
1542 case ISD::SREM: case ISD::VP_SREM:
1543 case ISD::FREM: case ISD::VP_FREM:
1544 case ISD::SMIN: case ISD::VP_SMIN:
1545 case ISD::SMAX: case ISD::VP_SMAX:
1546 case ISD::UMIN: case ISD::VP_UMIN:
1547 case ISD::UMAX: case ISD::VP_UMAX:
1548 case ISD::SADDSAT: case ISD::VP_SADDSAT:
1549 case ISD::UADDSAT: case ISD::VP_UADDSAT:
1550 case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
1551 case ISD::USUBSAT: case ISD::VP_USUBSAT:
1552 case ISD::SSHLSAT:
1553 case ISD::USHLSAT:
1554 case ISD::ROTL:
1555 case ISD::ROTR:
1556 case ISD::VP_FCOPYSIGN:
1557 SplitVecRes_BinOp(N, Lo, Hi);
1558 break;
1559 case ISD::MASKED_UDIV:
1560 case ISD::MASKED_SDIV:
1561 case ISD::MASKED_UREM:
1562 case ISD::MASKED_SREM:
1563 SplitVecRes_MaskedBinOp(N, Lo, Hi);
1564 break;
1565 case ISD::FMA: case ISD::VP_FMA:
1566 case ISD::FSHL:
1567 case ISD::VP_FSHL:
1568 case ISD::FSHR:
1569 case ISD::VP_FSHR:
1570 SplitVecRes_TernaryOp(N, Lo, Hi);
1571 break;
1572
1573 case ISD::SCMP: case ISD::UCMP:
1574 SplitVecRes_CMP(N, Lo, Hi);
1575 break;
1576
1577#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1578 case ISD::STRICT_##DAGN:
1579#include "llvm/IR/ConstrainedOps.def"
1580 SplitVecRes_StrictFPOp(N, Lo, Hi);
1581 break;
1582
1585 SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi);
1586 break;
1587
1588 case ISD::UADDO:
1589 case ISD::SADDO:
1590 case ISD::USUBO:
1591 case ISD::SSUBO:
1592 case ISD::UMULO:
1593 case ISD::SMULO:
1594 SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
1595 break;
1596 case ISD::SMULFIX:
1597 case ISD::SMULFIXSAT:
1598 case ISD::UMULFIX:
1599 case ISD::UMULFIXSAT:
1600 case ISD::SDIVFIX:
1601 case ISD::SDIVFIXSAT:
1602 case ISD::UDIVFIX:
1603 case ISD::UDIVFIXSAT:
1604 SplitVecRes_FIX(N, Lo, Hi);
1605 break;
1606 case ISD::EXPERIMENTAL_VP_SPLICE:
1607 SplitVecRes_VP_SPLICE(N, Lo, Hi);
1608 break;
1609 case ISD::EXPERIMENTAL_VP_REVERSE:
1610 SplitVecRes_VP_REVERSE(N, Lo, Hi);
1611 break;
1616 SplitVecRes_PARTIAL_REDUCE_MLA(N, Lo, Hi);
1617 break;
1619 SplitVecRes_GET_ACTIVE_LANE_MASK(N, Lo, Hi);
1620 break;
1621 }
1622
1623 // If Lo/Hi is null, the sub-method took care of registering results etc.
1624 if (Lo.getNode())
1625 SetSplitVector(SDValue(N, ResNo), Lo, Hi);
1626}
1627
1628void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
1629 MachinePointerInfo &MPI, SDValue &Ptr,
1630 uint64_t *ScaledOffset) {
1631 SDLoc DL(N);
1632 unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8;
1633
1634 if (MemVT.isScalableVector()) {
1635 SDValue BytesIncrement = DAG.getVScale(
1636 DL, Ptr.getValueType(),
1637 APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize));
1638 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
1639 if (ScaledOffset)
1640 *ScaledOffset += IncrementSize;
1641 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement,
1643 } else {
1644 MPI = N->getPointerInfo().getWithOffset(IncrementSize);
1645 // Increment the pointer to the other half.
1646 Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize));
1647 }
1648}
1649
1650std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) {
1651 return SplitMask(Mask, SDLoc(Mask));
1652}
1653
1654std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask,
1655 const SDLoc &DL) {
1656 SDValue MaskLo, MaskHi;
1657 EVT MaskVT = Mask.getValueType();
1658 if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector)
1659 GetSplitVector(Mask, MaskLo, MaskHi);
1660 else
1661 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
1662 return std::make_pair(MaskLo, MaskHi);
1663}
1664
1665void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) {
1666 SDValue LHSLo, LHSHi;
1667 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1668 SDValue RHSLo, RHSHi;
1669 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1670 SDLoc dl(N);
1671
1672 const SDNodeFlags Flags = N->getFlags();
1673 unsigned Opcode = N->getOpcode();
1674 if (N->getNumOperands() == 2) {
1675 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags);
1676 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags);
1677 return;
1678 }
1679
1680 assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
1681 assert(N->isVPOpcode() && "Expected VP opcode");
1682
1683 SDValue MaskLo, MaskHi;
1684 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
1685
1686 SDValue EVLLo, EVLHi;
1687 std::tie(EVLLo, EVLHi) =
1688 DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl);
1689
1690 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(),
1691 {LHSLo, RHSLo, MaskLo, EVLLo}, Flags);
1692 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(),
1693 {LHSHi, RHSHi, MaskHi, EVLHi}, Flags);
1694}
1695
1696void DAGTypeLegalizer::SplitVecRes_MaskedBinOp(SDNode *N, SDValue &Lo,
1697 SDValue &Hi) {
1698 SDValue LHSLo, LHSHi;
1699 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1700 SDValue RHSLo, RHSHi;
1701 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1702 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(2));
1703 SDLoc dl(N);
1704
1705 const SDNodeFlags Flags = N->getFlags();
1706 unsigned Opcode = N->getOpcode();
1707 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, MaskLo,
1708 Flags);
1709 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, MaskHi,
1710 Flags);
1711}
1712
1713void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
1714 SDValue &Hi) {
1715 SDValue Op0Lo, Op0Hi;
1716 GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi);
1717 SDValue Op1Lo, Op1Hi;
1718 GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi);
1719 SDValue Op2Lo, Op2Hi;
1720 GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi);
1721 SDLoc dl(N);
1722
1723 const SDNodeFlags Flags = N->getFlags();
1724 unsigned Opcode = N->getOpcode();
1725 if (N->getNumOperands() == 3) {
1726 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags);
1727 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags);
1728 return;
1729 }
1730
1731 assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
1732 assert(N->isVPOpcode() && "Expected VP opcode");
1733
1734 SDValue MaskLo, MaskHi;
1735 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
1736
1737 SDValue EVLLo, EVLHi;
1738 std::tie(EVLLo, EVLHi) =
1739 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl);
1740
1741 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(),
1742 {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags);
1743 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(),
1744 {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags);
1745}
1746
1747void DAGTypeLegalizer::SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) {
1748 LLVMContext &Ctxt = *DAG.getContext();
1749 SDLoc dl(N);
1750
1751 SDValue LHS = N->getOperand(0);
1752 SDValue RHS = N->getOperand(1);
1753
1754 SDValue LHSLo, LHSHi, RHSLo, RHSHi;
1755 if (getTypeAction(LHS.getValueType()) == TargetLowering::TypeSplitVector) {
1756 GetSplitVector(LHS, LHSLo, LHSHi);
1757 GetSplitVector(RHS, RHSLo, RHSHi);
1758 } else {
1759 std::tie(LHSLo, LHSHi) = DAG.SplitVector(LHS, dl);
1760 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, dl);
1761 }
1762
1763 EVT SplitResVT = N->getValueType(0).getHalfNumVectorElementsVT(Ctxt);
1764 Lo = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSLo, RHSLo);
1765 Hi = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSHi, RHSHi);
1766}
1767
1768void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
1769 SDValue LHSLo, LHSHi;
1770 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
1771 SDValue RHSLo, RHSHi;
1772 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
1773 SDLoc dl(N);
1774 SDValue Op2 = N->getOperand(2);
1775
1776 unsigned Opcode = N->getOpcode();
1777 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2,
1778 N->getFlags());
1779 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2,
1780 N->getFlags());
1781}
1782
1783void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
1784 SDValue &Hi) {
1785 // We know the result is a vector. The input may be either a vector or a
1786 // scalar value.
1787 EVT LoVT, HiVT;
1788 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1789 SDLoc dl(N);
1790
1791 SDValue InOp = N->getOperand(0);
1792 EVT InVT = InOp.getValueType();
1793
1794 // Handle some special cases efficiently.
1795 switch (getTypeAction(InVT)) {
1802 break;
1805 // A scalar to vector conversion, where the scalar needs expansion.
1806 // If the vector is being split in two then we can just convert the
1807 // expanded pieces.
1808 if (LoVT == HiVT) {
1809 GetExpandedOp(InOp, Lo, Hi);
1810 if (DAG.getDataLayout().isBigEndian())
1811 std::swap(Lo, Hi);
1812 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1813 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1814 return;
1815 }
1816 break;
1818 // If the input is a vector that needs to be split, convert each split
1819 // piece of the input now.
1820 GetSplitVector(InOp, Lo, Hi);
1821 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1822 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1823 return;
1825 report_fatal_error("Scalarization of scalable vectors is not supported.");
1826 }
1827
1828 if (LoVT.isScalableVector()) {
1829 auto [InLo, InHi] = DAG.SplitVectorOperand(N, 0);
1830 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, InLo);
1831 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, InHi);
1832 return;
1833 }
1834
1835 // In the general case, convert the input to an integer and split it by hand.
1836 EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
1837 EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
1838 if (DAG.getDataLayout().isBigEndian())
1839 std::swap(LoIntVT, HiIntVT);
1840
1841 SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi);
1842
1843 if (DAG.getDataLayout().isBigEndian())
1844 std::swap(Lo, Hi);
1845 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
1846 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
1847}
1848
1849void DAGTypeLegalizer::SplitVecRes_LOOP_DEPENDENCE_MASK(SDNode *N, SDValue &Lo,
1850 SDValue &Hi) {
1851 SDLoc DL(N);
1852 EVT LoVT, HiVT;
1853 SDValue PtrA = N->getOperand(0);
1854 SDValue PtrB = N->getOperand(1);
1855 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1856
1857 // The lane offset for the "Lo" half of the mask is unchanged.
1858 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, PtrA, PtrB,
1859 /*ElementSizeInBytes=*/N->getOperand(2),
1860 /*LaneOffset=*/N->getOperand(3));
1861 // The lane offset for the "Hi" half of the mask is incremented by the number
1862 // of elements in the "Lo" half.
1863 unsigned LaneOffset =
1865 // Note: The lane offset is implicitly scalable for scalable masks.
1866 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, PtrA, PtrB,
1867 /*ElementSizeInBytes=*/N->getOperand(2),
1868 /*LaneOffset=*/DAG.getConstant(LaneOffset, DL, MVT::i64));
1869}
1870
1871void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
1872 SDValue &Hi) {
1873 EVT LoVT, HiVT;
1874 SDLoc dl(N);
1875 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1876 unsigned LoNumElts = LoVT.getVectorNumElements();
1877 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts);
1878 Lo = DAG.getBuildVector(LoVT, dl, LoOps);
1879
1880 SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end());
1881 Hi = DAG.getBuildVector(HiVT, dl, HiOps);
1882}
1883
1884void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo,
1885 SDValue &Hi) {
1886 assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS");
1887 SDLoc dl(N);
1888 unsigned NumSubvectors = N->getNumOperands() / 2;
1889 if (NumSubvectors == 1) {
1890 Lo = N->getOperand(0);
1891 Hi = N->getOperand(1);
1892 return;
1893 }
1894
1895 EVT LoVT, HiVT;
1896 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1897
1898 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors);
1899 Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps);
1900
1901 SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end());
1902 Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps);
1903}
1904
1905void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
1906 SDValue &Hi) {
1907 SDValue Vec = N->getOperand(0);
1908 SDValue Idx = N->getOperand(1);
1909 SDLoc dl(N);
1910
1911 EVT LoVT, HiVT;
1912 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
1913
1914 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
1915 uint64_t IdxVal = Idx->getAsZExtVal();
1916 Hi = DAG.getNode(
1917 ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
1918 DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl));
1919}
1920
1921void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
1922 SDValue &Hi) {
1923 SDValue Vec = N->getOperand(0);
1924 SDValue SubVec = N->getOperand(1);
1925 SDValue Idx = N->getOperand(2);
1926 SDLoc dl(N);
1927 GetSplitVector(Vec, Lo, Hi);
1928
1929 EVT VecVT = Vec.getValueType();
1930 EVT LoVT = Lo.getValueType();
1931 EVT SubVecVT = SubVec.getValueType();
1932 unsigned VecElems = VecVT.getVectorMinNumElements();
1933 unsigned SubElems = SubVecVT.getVectorMinNumElements();
1934 unsigned LoElems = LoVT.getVectorMinNumElements();
1935
1936 // If we know the index is in the first half, and we know the subvector
1937 // doesn't cross the boundary between the halves, we can avoid spilling the
1938 // vector, and insert into the lower half of the split vector directly.
1939 unsigned IdxVal = Idx->getAsZExtVal();
1940 if (IdxVal + SubElems <= LoElems) {
1941 Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
1942 return;
1943 }
1944 // Similarly if the subvector is fully in the high half, but mind that we
1945 // can't tell whether a fixed-length subvector is fully within the high half
1946 // of a scalable vector.
1947 if (VecVT.isScalableVector() == SubVecVT.isScalableVector() &&
1948 IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
1949 Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
1950 DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
1951 return;
1952 }
1953
1954 if (getTypeAction(SubVecVT) == TargetLowering::TypeWidenVector &&
1955 Vec.isUndef() && SubVecVT.getVectorElementType() == MVT::i1) {
1956 SDValue WideSubVec = GetWidenedVector(SubVec);
1957 if (WideSubVec.getValueType() == VecVT) {
1958 std::tie(Lo, Hi) = DAG.SplitVector(WideSubVec, SDLoc(WideSubVec));
1959 return;
1960 }
1961 }
1962
1963 // Spill the vector to the stack.
1964 // In cases where the vector is illegal it will be broken down into parts
1965 // and stored in parts - we should use the alignment for the smallest part.
1966 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
1968 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
1969 auto &MF = DAG.getMachineFunction();
1970 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
1971 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
1972
1973 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
1974 SmallestAlign);
1975
1976 // Store the new subvector into the specified index.
1977 SDValue SubVecPtr =
1978 TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx);
1979 Store = DAG.getStore(Store, dl, SubVec, SubVecPtr,
1981
1982 // Load the Lo part from the stack slot.
1983 Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo,
1984 SmallestAlign);
1985
1986 // Increment the pointer to the other part.
1987 auto *Load = cast<LoadSDNode>(Lo);
1988 MachinePointerInfo MPI = Load->getPointerInfo();
1989 IncrementPointer(Load, LoVT, MPI, StackPtr);
1990
1991 // Load the Hi part from the stack slot.
1992 Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign);
1993}
1994
1995// Handle splitting an FP where the second operand does not match the first
1996// type. The second operand may be a scalar, or a vector that has exactly as
1997// many elements as the first
1998void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo,
1999 SDValue &Hi) {
2000 SDValue LHSLo, LHSHi;
2001 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
2002 SDLoc DL(N);
2003
2004 SDValue RHSLo, RHSHi;
2005 SDValue RHS = N->getOperand(1);
2006 EVT RHSVT = RHS.getValueType();
2007 if (RHSVT.isVector()) {
2008 if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector)
2009 GetSplitVector(RHS, RHSLo, RHSHi);
2010 else
2011 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS));
2012
2013 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo);
2014 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi);
2015 } else {
2016 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS);
2017 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS);
2018 }
2019}
2020
2021void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo,
2022 SDValue &Hi) {
2023 SDLoc DL(N);
2024 SDValue ArgLo, ArgHi;
2025 SDValue Test = N->getOperand(1);
2026 SDValue FpValue = N->getOperand(0);
2027 if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector)
2028 GetSplitVector(FpValue, ArgLo, ArgHi);
2029 else
2030 std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue));
2031 EVT LoVT, HiVT;
2032 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2033
2034 Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags());
2035 Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags());
2036}
2037
2038void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo,
2039 SDValue &Hi) {
2040 SDValue LHSLo, LHSHi;
2041 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
2042 SDLoc dl(N);
2043
2044 EVT LoVT, HiVT;
2045 std::tie(LoVT, HiVT) =
2046 DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT());
2047
2048 Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
2049 DAG.getValueType(LoVT));
2050 Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
2051 DAG.getValueType(HiVT));
2052}
2053
2054void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo,
2055 SDValue &Hi) {
2056 unsigned Opcode = N->getOpcode();
2057 SDValue N0 = N->getOperand(0);
2058
2059 SDLoc dl(N);
2060 SDValue InLo, InHi;
2061
2062 if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector)
2063 GetSplitVector(N0, InLo, InHi);
2064 else
2065 std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0);
2066
2067 EVT InLoVT = InLo.getValueType();
2068 unsigned InNumElements = InLoVT.getVectorNumElements();
2069
2070 EVT OutLoVT, OutHiVT;
2071 std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2072 unsigned OutNumElements = OutLoVT.getVectorNumElements();
2073 assert((2 * OutNumElements) <= InNumElements &&
2074 "Illegal extend vector in reg split");
2075
2076 // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the
2077 // input vector (i.e. we only use InLo):
2078 // OutLo will extend the first OutNumElements from InLo.
2079 // OutHi will extend the next OutNumElements from InLo.
2080
2081 // Shuffle the elements from InLo for OutHi into the bottom elements to
2082 // create a 'fake' InHi.
2083 SmallVector<int, 8> SplitHi(InNumElements, -1);
2084 for (unsigned i = 0; i != OutNumElements; ++i)
2085 SplitHi[i] = i + OutNumElements;
2086 InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getPOISON(InLoVT), SplitHi);
2087
2088 Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo);
2089 Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi);
2090}
2091
2092void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
2093 SDValue &Hi) {
2094 unsigned NumOps = N->getNumOperands();
2095 SDValue Chain = N->getOperand(0);
2096 EVT LoVT, HiVT;
2097 SDLoc dl(N);
2098 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2099
2102
2103 // The Chain is the first operand.
2104 OpsLo[0] = Chain;
2105 OpsHi[0] = Chain;
2106
2107 // Now process the remaining operands.
2108 for (unsigned i = 1; i < NumOps; ++i) {
2109 SDValue Op = N->getOperand(i);
2110 SDValue OpLo = Op;
2111 SDValue OpHi = Op;
2112
2113 EVT InVT = Op.getValueType();
2114 if (InVT.isVector()) {
2115 // If the input also splits, handle it directly for a
2116 // compile time speedup. Otherwise split it by hand.
2117 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2118 GetSplitVector(Op, OpLo, OpHi);
2119 else
2120 std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i);
2121 }
2122
2123 OpsLo[i] = OpLo;
2124 OpsHi[i] = OpHi;
2125 }
2126
2127 EVT LoValueVTs[] = {LoVT, MVT::Other};
2128 EVT HiValueVTs[] = {HiVT, MVT::Other};
2129 Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo,
2130 N->getFlags());
2131 Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi,
2132 N->getFlags());
2133
2134 // Build a factor node to remember that this Op is independent of the
2135 // other one.
2136 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
2137 Lo.getValue(1), Hi.getValue(1));
2138
2139 // Legalize the chain result - switch anything that used the old chain to
2140 // use the new one.
2141 ReplaceValueWith(SDValue(N, 1), Chain);
2142}
2143
2144SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
2145 SDValue Chain = N->getOperand(0);
2146 EVT VT = N->getValueType(0);
2147 unsigned NE = VT.getVectorNumElements();
2148 EVT EltVT = VT.getVectorElementType();
2149 SDLoc dl(N);
2150
2152 SmallVector<SDValue, 4> Operands(N->getNumOperands());
2153
2154 // If ResNE is 0, fully unroll the vector op.
2155 if (ResNE == 0)
2156 ResNE = NE;
2157 else if (NE > ResNE)
2158 NE = ResNE;
2159
2160 //The results of each unrolled operation, including the chain.
2161 SDVTList ChainVTs = DAG.getVTList(EltVT, MVT::Other);
2163
2164 unsigned i;
2165 for (i = 0; i != NE; ++i) {
2166 Operands[0] = Chain;
2167 for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
2168 SDValue Operand = N->getOperand(j);
2169 EVT OperandVT = Operand.getValueType();
2170 if (OperandVT.isVector()) {
2171 EVT OperandEltVT = OperandVT.getVectorElementType();
2172 Operands[j] = DAG.getExtractVectorElt(dl, OperandEltVT, Operand, i);
2173 } else {
2174 Operands[j] = Operand;
2175 }
2176 }
2177 SDValue Scalar =
2178 DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands, N->getFlags());
2179
2180 //Add in the scalar as well as its chain value to the
2181 //result vectors.
2182 Scalars.push_back(Scalar);
2183 Chains.push_back(Scalar.getValue(1));
2184 }
2185
2186 for (; i < ResNE; ++i)
2187 Scalars.push_back(DAG.getPOISON(EltVT));
2188
2189 // Build a new factor node to connect the chain back together.
2190 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
2191 ReplaceValueWith(SDValue(N, 1), Chain);
2192
2193 // Create a new BUILD_VECTOR node
2194 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
2195 return DAG.getBuildVector(VecVT, dl, Scalars);
2196}
2197
2198void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
2199 SDValue &Lo, SDValue &Hi) {
2200 SDLoc dl(N);
2201 EVT ResVT = N->getValueType(0);
2202 EVT OvVT = N->getValueType(1);
2203 EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
2204 std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
2205 std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
2206
2207 SDValue LoLHS, HiLHS, LoRHS, HiRHS;
2208 if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
2209 GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
2210 GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
2211 } else {
2212 std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
2213 std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
2214 }
2215
2216 unsigned Opcode = N->getOpcode();
2217 SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
2218 SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
2219 SDNode *LoNode =
2220 DAG.getNode(Opcode, dl, LoVTs, {LoLHS, LoRHS}, N->getFlags()).getNode();
2221 SDNode *HiNode =
2222 DAG.getNode(Opcode, dl, HiVTs, {HiLHS, HiRHS}, N->getFlags()).getNode();
2223
2224 Lo = SDValue(LoNode, ResNo);
2225 Hi = SDValue(HiNode, ResNo);
2226
2227 // Replace the other vector result not being explicitly split here.
2228 unsigned OtherNo = 1 - ResNo;
2229 EVT OtherVT = N->getValueType(OtherNo);
2230 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
2231 SetSplitVector(SDValue(N, OtherNo),
2232 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
2233 } else {
2234 SDValue OtherVal = DAG.getNode(
2235 ISD::CONCAT_VECTORS, dl, OtherVT,
2236 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
2237 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
2238 }
2239}
2240
2241void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
2242 SDValue &Hi) {
2243 SDValue Vec = N->getOperand(0);
2244 SDValue Elt = N->getOperand(1);
2245 SDValue Idx = N->getOperand(2);
2246 SDLoc dl(N);
2247 GetSplitVector(Vec, Lo, Hi);
2248
2249 if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
2250 unsigned IdxVal = CIdx->getZExtValue();
2251 unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements();
2252 if (IdxVal < LoNumElts) {
2253 Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl,
2254 Lo.getValueType(), Lo, Elt, Idx);
2255 return;
2256 } else if (!Vec.getValueType().isScalableVector()) {
2257 Hi = DAG.getInsertVectorElt(dl, Hi, Elt, IdxVal - LoNumElts);
2258 return;
2259 }
2260 }
2261
2262 // Make the vector elements byte-addressable if they aren't already.
2263 EVT VecVT = Vec.getValueType();
2264 EVT EltVT = VecVT.getVectorElementType();
2265 if (!EltVT.isByteSized()) {
2266 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
2267 VecVT = VecVT.changeElementType(*DAG.getContext(), EltVT);
2268 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
2269 // Extend the element type to match if needed.
2270 if (EltVT.bitsGT(Elt.getValueType()))
2271 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt);
2272 }
2273
2274 // Spill the vector to the stack.
2275 // In cases where the vector is illegal it will be broken down into parts
2276 // and stored in parts - we should use the alignment for the smallest part.
2277 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
2279 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
2280 auto &MF = DAG.getMachineFunction();
2281 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
2282 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
2283
2284 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
2285 SmallestAlign);
2286
2287 // Store the new element. This may be larger than the vector element type,
2288 // so use a truncating store.
2289 SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
2290 Store = DAG.getTruncStore(
2291 Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT,
2292 commonAlignment(SmallestAlign,
2293 EltVT.getFixedSizeInBits() / 8));
2294
2295 EVT LoVT, HiVT;
2296 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
2297
2298 // Load the Lo part from the stack slot.
2299 Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign);
2300
2301 // Increment the pointer to the other part.
2302 auto Load = cast<LoadSDNode>(Lo);
2303 MachinePointerInfo MPI = Load->getPointerInfo();
2304 IncrementPointer(Load, LoVT, MPI, StackPtr);
2305
2306 Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign);
2307
2308 // If we adjusted the original type, we need to truncate the results.
2309 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2310 if (LoVT != Lo.getValueType())
2311 Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo);
2312 if (HiVT != Hi.getValueType())
2313 Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi);
2314}
2315
2316void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo,
2317 SDValue &Hi) {
2318 EVT LoVT, HiVT;
2319 SDLoc dl(N);
2320 assert(N->getValueType(0).isScalableVector() &&
2321 "Only scalable vectors are supported for STEP_VECTOR");
2322 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2323 SDValue Step = N->getOperand(0);
2324
2325 Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step);
2326
2327 // Hi = Lo + (EltCnt * Step)
2328 EVT EltVT = Step.getValueType();
2329 APInt StepVal = Step->getAsAPIntVal();
2330 SDValue StartOfHi =
2331 DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements());
2332 StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType());
2333 StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi);
2334
2335 Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step);
2336 Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi);
2337}
2338
2339void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo,
2340 SDValue &Hi) {
2341 EVT LoVT, HiVT;
2342 SDLoc dl(N);
2343 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2344 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0));
2345 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) {
2346 Hi = DAG.getPOISON(HiVT);
2347 } else {
2348 assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode");
2349 Hi = Lo;
2350 }
2351}
2352
2353void DAGTypeLegalizer::SplitVecRes_ATOMIC_LOAD(AtomicSDNode *LD, SDValue &Lo,
2354 SDValue &Hi) {
2355 assert(LD->getExtensionType() == ISD::NON_EXTLOAD &&
2356 "Extended load during type legalization!");
2357 SDLoc dl(LD);
2358 EVT VT = LD->getValueType(0);
2359 EVT LoVT, HiVT;
2360 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
2361
2362 SDValue Ch = LD->getChain();
2363 SDValue Ptr = LD->getBasePtr();
2364
2365 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
2366 EVT MemIntVT =
2367 EVT::getIntegerVT(*DAG.getContext(), LD->getMemoryVT().getSizeInBits());
2368 SDValue ALD = DAG.getAtomicLoad(LD->getExtensionType(), dl, MemIntVT, IntVT,
2369 Ch, Ptr, LD->getMemOperand());
2370
2371 EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits());
2372 EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits());
2373 SDValue ExtractLo, ExtractHi;
2374 SplitInteger(ALD, LoIntVT, HiIntVT, ExtractLo, ExtractHi);
2375
2376 Lo = DAG.getBitcast(LoVT, ExtractLo);
2377 Hi = DAG.getBitcast(HiVT, ExtractHi);
2378
2379 // Legalize the chain result - switch anything that used the old chain to
2380 // use the new one.
2381 ReplaceValueWith(SDValue(LD, 1), ALD.getValue(1));
2382}
2383
2384void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
2385 SDValue &Hi) {
2386 assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
2387 EVT LoVT, HiVT;
2388 SDLoc dl(LD);
2389 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
2390
2391 ISD::LoadExtType ExtType = LD->getExtensionType();
2392 SDValue Ch = LD->getChain();
2393 SDValue Ptr = LD->getBasePtr();
2394 SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
2395 EVT MemoryVT = LD->getMemoryVT();
2396 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
2397 AAMDNodes AAInfo = LD->getAAInfo();
2398
2399 EVT LoMemVT, HiMemVT;
2400 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
2401
2402 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
2403 SDValue Value, NewChain;
2404 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
2405 std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
2406 ReplaceValueWith(SDValue(LD, 1), NewChain);
2407 return;
2408 }
2409
2410 Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
2411 LD->getPointerInfo(), LoMemVT, LD->getBaseAlign(), MMOFlags,
2412 AAInfo);
2413
2414 MachinePointerInfo MPI;
2415 IncrementPointer(LD, LoMemVT, MPI, Ptr);
2416
2417 Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
2418 HiMemVT, LD->getBaseAlign(), MMOFlags, AAInfo);
2419
2420 // Build a factor node to remember that this load is independent of the
2421 // other one.
2422 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2423 Hi.getValue(1));
2424
2425 // Legalize the chain result - switch anything that used the old chain to
2426 // use the new one.
2427 ReplaceValueWith(SDValue(LD, 1), Ch);
2428}
2429
2430void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo,
2431 SDValue &Hi) {
2432 assert(LD->isUnindexed() && "Indexed VP load during type legalization!");
2433 EVT LoVT, HiVT;
2434 SDLoc dl(LD);
2435 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0));
2436
2437 ISD::LoadExtType ExtType = LD->getExtensionType();
2438 SDValue Ch = LD->getChain();
2439 SDValue Ptr = LD->getBasePtr();
2440 SDValue Offset = LD->getOffset();
2441 assert(Offset.isUndef() && "Unexpected indexed variable-length load offset");
2442 Align Alignment = LD->getBaseAlign();
2443 SDValue Mask = LD->getMask();
2444 SDValue EVL = LD->getVectorLength();
2445 EVT MemoryVT = LD->getMemoryVT();
2446
2447 EVT LoMemVT, HiMemVT;
2448 bool HiIsEmpty = false;
2449 std::tie(LoMemVT, HiMemVT) =
2450 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
2451
2452 // Split Mask operand
2453 SDValue MaskLo, MaskHi;
2454 if (Mask.getOpcode() == ISD::SETCC) {
2455 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2456 } else {
2457 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2458 GetSplitVector(Mask, MaskLo, MaskHi);
2459 else
2460 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2461 }
2462
2463 // Split EVL operand
2464 SDValue EVLLo, EVLHi;
2465 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
2466
2467 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2468 LD->getPointerInfo(), MachineMemOperand::MOLoad,
2469 LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
2470 LD->getRanges());
2471
2472 Lo =
2473 DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset,
2474 MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad());
2475
2476 if (HiIsEmpty) {
2477 // The hi vp_load has zero storage size. We therefore simply set it to
2478 // the low vp_load and rely on subsequent removal from the chain.
2479 Hi = Lo;
2480 } else {
2481 // Generate hi vp_load.
2482 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
2483 LD->isExpandingLoad());
2484
2485 MachinePointerInfo MPI;
2486 if (LoMemVT.isScalableVector())
2487 MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
2488 else
2489 MPI = LD->getPointerInfo().getWithOffset(
2490 LoMemVT.getStoreSize().getFixedValue());
2491
2492 MMO = DAG.getMachineFunction().getMachineMemOperand(
2494 Alignment, LD->getAAInfo(), LD->getRanges());
2495
2496 Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr,
2497 Offset, MaskHi, EVLHi, HiMemVT, MMO,
2498 LD->isExpandingLoad());
2499 }
2500
2501 // Build a factor node to remember that this load is independent of the
2502 // other one.
2503 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2504 Hi.getValue(1));
2505
2506 // Legalize the chain result - switch anything that used the old chain to
2507 // use the new one.
2508 ReplaceValueWith(SDValue(LD, 1), Ch);
2509}
2510
2511void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo,
2512 SDValue &Hi) {
2513 SDLoc dl(LD);
2514 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0));
2515
2516 SDValue Ch = LD->getChain();
2517 SDValue Ptr = LD->getBasePtr();
2518 Align Alignment = LD->getBaseAlign();
2519 SDValue Mask = LD->getMask();
2520 SDValue EVL = LD->getVectorLength();
2521
2522 // Split Mask operand
2523 SDValue MaskLo, MaskHi;
2524 if (Mask.getOpcode() == ISD::SETCC) {
2525 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2526 } else {
2527 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2528 GetSplitVector(Mask, MaskLo, MaskHi);
2529 else
2530 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2531 }
2532
2533 // Split EVL operand
2534 auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl);
2535
2536 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2537 LD->getPointerInfo(), MachineMemOperand::MOLoad,
2538 LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(),
2539 LD->getRanges());
2540
2541 Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO);
2542
2543 // Fill the upper half with poison.
2544 Hi = DAG.getPOISON(HiVT);
2545
2546 ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1));
2547 ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2));
2548}
2549
2550void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD,
2551 SDValue &Lo, SDValue &Hi) {
2552 assert(SLD->isUnindexed() &&
2553 "Indexed VP strided load during type legalization!");
2554 assert(SLD->getOffset().isUndef() &&
2555 "Unexpected indexed variable-length load offset");
2556
2557 SDLoc DL(SLD);
2558
2559 EVT LoVT, HiVT;
2560 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0));
2561
2562 EVT LoMemVT, HiMemVT;
2563 bool HiIsEmpty = false;
2564 std::tie(LoMemVT, HiMemVT) =
2565 DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty);
2566
2567 SDValue Mask = SLD->getMask();
2568 SDValue LoMask, HiMask;
2569 if (Mask.getOpcode() == ISD::SETCC) {
2570 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
2571 } else {
2572 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2573 GetSplitVector(Mask, LoMask, HiMask);
2574 else
2575 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
2576 }
2577
2578 SDValue LoEVL, HiEVL;
2579 std::tie(LoEVL, HiEVL) =
2580 DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL);
2581
2582 // Generate the low vp_strided_load
2583 Lo = DAG.getStridedLoadVP(
2584 SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL,
2585 SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(),
2586 LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad());
2587
2588 if (HiIsEmpty) {
2589 // The high vp_strided_load has zero storage size. We therefore simply set
2590 // it to the low vp_strided_load and rely on subsequent removal from the
2591 // chain.
2592 Hi = Lo;
2593 } else {
2594 // Generate the high vp_strided_load.
2595 // To calculate the high base address, we need to sum to the low base
2596 // address stride number of bytes for each element already loaded by low,
2597 // that is: Ptr = Ptr + (LoEVL * Stride)
2598 EVT PtrVT = SLD->getBasePtr().getValueType();
2600 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
2601 DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT));
2602 SDValue Ptr =
2603 DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment);
2604
2605 Align Alignment = SLD->getBaseAlign();
2606 if (LoMemVT.isScalableVector())
2607 Alignment = commonAlignment(
2608 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
2609
2610 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2611 MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()),
2613 Alignment, SLD->getAAInfo(), SLD->getRanges());
2614
2615 Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(),
2616 HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(),
2617 SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO,
2618 SLD->isExpandingLoad());
2619 }
2620
2621 // Build a factor node to remember that this load is independent of the
2622 // other one.
2623 SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
2624 Hi.getValue(1));
2625
2626 // Legalize the chain result - switch anything that used the old chain to
2627 // use the new one.
2628 ReplaceValueWith(SDValue(SLD, 1), Ch);
2629}
2630
2631void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
2632 SDValue &Lo, SDValue &Hi) {
2633 assert(MLD->isUnindexed() && "Indexed masked load during type legalization!");
2634 EVT LoVT, HiVT;
2635 SDLoc dl(MLD);
2636 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
2637
2638 SDValue Ch = MLD->getChain();
2639 SDValue Ptr = MLD->getBasePtr();
2640 SDValue Offset = MLD->getOffset();
2641 assert(Offset.isUndef() && "Unexpected indexed masked load offset");
2642 SDValue Mask = MLD->getMask();
2643 SDValue PassThru = MLD->getPassThru();
2644 Align Alignment = MLD->getBaseAlign();
2645 ISD::LoadExtType ExtType = MLD->getExtensionType();
2646 MachineMemOperand::Flags MMOFlags = MLD->getMemOperand()->getFlags();
2647
2648 // Split Mask operand
2649 SDValue MaskLo, MaskHi;
2650 if (Mask.getOpcode() == ISD::SETCC) {
2651 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
2652 } else {
2653 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
2654 GetSplitVector(Mask, MaskLo, MaskHi);
2655 else
2656 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
2657 }
2658
2659 EVT MemoryVT = MLD->getMemoryVT();
2660 EVT LoMemVT, HiMemVT;
2661 bool HiIsEmpty = false;
2662 std::tie(LoMemVT, HiMemVT) =
2663 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty);
2664
2665 SDValue PassThruLo, PassThruHi;
2666 if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector)
2667 GetSplitVector(PassThru, PassThruLo, PassThruHi);
2668 else
2669 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
2670
2671 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2673 Alignment, MLD->getAAInfo(), MLD->getRanges());
2674
2675 Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT,
2676 MMO, MLD->getAddressingMode(), ExtType,
2677 MLD->isExpandingLoad());
2678
2679 if (HiIsEmpty) {
2680 // The hi masked load has zero storage size. We therefore simply set it to
2681 // the low masked load and rely on subsequent removal from the chain.
2682 Hi = Lo;
2683 } else {
2684 // Generate hi masked load.
2685 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
2686 MLD->isExpandingLoad());
2687
2688 MachinePointerInfo MPI;
2689 if (LoMemVT.isScalableVector())
2690 MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace());
2691 else
2692 MPI = MLD->getPointerInfo().getWithOffset(
2693 LoMemVT.getStoreSize().getFixedValue());
2694
2695 MMO = DAG.getMachineFunction().getMachineMemOperand(
2696 MPI, MMOFlags, LocationSize::beforeOrAfterPointer(), Alignment,
2697 MLD->getAAInfo(), MLD->getRanges());
2698
2699 Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi,
2700 HiMemVT, MMO, MLD->getAddressingMode(), ExtType,
2701 MLD->isExpandingLoad());
2702 }
2703
2704 // Build a factor node to remember that this load is independent of the
2705 // other one.
2706 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2707 Hi.getValue(1));
2708
2709 // Legalize the chain result - switch anything that used the old chain to
2710 // use the new one.
2711 ReplaceValueWith(SDValue(MLD, 1), Ch);
2712
2713}
2714
2715void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo,
2716 SDValue &Hi, bool SplitSETCC) {
2717 EVT LoVT, HiVT;
2718 SDLoc dl(N);
2719 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2720
2721 SDValue Ch = N->getChain();
2722 SDValue Ptr = N->getBasePtr();
2723 struct Operands {
2724 SDValue Mask;
2725 SDValue Index;
2726 SDValue Scale;
2727 } Ops = [&]() -> Operands {
2728 if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) {
2729 return {MSC->getMask(), MSC->getIndex(), MSC->getScale()};
2730 }
2731 auto *VPSC = cast<VPGatherSDNode>(N);
2732 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()};
2733 }();
2734
2735 EVT MemoryVT = N->getMemoryVT();
2736 Align Alignment = N->getBaseAlign();
2737
2738 // Split Mask operand
2739 SDValue MaskLo, MaskHi;
2740 if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) {
2741 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
2742 } else {
2743 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl);
2744 }
2745
2746 EVT LoMemVT, HiMemVT;
2747 // Split MemoryVT
2748 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
2749
2750 SDValue IndexHi, IndexLo;
2751 if (getTypeAction(Ops.Index.getValueType()) ==
2753 GetSplitVector(Ops.Index, IndexLo, IndexHi);
2754 else
2755 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl);
2756
2757 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
2758 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
2759 N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
2760 Alignment, N->getAAInfo(), N->getRanges());
2761
2762 if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) {
2763 SDValue PassThru = MGT->getPassThru();
2764 SDValue PassThruLo, PassThruHi;
2765 if (getTypeAction(PassThru.getValueType()) ==
2767 GetSplitVector(PassThru, PassThruLo, PassThruHi);
2768 else
2769 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl);
2770
2771 ISD::LoadExtType ExtType = MGT->getExtensionType();
2772 ISD::MemIndexType IndexTy = MGT->getIndexType();
2773
2774 SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale};
2775 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl,
2776 OpsLo, MMO, IndexTy, ExtType);
2777
2778 SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale};
2779 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl,
2780 OpsHi, MMO, IndexTy, ExtType);
2781 } else {
2782 auto *VPGT = cast<VPGatherSDNode>(N);
2783 SDValue EVLLo, EVLHi;
2784 std::tie(EVLLo, EVLHi) =
2785 DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl);
2786
2787 SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
2788 Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo,
2789 MMO, VPGT->getIndexType());
2790
2791 SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
2792 Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi,
2793 MMO, VPGT->getIndexType());
2794 }
2795
2796 // Build a factor node to remember that this load is independent of the
2797 // other one.
2798 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
2799 Hi.getValue(1));
2800
2801 // Legalize the chain result - switch anything that used the old chain to
2802 // use the new one.
2803 ReplaceValueWith(SDValue(N, 1), Ch);
2804}
2805
2806void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo,
2807 SDValue &Hi) {
2808 // This is not "trivial", as there is a dependency between the two subvectors.
2809 // Depending on the number of 1s in the mask, the elements from the Hi vector
2810 // need to be moved to the Lo vector. Passthru values make this even harder.
2811 // We try to use VECTOR_COMPRESS if the target has custom lowering with
2812 // smaller types and passthru is undef, as it is most likely faster than the
2813 // fully expand path. Otherwise, just do the full expansion as one "big"
2814 // operation and then extract the Lo and Hi vectors from that. This gets
2815 // rid of VECTOR_COMPRESS and all other operands can be legalized later.
2816 SDLoc DL(N);
2817 EVT VecVT = N->getValueType(0);
2818
2819 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT);
2820 bool HasCustomLowering = false;
2821 EVT CheckVT = LoVT;
2822 while (CheckVT.getVectorMinNumElements() > 1) {
2823 // TLI.isOperationLegalOrCustom requires a legal type, but we could have a
2824 // custom lowering for illegal types. So we do the checks separately.
2825 if (TLI.isOperationLegal(ISD::VECTOR_COMPRESS, CheckVT) ||
2826 TLI.isOperationCustom(ISD::VECTOR_COMPRESS, CheckVT)) {
2827 HasCustomLowering = true;
2828 break;
2829 }
2830 CheckVT = CheckVT.getHalfNumVectorElementsVT(*DAG.getContext());
2831 }
2832
2833 SDValue Passthru = N->getOperand(2);
2834 if (!HasCustomLowering) {
2835 SDValue Compressed = TLI.expandVECTOR_COMPRESS(N, DAG);
2836 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL, LoVT, HiVT);
2837 return;
2838 }
2839
2840 // Try to VECTOR_COMPRESS smaller vectors and combine via a stack store+load.
2841 SDValue Mask = N->getOperand(1);
2842 SDValue LoMask, HiMask;
2843 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2844 std::tie(LoMask, HiMask) = SplitMask(Mask);
2845
2846 SDValue UndefPassthru = DAG.getPOISON(LoVT);
2847 Lo = DAG.getNode(ISD::VECTOR_COMPRESS, DL, LoVT, Lo, LoMask, UndefPassthru);
2848 Hi = DAG.getNode(ISD::VECTOR_COMPRESS, DL, HiVT, Hi, HiMask, UndefPassthru);
2849
2850 SDValue StackPtr = DAG.CreateStackTemporary(
2851 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false));
2852 MachineFunction &MF = DAG.getMachineFunction();
2853 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(
2854 MF, cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex());
2855
2856 EVT MaskVT = LoMask.getValueType();
2857 assert(MaskVT.getScalarType() == MVT::i1 && "Expected vector of i1s");
2858
2859 // We store LoVec and then insert HiVec starting at offset=|1s| in LoMask.
2860 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
2861 MaskVT.getVectorElementCount());
2862 SDValue WideMask = DAG.getNode(ISD::ZERO_EXTEND, DL, WideMaskVT, LoMask);
2863 SDValue Offset = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, WideMask);
2864 Offset = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Offset);
2865
2866 SDValue Chain = DAG.getEntryNode();
2867 Chain = DAG.getStore(Chain, DL, Lo, StackPtr, PtrInfo);
2868 Chain = DAG.getStore(Chain, DL, Hi, Offset,
2870
2871 SDValue Compressed = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo);
2872 if (!Passthru.isUndef()) {
2873 Compressed =
2874 DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, Compressed, Passthru);
2875 }
2876 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL);
2877}
2878
2879void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) {
2880 assert(N->getValueType(0).isVector() &&
2881 N->getOperand(0).getValueType().isVector() &&
2882 "Operand types must be vectors");
2883
2884 EVT LoVT, HiVT;
2885 SDLoc DL(N);
2886 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2887
2888 // If the input also splits, handle it directly. Otherwise split it by hand.
2889 SDValue LL, LH, RL, RH;
2890 if (getTypeAction(N->getOperand(0).getValueType()) ==
2892 GetSplitVector(N->getOperand(0), LL, LH);
2893 else
2894 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
2895
2896 if (getTypeAction(N->getOperand(1).getValueType()) ==
2898 GetSplitVector(N->getOperand(1), RL, RH);
2899 else
2900 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
2901
2902 if (N->getOpcode() == ISD::SETCC) {
2903 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
2904 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
2905 } else {
2906 assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode");
2907 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
2908 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
2909 std::tie(EVLLo, EVLHi) =
2910 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
2911 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo,
2912 EVLLo);
2913 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi,
2914 EVLHi);
2915 }
2916}
2917
2918void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
2919 SDValue &Hi) {
2920 // Get the dest types - they may not match the input types, e.g. int_to_fp.
2921 EVT LoVT, HiVT;
2922 SDLoc dl(N);
2923 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
2924
2925 // If the input also splits, handle it directly for a compile time speedup.
2926 // Otherwise split it by hand.
2927 EVT InVT = N->getOperand(0).getValueType();
2928 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2929 GetSplitVector(N->getOperand(0), Lo, Hi);
2930 else
2931 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2932
2933 const SDNodeFlags Flags = N->getFlags();
2934 unsigned Opcode = N->getOpcode();
2935 if (N->getNumOperands() <= 2) {
2936 if (Opcode == ISD::FP_ROUND || Opcode == ISD::AssertNoFPClass ||
2938 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags);
2939 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags);
2940 } else {
2941 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags);
2942 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags);
2943 }
2944 return;
2945 }
2946
2947 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
2948 assert(N->isVPOpcode() && "Expected VP opcode");
2949
2950 SDValue MaskLo, MaskHi;
2951 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
2952
2953 SDValue EVLLo, EVLHi;
2954 std::tie(EVLLo, EVLHi) =
2955 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
2956
2957 Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags);
2958 Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags);
2959}
2960
2961void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo,
2962 SDValue &Hi) {
2963 SDLoc dl(N);
2964 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
2965
2966 // If the input also splits, handle it directly for a compile time speedup.
2967 // Otherwise split it by hand.
2968 EVT InVT = N->getOperand(0).getValueType();
2969 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2970 GetSplitVector(N->getOperand(0), Lo, Hi);
2971 else
2972 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2973
2974 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
2975 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace();
2976 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace();
2977 Lo = DAG.getAddrSpaceCast(dl, LoVT, Lo, SrcAS, DestAS);
2978 Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS);
2979}
2980
2981void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N,
2982 unsigned ResNo,
2983 SDValue &Lo,
2984 SDValue &Hi) {
2985 SDLoc dl(N);
2986 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0));
2987 auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1));
2988
2989 // If the input also splits, handle it directly for a compile time speedup.
2990 // Otherwise split it by hand.
2991 EVT InVT = N->getOperand(0).getValueType();
2992 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
2993 GetSplitVector(N->getOperand(0), Lo, Hi);
2994 else
2995 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
2996
2997 Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo, N->getFlags());
2998 Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi, N->getFlags());
2999
3000 SDNode *HiNode = Hi.getNode();
3001 SDNode *LoNode = Lo.getNode();
3002
3003 // Replace the other vector result not being explicitly split here.
3004 unsigned OtherNo = 1 - ResNo;
3005 EVT OtherVT = N->getValueType(OtherNo);
3006 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
3007 SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo),
3008 SDValue(HiNode, OtherNo));
3009 } else {
3010 SDValue OtherVal =
3011 DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo),
3012 SDValue(HiNode, OtherNo));
3013 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
3014 }
3015}
3016
3017void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
3018 SDValue &Hi) {
3019 SDLoc dl(N);
3020 EVT SrcVT = N->getOperand(0).getValueType();
3021 EVT DestVT = N->getValueType(0);
3022 EVT LoVT, HiVT;
3023 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT);
3024
3025 // We can do better than a generic split operation if the extend is doing
3026 // more than just doubling the width of the elements and the following are
3027 // true:
3028 // - The number of vector elements is even,
3029 // - the source type is legal,
3030 // - the type of a split source is illegal,
3031 // - the type of an extended (by doubling element size) source is legal, and
3032 // - the type of that extended source when split is legal.
3033 //
3034 // This won't necessarily completely legalize the operation, but it will
3035 // more effectively move in the right direction and prevent falling down
3036 // to scalarization in many cases due to the input vector being split too
3037 // far.
3038 if (SrcVT.getVectorElementCount().isKnownEven() &&
3039 SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) {
3040 LLVMContext &Ctx = *DAG.getContext();
3041 EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx);
3042 EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx);
3043
3044 EVT SplitLoVT, SplitHiVT;
3045 std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT);
3046 if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) &&
3047 TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) {
3048 LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:";
3049 N->dump(&DAG); dbgs() << "\n");
3050 if (!N->isVPOpcode()) {
3051 // Extend the source vector by one step.
3052 SDValue NewSrc =
3053 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0));
3054 // Get the low and high halves of the new, extended one step, vector.
3055 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
3056 // Extend those vector halves the rest of the way.
3057 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
3058 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
3059 return;
3060 }
3061
3062 // Extend the source vector by one step.
3063 SDValue NewSrc =
3064 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0),
3065 N->getOperand(1), N->getOperand(2));
3066 // Get the low and high halves of the new, extended one step, vector.
3067 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl);
3068
3069 SDValue MaskLo, MaskHi;
3070 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
3071
3072 SDValue EVLLo, EVLHi;
3073 std::tie(EVLLo, EVLHi) =
3074 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
3075 // Extend those vector halves the rest of the way.
3076 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo});
3077 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi});
3078 return;
3079 }
3080 }
3081 // Fall back to the generic unary operator splitting otherwise.
3082 SplitVecRes_UnaryOp(N, Lo, Hi);
3083}
3084
3085void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
3086 SDValue &Lo, SDValue &Hi) {
3087 // The low and high parts of the original input give four input vectors.
3088 SDValue Inputs[4];
3089 SDLoc DL(N);
3090 GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]);
3091 GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]);
3092 EVT NewVT = Inputs[0].getValueType();
3093 unsigned NewElts = NewVT.getVectorNumElements();
3094
3095 auto &&IsConstant = [](const SDValue &N) {
3096 APInt SplatValue;
3097 return N.getResNo() == 0 &&
3098 (ISD::isConstantSplatVector(N.getNode(), SplatValue) ||
3100 };
3101 auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1,
3102 SDValue &Input2,
3103 ArrayRef<int> Mask) {
3104 assert(Input1->getOpcode() == ISD::BUILD_VECTOR &&
3105 Input2->getOpcode() == ISD::BUILD_VECTOR &&
3106 "Expected build vector node.");
3107 EVT EltVT = NewVT.getVectorElementType();
3108 SmallVector<SDValue> Ops(NewElts, DAG.getPOISON(EltVT));
3109 for (unsigned I = 0; I < NewElts; ++I) {
3110 if (Mask[I] == PoisonMaskElem)
3111 continue;
3112 unsigned Idx = Mask[I];
3113 if (Idx >= NewElts)
3114 Ops[I] = Input2.getOperand(Idx - NewElts);
3115 else
3116 Ops[I] = Input1.getOperand(Idx);
3117 // Make the type of all elements the same as the element type.
3118 if (Ops[I].getValueType().bitsGT(EltVT))
3119 Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]);
3120 }
3121 return DAG.getBuildVector(NewVT, DL, Ops);
3122 };
3123
3124 // If Lo or Hi uses elements from at most two of the four input vectors, then
3125 // express it as a vector shuffle of those two inputs. Otherwise extract the
3126 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
3127 SmallVector<int> OrigMask(N->getMask());
3128 // Try to pack incoming shuffles/inputs.
3129 auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts,
3130 &DL](SmallVectorImpl<int> &Mask) {
3131 // Check if all inputs are shuffles of the same operands or non-shuffles.
3132 MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs;
3133 for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) {
3134 SDValue Input = Inputs[Idx];
3135 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode());
3136 if (!Shuffle ||
3137 Input.getOperand(0).getValueType() != Input.getValueType())
3138 continue;
3139 ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))]
3140 .push_back(Idx);
3141 ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))]
3142 .push_back(Idx);
3143 }
3144 for (auto &P : ShufflesIdxs) {
3145 if (P.second.size() < 2)
3146 continue;
3147 // Use shuffles operands instead of shuffles themselves.
3148 // 1. Adjust mask.
3149 for (int &Idx : Mask) {
3150 if (Idx == PoisonMaskElem)
3151 continue;
3152 unsigned SrcRegIdx = Idx / NewElts;
3153 if (Inputs[SrcRegIdx].isUndef()) {
3154 Idx = PoisonMaskElem;
3155 continue;
3156 }
3157 auto *Shuffle =
3158 dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode());
3159 if (!Shuffle || !is_contained(P.second, SrcRegIdx))
3160 continue;
3161 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3162 if (MaskElt == PoisonMaskElem) {
3163 Idx = PoisonMaskElem;
3164 continue;
3165 }
3166 Idx = MaskElt % NewElts +
3167 P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first
3168 ? 0
3169 : 1] *
3170 NewElts;
3171 }
3172 // 2. Update inputs.
3173 Inputs[P.second[0]] = P.first.first;
3174 Inputs[P.second[1]] = P.first.second;
3175 // Clear the pair data.
3176 P.second.clear();
3177 ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear();
3178 }
3179 // Check if any concat_vectors can be simplified.
3180 SmallBitVector UsedSubVector(2 * std::size(Inputs));
3181 for (int &Idx : Mask) {
3182 if (Idx == PoisonMaskElem)
3183 continue;
3184 unsigned SrcRegIdx = Idx / NewElts;
3185 if (Inputs[SrcRegIdx].isUndef()) {
3186 Idx = PoisonMaskElem;
3187 continue;
3188 }
3190 getTypeAction(Inputs[SrcRegIdx].getValueType());
3191 if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS &&
3192 Inputs[SrcRegIdx].getNumOperands() == 2 &&
3193 !Inputs[SrcRegIdx].getOperand(1).isUndef() &&
3194 (TypeAction == TargetLowering::TypeLegal ||
3195 TypeAction == TargetLowering::TypeWidenVector))
3196 UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2));
3197 }
3198 if (UsedSubVector.count() > 1) {
3200 for (unsigned I = 0; I < std::size(Inputs); ++I) {
3201 if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1))
3202 continue;
3203 if (Pairs.empty() || Pairs.back().size() == 2)
3204 Pairs.emplace_back();
3205 if (UsedSubVector.test(2 * I)) {
3206 Pairs.back().emplace_back(I, 0);
3207 } else {
3208 assert(UsedSubVector.test(2 * I + 1) &&
3209 "Expected to be used one of the subvectors.");
3210 Pairs.back().emplace_back(I, 1);
3211 }
3212 }
3213 if (!Pairs.empty() && Pairs.front().size() > 1) {
3214 // Adjust mask.
3215 for (int &Idx : Mask) {
3216 if (Idx == PoisonMaskElem)
3217 continue;
3218 unsigned SrcRegIdx = Idx / NewElts;
3219 auto *It = find_if(
3220 Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) {
3221 return Idxs.front().first == SrcRegIdx ||
3222 Idxs.back().first == SrcRegIdx;
3223 });
3224 if (It == Pairs.end())
3225 continue;
3226 Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) +
3227 (SrcRegIdx == It->front().first ? 0 : (NewElts / 2));
3228 }
3229 // Adjust inputs.
3230 for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) {
3231 Inputs[Idxs.front().first] = DAG.getNode(
3233 Inputs[Idxs.front().first].getValueType(),
3234 Inputs[Idxs.front().first].getOperand(Idxs.front().second),
3235 Inputs[Idxs.back().first].getOperand(Idxs.back().second));
3236 }
3237 }
3238 }
3239 bool Changed;
3240 do {
3241 // Try to remove extra shuffles (except broadcasts) and shuffles with the
3242 // reused operands.
3243 Changed = false;
3244 for (unsigned I = 0; I < std::size(Inputs); ++I) {
3245 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode());
3246 if (!Shuffle)
3247 continue;
3248 if (Shuffle->getOperand(0).getValueType() != NewVT)
3249 continue;
3250 int Op = -1;
3251 if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() &&
3252 !Shuffle->isSplat()) {
3253 Op = 0;
3254 } else if (!Inputs[I].hasOneUse() &&
3255 !Shuffle->getOperand(1).isUndef()) {
3256 // Find the only used operand, if possible.
3257 for (int &Idx : Mask) {
3258 if (Idx == PoisonMaskElem)
3259 continue;
3260 unsigned SrcRegIdx = Idx / NewElts;
3261 if (SrcRegIdx != I)
3262 continue;
3263 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3264 if (MaskElt == PoisonMaskElem) {
3265 Idx = PoisonMaskElem;
3266 continue;
3267 }
3268 int OpIdx = MaskElt / NewElts;
3269 if (Op == -1) {
3270 Op = OpIdx;
3271 continue;
3272 }
3273 if (Op != OpIdx) {
3274 Op = -1;
3275 break;
3276 }
3277 }
3278 }
3279 if (Op < 0) {
3280 // Try to check if one of the shuffle operands is used already.
3281 for (int OpIdx = 0; OpIdx < 2; ++OpIdx) {
3282 if (Shuffle->getOperand(OpIdx).isUndef())
3283 continue;
3284 auto *It = find(Inputs, Shuffle->getOperand(OpIdx));
3285 if (It == std::end(Inputs))
3286 continue;
3287 int FoundOp = std::distance(std::begin(Inputs), It);
3288 // Found that operand is used already.
3289 // 1. Fix the mask for the reused operand.
3290 for (int &Idx : Mask) {
3291 if (Idx == PoisonMaskElem)
3292 continue;
3293 unsigned SrcRegIdx = Idx / NewElts;
3294 if (SrcRegIdx != I)
3295 continue;
3296 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3297 if (MaskElt == PoisonMaskElem) {
3298 Idx = PoisonMaskElem;
3299 continue;
3300 }
3301 int MaskIdx = MaskElt / NewElts;
3302 if (OpIdx == MaskIdx)
3303 Idx = MaskElt % NewElts + FoundOp * NewElts;
3304 }
3305 // 2. Set Op to the unused OpIdx.
3306 Op = (OpIdx + 1) % 2;
3307 break;
3308 }
3309 }
3310 if (Op >= 0) {
3311 Changed = true;
3312 Inputs[I] = Shuffle->getOperand(Op);
3313 // Adjust mask.
3314 for (int &Idx : Mask) {
3315 if (Idx == PoisonMaskElem)
3316 continue;
3317 unsigned SrcRegIdx = Idx / NewElts;
3318 if (SrcRegIdx != I)
3319 continue;
3320 int MaskElt = Shuffle->getMaskElt(Idx % NewElts);
3321 int OpIdx = MaskElt / NewElts;
3322 if (OpIdx != Op)
3323 continue;
3324 Idx = MaskElt % NewElts + SrcRegIdx * NewElts;
3325 }
3326 }
3327 }
3328 } while (Changed);
3329 };
3330 TryPeekThroughShufflesInputs(OrigMask);
3331 // Proces unique inputs.
3332 auto &&MakeUniqueInputs = [&Inputs, &IsConstant,
3333 NewElts](SmallVectorImpl<int> &Mask) {
3334 SetVector<SDValue> UniqueInputs;
3335 SetVector<SDValue> UniqueConstantInputs;
3336 for (const auto &I : Inputs) {
3337 if (IsConstant(I))
3338 UniqueConstantInputs.insert(I);
3339 else if (!I.isUndef())
3340 UniqueInputs.insert(I);
3341 }
3342 // Adjust mask in case of reused inputs. Also, need to insert constant
3343 // inputs at first, otherwise it affects the final outcome.
3344 if (UniqueInputs.size() != std::size(Inputs)) {
3345 auto &&UniqueVec = UniqueInputs.takeVector();
3346 auto &&UniqueConstantVec = UniqueConstantInputs.takeVector();
3347 unsigned ConstNum = UniqueConstantVec.size();
3348 for (int &Idx : Mask) {
3349 if (Idx == PoisonMaskElem)
3350 continue;
3351 unsigned SrcRegIdx = Idx / NewElts;
3352 if (Inputs[SrcRegIdx].isUndef()) {
3353 Idx = PoisonMaskElem;
3354 continue;
3355 }
3356 const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]);
3357 if (It != UniqueConstantVec.end()) {
3358 Idx = (Idx % NewElts) +
3359 NewElts * std::distance(UniqueConstantVec.begin(), It);
3360 assert(Idx >= 0 && "Expected defined mask idx.");
3361 continue;
3362 }
3363 const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]);
3364 assert(RegIt != UniqueVec.end() && "Cannot find non-const value.");
3365 Idx = (Idx % NewElts) +
3366 NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum);
3367 assert(Idx >= 0 && "Expected defined mask idx.");
3368 }
3369 copy(UniqueConstantVec, std::begin(Inputs));
3370 copy(UniqueVec, std::next(std::begin(Inputs), ConstNum));
3371 }
3372 };
3373 MakeUniqueInputs(OrigMask);
3374 SDValue OrigInputs[4];
3375 copy(Inputs, std::begin(OrigInputs));
3376 for (unsigned High = 0; High < 2; ++High) {
3377 SDValue &Output = High ? Hi : Lo;
3378
3379 // Build a shuffle mask for the output, discovering on the fly which
3380 // input vectors to use as shuffle operands.
3381 unsigned FirstMaskIdx = High * NewElts;
3382 SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem);
3383 copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin());
3384 assert(!Output && "Expected default initialized initial value.");
3385 TryPeekThroughShufflesInputs(Mask);
3386 MakeUniqueInputs(Mask);
3387 SDValue TmpInputs[4];
3388 copy(Inputs, std::begin(TmpInputs));
3389 // Track changes in the output registers.
3390 int UsedIdx = -1;
3391 bool SecondIteration = false;
3392 auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) {
3393 if (UsedIdx < 0) {
3394 UsedIdx = Idx;
3395 return false;
3396 }
3397 if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx)
3398 SecondIteration = true;
3399 return SecondIteration;
3400 };
3402 Mask, std::size(Inputs), std::size(Inputs),
3403 /*NumOfUsedRegs=*/1,
3404 [&Output, &DAG = DAG, NewVT]() { Output = DAG.getPOISON(NewVT); },
3405 [&Output, &DAG = DAG, NewVT, &DL, &Inputs,
3406 &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) {
3407 if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR)
3408 Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask);
3409 else
3410 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx],
3411 DAG.getPOISON(NewVT), Mask);
3412 Inputs[Idx] = Output;
3413 },
3414 [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs,
3415 &TmpInputs, &BuildVector](ArrayRef<int> Mask, unsigned Idx1,
3416 unsigned Idx2, bool /*Unused*/) {
3417 if (AccumulateResults(Idx1)) {
3418 if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
3419 Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
3420 Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask);
3421 else
3422 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1],
3423 Inputs[Idx2], Mask);
3424 } else {
3425 if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR &&
3426 TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR)
3427 Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask);
3428 else
3429 Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1],
3430 TmpInputs[Idx2], Mask);
3431 }
3432 Inputs[Idx1] = Output;
3433 });
3434 copy(OrigInputs, std::begin(Inputs));
3435 }
3436}
3437
3438void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
3439 EVT OVT = N->getValueType(0);
3440 EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
3441 SDValue Chain = N->getOperand(0);
3442 SDValue Ptr = N->getOperand(1);
3443 SDValue SV = N->getOperand(2);
3444 SDLoc dl(N);
3445
3446 const Align Alignment =
3447 DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext()));
3448
3449 Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value());
3450 Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value());
3451 Chain = Hi.getValue(1);
3452
3453 // Modified the chain - switch anything that used the old chain to use
3454 // the new one.
3455 ReplaceValueWith(SDValue(N, 1), Chain);
3456}
3457
3458void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo,
3459 SDValue &Hi) {
3460 EVT DstVTLo, DstVTHi;
3461 std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0));
3462 SDLoc dl(N);
3463
3464 SDValue SrcLo, SrcHi;
3465 EVT SrcVT = N->getOperand(0).getValueType();
3466 if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector)
3467 GetSplitVector(N->getOperand(0), SrcLo, SrcHi);
3468 else
3469 std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0);
3470
3471 Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1));
3472 Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1));
3473}
3474
3475void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo,
3476 SDValue &Hi) {
3477 SDValue InLo, InHi;
3478 GetSplitVector(N->getOperand(0), InLo, InHi);
3479 SDLoc DL(N);
3480
3481 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi);
3482 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo);
3483}
3484
3485void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo,
3486 SDValue &Hi) {
3487 SDLoc DL(N);
3488
3489 SDValue Expanded = TLI.expandVectorSplice(N, DAG);
3490 std::tie(Lo, Hi) = DAG.SplitVector(Expanded, DL);
3491}
3492
3493void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo,
3494 SDValue &Hi) {
3495 EVT VT = N->getValueType(0);
3496 SDValue Val = N->getOperand(0);
3497 SDValue Mask = N->getOperand(1);
3498 SDValue EVL = N->getOperand(2);
3499 SDLoc DL(N);
3500
3501 // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD.
3502 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
3503
3504 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
3506 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
3507 EVT PtrVT = StackPtr.getValueType();
3508 auto &MF = DAG.getMachineFunction();
3509 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
3510 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
3511
3512 MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
3514 Alignment);
3515 MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
3517 Alignment);
3518
3519 unsigned EltWidth = VT.getScalarSizeInBits() / 8;
3520 SDValue NumElemMinus1 =
3521 DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT),
3522 DAG.getConstant(1, DL, PtrVT));
3523 SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1,
3524 DAG.getConstant(EltWidth, DL, PtrVT));
3525 SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset);
3526 SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT);
3527
3528 SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
3529 SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr,
3530 DAG.getPOISON(PtrVT), Stride, TrueMask,
3531 EVL, MemVT, StoreMMO, ISD::UNINDEXED);
3532
3533 SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO);
3534
3535 std::tie(Lo, Hi) = DAG.SplitVector(Load, DL);
3536}
3537
3538void DAGTypeLegalizer::SplitVecRes_VP_SPLICE(SDNode *N, SDValue &Lo,
3539 SDValue &Hi) {
3540 EVT VT = N->getValueType(0);
3541 SDValue V1 = N->getOperand(0);
3542 SDValue V2 = N->getOperand(1);
3543 int64_t Imm = cast<ConstantSDNode>(N->getOperand(2))->getSExtValue();
3544 SDValue Mask = N->getOperand(3);
3545 SDValue EVL1 = N->getOperand(4);
3546 SDValue EVL2 = N->getOperand(5);
3547 SDLoc DL(N);
3548
3549 // Since EVL2 is considered the real VL it gets promoted during
3550 // SelectionDAGBuilder. Promote EVL1 here if needed.
3551 if (getTypeAction(EVL1.getValueType()) == TargetLowering::TypePromoteInteger)
3552 EVL1 = ZExtPromotedInteger(EVL1);
3553
3554 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
3555
3556 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
3557 VT.getVectorElementCount() * 2);
3558 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment);
3559 EVT PtrVT = StackPtr.getValueType();
3560 auto &MF = DAG.getMachineFunction();
3561 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
3562 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
3563
3564 MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand(
3566 Alignment);
3567 MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand(
3569 Alignment);
3570
3571 SDValue StackPtr2 = TLI.getVectorElementPointer(DAG, StackPtr, VT, EVL1);
3572 SDValue PoisonPtr = DAG.getPOISON(PtrVT);
3573
3574 SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT);
3575 SDValue StoreV1 =
3576 DAG.getStoreVP(DAG.getEntryNode(), DL, V1, StackPtr, PoisonPtr, TrueMask,
3577 EVL1, V1.getValueType(), StoreMMO, ISD::UNINDEXED);
3578
3580 DAG.getStoreVP(StoreV1, DL, V2, StackPtr2, PoisonPtr, TrueMask, EVL2,
3581 V2.getValueType(), StoreMMO, ISD::UNINDEXED);
3582
3583 SDValue Load;
3584 if (Imm >= 0) {
3585 StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VT, N->getOperand(2));
3586 Load = DAG.getLoadVP(VT, DL, StoreV2, StackPtr, Mask, EVL2, LoadMMO);
3587 } else {
3588 uint64_t TrailingElts = -Imm;
3589 unsigned EltWidth = VT.getScalarSizeInBits() / 8;
3590 SDValue TrailingBytes = DAG.getConstant(TrailingElts * EltWidth, DL, PtrVT);
3591
3592 // Make sure TrailingBytes doesn't exceed the size of vec1.
3593 SDValue OffsetToV2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, StackPtr);
3594 TrailingBytes =
3595 DAG.getNode(ISD::UMIN, DL, PtrVT, TrailingBytes, OffsetToV2);
3596
3597 // Calculate the start address of the spliced result.
3598 StackPtr2 = DAG.getNode(ISD::SUB, DL, PtrVT, StackPtr2, TrailingBytes);
3599 Load = DAG.getLoadVP(VT, DL, StoreV2, StackPtr2, Mask, EVL2, LoadMMO);
3600 }
3601
3602 EVT LoVT, HiVT;
3603 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
3604 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, Load,
3605 DAG.getVectorIdxConstant(0, DL));
3606 Hi =
3607 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, Load,
3608 DAG.getVectorIdxConstant(LoVT.getVectorMinNumElements(), DL));
3609}
3610
3611void DAGTypeLegalizer::SplitVecRes_PARTIAL_REDUCE_MLA(SDNode *N, SDValue &Lo,
3612 SDValue &Hi) {
3613 SDLoc DL(N);
3614 SDValue Acc = N->getOperand(0);
3615 SDValue Input1 = N->getOperand(1);
3616 SDValue Input2 = N->getOperand(2);
3617
3618 SDValue AccLo, AccHi;
3619 GetSplitVector(Acc, AccLo, AccHi);
3620 unsigned Opcode = N->getOpcode();
3621
3622 // If the input types don't need splitting, just accumulate into the
3623 // low part of the accumulator.
3624 if (getTypeAction(Input1.getValueType()) != TargetLowering::TypeSplitVector) {
3625 Lo = DAG.getNode(Opcode, DL, AccLo.getValueType(), AccLo, Input1, Input2);
3626 Hi = AccHi;
3627 return;
3628 }
3629
3630 SDValue Input1Lo, Input1Hi;
3631 SDValue Input2Lo, Input2Hi;
3632 GetSplitVector(Input1, Input1Lo, Input1Hi);
3633 GetSplitVector(Input2, Input2Lo, Input2Hi);
3634 EVT ResultVT = AccLo.getValueType();
3635
3636 Lo = DAG.getNode(Opcode, DL, ResultVT, AccLo, Input1Lo, Input2Lo);
3637 Hi = DAG.getNode(Opcode, DL, ResultVT, AccHi, Input1Hi, Input2Hi);
3638}
3639
3640void DAGTypeLegalizer::SplitVecRes_GET_ACTIVE_LANE_MASK(SDNode *N, SDValue &Lo,
3641 SDValue &Hi) {
3642 SDLoc DL(N);
3643 SDValue Op0 = N->getOperand(0);
3644 SDValue Op1 = N->getOperand(1);
3645 EVT OpVT = Op0.getValueType();
3646
3647 EVT LoVT, HiVT;
3648 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
3649
3650 Lo = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, LoVT, Op0, Op1);
3651 SDValue LoElts = DAG.getElementCount(DL, OpVT, LoVT.getVectorElementCount());
3652 SDValue HiStartVal = DAG.getNode(ISD::UADDSAT, DL, OpVT, Op0, LoElts);
3653 Hi = DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, DL, HiVT, HiStartVal, Op1);
3654}
3655
3656void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) {
3657 unsigned Factor = N->getNumOperands();
3658
3659 SmallVector<SDValue, 8> Ops(Factor * 2);
3660 for (unsigned i = 0; i != Factor; ++i) {
3661 SDValue OpLo, OpHi;
3662 GetSplitVector(N->getOperand(i), OpLo, OpHi);
3663 Ops[i * 2] = OpLo;
3664 Ops[i * 2 + 1] = OpHi;
3665 }
3666
3667 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
3668
3669 SDLoc DL(N);
3670 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,
3671 ArrayRef(Ops).slice(0, Factor));
3672 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, VTs,
3673 ArrayRef(Ops).slice(Factor, Factor));
3674
3675 for (unsigned i = 0; i != Factor; ++i)
3676 SetSplitVector(SDValue(N, i), ResLo.getValue(i), ResHi.getValue(i));
3677}
3678
3679void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) {
3680 unsigned Factor = N->getNumOperands();
3681
3682 SmallVector<SDValue, 8> Ops(Factor * 2);
3683 for (unsigned i = 0; i != Factor; ++i) {
3684 SDValue OpLo, OpHi;
3685 GetSplitVector(N->getOperand(i), OpLo, OpHi);
3686 Ops[i] = OpLo;
3687 Ops[i + Factor] = OpHi;
3688 }
3689
3690 SmallVector<EVT, 8> VTs(Factor, Ops[0].getValueType());
3691
3692 SDLoc DL(N);
3693 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
3694 ArrayRef(Ops).slice(0, Factor)),
3695 DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, VTs,
3696 ArrayRef(Ops).slice(Factor, Factor))};
3697
3698 for (unsigned i = 0; i != Factor; ++i) {
3699 unsigned IdxLo = 2 * i;
3700 unsigned IdxHi = 2 * i + 1;
3701 SetSplitVector(SDValue(N, i), Res[IdxLo / Factor].getValue(IdxLo % Factor),
3702 Res[IdxHi / Factor].getValue(IdxHi % Factor));
3703 }
3704}
3705
3706//===----------------------------------------------------------------------===//
3707// Operand Vector Splitting
3708//===----------------------------------------------------------------------===//
3709
3710/// This method is called when the specified operand of the specified node is
3711/// found to need vector splitting. At this point, all of the result types of
3712/// the node are known to be legal, but other operands of the node may need
3713/// legalization as well as the specified one.
3714bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
3715 LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG));
3716 SDValue Res = SDValue();
3717
3718 // See if the target wants to custom split this node.
3719 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
3720 return false;
3721
3722 switch (N->getOpcode()) {
3723 default:
3724#ifndef NDEBUG
3725 dbgs() << "SplitVectorOperand Op #" << OpNo << ": ";
3726 N->dump(&DAG);
3727 dbgs() << "\n";
3728#endif
3729 report_fatal_error("Do not know how to split this operator's "
3730 "operand!\n");
3731
3732 case ISD::VP_SETCC:
3733 case ISD::STRICT_FSETCC:
3735 case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break;
3736 case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
3737 case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
3738 case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break;
3739 case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
3740 case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
3742 Res = SplitVecOp_VECTOR_FIND_LAST_ACTIVE(N);
3743 break;
3744 case ISD::VP_TRUNCATE:
3745 case ISD::TRUNCATE:
3746 Res = SplitVecOp_TruncateHelper(N);
3747 break;
3749 case ISD::VP_FP_ROUND:
3750 case ISD::FP_ROUND:
3752 Res = SplitVecOp_FP_ROUND(N);
3753 break;
3754 case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break;
3755 case ISD::STORE:
3756 Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
3757 break;
3758 case ISD::VP_STORE:
3759 Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo);
3760 break;
3761 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
3762 Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo);
3763 break;
3764 case ISD::MSTORE:
3765 Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo);
3766 break;
3767 case ISD::MSCATTER:
3768 case ISD::VP_SCATTER:
3769 Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo);
3770 break;
3771 case ISD::MGATHER:
3772 case ISD::VP_GATHER:
3773 Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo);
3774 break;
3775 case ISD::VSELECT:
3776 Res = SplitVecOp_VSELECT(N, OpNo);
3777 break;
3779 Res = SplitVecOp_VECTOR_COMPRESS(N, OpNo);
3780 break;
3783 case ISD::SINT_TO_FP:
3784 case ISD::UINT_TO_FP:
3785 case ISD::VP_SINT_TO_FP:
3786 case ISD::VP_UINT_TO_FP:
3787 if (N->getValueType(0).bitsLT(
3788 N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType()))
3789 Res = SplitVecOp_TruncateHelper(N);
3790 else
3791 Res = SplitVecOp_UnaryOp(N);
3792 break;
3795 Res = SplitVecOp_FP_TO_XINT_SAT(N);
3796 break;
3797 case ISD::FP_TO_SINT:
3798 case ISD::FP_TO_UINT:
3799 case ISD::VP_FP_TO_SINT:
3800 case ISD::VP_FP_TO_UINT:
3804 case ISD::FP_EXTEND:
3805 case ISD::SIGN_EXTEND:
3806 case ISD::ZERO_EXTEND:
3807 case ISD::ANY_EXTEND:
3808 case ISD::FTRUNC:
3809 case ISD::LROUND:
3810 case ISD::LLROUND:
3811 case ISD::LRINT:
3812 case ISD::LLRINT:
3813 Res = SplitVecOp_UnaryOp(N);
3814 break;
3815 case ISD::FLDEXP:
3816 Res = SplitVecOp_FPOpDifferentTypes(N);
3817 break;
3818
3819 case ISD::SCMP:
3820 case ISD::UCMP:
3821 Res = SplitVecOp_CMP(N);
3822 break;
3823
3824 case ISD::FAKE_USE:
3825 Res = SplitVecOp_FAKE_USE(N);
3826 break;
3830 Res = SplitVecOp_ExtVecInRegOp(N);
3831 break;
3832
3835 case ISD::VECREDUCE_ADD:
3836 case ISD::VECREDUCE_MUL:
3837 case ISD::VECREDUCE_AND:
3838 case ISD::VECREDUCE_OR:
3839 case ISD::VECREDUCE_XOR:
3848 Res = SplitVecOp_VECREDUCE(N, OpNo);
3849 break;
3852 Res = SplitVecOp_VECREDUCE_SEQ(N);
3853 break;
3854 case ISD::VP_REDUCE_FADD:
3855 case ISD::VP_REDUCE_SEQ_FADD:
3856 case ISD::VP_REDUCE_FMUL:
3857 case ISD::VP_REDUCE_SEQ_FMUL:
3858 case ISD::VP_REDUCE_ADD:
3859 case ISD::VP_REDUCE_MUL:
3860 case ISD::VP_REDUCE_AND:
3861 case ISD::VP_REDUCE_OR:
3862 case ISD::VP_REDUCE_XOR:
3863 case ISD::VP_REDUCE_SMAX:
3864 case ISD::VP_REDUCE_SMIN:
3865 case ISD::VP_REDUCE_UMAX:
3866 case ISD::VP_REDUCE_UMIN:
3867 case ISD::VP_REDUCE_FMAX:
3868 case ISD::VP_REDUCE_FMIN:
3869 case ISD::VP_REDUCE_FMAXIMUM:
3870 case ISD::VP_REDUCE_FMINIMUM:
3871 Res = SplitVecOp_VP_REDUCE(N, OpNo);
3872 break;
3873 case ISD::CTTZ_ELTS:
3875 Res = SplitVecOp_CttzElts(N);
3876 break;
3877 case ISD::VP_CTTZ_ELTS:
3878 case ISD::VP_CTTZ_ELTS_ZERO_POISON:
3879 Res = SplitVecOp_VP_CttzElements(N);
3880 break;
3882 Res = SplitVecOp_VECTOR_HISTOGRAM(N);
3883 break;
3888 Res = SplitVecOp_PARTIAL_REDUCE_MLA(N);
3889 break;
3890 }
3891
3892 // If the result is null, the sub-method took care of registering results etc.
3893 if (!Res.getNode()) return false;
3894
3895 // If the result is N, the sub-method updated N in place. Tell the legalizer
3896 // core about this.
3897 if (Res.getNode() == N)
3898 return true;
3899
3900 if (N->isStrictFPOpcode())
3901 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
3902 "Invalid operand expansion");
3903 else
3904 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
3905 "Invalid operand expansion");
3906
3907 ReplaceValueWith(SDValue(N, 0), Res);
3908 return false;
3909}
3910
3911SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
3912 SDLoc DL(N);
3913
3914 SDValue LoMask, HiMask;
3915 GetSplitVector(N->getOperand(0), LoMask, HiMask);
3916
3917 EVT VT = N->getValueType(0);
3918 EVT SplitVT = LoMask.getValueType();
3919 ElementCount SplitEC = SplitVT.getVectorElementCount();
3920
3921 // Find the last active in both the low and the high masks.
3922 SDValue LoFind = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT, LoMask);
3923 SDValue HiFind = DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, VT, HiMask);
3924
3925 // Check if any lane is active in the high mask.
3926 // FIXME: This would not be necessary if VECTOR_FIND_LAST_ACTIVE returned a
3927 // sentinel value for "none active".
3928 SDValue AnyHiActive = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, HiMask);
3929 SDValue Cond = DAG.getBoolExtOrTrunc(AnyHiActive, DL,
3930 getSetCCResultType(MVT::i1), MVT::i1);
3931
3932 // Return: AnyHiActive ? (HiFind + SplitEC) : LoFind;
3933 return DAG.getNode(ISD::SELECT, DL, VT, Cond,
3934 DAG.getNode(ISD::ADD, DL, VT, HiFind,
3935 DAG.getElementCount(DL, VT, SplitEC)),
3936 LoFind);
3937}
3938
3939SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) {
3940 // The only possibility for an illegal operand is the mask, since result type
3941 // legalization would have handled this node already otherwise.
3942 assert(OpNo == 0 && "Illegal operand must be mask");
3943
3944 SDValue Mask = N->getOperand(0);
3945 SDValue Src0 = N->getOperand(1);
3946 SDValue Src1 = N->getOperand(2);
3947 EVT Src0VT = Src0.getValueType();
3948 SDLoc DL(N);
3949 assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?");
3950
3951 SDValue Lo, Hi;
3952 GetSplitVector(N->getOperand(0), Lo, Hi);
3953 assert(Lo.getValueType() == Hi.getValueType() &&
3954 "Lo and Hi have differing types");
3955
3956 EVT LoOpVT, HiOpVT;
3957 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT);
3958 assert(LoOpVT == HiOpVT && "Asymmetric vector split?");
3959
3960 SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask;
3961 std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL);
3962 std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL);
3963 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
3964
3965 SDValue LoSelect =
3966 DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1);
3967 SDValue HiSelect =
3968 DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1);
3969
3970 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect);
3971}
3972
3973SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_COMPRESS(SDNode *N, unsigned OpNo) {
3974 // The only possibility for an illegal operand is the mask, since result type
3975 // legalization would have handled this node already otherwise.
3976 assert(OpNo == 1 && "Illegal operand must be mask");
3977
3978 // To split the mask, we need to split the result type too, so we can just
3979 // reuse that logic here.
3980 SDValue Lo, Hi;
3981 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi);
3982
3983 EVT VecVT = N->getValueType(0);
3984 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VecVT, Lo, Hi);
3985}
3986
3987SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) {
3988 EVT ResVT = N->getValueType(0);
3989 SDValue Lo, Hi;
3990 SDLoc dl(N);
3991
3992 SDValue VecOp = N->getOperand(OpNo);
3993 EVT VecVT = VecOp.getValueType();
3994 assert(VecVT.isVector() && "Can only split reduce vector operand");
3995 GetSplitVector(VecOp, Lo, Hi);
3996 EVT LoOpVT, HiOpVT;
3997 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
3998
3999 // Use the appropriate scalar instruction on the split subvectors before
4000 // reducing the now partially reduced smaller vector.
4001 unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode());
4002 SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags());
4003 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags());
4004}
4005
4006SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) {
4007 EVT ResVT = N->getValueType(0);
4008 SDValue Lo, Hi;
4009 SDLoc dl(N);
4010
4011 SDValue AccOp = N->getOperand(0);
4012 SDValue VecOp = N->getOperand(1);
4013 SDNodeFlags Flags = N->getFlags();
4014
4015 EVT VecVT = VecOp.getValueType();
4016 assert(VecVT.isVector() && "Can only split reduce vector operand");
4017 GetSplitVector(VecOp, Lo, Hi);
4018 EVT LoOpVT, HiOpVT;
4019 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT);
4020
4021 // Reduce low half.
4022 SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags);
4023
4024 // Reduce high half, using low half result as initial value.
4025 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags);
4026}
4027
4028SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) {
4029 assert(N->isVPOpcode() && "Expected VP opcode");
4030 assert(OpNo == 1 && "Can only split reduce vector operand");
4031
4032 unsigned Opc = N->getOpcode();
4033 EVT ResVT = N->getValueType(0);
4034 SDValue Lo, Hi;
4035 SDLoc dl(N);
4036
4037 SDValue VecOp = N->getOperand(OpNo);
4038 EVT VecVT = VecOp.getValueType();
4039 assert(VecVT.isVector() && "Can only split reduce vector operand");
4040 GetSplitVector(VecOp, Lo, Hi);
4041
4042 SDValue MaskLo, MaskHi;
4043 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2));
4044
4045 SDValue EVLLo, EVLHi;
4046 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl);
4047
4048 const SDNodeFlags Flags = N->getFlags();
4049
4050 SDValue ResLo =
4051 DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags);
4052 return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags);
4053}
4054
4055SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
4056 // The result has a legal vector type, but the input needs splitting.
4057 EVT ResVT = N->getValueType(0);
4058 SDValue Lo, Hi;
4059 SDLoc dl(N);
4060 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
4061 EVT InVT = Lo.getValueType();
4062
4063 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4064 InVT.getVectorElementCount());
4065
4066 if (N->isStrictFPOpcode()) {
4067 Lo = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
4068 {N->getOperand(0), Lo});
4069 Hi = DAG.getNode(N->getOpcode(), dl, {OutVT, MVT::Other},
4070 {N->getOperand(0), Hi});
4071
4072 // Build a factor node to remember that this operation is independent
4073 // of the other one.
4074 SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
4075 Hi.getValue(1));
4076
4077 // Legalize the chain result - switch anything that used the old chain to
4078 // use the new one.
4079 ReplaceValueWith(SDValue(N, 1), Ch);
4080 } else if (N->getNumOperands() == 3) {
4081 assert(N->isVPOpcode() && "Expected VP opcode");
4082 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4083 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
4084 std::tie(EVLLo, EVLHi) =
4085 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl);
4086 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo);
4087 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi);
4088 } else {
4089 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
4090 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
4091 }
4092
4093 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4094}
4095
4096// Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part.
4097SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) {
4098 SDValue Lo, Hi;
4099 GetSplitVector(N->getOperand(1), Lo, Hi);
4100 SDValue Chain =
4101 DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo);
4102 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi);
4103}
4104
4105SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
4106 // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
4107 // end up being split all the way down to individual components. Convert the
4108 // split pieces into integers and reassemble.
4109 EVT ResVT = N->getValueType(0);
4110 SDValue Lo, Hi;
4111 GetSplitVector(N->getOperand(0), Lo, Hi);
4112 SDLoc dl(N);
4113
4114 if (ResVT.isScalableVector()) {
4115 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(ResVT);
4116 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
4117 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
4118 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4119 }
4120
4121 Lo = BitConvertToInteger(Lo);
4122 Hi = BitConvertToInteger(Hi);
4123
4124 if (DAG.getDataLayout().isBigEndian())
4125 std::swap(Lo, Hi);
4126
4127 return DAG.getNode(ISD::BITCAST, dl, ResVT, JoinIntegers(Lo, Hi));
4128}
4129
4130SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N,
4131 unsigned OpNo) {
4132 assert(OpNo == 1 && "Invalid OpNo; can only split SubVec.");
4133 // We know that the result type is legal.
4134 EVT ResVT = N->getValueType(0);
4135
4136 SDValue Vec = N->getOperand(0);
4137 SDValue SubVec = N->getOperand(1);
4138 SDValue Idx = N->getOperand(2);
4139 SDLoc dl(N);
4140
4141 SDValue Lo, Hi;
4142 GetSplitVector(SubVec, Lo, Hi);
4143
4144 uint64_t IdxVal = Idx->getAsZExtVal();
4145 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
4146
4147 SDValue FirstInsertion =
4148 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx);
4149 SDValue SecondInsertion =
4150 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi,
4151 DAG.getVectorIdxConstant(IdxVal + LoElts, dl));
4152
4153 return SecondInsertion;
4154}
4155
4156SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
4157 // We know that the extracted result type is legal.
4158 EVT SubVT = N->getValueType(0);
4159 SDValue Idx = N->getOperand(1);
4160 SDLoc dl(N);
4161 SDValue Lo, Hi;
4162
4163 GetSplitVector(N->getOperand(0), Lo, Hi);
4164
4165 ElementCount LoElts = Lo.getValueType().getVectorElementCount();
4166 // Note: For scalable vectors, the index is scaled by vscale.
4167 ElementCount IdxVal =
4169 uint64_t IdxValMin = IdxVal.getKnownMinValue();
4170
4171 EVT SrcVT = N->getOperand(0).getValueType();
4172 ElementCount NumResultElts = SubVT.getVectorElementCount();
4173
4174 // If the extracted elements are all in the low half, do a simple extract.
4175 if (ElementCount::isKnownLE(IdxVal + NumResultElts, LoElts))
4176 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx);
4177
4178 unsigned LoEltsMin = LoElts.getKnownMinValue();
4179 if (IdxValMin < LoEltsMin && SubVT.isFixedLengthVector() &&
4180 SrcVT.isFixedLengthVector()) {
4181 // Extracted subvector crosses vector split, so we need to blend the two
4182 // halves.
4183 // TODO: May be able to emit partial extract_subvector.
4185 Elts.reserve(NumResultElts.getFixedValue());
4186
4187 // This is not valid for scalable vectors. If SubVT is scalable, this is the
4188 // same as unrolling a scalable dimension (invalid). If ScrVT is scalable,
4189 // `Lo[LoEltsMin]` may not be the last element of `Lo`.
4190 DAG.ExtractVectorElements(Lo, Elts, /*Start=*/IdxValMin,
4191 /*Count=*/LoEltsMin - IdxValMin);
4192 DAG.ExtractVectorElements(Hi, Elts, /*Start=*/0,
4193 /*Count=*/SubVT.getVectorNumElements() -
4194 Elts.size());
4195 return DAG.getBuildVector(SubVT, dl, Elts);
4196 }
4197
4198 if (SubVT.isScalableVector() == SrcVT.isScalableVector()) {
4199 ElementCount ExtractIdx = IdxVal - LoElts;
4200 if (ExtractIdx.isKnownMultipleOf(NumResultElts))
4201 return DAG.getExtractSubvector(dl, SubVT, Hi,
4202 ExtractIdx.getKnownMinValue());
4203
4204 EVT HiVT = Hi.getValueType();
4205 assert(HiVT.isFixedLengthVector() &&
4206 "Only fixed-vector extracts are supported in this case");
4207
4208 // We cannot create an extract_subvector that isn't a multiple of the
4209 // result size, which may go out of bounds for the last elements. Shuffle
4210 // the desired elements down to 0 and do a simple 0 extract.
4211 SmallVector<int, 8> Mask(HiVT.getVectorNumElements(), -1);
4212 for (int I = 0; I != int(NumResultElts.getFixedValue()); ++I)
4213 Mask[I] = int(ExtractIdx.getFixedValue()) + I;
4214
4215 SDValue Shuffle =
4216 DAG.getVectorShuffle(HiVT, dl, Hi, DAG.getPOISON(HiVT), Mask);
4217 return DAG.getExtractSubvector(dl, SubVT, Shuffle, 0);
4218 }
4219
4220 // After this point the DAG node only permits extracting fixed-width
4221 // subvectors from scalable vectors.
4222 assert(SubVT.isFixedLengthVector() &&
4223 "Extracting scalable subvector from fixed-width unsupported");
4224
4225 // If the element type is i1 and we're not promoting the result, then we may
4226 // end up loading the wrong data since the bits are packed tightly into
4227 // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal)
4228 // type at index 4, then we will load a byte starting at index 0.
4229 if (SubVT.getScalarType() == MVT::i1)
4230 report_fatal_error("Don't know how to extract fixed-width predicate "
4231 "subvector from a scalable predicate vector");
4232
4233 // Spill the vector to the stack. We should use the alignment for
4234 // the smallest part.
4235 SDValue Vec = N->getOperand(0);
4236 EVT VecVT = Vec.getValueType();
4237 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
4239 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
4240 auto &MF = DAG.getMachineFunction();
4241 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4242 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
4243
4244 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
4245 SmallestAlign);
4246
4247 // Extract the subvector by loading the correct part.
4248 StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx);
4249
4250 return DAG.getLoad(
4251 SubVT, dl, Store, StackPtr,
4252 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
4253}
4254
4255SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
4256 SDValue Vec = N->getOperand(0);
4257 SDValue Idx = N->getOperand(1);
4258 EVT VecVT = Vec.getValueType();
4259
4260 if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) {
4261 uint64_t IdxVal = Index->getZExtValue();
4262
4263 SDValue Lo, Hi;
4264 GetSplitVector(Vec, Lo, Hi);
4265
4266 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements();
4267
4268 if (IdxVal < LoElts)
4269 return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0);
4270 else if (!Vec.getValueType().isScalableVector())
4271 return SDValue(DAG.UpdateNodeOperands(N, Hi,
4272 DAG.getConstant(IdxVal - LoElts, SDLoc(N),
4273 Idx.getValueType())), 0);
4274 }
4275
4276 // See if the target wants to custom expand this node.
4277 if (CustomLowerNode(N, N->getValueType(0), true))
4278 return SDValue();
4279
4280 // Make the vector elements byte-addressable if they aren't already.
4281 SDLoc dl(N);
4282 EVT EltVT = VecVT.getVectorElementType();
4283 if (!EltVT.isByteSized()) {
4284 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext());
4285 VecVT = VecVT.changeElementType(*DAG.getContext(), EltVT);
4286 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec);
4287 SDValue NewExtract =
4288 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, Idx);
4289 return DAG.getAnyExtOrTrunc(NewExtract, dl, N->getValueType(0));
4290 }
4291
4292 // Store the vector to the stack.
4293 // In cases where the vector is illegal it will be broken down into parts
4294 // and stored in parts - we should use the alignment for the smallest part.
4295 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false);
4297 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign);
4298 auto &MF = DAG.getMachineFunction();
4299 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4300 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
4301 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
4302 SmallestAlign);
4303
4304 // Load back the required element.
4305 StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx);
4306
4307 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return
4308 // type, leaving the high bits undefined. But it can't truncate.
4309 assert(N->getValueType(0).bitsGE(EltVT) && "Illegal EXTRACT_VECTOR_ELT.");
4310
4311 return DAG.getExtLoad(
4312 ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
4313 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT,
4314 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8));
4315}
4316
4317SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) {
4318 SDValue Lo, Hi;
4319
4320 // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so
4321 // splitting the result has the same effect as splitting the input operand.
4322 SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
4323
4324 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi);
4325}
4326
4327SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) {
4328 (void)OpNo;
4329 SDValue Lo, Hi;
4330 SplitVecRes_Gather(N, Lo, Hi);
4331
4332 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi);
4333 ReplaceValueWith(SDValue(N, 0), Res);
4334 return SDValue();
4335}
4336
4337SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) {
4338 assert(N->isUnindexed() && "Indexed vp_store of vector?");
4339 SDValue Ch = N->getChain();
4340 SDValue Ptr = N->getBasePtr();
4341 SDValue Offset = N->getOffset();
4342 assert(Offset.isUndef() && "Unexpected VP store offset");
4343 SDValue Mask = N->getMask();
4344 SDValue EVL = N->getVectorLength();
4345 SDValue Data = N->getValue();
4346 Align Alignment = N->getBaseAlign();
4347 SDLoc DL(N);
4348
4349 SDValue DataLo, DataHi;
4350 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4351 // Split Data operand
4352 GetSplitVector(Data, DataLo, DataHi);
4353 else
4354 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
4355
4356 // Split Mask operand
4357 SDValue MaskLo, MaskHi;
4358 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
4359 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
4360 } else {
4361 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
4362 GetSplitVector(Mask, MaskLo, MaskHi);
4363 else
4364 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
4365 }
4366
4367 EVT MemoryVT = N->getMemoryVT();
4368 EVT LoMemVT, HiMemVT;
4369 bool HiIsEmpty = false;
4370 std::tie(LoMemVT, HiMemVT) =
4371 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
4372
4373 // Split EVL
4374 SDValue EVLLo, EVLHi;
4375 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL);
4376
4377 SDValue Lo, Hi;
4378 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4379 N->getPointerInfo(), MachineMemOperand::MOStore,
4380 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
4381 N->getRanges());
4382
4383 Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO,
4384 N->getAddressingMode(), N->isTruncatingStore(),
4385 N->isCompressingStore());
4386
4387 // If the hi vp_store has zero storage size, only the lo vp_store is needed.
4388 if (HiIsEmpty)
4389 return Lo;
4390
4391 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
4392 N->isCompressingStore());
4393
4394 MachinePointerInfo MPI;
4395 if (LoMemVT.isScalableVector()) {
4396 Alignment = commonAlignment(Alignment,
4397 LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4398 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
4399 } else
4400 MPI = N->getPointerInfo().getWithOffset(
4401 LoMemVT.getStoreSize().getFixedValue());
4402
4403 MMO = DAG.getMachineFunction().getMachineMemOperand(
4405 Alignment, N->getAAInfo(), N->getRanges());
4406
4407 Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO,
4408 N->getAddressingMode(), N->isTruncatingStore(),
4409 N->isCompressingStore());
4410
4411 // Build a factor node to remember that this store is independent of the
4412 // other one.
4413 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4414}
4415
4416SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N,
4417 unsigned OpNo) {
4418 assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?");
4419 assert(N->getOffset().isUndef() && "Unexpected VP strided store offset");
4420
4421 SDLoc DL(N);
4422
4423 SDValue Data = N->getValue();
4424 SDValue LoData, HiData;
4425 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4426 GetSplitVector(Data, LoData, HiData);
4427 else
4428 std::tie(LoData, HiData) = DAG.SplitVector(Data, DL);
4429
4430 EVT LoMemVT, HiMemVT;
4431 bool HiIsEmpty = false;
4432 std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs(
4433 N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty);
4434
4435 SDValue Mask = N->getMask();
4436 SDValue LoMask, HiMask;
4437 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC)
4438 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask);
4439 else if (getTypeAction(Mask.getValueType()) ==
4441 GetSplitVector(Mask, LoMask, HiMask);
4442 else
4443 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL);
4444
4445 SDValue LoEVL, HiEVL;
4446 std::tie(LoEVL, HiEVL) =
4447 DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL);
4448
4449 // Generate the low vp_strided_store
4450 SDValue Lo = DAG.getStridedStoreVP(
4451 N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(),
4452 N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(),
4453 N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore());
4454
4455 // If the high vp_strided_store has zero storage size, only the low
4456 // vp_strided_store is needed.
4457 if (HiIsEmpty)
4458 return Lo;
4459
4460 // Generate the high vp_strided_store.
4461 // To calculate the high base address, we need to sum to the low base
4462 // address stride number of bytes for each element already stored by low,
4463 // that is: Ptr = Ptr + (LoEVL * Stride)
4464 EVT PtrVT = N->getBasePtr().getValueType();
4466 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL,
4467 DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT));
4468 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment);
4469
4470 Align Alignment = N->getBaseAlign();
4471 if (LoMemVT.isScalableVector())
4472 Alignment = commonAlignment(Alignment,
4473 LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4474
4475 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4476 MachinePointerInfo(N->getPointerInfo().getAddrSpace()),
4478 Alignment, N->getAAInfo(), N->getRanges());
4479
4480 SDValue Hi = DAG.getStridedStoreVP(
4481 N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask,
4482 HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(),
4483 N->isCompressingStore());
4484
4485 // Build a factor node to remember that this store is independent of the
4486 // other one.
4487 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4488}
4489
4490SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
4491 unsigned OpNo) {
4492 assert(N->isUnindexed() && "Indexed masked store of vector?");
4493 SDValue Ch = N->getChain();
4494 SDValue Ptr = N->getBasePtr();
4495 SDValue Offset = N->getOffset();
4496 assert(Offset.isUndef() && "Unexpected indexed masked store offset");
4497 SDValue Mask = N->getMask();
4498 SDValue Data = N->getValue();
4499 Align Alignment = N->getBaseAlign();
4500 SDLoc DL(N);
4501
4502 SDValue DataLo, DataHi;
4503 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector)
4504 // Split Data operand
4505 GetSplitVector(Data, DataLo, DataHi);
4506 else
4507 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
4508
4509 // Split Mask operand
4510 SDValue MaskLo, MaskHi;
4511 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
4512 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
4513 } else {
4514 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
4515 GetSplitVector(Mask, MaskLo, MaskHi);
4516 else
4517 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
4518 }
4519
4520 EVT MemoryVT = N->getMemoryVT();
4521 EVT LoMemVT, HiMemVT;
4522 bool HiIsEmpty = false;
4523 std::tie(LoMemVT, HiMemVT) =
4524 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty);
4525
4526 SDValue Lo, Hi, Res;
4527 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4528 N->getPointerInfo(), MachineMemOperand::MOStore,
4529 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(),
4530 N->getRanges());
4531
4532 Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO,
4533 N->getAddressingMode(), N->isTruncatingStore(),
4534 N->isCompressingStore());
4535
4536 if (HiIsEmpty) {
4537 // The hi masked store has zero storage size.
4538 // Only the lo masked store is needed.
4539 Res = Lo;
4540 } else {
4541
4542 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
4543 N->isCompressingStore());
4544
4545 MachinePointerInfo MPI;
4546 if (LoMemVT.isScalableVector()) {
4547 Alignment = commonAlignment(
4548 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8);
4549 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
4550 } else
4551 MPI = N->getPointerInfo().getWithOffset(
4552 LoMemVT.getStoreSize().getFixedValue());
4553
4554 MMO = DAG.getMachineFunction().getMachineMemOperand(
4556 Alignment, N->getAAInfo(), N->getRanges());
4557
4558 Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO,
4559 N->getAddressingMode(), N->isTruncatingStore(),
4560 N->isCompressingStore());
4561
4562 // Build a factor node to remember that this store is independent of the
4563 // other one.
4564 Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4565 }
4566
4567 return Res;
4568}
4569
4570SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) {
4571 SDValue Ch = N->getChain();
4572 SDValue Ptr = N->getBasePtr();
4573 EVT MemoryVT = N->getMemoryVT();
4574 Align Alignment = N->getBaseAlign();
4575 SDLoc DL(N);
4576 struct Operands {
4577 SDValue Mask;
4578 SDValue Index;
4579 SDValue Scale;
4580 SDValue Data;
4581 } Ops = [&]() -> Operands {
4582 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
4583 return {MSC->getMask(), MSC->getIndex(), MSC->getScale(),
4584 MSC->getValue()};
4585 }
4586 auto *VPSC = cast<VPScatterSDNode>(N);
4587 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(),
4588 VPSC->getValue()};
4589 }();
4590 // Split all operands
4591
4592 EVT LoMemVT, HiMemVT;
4593 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
4594
4595 SDValue DataLo, DataHi;
4596 if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector)
4597 // Split Data operand
4598 GetSplitVector(Ops.Data, DataLo, DataHi);
4599 else
4600 std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL);
4601
4602 // Split Mask operand
4603 SDValue MaskLo, MaskHi;
4604 if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) {
4605 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi);
4606 } else {
4607 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL);
4608 }
4609
4610 SDValue IndexHi, IndexLo;
4611 if (getTypeAction(Ops.Index.getValueType()) ==
4613 GetSplitVector(Ops.Index, IndexLo, IndexHi);
4614 else
4615 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL);
4616
4617 SDValue Lo;
4618 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
4619 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
4620 N->getPointerInfo(), MMOFlags, LocationSize::beforeOrAfterPointer(),
4621 Alignment, N->getAAInfo(), N->getRanges());
4622
4623 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) {
4624 SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale};
4625 Lo =
4626 DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
4627 MSC->getIndexType(), MSC->isTruncatingStore());
4628
4629 // The order of the Scatter operation after split is well defined. The "Hi"
4630 // part comes after the "Lo". So these two operations should be chained one
4631 // after another.
4632 SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale};
4633 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi,
4634 MMO, MSC->getIndexType(),
4635 MSC->isTruncatingStore());
4636 }
4637 auto *VPSC = cast<VPScatterSDNode>(N);
4638 SDValue EVLLo, EVLHi;
4639 std::tie(EVLLo, EVLHi) =
4640 DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL);
4641
4642 SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo};
4643 Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO,
4644 VPSC->getIndexType());
4645
4646 // The order of the Scatter operation after split is well defined. The "Hi"
4647 // part comes after the "Lo". So these two operations should be chained one
4648 // after another.
4649 SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi};
4650 return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO,
4651 VPSC->getIndexType());
4652}
4653
4654SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
4655 assert(N->isUnindexed() && "Indexed store of vector?");
4656 assert(OpNo == 1 && "Can only split the stored value");
4657 SDLoc DL(N);
4658
4659 bool isTruncating = N->isTruncatingStore();
4660 SDValue Ch = N->getChain();
4661 SDValue Ptr = N->getBasePtr();
4662 EVT MemoryVT = N->getMemoryVT();
4663 Align Alignment = N->getBaseAlign();
4664 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags();
4665 AAMDNodes AAInfo = N->getAAInfo();
4666 SDValue Lo, Hi;
4667 GetSplitVector(N->getOperand(1), Lo, Hi);
4668
4669 EVT LoMemVT, HiMemVT;
4670 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
4671
4672 // Scalarize if the split halves are not byte-sized.
4673 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
4674 return TLI.scalarizeVectorStore(N, DAG);
4675
4676 if (isTruncating)
4677 Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
4678 Alignment, MMOFlags, AAInfo);
4679 else
4680 Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags,
4681 AAInfo);
4682
4683 MachinePointerInfo MPI;
4684 IncrementPointer(N, LoMemVT, MPI, Ptr);
4685
4686 if (isTruncating)
4687 Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,
4688 HiMemVT, Alignment, MMOFlags, AAInfo);
4689 else
4690 Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo);
4691
4692 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
4693}
4694
4695SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
4696 SDLoc DL(N);
4697
4698 // The input operands all must have the same type, and we know the result
4699 // type is valid. Convert this to a buildvector which extracts all the
4700 // input elements.
4701 // TODO: If the input elements are power-two vectors, we could convert this to
4702 // a new CONCAT_VECTORS node with elements that are half-wide.
4704 EVT EltVT = N->getValueType(0).getVectorElementType();
4705 for (const SDValue &Op : N->op_values()) {
4706 for (unsigned i = 0, e = Op.getValueType().getVectorNumElements();
4707 i != e; ++i) {
4708 Elts.push_back(DAG.getExtractVectorElt(DL, EltVT, Op, i));
4709 }
4710 }
4711
4712 return DAG.getBuildVector(N->getValueType(0), DL, Elts);
4713}
4714
4715SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
4716 // The result type is legal, but the input type is illegal. If splitting
4717 // ends up with the result type of each half still being legal, just
4718 // do that. If, however, that would result in an illegal result type,
4719 // we can try to get more clever with power-two vectors. Specifically,
4720 // split the input type, but also widen the result element size, then
4721 // concatenate the halves and truncate again. For example, consider a target
4722 // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit
4723 // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do:
4724 // %inlo = v4i32 extract_subvector %in, 0
4725 // %inhi = v4i32 extract_subvector %in, 4
4726 // %lo16 = v4i16 trunc v4i32 %inlo
4727 // %hi16 = v4i16 trunc v4i32 %inhi
4728 // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16
4729 // %res = v8i8 trunc v8i16 %in16
4730 //
4731 // Without this transform, the original truncate would end up being
4732 // scalarized, which is pretty much always a last resort.
4733 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
4734 SDValue InVec = N->getOperand(OpNo);
4735 EVT InVT = InVec->getValueType(0);
4736 EVT OutVT = N->getValueType(0);
4737 ElementCount NumElements = OutVT.getVectorElementCount();
4738 bool IsFloat = OutVT.isFloatingPoint();
4739
4740 unsigned InElementSize = InVT.getScalarSizeInBits();
4741 unsigned OutElementSize = OutVT.getScalarSizeInBits();
4742
4743 // Determine the split output VT. If its legal we can just split dirctly.
4744 EVT LoOutVT, HiOutVT;
4745 std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT);
4746 assert(LoOutVT == HiOutVT && "Unequal split?");
4747
4748 // If the input elements are only 1/2 the width of the result elements,
4749 // just use the normal splitting. Our trick only work if there's room
4750 // to split more than once.
4751 if (isTypeLegal(LoOutVT) || InElementSize <= OutElementSize * 2 ||
4752 (IsFloat && !isPowerOf2_32(InElementSize)))
4753 return SplitVecOp_UnaryOp(N);
4754 SDLoc DL(N);
4755
4756 // Don't touch if this will be scalarized.
4757 EVT FinalVT = InVT;
4758 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
4759 FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext());
4760
4761 if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector)
4762 return SplitVecOp_UnaryOp(N);
4763
4764 // Get the split input vector.
4765 SDValue InLoVec, InHiVec;
4766 GetSplitVector(InVec, InLoVec, InHiVec);
4767
4768 // Truncate them to 1/2 the element size.
4769 //
4770 // This assumes the number of elements is a power of two; any vector that
4771 // isn't should be widened, not split.
4772 EVT HalfElementVT = IsFloat ?
4773 EVT::getFloatingPointVT(InElementSize/2) :
4774 EVT::getIntegerVT(*DAG.getContext(), InElementSize/2);
4775 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT,
4776 NumElements.divideCoefficientBy(2));
4777
4778 SDValue HalfLo;
4779 SDValue HalfHi;
4780 SDValue Chain;
4781 if (N->isStrictFPOpcode()) {
4782 HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
4783 {N->getOperand(0), InLoVec});
4784 HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other},
4785 {N->getOperand(0), InHiVec});
4786 // Legalize the chain result - switch anything that used the old chain to
4787 // use the new one.
4788 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1),
4789 HalfHi.getValue(1));
4790 } else {
4791 HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec);
4792 HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec);
4793 }
4794
4795 // Concatenate them to get the full intermediate truncation result.
4796 EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements);
4797 SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo,
4798 HalfHi);
4799 // Now finish up by truncating all the way down to the original result
4800 // type. This should normally be something that ends up being legal directly,
4801 // but in theory if a target has very wide vectors and an annoyingly
4802 // restricted set of legal types, this split can chain to build things up.
4803
4804 if (N->isStrictFPOpcode()) {
4805 SDValue Res = DAG.getNode(
4806 ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other},
4807 {Chain, InterVec,
4808 DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))});
4809 // Relink the chain
4810 ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1));
4811 return Res;
4812 }
4813
4814 return IsFloat
4815 ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec,
4816 DAG.getTargetConstant(
4817 0, DL, TLI.getPointerTy(DAG.getDataLayout())))
4818 : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec);
4819}
4820
4821SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
4822 unsigned Opc = N->getOpcode();
4823 bool isStrict = Opc == ISD::STRICT_FSETCC || Opc == ISD::STRICT_FSETCCS;
4824 assert(N->getValueType(0).isVector() &&
4825 N->getOperand(isStrict ? 1 : 0).getValueType().isVector() &&
4826 "Operand types must be vectors");
4827 // The result has a legal vector type, but the input needs splitting.
4828 SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes;
4829 SDLoc DL(N);
4830 GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0);
4831 GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1);
4832
4833 EVT VT = N->getValueType(0);
4834 EVT PartResVT = getSetCCResultType(Lo0.getValueType());
4835
4836 if (Opc == ISD::SETCC) {
4837 LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
4838 HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
4839 } else if (isStrict) {
4840 LoRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)),
4841 N->getOperand(0), Lo0, Lo1, N->getOperand(3));
4842 HiRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)),
4843 N->getOperand(0), Hi0, Hi1, N->getOperand(3));
4844 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
4845 LoRes.getValue(1), HiRes.getValue(1));
4846 ReplaceValueWith(SDValue(N, 1), NewChain);
4847 } else {
4848 assert(Opc == ISD::VP_SETCC && "Expected VP_SETCC opcode");
4849 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4850 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3));
4851 std::tie(EVLLo, EVLHi) =
4852 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL);
4853 LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1,
4854 N->getOperand(2), MaskLo, EVLLo);
4855 HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1,
4856 N->getOperand(2), MaskHi, EVLHi);
4857 }
4858
4859 EVT ConcatVT = PartResVT.getDoubleNumVectorElementsVT(*DAG.getContext());
4860 SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, LoRes, HiRes);
4861 if (VT == ConcatVT)
4862 return Con;
4863
4864 EVT OpVT = N->getOperand(0).getValueType();
4865 ISD::NodeType ExtendCode =
4866 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
4867 return DAG.getExtOrTrunc(Con, DL, VT, ExtendCode);
4868}
4869
4870
4871SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
4872 // The result has a legal vector type, but the input needs splitting.
4873 EVT ResVT = N->getValueType(0);
4874 SDValue Lo, Hi;
4875 SDLoc DL(N);
4876 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
4877 EVT InVT = Lo.getValueType();
4878
4879 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4880 InVT.getVectorElementCount());
4881
4882 if (N->isStrictFPOpcode()) {
4883 Lo = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
4884 {N->getOperand(0), Lo, N->getOperand(2)});
4885 Hi = DAG.getNode(N->getOpcode(), DL, {OutVT, MVT::Other},
4886 {N->getOperand(0), Hi, N->getOperand(2)});
4887 // Legalize the chain result - switch anything that used the old chain to
4888 // use the new one.
4889 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
4890 Lo.getValue(1), Hi.getValue(1));
4891 ReplaceValueWith(SDValue(N, 1), NewChain);
4892 } else if (N->getOpcode() == ISD::VP_FP_ROUND) {
4893 SDValue MaskLo, MaskHi, EVLLo, EVLHi;
4894 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1));
4895 std::tie(EVLLo, EVLHi) =
4896 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL);
4897 Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo);
4898 Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi);
4899 } else {
4900 Lo = DAG.getNode(N->getOpcode(), DL, OutVT, Lo, N->getOperand(1));
4901 Hi = DAG.getNode(N->getOpcode(), DL, OutVT, Hi, N->getOperand(1));
4902 }
4903
4904 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
4905}
4906
4907// Split a vector type in an FP binary operation where the second operand has a
4908// different type from the first.
4909//
4910// The result (and the first input) has a legal vector type, but the second
4911// input needs splitting.
4912SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) {
4913 SDLoc DL(N);
4914
4915 EVT LHSLoVT, LHSHiVT;
4916 std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
4917
4918 if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT))
4919 return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements());
4920
4921 SDValue LHSLo, LHSHi;
4922 std::tie(LHSLo, LHSHi) =
4923 DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT);
4924
4925 SDValue RHSLo, RHSHi;
4926 std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL);
4927
4928 SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo);
4929 SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi);
4930
4931 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi);
4932}
4933
4934SDValue DAGTypeLegalizer::SplitVecOp_CMP(SDNode *N) {
4935 LLVMContext &Ctxt = *DAG.getContext();
4936 SDLoc dl(N);
4937
4938 SDValue LHSLo, LHSHi, RHSLo, RHSHi;
4939 GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
4940 GetSplitVector(N->getOperand(1), RHSLo, RHSHi);
4941
4942 EVT ResVT = N->getValueType(0);
4943 ElementCount SplitOpEC = LHSLo.getValueType().getVectorElementCount();
4944 EVT NewResVT =
4945 EVT::getVectorVT(Ctxt, ResVT.getVectorElementType(), SplitOpEC);
4946
4947 SDValue Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSLo, RHSLo);
4948 SDValue Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSHi, RHSHi);
4949
4950 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4951}
4952
4953SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) {
4954 EVT ResVT = N->getValueType(0);
4955 SDValue Lo, Hi;
4956 SDLoc dl(N);
4957 GetSplitVector(N->getOperand(0), Lo, Hi);
4958 EVT InVT = Lo.getValueType();
4959
4960 EVT NewResVT =
4961 EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
4962 InVT.getVectorElementCount());
4963
4964 Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1));
4965 Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1));
4966
4967 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
4968}
4969
4970SDValue DAGTypeLegalizer::SplitVecOp_CttzElts(SDNode *N) {
4971 SDLoc DL(N);
4972 EVT ResVT = N->getValueType(0);
4973
4974 SDValue Lo, Hi;
4975 SDValue VecOp = N->getOperand(0);
4976 GetSplitVector(VecOp, Lo, Hi);
4977
4978 // if CTTZ_ELTS(Lo) != VL => CTTZ_ELTS(Lo).
4979 // else => VL + (CTTZ_ELTS(Hi) or CTTZ_ELTS_ZERO_POISON(Hi)).
4980 SDValue ResLo = DAG.getNode(ISD::CTTZ_ELTS, DL, ResVT, Lo);
4981 SDValue VL =
4982 DAG.getElementCount(DL, ResVT, Lo.getValueType().getVectorElementCount());
4983 SDValue ResLoNotVL =
4984 DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VL, ISD::SETNE);
4985 SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi);
4986 return DAG.getSelect(DL, ResVT, ResLoNotVL, ResLo,
4987 DAG.getNode(ISD::ADD, DL, ResVT, VL, ResHi));
4988}
4989
4990SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) {
4991 SDLoc DL(N);
4992 EVT ResVT = N->getValueType(0);
4993
4994 SDValue Lo, Hi;
4995 SDValue VecOp = N->getOperand(0);
4996 GetSplitVector(VecOp, Lo, Hi);
4997
4998 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1));
4999 auto [EVLLo, EVLHi] =
5000 DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL);
5001 SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT);
5002
5003 // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo).
5004 // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_POISON(Hi)).
5005 SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo);
5006 SDValue ResLoNotEVL =
5007 DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE);
5008 SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi);
5009 return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo,
5010 DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi));
5011}
5012
5013SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) {
5014 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N);
5015 SDLoc DL(HG);
5016 SDValue Inc = HG->getInc();
5017 SDValue Ptr = HG->getBasePtr();
5018 SDValue Scale = HG->getScale();
5019 SDValue IntID = HG->getIntID();
5020 EVT MemVT = HG->getMemoryVT();
5021 MachineMemOperand *MMO = HG->getMemOperand();
5022 ISD::MemIndexType IndexType = HG->getIndexType();
5023
5024 SDValue IndexLo, IndexHi, MaskLo, MaskHi;
5025 std::tie(IndexLo, IndexHi) = DAG.SplitVector(HG->getIndex(), DL);
5026 std::tie(MaskLo, MaskHi) = DAG.SplitVector(HG->getMask(), DL);
5027 SDValue OpsLo[] = {HG->getChain(), Inc, MaskLo, Ptr, IndexLo, Scale, IntID};
5028 SDValue Lo = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL,
5029 OpsLo, MMO, IndexType);
5030 SDValue OpsHi[] = {Lo, Inc, MaskHi, Ptr, IndexHi, Scale, IntID};
5031 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, OpsHi,
5032 MMO, IndexType);
5033}
5034
5035SDValue DAGTypeLegalizer::SplitVecOp_PARTIAL_REDUCE_MLA(SDNode *N) {
5036 SDValue Acc = N->getOperand(0);
5037 assert(getTypeAction(Acc.getValueType()) != TargetLowering::TypeSplitVector &&
5038 "Accumulator should already be a legal type, and shouldn't need "
5039 "further splitting");
5040
5041 SDLoc DL(N);
5042 SDValue Input1Lo, Input1Hi, Input2Lo, Input2Hi;
5043 GetSplitVector(N->getOperand(1), Input1Lo, Input1Hi);
5044 GetSplitVector(N->getOperand(2), Input2Lo, Input2Hi);
5045 unsigned Opcode = N->getOpcode();
5046 EVT ResultVT = Acc.getValueType();
5047
5048 SDValue Lo = DAG.getNode(Opcode, DL, ResultVT, Acc, Input1Lo, Input2Lo);
5049 return DAG.getNode(Opcode, DL, ResultVT, Lo, Input1Hi, Input2Hi);
5050}
5051
5052//===----------------------------------------------------------------------===//
5053// Result Vector Widening
5054//===----------------------------------------------------------------------===//
5055
5056void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode,
5057 unsigned WidenResNo) {
5058 unsigned NumResults = N->getNumValues();
5059 for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) {
5060 if (ResNo == WidenResNo)
5061 continue;
5062 EVT ResVT = N->getValueType(ResNo);
5063 if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) {
5064 SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo));
5065 } else {
5066 SDLoc DL(N);
5067 SDValue ResVal =
5068 DAG.getExtractSubvector(DL, ResVT, SDValue(WidenNode, ResNo), 0);
5069 ReplaceValueWith(SDValue(N, ResNo), ResVal);
5070 }
5071 }
5072}
5073
5074void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
5075 LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG));
5076
5077 // See if the target wants to custom widen this node.
5078 if (CustomWidenLowerNode(N, N->getValueType(ResNo)))
5079 return;
5080
5081 SDValue Res = SDValue();
5082
5083 auto unrollExpandedOp = [&]() {
5084 // We're going to widen this vector op to a legal type by padding with undef
5085 // elements. If the wide vector op is eventually going to be expanded to
5086 // scalar libcalls, then unroll into scalar ops now to avoid unnecessary
5087 // libcalls on the undef elements.
5088 EVT VT = N->getValueType(0);
5089 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
5090 if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) &&
5091 TLI.isOperationExpandOrLibCall(N->getOpcode(), VT.getScalarType())) {
5092 Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
5093 if (N->getNumValues() > 1)
5094 ReplaceOtherWidenResults(N, Res.getNode(), ResNo);
5095 return true;
5096 }
5097 return false;
5098 };
5099
5100 switch (N->getOpcode()) {
5101 default:
5102#ifndef NDEBUG
5103 dbgs() << "WidenVectorResult #" << ResNo << ": ";
5104 N->dump(&DAG);
5105 dbgs() << "\n";
5106#endif
5107 report_fatal_error("Do not know how to widen the result of this operator!");
5108
5111 Res = WidenVecRes_LOOP_DEPENDENCE_MASK(N);
5112 break;
5113 case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break;
5114 case ISD::ADDRSPACECAST:
5115 Res = WidenVecRes_ADDRSPACECAST(N);
5116 break;
5117 case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break;
5118 case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
5119 case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
5120 case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
5122 Res = WidenVecRes_INSERT_SUBVECTOR(N);
5123 break;
5124 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
5125 case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
5126 case ISD::ATOMIC_LOAD:
5127 Res = WidenVecRes_ATOMIC_LOAD(cast<AtomicSDNode>(N));
5128 break;
5129 case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
5130 case ISD::STEP_VECTOR:
5131 case ISD::SPLAT_VECTOR:
5133 Res = WidenVecRes_ScalarOp(N);
5134 break;
5135 case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
5136 case ISD::VSELECT:
5137 case ISD::SELECT:
5138 case ISD::VP_SELECT:
5139 case ISD::VP_MERGE:
5140 Res = WidenVecRes_Select(N);
5141 break;
5142 case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
5143 case ISD::VP_SETCC:
5144 case ISD::SETCC: Res = WidenVecRes_SETCC(N); break;
5145 case ISD::POISON:
5146 case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
5148 Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N));
5149 break;
5150 case ISD::VP_LOAD:
5151 Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N));
5152 break;
5153 case ISD::VP_LOAD_FF:
5154 Res = WidenVecRes_VP_LOAD_FF(cast<VPLoadFFSDNode>(N));
5155 break;
5156 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD:
5157 Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N));
5158 break;
5160 Res = WidenVecRes_VECTOR_COMPRESS(N);
5161 break;
5162 case ISD::MLOAD:
5163 Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N));
5164 break;
5165 case ISD::MGATHER:
5166 Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N));
5167 break;
5168 case ISD::VP_GATHER:
5169 Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N));
5170 break;
5172 Res = WidenVecRes_VECTOR_REVERSE(N);
5173 break;
5175 Res = WidenVecRes_GET_ACTIVE_LANE_MASK(N);
5176 break;
5177
5178 case ISD::ADD: case ISD::VP_ADD:
5179 case ISD::AND: case ISD::VP_AND:
5180 case ISD::MUL: case ISD::VP_MUL:
5181 case ISD::MULHS:
5182 case ISD::MULHU:
5183 case ISD::ABDS:
5184 case ISD::ABDU:
5185 case ISD::OR: case ISD::VP_OR:
5186 case ISD::SUB: case ISD::VP_SUB:
5187 case ISD::XOR: case ISD::VP_XOR:
5188 case ISD::SHL: case ISD::VP_SHL:
5189 case ISD::SRA: case ISD::VP_SRA:
5190 case ISD::SRL: case ISD::VP_SRL:
5191 case ISD::CLMUL:
5192 case ISD::CLMULR:
5193 case ISD::CLMULH:
5194 case ISD::FMINNUM:
5195 case ISD::FMINNUM_IEEE:
5196 case ISD::VP_FMINNUM:
5197 case ISD::FMAXNUM:
5198 case ISD::FMAXNUM_IEEE:
5199 case ISD::VP_FMAXNUM:
5200 case ISD::FMINIMUM:
5201 case ISD::VP_FMINIMUM:
5202 case ISD::FMAXIMUM:
5203 case ISD::VP_FMAXIMUM:
5204 case ISD::FMINIMUMNUM:
5205 case ISD::FMAXIMUMNUM:
5206 case ISD::SMIN: case ISD::VP_SMIN:
5207 case ISD::SMAX: case ISD::VP_SMAX:
5208 case ISD::UMIN: case ISD::VP_UMIN:
5209 case ISD::UMAX: case ISD::VP_UMAX:
5210 case ISD::UADDSAT: case ISD::VP_UADDSAT:
5211 case ISD::SADDSAT: case ISD::VP_SADDSAT:
5212 case ISD::USUBSAT: case ISD::VP_USUBSAT:
5213 case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
5214 case ISD::SSHLSAT:
5215 case ISD::USHLSAT:
5216 case ISD::ROTL:
5217 case ISD::ROTR:
5218 case ISD::AVGFLOORS:
5219 case ISD::AVGFLOORU:
5220 case ISD::AVGCEILS:
5221 case ISD::AVGCEILU:
5222 // Vector-predicated binary op widening. Note that -- unlike the
5223 // unpredicated versions -- we don't have to worry about trapping on
5224 // operations like UDIV, FADD, etc., as we pass on the original vector
5225 // length parameter. This means the widened elements containing garbage
5226 // aren't active.
5227 case ISD::VP_SDIV:
5228 case ISD::VP_UDIV:
5229 case ISD::VP_SREM:
5230 case ISD::VP_UREM:
5231 case ISD::VP_FADD:
5232 case ISD::VP_FSUB:
5233 case ISD::VP_FMUL:
5234 case ISD::VP_FDIV:
5235 case ISD::VP_FREM:
5236 case ISD::VP_FCOPYSIGN:
5237 Res = WidenVecRes_Binary(N);
5238 break;
5239
5240 case ISD::MASKED_UDIV:
5241 case ISD::MASKED_SDIV:
5242 case ISD::MASKED_UREM:
5243 case ISD::MASKED_SREM:
5244 Res = WidenVecRes_MaskedBinary(N);
5245 break;
5246
5247 case ISD::SCMP:
5248 case ISD::UCMP:
5249 Res = WidenVecRes_CMP(N);
5250 break;
5251
5252 case ISD::FPOW:
5253 case ISD::FATAN2:
5254 case ISD::FREM:
5255 if (unrollExpandedOp())
5256 break;
5257 // If the target has custom/legal support for the scalar FP intrinsic ops
5258 // (they are probably not destined to become libcalls), then widen those
5259 // like any other binary ops.
5260 [[fallthrough]];
5261
5262 case ISD::FADD:
5263 case ISD::FMUL:
5264 case ISD::FSUB:
5265 case ISD::FDIV:
5266 case ISD::SDIV:
5267 case ISD::UDIV:
5268 case ISD::SREM:
5269 case ISD::UREM:
5270 Res = WidenVecRes_BinaryCanTrap(N);
5271 break;
5272
5273 case ISD::SMULFIX:
5274 case ISD::SMULFIXSAT:
5275 case ISD::UMULFIX:
5276 case ISD::UMULFIXSAT:
5277 // These are binary operations, but with an extra operand that shouldn't
5278 // be widened (the scale).
5279 Res = WidenVecRes_BinaryWithExtraScalarOp(N);
5280 break;
5281
5282#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
5283 case ISD::STRICT_##DAGN:
5284#include "llvm/IR/ConstrainedOps.def"
5285 Res = WidenVecRes_StrictFP(N);
5286 break;
5287
5288 case ISD::UADDO:
5289 case ISD::SADDO:
5290 case ISD::USUBO:
5291 case ISD::SSUBO:
5292 case ISD::UMULO:
5293 case ISD::SMULO:
5294 Res = WidenVecRes_OverflowOp(N, ResNo);
5295 break;
5296
5297 case ISD::FCOPYSIGN:
5298 Res = WidenVecRes_FCOPYSIGN(N);
5299 break;
5300
5301 case ISD::IS_FPCLASS:
5302 case ISD::FPTRUNC_ROUND:
5303 Res = WidenVecRes_UnarySameEltsWithScalarArg(N);
5304 break;
5305
5306 case ISD::FLDEXP:
5307 case ISD::FPOWI:
5308 if (!unrollExpandedOp())
5309 Res = WidenVecRes_ExpOp(N);
5310 break;
5311
5315 Res = WidenVecRes_EXTEND_VECTOR_INREG(N);
5316 break;
5317
5318 case ISD::ANY_EXTEND:
5319 case ISD::FP_EXTEND:
5320 case ISD::VP_FP_EXTEND:
5321 case ISD::FP_ROUND:
5322 case ISD::VP_FP_ROUND:
5323 case ISD::FP_TO_SINT:
5324 case ISD::VP_FP_TO_SINT:
5325 case ISD::FP_TO_UINT:
5326 case ISD::VP_FP_TO_UINT:
5327 case ISD::SIGN_EXTEND:
5328 case ISD::VP_SIGN_EXTEND:
5329 case ISD::SINT_TO_FP:
5330 case ISD::VP_SINT_TO_FP:
5331 case ISD::VP_TRUNCATE:
5332 case ISD::TRUNCATE:
5333 case ISD::UINT_TO_FP:
5334 case ISD::VP_UINT_TO_FP:
5335 case ISD::ZERO_EXTEND:
5336 case ISD::VP_ZERO_EXTEND:
5338 Res = WidenVecRes_Convert(N);
5339 break;
5340
5343 Res = WidenVecRes_FP_TO_XINT_SAT(N);
5344 break;
5345
5346 case ISD::LRINT:
5347 case ISD::LLRINT:
5348 case ISD::VP_LRINT:
5349 case ISD::VP_LLRINT:
5350 case ISD::LROUND:
5351 case ISD::LLROUND:
5352 Res = WidenVecRes_XROUND(N);
5353 break;
5354
5355 case ISD::FACOS:
5356 case ISD::FASIN:
5357 case ISD::FATAN:
5358 case ISD::FCEIL:
5359 case ISD::FCOS:
5360 case ISD::FCOSH:
5361 case ISD::FEXP:
5362 case ISD::FEXP2:
5363 case ISD::FEXP10:
5364 case ISD::FFLOOR:
5365 case ISD::FLOG:
5366 case ISD::FLOG10:
5367 case ISD::FLOG2:
5368 case ISD::FNEARBYINT:
5369 case ISD::FRINT:
5370 case ISD::FROUND:
5371 case ISD::FROUNDEVEN:
5372 case ISD::FSIN:
5373 case ISD::FSINH:
5374 case ISD::FSQRT:
5375 case ISD::FTAN:
5376 case ISD::FTANH:
5377 case ISD::FTRUNC:
5378 if (unrollExpandedOp())
5379 break;
5380 // If the target has custom/legal support for the scalar FP intrinsic ops
5381 // (they are probably not destined to become libcalls), then widen those
5382 // like any other unary ops.
5383 [[fallthrough]];
5384
5385 case ISD::ABS:
5386 case ISD::VP_ABS:
5387 case ISD::BITREVERSE:
5388 case ISD::VP_BITREVERSE:
5389 case ISD::BSWAP:
5390 case ISD::VP_BSWAP:
5391 case ISD::CTLZ:
5392 case ISD::VP_CTLZ:
5394 case ISD::VP_CTLZ_ZERO_POISON:
5395 case ISD::CTPOP:
5396 case ISD::VP_CTPOP:
5397 case ISD::CTTZ:
5398 case ISD::VP_CTTZ:
5400 case ISD::VP_CTTZ_ZERO_POISON:
5401 case ISD::FNEG: case ISD::VP_FNEG:
5402 case ISD::FABS: case ISD::VP_FABS:
5403 case ISD::VP_SQRT:
5404 case ISD::VP_FCEIL:
5405 case ISD::VP_FFLOOR:
5406 case ISD::VP_FRINT:
5407 case ISD::VP_FNEARBYINT:
5408 case ISD::VP_FROUND:
5409 case ISD::VP_FROUNDEVEN:
5410 case ISD::VP_FROUNDTOZERO:
5411 case ISD::FREEZE:
5412 case ISD::ARITH_FENCE:
5413 case ISD::FCANONICALIZE:
5415 Res = WidenVecRes_Unary(N);
5416 break;
5417 case ISD::FMA: case ISD::VP_FMA:
5418 case ISD::FSHL:
5419 case ISD::VP_FSHL:
5420 case ISD::FSHR:
5421 case ISD::VP_FSHR:
5422 Res = WidenVecRes_Ternary(N);
5423 break;
5424 case ISD::FMODF:
5425 case ISD::FFREXP:
5426 case ISD::FSINCOS:
5427 case ISD::FSINCOSPI: {
5428 if (!unrollExpandedOp())
5429 Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo);
5430 break;
5431 }
5432 }
5433
5434 // If Res is null, the sub-method took care of registering the result.
5435 if (Res.getNode())
5436 SetWidenedVector(SDValue(N, ResNo), Res);
5437}
5438
5439SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) {
5440 // Ternary op widening.
5441 SDLoc dl(N);
5442 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5443 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5444 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5445 SDValue InOp3 = GetWidenedVector(N->getOperand(2));
5446 if (N->getNumOperands() == 3)
5447 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3);
5448
5449 assert(N->getNumOperands() == 5 && "Unexpected number of operands!");
5450 assert(N->isVPOpcode() && "Expected VP opcode");
5451
5452 SDValue Mask =
5453 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
5454 return DAG.getNode(N->getOpcode(), dl, WidenVT,
5455 {InOp1, InOp2, InOp3, Mask, N->getOperand(4)});
5456}
5457
5458SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
5459 // Binary op widening.
5460 SDLoc dl(N);
5461 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5462 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5463 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5464 if (N->getNumOperands() == 2)
5465 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2,
5466 N->getFlags());
5467
5468 assert(N->getNumOperands() == 4 && "Unexpected number of operands!");
5469 assert(N->isVPOpcode() && "Expected VP opcode");
5470
5471 SDValue Mask =
5472 GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount());
5473 return DAG.getNode(N->getOpcode(), dl, WidenVT,
5474 {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags());
5475}
5476
5477SDValue DAGTypeLegalizer::WidenVecRes_MaskedBinary(SDNode *N) {
5478 SDLoc dl(N);
5479 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5480 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5481 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5482 SDValue Mask = N->getOperand(2);
5483 EVT WideMaskVT = WidenVT.changeVectorElementType(
5484 *DAG.getContext(), Mask.getValueType().getVectorElementType());
5485 Mask = ModifyToType(Mask, WideMaskVT, /*FillWithZeros=*/true);
5486 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Mask,
5487 N->getFlags());
5488}
5489
5490SDValue DAGTypeLegalizer::WidenVecRes_CMP(SDNode *N) {
5491 LLVMContext &Ctxt = *DAG.getContext();
5492 SDLoc dl(N);
5493
5494 SDValue LHS = N->getOperand(0);
5495 SDValue RHS = N->getOperand(1);
5496 EVT OpVT = LHS.getValueType();
5497 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) {
5498 LHS = GetWidenedVector(LHS);
5499 RHS = GetWidenedVector(RHS);
5500 OpVT = LHS.getValueType();
5501 }
5502
5503 EVT WidenResVT = TLI.getTypeToTransformTo(Ctxt, N->getValueType(0));
5504 ElementCount WidenResEC = WidenResVT.getVectorElementCount();
5505 if (WidenResEC == OpVT.getVectorElementCount()) {
5506 return DAG.getNode(N->getOpcode(), dl, WidenResVT, LHS, RHS);
5507 }
5508
5509 return DAG.UnrollVectorOp(N, WidenResVT.getVectorNumElements());
5510}
5511
5512SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
5513 // Binary op widening, but with an extra operand that shouldn't be widened.
5514 SDLoc dl(N);
5515 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5516 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5517 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5518 SDValue InOp3 = N->getOperand(2);
5519 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
5520 N->getFlags());
5521}
5522
5523// Given a vector of operations that have been broken up to widen, see
5524// if we can collect them together into the next widest legal VT. This
5525// implementation is trap-safe.
5527 SmallVectorImpl<SDValue> &ConcatOps,
5528 unsigned ConcatEnd, EVT VT, EVT MaxVT,
5529 EVT WidenVT) {
5530 // Check to see if we have a single operation with the widen type.
5531 if (ConcatEnd == 1) {
5532 VT = ConcatOps[0].getValueType();
5533 if (VT == WidenVT)
5534 return ConcatOps[0];
5535 }
5536
5537 SDLoc dl(ConcatOps[0]);
5538 EVT WidenEltVT = WidenVT.getVectorElementType();
5539
5540 // while (Some element of ConcatOps is not of type MaxVT) {
5541 // From the end of ConcatOps, collect elements of the same type and put
5542 // them into an op of the next larger supported type
5543 // }
5544 while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
5545 int Idx = ConcatEnd - 1;
5546 VT = ConcatOps[Idx--].getValueType();
5547 while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
5548 Idx--;
5549
5550 int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1;
5551 EVT NextVT;
5552 do {
5553 NextSize *= 2;
5554 NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
5555 } while (!TLI.isTypeLegal(NextVT));
5556
5557 if (!VT.isVector()) {
5558 // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
5559 SDValue VecOp = DAG.getPOISON(NextVT);
5560 unsigned NumToInsert = ConcatEnd - Idx - 1;
5561 for (unsigned i = 0, OpIdx = Idx + 1; i < NumToInsert; i++, OpIdx++)
5562 VecOp = DAG.getInsertVectorElt(dl, VecOp, ConcatOps[OpIdx], i);
5563 ConcatOps[Idx+1] = VecOp;
5564 ConcatEnd = Idx + 2;
5565 } else {
5566 // Vector type, create a CONCAT_VECTORS of type NextVT
5567 SDValue undefVec = DAG.getPOISON(VT);
5568 unsigned OpsToConcat = NextSize/VT.getVectorNumElements();
5569 SmallVector<SDValue, 16> SubConcatOps(OpsToConcat);
5570 unsigned RealVals = ConcatEnd - Idx - 1;
5571 unsigned SubConcatEnd = 0;
5572 unsigned SubConcatIdx = Idx + 1;
5573 while (SubConcatEnd < RealVals)
5574 SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx];
5575 while (SubConcatEnd < OpsToConcat)
5576 SubConcatOps[SubConcatEnd++] = undefVec;
5577 ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl,
5578 NextVT, SubConcatOps);
5579 ConcatEnd = SubConcatIdx + 1;
5580 }
5581 }
5582
5583 // Check to see if we have a single operation with the widen type.
5584 if (ConcatEnd == 1) {
5585 VT = ConcatOps[0].getValueType();
5586 if (VT == WidenVT)
5587 return ConcatOps[0];
5588 }
5589
5590 // add undefs of size MaxVT until ConcatOps grows to length of WidenVT
5591 unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
5592 if (NumOps != ConcatEnd ) {
5593 SDValue UndefVal = DAG.getPOISON(MaxVT);
5594 for (unsigned j = ConcatEnd; j < NumOps; ++j)
5595 ConcatOps[j] = UndefVal;
5596 }
5597 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
5598 ArrayRef(ConcatOps.data(), NumOps));
5599}
5600
5601SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) {
5602 // Binary op widening for operations that can trap.
5603 unsigned Opcode = N->getOpcode();
5604 SDLoc dl(N);
5605 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5606 EVT WidenEltVT = WidenVT.getVectorElementType();
5607 EVT VT = WidenVT;
5608 unsigned NumElts = VT.getVectorMinNumElements();
5609 const SDNodeFlags Flags = N->getFlags();
5610 while (!TLI.isTypeLegal(VT) && NumElts != 1) {
5611 NumElts = NumElts / 2;
5612 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5613 }
5614
5615 if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) {
5616 // Operation doesn't trap so just widen as normal.
5617 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5618 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5619 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags);
5620 }
5621
5622 // Generate a vp.op if it is custom/legal for the target. This avoids need
5623 // to split and tile the subvectors (below), because the inactive lanes can
5624 // simply be disabled. To avoid possible recursion, only do this if the
5625 // widened mask type is legal.
5626 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode);
5627 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) {
5628 if (EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
5629 WidenVT.getVectorElementCount());
5630 TLI.isTypeLegal(WideMaskVT)) {
5631 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5632 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5633 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
5634 SDValue EVL =
5635 DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
5636 N->getValueType(0).getVectorElementCount());
5637 return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL,
5638 Flags);
5639 }
5640 }
5641
5642 // FIXME: Improve support for scalable vectors.
5643 assert(!VT.isScalableVector() && "Scalable vectors not handled yet.");
5644
5645 // No legal vector version so unroll the vector operation and then widen.
5646 if (NumElts == 1)
5647 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
5648
5649 // Since the operation can trap, apply operation on the original vector.
5650 EVT MaxVT = VT;
5651 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
5652 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
5653 unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
5654
5655 SmallVector<SDValue, 16> ConcatOps(CurNumElts);
5656 unsigned ConcatEnd = 0; // Current ConcatOps index.
5657 int Idx = 0; // Current Idx into input vectors.
5658
5659 // NumElts := greatest legal vector size (at most WidenVT)
5660 // while (orig. vector has unhandled elements) {
5661 // take munches of size NumElts from the beginning and add to ConcatOps
5662 // NumElts := next smaller supported vector size or 1
5663 // }
5664 while (CurNumElts != 0) {
5665 while (CurNumElts >= NumElts) {
5666 SDValue EOp1 = DAG.getExtractSubvector(dl, VT, InOp1, Idx);
5667 SDValue EOp2 = DAG.getExtractSubvector(dl, VT, InOp2, Idx);
5668 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags);
5669 Idx += NumElts;
5670 CurNumElts -= NumElts;
5671 }
5672 do {
5673 NumElts = NumElts / 2;
5674 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5675 } while (!TLI.isTypeLegal(VT) && NumElts != 1);
5676
5677 if (NumElts == 1) {
5678 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
5679 SDValue EOp1 = DAG.getExtractVectorElt(dl, WidenEltVT, InOp1, Idx);
5680 SDValue EOp2 = DAG.getExtractVectorElt(dl, WidenEltVT, InOp2, Idx);
5681 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
5682 EOp1, EOp2, Flags);
5683 }
5684 CurNumElts = 0;
5685 }
5686 }
5687
5688 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
5689}
5690
5691SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
5692 switch (N->getOpcode()) {
5693 case ISD::STRICT_FSETCC:
5695 return WidenVecRes_STRICT_FSETCC(N);
5702 return WidenVecRes_Convert_StrictFP(N);
5703 default:
5704 break;
5705 }
5706
5707 // StrictFP op widening for operations that can trap.
5708 unsigned NumOpers = N->getNumOperands();
5709 unsigned Opcode = N->getOpcode();
5710 SDLoc dl(N);
5711 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5712 EVT WidenEltVT = WidenVT.getVectorElementType();
5713 EVT VT = WidenVT;
5714 unsigned NumElts = VT.getVectorNumElements();
5715 while (!TLI.isTypeLegal(VT) && NumElts != 1) {
5716 NumElts = NumElts / 2;
5717 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5718 }
5719
5720 // No legal vector version so unroll the vector operation and then widen.
5721 if (NumElts == 1)
5722 return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
5723
5724 // Since the operation can trap, apply operation on the original vector.
5725 EVT MaxVT = VT;
5727 unsigned CurNumElts = N->getValueType(0).getVectorNumElements();
5728
5729 SmallVector<SDValue, 16> ConcatOps(CurNumElts);
5731 unsigned ConcatEnd = 0; // Current ConcatOps index.
5732 int Idx = 0; // Current Idx into input vectors.
5733
5734 // The Chain is the first operand.
5735 InOps.push_back(N->getOperand(0));
5736
5737 // Now process the remaining operands.
5738 for (unsigned i = 1; i < NumOpers; ++i) {
5739 SDValue Oper = N->getOperand(i);
5740
5741 EVT OpVT = Oper.getValueType();
5742 if (OpVT.isVector()) {
5743 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector)
5744 Oper = GetWidenedVector(Oper);
5745 else {
5746 EVT WideOpVT =
5747 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
5748 WidenVT.getVectorElementCount());
5749 Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5750 DAG.getPOISON(WideOpVT), Oper,
5751 DAG.getVectorIdxConstant(0, dl));
5752 }
5753 }
5754
5755 InOps.push_back(Oper);
5756 }
5757
5758 // NumElts := greatest legal vector size (at most WidenVT)
5759 // while (orig. vector has unhandled elements) {
5760 // take munches of size NumElts from the beginning and add to ConcatOps
5761 // NumElts := next smaller supported vector size or 1
5762 // }
5763 while (CurNumElts != 0) {
5764 while (CurNumElts >= NumElts) {
5766
5767 for (unsigned i = 0; i < NumOpers; ++i) {
5768 SDValue Op = InOps[i];
5769
5770 EVT OpVT = Op.getValueType();
5771 if (OpVT.isVector()) {
5772 EVT OpExtractVT =
5773 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(),
5775 Op = DAG.getExtractSubvector(dl, OpExtractVT, Op, Idx);
5776 }
5777
5778 EOps.push_back(Op);
5779 }
5780
5781 EVT OperVT[] = {VT, MVT::Other};
5782 SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps);
5783 ConcatOps[ConcatEnd++] = Oper;
5784 Chains.push_back(Oper.getValue(1));
5785 Idx += NumElts;
5786 CurNumElts -= NumElts;
5787 }
5788 do {
5789 NumElts = NumElts / 2;
5790 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
5791 } while (!TLI.isTypeLegal(VT) && NumElts != 1);
5792
5793 if (NumElts == 1) {
5794 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
5796
5797 for (unsigned i = 0; i < NumOpers; ++i) {
5798 SDValue Op = InOps[i];
5799
5800 EVT OpVT = Op.getValueType();
5801 if (OpVT.isVector())
5802 Op = DAG.getExtractVectorElt(dl, OpVT.getVectorElementType(), Op,
5803 Idx);
5804
5805 EOps.push_back(Op);
5806 }
5807
5808 EVT WidenVT[] = {WidenEltVT, MVT::Other};
5809 SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps);
5810 ConcatOps[ConcatEnd++] = Oper;
5811 Chains.push_back(Oper.getValue(1));
5812 }
5813 CurNumElts = 0;
5814 }
5815 }
5816
5817 // Build a factor node to remember all the Ops that have been created.
5818 SDValue NewChain;
5819 if (Chains.size() == 1)
5820 NewChain = Chains[0];
5821 else
5822 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
5823 ReplaceValueWith(SDValue(N, 1), NewChain);
5824
5825 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
5826}
5827
5828SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
5829 SDLoc DL(N);
5830 EVT ResVT = N->getValueType(0);
5831 EVT OvVT = N->getValueType(1);
5832 EVT WideResVT, WideOvVT;
5833 SDValue WideLHS, WideRHS;
5834
5835 // TODO: This might result in a widen/split loop.
5836 if (ResNo == 0) {
5837 WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
5838 WideOvVT = EVT::getVectorVT(
5839 *DAG.getContext(), OvVT.getVectorElementType(),
5840 WideResVT.getVectorNumElements());
5841
5842 WideLHS = GetWidenedVector(N->getOperand(0));
5843 WideRHS = GetWidenedVector(N->getOperand(1));
5844 } else {
5845 WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
5846 WideResVT = EVT::getVectorVT(
5847 *DAG.getContext(), ResVT.getVectorElementType(),
5848 WideOvVT.getVectorNumElements());
5849
5850 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
5851 SDValue Poison = DAG.getPOISON(WideResVT);
5852
5853 WideLHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideResVT, Poison,
5854 N->getOperand(0), Zero);
5855 WideRHS = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideResVT, Poison,
5856 N->getOperand(1), Zero);
5857 }
5858
5859 SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
5860 SDNode *WideNode = DAG.getNode(
5861 N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
5862
5863 // Replace the other vector result not being explicitly widened here.
5864 unsigned OtherNo = 1 - ResNo;
5865 EVT OtherVT = N->getValueType(OtherNo);
5866 if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
5867 SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
5868 } else {
5869 SDValue Zero = DAG.getVectorIdxConstant(0, DL);
5870 SDValue OtherVal = DAG.getNode(
5871 ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
5872 ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
5873 }
5874
5875 return SDValue(WideNode, ResNo);
5876}
5877
5878SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
5879 LLVMContext &Ctx = *DAG.getContext();
5880 SDValue InOp = N->getOperand(0);
5881 SDLoc DL(N);
5882
5883 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
5884 ElementCount WidenEC = WidenVT.getVectorElementCount();
5885
5886 EVT InVT = InOp.getValueType();
5887
5888 unsigned Opcode = N->getOpcode();
5889 const SDNodeFlags Flags = N->getFlags();
5890
5891 // Handle the case of ZERO_EXTEND where the promoted InVT element size does
5892 // not equal that of WidenVT.
5893 if (N->getOpcode() == ISD::ZERO_EXTEND &&
5894 getTypeAction(InVT) == TargetLowering::TypePromoteInteger &&
5895 TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() !=
5896 WidenVT.getScalarSizeInBits()) {
5897 InOp = ZExtPromotedInteger(InOp);
5898 InVT = InOp.getValueType();
5899 if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits())
5900 Opcode = ISD::TRUNCATE;
5901 }
5902
5903 EVT InEltVT = InVT.getVectorElementType();
5904 EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
5905 ElementCount InVTEC = InVT.getVectorElementCount();
5906
5907 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
5908 InOp = GetWidenedVector(N->getOperand(0));
5909 InVT = InOp.getValueType();
5910 InVTEC = InVT.getVectorElementCount();
5911 if (InVTEC == WidenEC) {
5912 if (N->getNumOperands() == 1)
5913 return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags);
5914 if (N->getNumOperands() == 3) {
5915 assert(N->isVPOpcode() && "Expected VP opcode");
5916 SDValue Mask =
5917 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
5918 return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2));
5919 }
5920 return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
5921 }
5922 if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
5923 // If both input and result vector types are of same width, extend
5924 // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
5925 // accepts fewer elements in the result than in the input.
5926 if (Opcode == ISD::ANY_EXTEND)
5927 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5928 if (Opcode == ISD::SIGN_EXTEND)
5929 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5930 if (Opcode == ISD::ZERO_EXTEND)
5931 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp);
5932 }
5933
5934 // For TRUNCATE, try to widen using the legal EC of the input type instead
5935 // if the legalisation action for that intermediate type is not widening.
5936 // E.g. for trunc nxv1i64 -> nxv1i8 where
5937 // - nxv1i64 input gets widened to nxv2i64
5938 // - nxv1i8 output gets widened to nxv16i8
5939 // Then one can try widening the result to nxv2i8 (instead of going all the
5940 // way to nxv16i8) if this later allows type promotion.
5941 EVT MidResVT =
5942 EVT::getVectorVT(Ctx, WidenVT.getVectorElementType(), InVTEC);
5943 if (N->getOpcode() == ISD::TRUNCATE &&
5944 getTypeAction(MidResVT) == TargetLowering::TypePromoteInteger) {
5945 SDValue MidRes = DAG.getNode(ISD::TRUNCATE, DL, MidResVT, InOp, Flags);
5946 return DAG.getInsertSubvector(DL, DAG.getPOISON(WidenVT), MidRes, 0);
5947 }
5948 }
5949
5950 if (TLI.isTypeLegal(InWidenVT)) {
5951 // Because the result and the input are different vector types, widening
5952 // the result could create a legal type but widening the input might make
5953 // it an illegal type that might lead to repeatedly splitting the input
5954 // and then widening it. To avoid this, we widen the input only if
5955 // it results in a legal type.
5956 if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
5957 // Widen the input and call convert on the widened input vector.
5958 unsigned NumConcat =
5959 WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
5960 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getPOISON(InVT));
5961 Ops[0] = InOp;
5962 SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
5963 if (N->getNumOperands() == 1)
5964 return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags);
5965 return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
5966 }
5967
5968 if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
5969 SDValue InVal = DAG.getExtractSubvector(DL, InWidenVT, InOp, 0);
5970 // Extract the input and convert the shorten input vector.
5971 if (N->getNumOperands() == 1)
5972 return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags);
5973 return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags);
5974 }
5975 }
5976
5977 // Otherwise unroll into some nasty scalar code and rebuild the vector.
5978 EVT EltVT = WidenVT.getVectorElementType();
5979 SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getPOISON(EltVT));
5980 // Use the original element count so we don't do more scalar opts than
5981 // necessary.
5982 unsigned MinElts = N->getValueType(0).getVectorNumElements();
5983 for (unsigned i=0; i < MinElts; ++i) {
5984 SDValue Val = DAG.getExtractVectorElt(DL, InEltVT, InOp, i);
5985 if (N->getNumOperands() == 1)
5986 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags);
5987 else
5988 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags);
5989 }
5990
5991 return DAG.getBuildVector(WidenVT, DL, Ops);
5992}
5993
5994SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) {
5995 SDLoc dl(N);
5996 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
5997 ElementCount WidenNumElts = WidenVT.getVectorElementCount();
5998
5999 SDValue Src = N->getOperand(0);
6000 EVT SrcVT = Src.getValueType();
6001
6002 // Also widen the input.
6003 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
6004 Src = GetWidenedVector(Src);
6005 SrcVT = Src.getValueType();
6006 }
6007
6008 // Input and output not widened to the same size, give up.
6009 if (WidenNumElts != SrcVT.getVectorElementCount())
6010 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
6011
6012 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1));
6013}
6014
6015SDValue DAGTypeLegalizer::WidenVecRes_XROUND(SDNode *N) {
6016 SDLoc dl(N);
6017 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6018 ElementCount WidenNumElts = WidenVT.getVectorElementCount();
6019
6020 SDValue Src = N->getOperand(0);
6021 EVT SrcVT = Src.getValueType();
6022
6023 // Also widen the input.
6024 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) {
6025 Src = GetWidenedVector(Src);
6026 SrcVT = Src.getValueType();
6027 }
6028
6029 // Input and output not widened to the same size, give up.
6030 if (WidenNumElts != SrcVT.getVectorElementCount())
6031 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue());
6032
6033 if (N->getNumOperands() == 1)
6034 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src);
6035
6036 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
6037 assert(N->isVPOpcode() && "Expected VP opcode");
6038
6039 SDValue Mask =
6040 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
6041 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2));
6042}
6043
6044SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
6045 SDValue InOp = N->getOperand(1);
6046 SDLoc DL(N);
6047 SmallVector<SDValue, 4> NewOps(N->ops());
6048
6049 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6050 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6051
6052 EVT InVT = InOp.getValueType();
6053 EVT InEltVT = InVT.getVectorElementType();
6054
6055 unsigned Opcode = N->getOpcode();
6056
6057 // FIXME: Optimizations need to be implemented here.
6058
6059 // Otherwise unroll into some nasty scalar code and rebuild the vector.
6060 EVT EltVT = WidenVT.getVectorElementType();
6061 std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}};
6062 SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getPOISON(EltVT));
6063 SmallVector<SDValue, 32> OpChains;
6064 // Use the original element count so we don't do more scalar opts than
6065 // necessary.
6066 unsigned MinElts = N->getValueType(0).getVectorNumElements();
6067 for (unsigned i=0; i < MinElts; ++i) {
6068 NewOps[1] = DAG.getExtractVectorElt(DL, InEltVT, InOp, i);
6069 Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
6070 OpChains.push_back(Ops[i].getValue(1));
6071 }
6072 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
6073 ReplaceValueWith(SDValue(N, 1), NewChain);
6074
6075 return DAG.getBuildVector(WidenVT, DL, Ops);
6076}
6077
6078SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
6079 unsigned Opcode = N->getOpcode();
6080 SDValue InOp = N->getOperand(0);
6081 SDLoc DL(N);
6082
6083 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6084 EVT WidenSVT = WidenVT.getVectorElementType();
6085 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6086
6087 EVT InVT = InOp.getValueType();
6088 EVT InSVT = InVT.getVectorElementType();
6089 unsigned InVTNumElts = InVT.getVectorNumElements();
6090
6091 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
6092 InOp = GetWidenedVector(InOp);
6093 InVT = InOp.getValueType();
6094 if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) {
6095 switch (Opcode) {
6099 return DAG.getNode(Opcode, DL, WidenVT, InOp);
6100 }
6101 }
6102 }
6103
6104 // Unroll, extend the scalars and rebuild the vector.
6106 for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) {
6107 SDValue Val = DAG.getExtractVectorElt(DL, InSVT, InOp, i);
6108 switch (Opcode) {
6110 Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val);
6111 break;
6113 Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val);
6114 break;
6116 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val);
6117 break;
6118 default:
6119 llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected");
6120 }
6121 Ops.push_back(Val);
6122 }
6123
6124 while (Ops.size() != WidenNumElts)
6125 Ops.push_back(DAG.getPOISON(WidenSVT));
6126
6127 return DAG.getBuildVector(WidenVT, DL, Ops);
6128}
6129
6130SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) {
6131 // If this is an FCOPYSIGN with same input types, we can treat it as a
6132 // normal (can trap) binary op.
6133 if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType())
6134 return WidenVecRes_BinaryCanTrap(N);
6135
6136 // If the types are different, fall back to unrolling.
6137 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6138 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
6139}
6140
6141/// Result and first source operand are different scalar types, but must have
6142/// the same number of elements. There is an additional control argument which
6143/// should be passed through unchanged.
6144SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) {
6145 SDValue FpValue = N->getOperand(0);
6146 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6147 if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector)
6148 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
6149 SDValue Arg = GetWidenedVector(FpValue);
6150 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)},
6151 N->getFlags());
6152}
6153
6154SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) {
6155 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6156 SDValue InOp = GetWidenedVector(N->getOperand(0));
6157 SDValue RHS = N->getOperand(1);
6158 EVT ExpVT = RHS.getValueType();
6159 SDValue ExpOp = RHS;
6160 if (ExpVT.isVector()) {
6161 EVT WideExpVT = WidenVT.changeVectorElementType(
6162 *DAG.getContext(), ExpVT.getVectorElementType());
6163 ExpOp = ModifyToType(RHS, WideExpVT);
6164 }
6165
6166 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp);
6167}
6168
6169SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) {
6170 // Unary op widening.
6171 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6172 SDValue InOp = GetWidenedVector(N->getOperand(0));
6173 if (N->getNumOperands() == 1)
6174 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags());
6175 if (N->getOpcode() == ISD::AssertNoFPClass)
6176 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp,
6177 N->getOperand(1), N->getFlags());
6178
6179 assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
6180 assert(N->isVPOpcode() && "Expected VP opcode");
6181
6182 SDValue Mask =
6183 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount());
6184 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT,
6185 {InOp, Mask, N->getOperand(2)});
6186}
6187
6188SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
6189 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6190 EVT ExtVT = EVT::getVectorVT(
6191 *DAG.getContext(),
6192 cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(),
6193 WidenVT.getVectorElementCount());
6194 SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
6195 return DAG.getNode(N->getOpcode(), SDLoc(N),
6196 WidenVT, WidenLHS, DAG.getValueType(ExtVT));
6197}
6198
6199SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N,
6200 unsigned ResNo) {
6201 EVT VT0 = N->getValueType(0);
6202 EVT VT1 = N->getValueType(1);
6203
6204 assert(VT0.isVector() && VT1.isVector() &&
6206 "expected both results to be vectors of matching element count");
6207
6208 LLVMContext &Ctx = *DAG.getContext();
6209 SDValue InOp = GetWidenedVector(N->getOperand(0));
6210
6211 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo));
6212 ElementCount WidenEC = WidenVT.getVectorElementCount();
6213
6214 EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC);
6215 EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC);
6216
6217 SDNode *WidenNode =
6218 DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp)
6219 .getNode();
6220
6221 ReplaceOtherWidenResults(N, WidenNode, ResNo);
6222 return SDValue(WidenNode, ResNo);
6223}
6224
6225SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) {
6226 SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo);
6227 return GetWidenedVector(WidenVec);
6228}
6229
6230SDValue DAGTypeLegalizer::WidenVecRes_ADDRSPACECAST(SDNode *N) {
6231 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6232 SDValue InOp = GetWidenedVector(N->getOperand(0));
6233 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N);
6234
6235 return DAG.getAddrSpaceCast(SDLoc(N), WidenVT, InOp,
6236 AddrSpaceCastN->getSrcAddressSpace(),
6237 AddrSpaceCastN->getDestAddressSpace());
6238}
6239
6240SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
6241 SDValue InOp = N->getOperand(0);
6242 EVT InVT = InOp.getValueType();
6243 EVT VT = N->getValueType(0);
6244 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6245 SDLoc dl(N);
6246
6247 switch (getTypeAction(InVT)) {
6249 break;
6251 report_fatal_error("Scalarization of scalable vectors is not supported.");
6253 // If the incoming type is a vector that is being promoted, then
6254 // we know that the elements are arranged differently and that we
6255 // must perform the conversion using a stack slot.
6256 if (InVT.isVector())
6257 break;
6258
6259 // If the InOp is promoted to the same size, convert it. Otherwise,
6260 // fall out of the switch and widen the promoted input.
6261 SDValue NInOp = GetPromotedInteger(InOp);
6262 EVT NInVT = NInOp.getValueType();
6263 if (WidenVT.bitsEq(NInVT)) {
6264 // For big endian targets we need to shift the input integer or the
6265 // interesting bits will end up at the wrong place.
6266 if (DAG.getDataLayout().isBigEndian()) {
6267 unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits();
6268 NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp,
6269 DAG.getShiftAmountConstant(ShiftAmt, NInVT, dl));
6270 }
6271 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp);
6272 }
6273 InOp = NInOp;
6274 InVT = NInVT;
6275 break;
6276 }
6283 break;
6285 // If the InOp is widened to the same size, convert it. Otherwise, fall
6286 // out of the switch and widen the widened input.
6287 InOp = GetWidenedVector(InOp);
6288 InVT = InOp.getValueType();
6289 if (WidenVT.bitsEq(InVT))
6290 // The input widens to the same size. Convert to the widen value.
6291 return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
6292 break;
6293 }
6294
6295 unsigned WidenSize = WidenVT.getSizeInBits();
6296 unsigned InSize = InVT.getSizeInBits();
6297 unsigned InScalarSize = InVT.getScalarSizeInBits();
6298 // x86mmx is not an acceptable vector element type, so don't try.
6299 if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) {
6300 // Determine new input vector type. The new input vector type will use
6301 // the same element type (if its a vector) or use the input type as a
6302 // vector. It is the same size as the type to widen to.
6303 EVT NewInVT;
6304 unsigned NewNumParts = WidenSize / InSize;
6305 if (InVT.isVector()) {
6306 EVT InEltVT = InVT.getVectorElementType();
6307 NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT,
6308 WidenSize / InEltVT.getSizeInBits());
6309 } else {
6310 // For big endian systems, using the promoted input scalar type
6311 // to produce the scalar_to_vector would put the desired bits into
6312 // the least significant byte(s) of the wider element zero. This
6313 // will mean that the users of the result vector are using incorrect
6314 // bits. Use the original input type instead. Although either input
6315 // type can be used on little endian systems, for consistency we
6316 // use the original type there as well.
6317 EVT OrigInVT = N->getOperand(0).getValueType();
6318 NewNumParts = WidenSize / OrigInVT.getSizeInBits();
6319 NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts);
6320 }
6321
6322 if (TLI.isTypeLegal(NewInVT)) {
6323 SDValue NewVec;
6324 if (InVT.isVector()) {
6325 // Because the result and the input are different vector types, widening
6326 // the result could create a legal type but widening the input might
6327 // make it an illegal type that might lead to repeatedly splitting the
6328 // input and then widening it. To avoid this, we widen the input only if
6329 // it results in a legal type.
6330 if (WidenSize % InSize == 0) {
6331 SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getPOISON(InVT));
6332 Ops[0] = InOp;
6333
6334 NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops);
6335 } else {
6337 DAG.ExtractVectorElements(InOp, Ops);
6338 Ops.append(WidenSize / InScalarSize - Ops.size(),
6339 DAG.getPOISON(InVT.getVectorElementType()));
6340
6341 NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops);
6342 }
6343 } else {
6344 NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp);
6345 }
6346 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
6347 }
6348 }
6349
6350 return CreateStackStoreLoad(InOp, WidenVT);
6351}
6352
6353SDValue DAGTypeLegalizer::WidenVecRes_LOOP_DEPENDENCE_MASK(SDNode *N) {
6354 return DAG.getNode(
6355 N->getOpcode(), SDLoc(N),
6356 TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)),
6357 N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3));
6358}
6359
6360SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) {
6361 SDLoc dl(N);
6362 // Build a vector with poison for the new nodes.
6363 EVT VT = N->getValueType(0);
6364
6365 // Integer BUILD_VECTOR operands may be larger than the node's vector element
6366 // type. The POISONs need to have the same type as the existing operands.
6367 EVT EltVT = N->getOperand(0).getValueType();
6368 unsigned NumElts = VT.getVectorNumElements();
6369
6370 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6371 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6372
6373 SmallVector<SDValue, 16> NewOps(N->ops());
6374 assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!");
6375 NewOps.append(WidenNumElts - NumElts, DAG.getPOISON(EltVT));
6376
6377 return DAG.getBuildVector(WidenVT, dl, NewOps);
6378}
6379
6380SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) {
6381 EVT InVT = N->getOperand(0).getValueType();
6382 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6383 SDLoc dl(N);
6384 unsigned NumOperands = N->getNumOperands();
6385
6386 bool InputWidened = false; // Indicates we need to widen the input.
6387 if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) {
6388 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
6389 unsigned NumInElts = InVT.getVectorMinNumElements();
6390 if (WidenNumElts % NumInElts == 0) {
6391 // Add undef vectors to widen to correct length.
6392 unsigned NumConcat = WidenNumElts / NumInElts;
6393 SDValue UndefVal = DAG.getPOISON(InVT);
6394 SmallVector<SDValue, 16> Ops(NumConcat);
6395 for (unsigned i=0; i < NumOperands; ++i)
6396 Ops[i] = N->getOperand(i);
6397 for (unsigned i = NumOperands; i != NumConcat; ++i)
6398 Ops[i] = UndefVal;
6399 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops);
6400 }
6401 } else {
6402 InputWidened = true;
6403 if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
6404 // The inputs and the result are widen to the same value.
6405 unsigned i;
6406 for (i=1; i < NumOperands; ++i)
6407 if (!N->getOperand(i).isUndef())
6408 break;
6409
6410 if (i == NumOperands)
6411 // Everything but the first operand is an UNDEF so just return the
6412 // widened first operand.
6413 return GetWidenedVector(N->getOperand(0));
6414
6415 if (NumOperands == 2) {
6416 assert(!WidenVT.isScalableVector() &&
6417 "Cannot use vector shuffles to widen CONCAT_VECTOR result");
6418 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6419 unsigned NumInElts = InVT.getVectorNumElements();
6420
6421 // Replace concat of two operands with a shuffle.
6422 SmallVector<int, 16> MaskOps(WidenNumElts, -1);
6423 for (unsigned i = 0; i < NumInElts; ++i) {
6424 MaskOps[i] = i;
6425 MaskOps[i + NumInElts] = i + WidenNumElts;
6426 }
6427 return DAG.getVectorShuffle(WidenVT, dl,
6428 GetWidenedVector(N->getOperand(0)),
6429 GetWidenedVector(N->getOperand(1)),
6430 MaskOps);
6431 }
6432 }
6433 }
6434
6435 assert(!WidenVT.isScalableVector() &&
6436 "Cannot use build vectors to widen CONCAT_VECTOR result");
6437 unsigned WidenNumElts = WidenVT.getVectorNumElements();
6438 unsigned NumInElts = InVT.getVectorNumElements();
6439
6440 // Fall back to use extracts and build vector.
6441 EVT EltVT = WidenVT.getVectorElementType();
6442 SmallVector<SDValue, 16> Ops(WidenNumElts);
6443 unsigned Idx = 0;
6444 for (unsigned i=0; i < NumOperands; ++i) {
6445 SDValue InOp = N->getOperand(i);
6446 if (InputWidened)
6447 InOp = GetWidenedVector(InOp);
6448 for (unsigned j = 0; j < NumInElts; ++j)
6449 Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
6450 }
6451 SDValue UndefVal = DAG.getPOISON(EltVT);
6452 for (; Idx < WidenNumElts; ++Idx)
6453 Ops[Idx] = UndefVal;
6454 return DAG.getBuildVector(WidenVT, dl, Ops);
6455}
6456
6457SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) {
6458 EVT VT = N->getValueType(0);
6459 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6460 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
6461 SDValue InOp2 = N->getOperand(1);
6462 SDValue Idx = N->getOperand(2);
6463 SDLoc dl(N);
6464 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx);
6465}
6466
6467SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
6468 EVT VT = N->getValueType(0);
6469 EVT EltVT = VT.getVectorElementType();
6470 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6471 SDValue InOp = N->getOperand(0);
6472 SDValue Idx = N->getOperand(1);
6473 SDLoc dl(N);
6474
6475 auto InOpTypeAction = getTypeAction(InOp.getValueType());
6476 if (InOpTypeAction == TargetLowering::TypeWidenVector)
6477 InOp = GetWidenedVector(InOp);
6478
6479 EVT InVT = InOp.getValueType();
6480
6481 // Check if we can just return the input vector after widening.
6482 uint64_t IdxVal = Idx->getAsZExtVal();
6483 if (IdxVal == 0 && InVT == WidenVT)
6484 return InOp;
6485
6486 // Check if we can extract from the vector.
6487 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
6488 unsigned InNumElts = InVT.getVectorMinNumElements();
6489 unsigned VTNumElts = VT.getVectorMinNumElements();
6490 assert(IdxVal % VTNumElts == 0 &&
6491 "Expected Idx to be a multiple of subvector minimum vector length");
6492 if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
6493 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
6494
6495 if (VT.isScalableVector()) {
6496 // Try to split the operation up into smaller extracts and concat the
6497 // results together, e.g.
6498 // nxv6i64 extract_subvector(nxv12i64, 6)
6499 // <->
6500 // nxv8i64 concat(
6501 // nxv2i64 extract_subvector(nxv16i64, 6)
6502 // nxv2i64 extract_subvector(nxv16i64, 8)
6503 // nxv2i64 extract_subvector(nxv16i64, 10)
6504 // undef)
6505 unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
6506 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
6507 "down type's element count");
6508 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
6510 // Avoid recursion around e.g. nxv1i8.
6511 if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) {
6513 unsigned I = 0;
6514 for (; I < VTNumElts / GCD; ++I)
6515 Parts.push_back(
6516 DAG.getExtractSubvector(dl, PartVT, InOp, IdxVal + I * GCD));
6517 for (; I < WidenNumElts / GCD; ++I)
6518 Parts.push_back(DAG.getPOISON(PartVT));
6519
6520 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
6521 }
6522
6523 // Fallback to extracting through memory.
6524
6525 Align Alignment = DAG.getReducedAlign(InVT, /*UseABI=*/false);
6526 SDValue StackPtr = DAG.CreateStackTemporary(InVT.getStoreSize(), Alignment);
6527 MachineFunction &MF = DAG.getMachineFunction();
6528 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
6529 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
6530
6531 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
6534 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
6537
6538 // Write out the input vector.
6539 SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, StoreMMO);
6540
6541 // Build a mask to match the length of the non-widened result.
6542 SDValue Mask =
6543 DAG.getMaskFromElementCount(dl, WidenVT, VT.getVectorElementCount());
6544
6545 // Read back the sub-vector setting the remaining lanes to poison.
6546 StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, InVT, VT, Idx);
6547 return DAG.getMaskedLoad(
6548 WidenVT, dl, Ch, StackPtr, DAG.getPOISON(StackPtr.getValueType()), Mask,
6549 DAG.getPOISON(WidenVT), VT, LoadMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
6550 }
6551
6552 // We could try widening the input to the right length but for now, extract
6553 // the original elements, fill the rest with undefs and build a vector.
6554 SmallVector<SDValue, 16> Ops(WidenNumElts);
6555 unsigned i;
6556 for (i = 0; i < VTNumElts; ++i)
6557 Ops[i] = DAG.getExtractVectorElt(dl, EltVT, InOp, IdxVal + i);
6558
6559 SDValue UndefVal = DAG.getPOISON(EltVT);
6560 for (; i < WidenNumElts; ++i)
6561 Ops[i] = UndefVal;
6562 return DAG.getBuildVector(WidenVT, dl, Ops);
6563}
6564
6565SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) {
6566 SDValue InOp = ModifyToType(
6567 N->getOperand(0),
6568 TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true);
6569 return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp,
6570 N->getOperand(1));
6571}
6572
6573SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) {
6574 SDValue InOp = GetWidenedVector(N->getOperand(0));
6575 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N),
6576 InOp.getValueType(), InOp,
6577 N->getOperand(1), N->getOperand(2));
6578}
6579
6580/// Either return the same load or provide appropriate casts
6581/// from the load and return that.
6582static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT,
6583 TypeSize LdWidth, TypeSize FirstVTWidth,
6584 SDLoc dl, SelectionDAG &DAG) {
6585 assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth) &&
6586 "Load width must be less than or equal to first value type width");
6587 TypeSize WidenWidth = WidenVT.getSizeInBits();
6588 if (!FirstVT.isVector()) {
6589 unsigned NumElts =
6590 WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue();
6591 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), FirstVT, NumElts);
6592 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
6593 return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
6594 }
6595 assert(FirstVT == WidenVT && "First value type must equal widen value type");
6596 return LdOp;
6597}
6598
6599static std::optional<EVT> findMemType(SelectionDAG &DAG,
6600 const TargetLowering &TLI, unsigned Width,
6601 EVT WidenVT, unsigned Align,
6602 unsigned WidenEx);
6603
6604SDValue DAGTypeLegalizer::WidenVecRes_ATOMIC_LOAD(AtomicSDNode *LD) {
6605 EVT WidenVT =
6606 TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
6607 EVT LdVT = LD->getMemoryVT();
6608 SDLoc dl(LD);
6609 assert(LdVT.isVector() && WidenVT.isVector() && "Expected vectors");
6610 assert(LdVT.isScalableVector() == WidenVT.isScalableVector() &&
6611 "Must be scalable");
6612 assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType() &&
6613 "Expected equivalent element types");
6614
6615 // Load information
6616 SDValue Chain = LD->getChain();
6617 SDValue BasePtr = LD->getBasePtr();
6618
6619 TypeSize LdWidth = LdVT.getSizeInBits();
6620 TypeSize WidenWidth = WidenVT.getSizeInBits();
6621 TypeSize WidthDiff = WidenWidth - LdWidth;
6622
6623 // Find the vector type that can load from.
6624 std::optional<EVT> FirstVT =
6625 findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, /*LdAlign=*/0,
6626 WidthDiff.getKnownMinValue());
6627
6628 if (!FirstVT)
6629 return SDValue();
6630
6631 SmallVector<EVT, 8> MemVTs;
6632 TypeSize FirstVTWidth = FirstVT->getSizeInBits();
6633
6634 SDValue LdOp = DAG.getAtomicLoad(ISD::NON_EXTLOAD, dl, *FirstVT, *FirstVT,
6635 Chain, BasePtr, LD->getMemOperand());
6636
6637 // Load the element with one instruction.
6638 SDValue Result = coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth,
6639 FirstVTWidth, dl, DAG);
6640
6641 // Modified the chain - switch anything that used the old chain to use
6642 // the new one.
6643 ReplaceValueWith(SDValue(LD, 1), LdOp.getValue(1));
6644 return Result;
6645}
6646
6647SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
6648 LoadSDNode *LD = cast<LoadSDNode>(N);
6649 ISD::LoadExtType ExtType = LD->getExtensionType();
6650
6651 // A vector must always be stored in memory as-is, i.e. without any padding
6652 // between the elements, since various code depend on it, e.g. in the
6653 // handling of a bitcast of a vector type to int, which may be done with a
6654 // vector store followed by an integer load. A vector that does not have
6655 // elements that are byte-sized must therefore be stored as an integer
6656 // built out of the extracted vector elements.
6657 if (!LD->getMemoryVT().isByteSized()) {
6658 SDValue Value, NewChain;
6659 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
6660 ReplaceValueWith(SDValue(LD, 0), Value);
6661 ReplaceValueWith(SDValue(LD, 1), NewChain);
6662 return SDValue();
6663 }
6664
6665 // Generate a vector-predicated load if it is custom/legal on the target. To
6666 // avoid possible recursion, only do this if the widened mask type is legal.
6667 // FIXME: Not all targets may support EVL in VP_LOAD. These will have been
6668 // removed from the IR by the ExpandVectorPredication pass but we're
6669 // reintroducing them here.
6670 EVT VT = LD->getValueType(0);
6671 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6672 EVT WideMaskVT = getSetCCResultType(WideVT);
6673
6674 if (ExtType == ISD::NON_EXTLOAD &&
6675 TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
6676 TLI.isTypeLegal(WideMaskVT)) {
6677 SDLoc DL(N);
6678 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
6679 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
6681 SDValue NewLoad =
6682 DAG.getLoadVP(LD->getAddressingMode(), ISD::NON_EXTLOAD, WideVT, DL,
6683 LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
6684 EVL, LD->getMemoryVT(), LD->getMemOperand());
6685
6686 // Modified the chain - switch anything that used the old chain to use
6687 // the new one.
6688 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6689
6690 return NewLoad;
6691 }
6692
6694 SmallVector<SDValue, 16> LdChain; // Chain for the series of load
6695 if (ExtType != ISD::NON_EXTLOAD)
6696 Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
6697 else
6698 Result = GenWidenVectorLoads(LdChain, LD);
6699
6700 if (Result) {
6701 // If we generate a single load, we can use that for the chain. Otherwise,
6702 // build a factor node to remember the multiple loads are independent and
6703 // chain to that.
6704 SDValue NewChain;
6705 if (LdChain.size() == 1)
6706 NewChain = LdChain[0];
6707 else
6708 NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
6709
6710 // Modified the chain - switch anything that used the old chain to use
6711 // the new one.
6712 ReplaceValueWith(SDValue(N, 1), NewChain);
6713
6714 return Result;
6715 }
6716
6717 if (VT.isVector()) {
6718 // If all else fails replace the load with a wide masked load.
6719 SDLoc DL(N);
6720 SDValue Mask =
6721 DAG.getMaskFromElementCount(DL, WideVT, VT.getVectorElementCount());
6722
6723 SDValue NewLoad = DAG.getMaskedLoad(
6724 WideVT, DL, LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask,
6725 DAG.getPOISON(WideVT), LD->getMemoryVT(), LD->getMemOperand(),
6726 LD->getAddressingMode(), LD->getExtensionType());
6727
6728 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6729 return NewLoad;
6730 }
6731
6732 report_fatal_error("Unable to widen vector load");
6733}
6734
6735SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) {
6736 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6737 SDValue Mask = N->getMask();
6738 SDValue EVL = N->getVectorLength();
6739 ISD::LoadExtType ExtType = N->getExtensionType();
6740 SDLoc dl(N);
6741
6742 // The mask should be widened as well
6743 assert(getTypeAction(Mask.getValueType()) ==
6745 "Unable to widen binary VP op");
6746 Mask = GetWidenedVector(Mask);
6747 assert(Mask.getValueType().getVectorElementCount() ==
6748 TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
6749 .getVectorElementCount() &&
6750 "Unable to widen vector load");
6751
6752 SDValue Res =
6753 DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(),
6754 N->getBasePtr(), N->getOffset(), Mask, EVL,
6755 N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad());
6756 // Legalize the chain result - switch anything that used the old chain to
6757 // use the new one.
6758 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6759 return Res;
6760}
6761
6762SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) {
6763 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6764 SDValue Mask = N->getMask();
6765 SDValue EVL = N->getVectorLength();
6766 SDLoc dl(N);
6767
6768 // The mask should be widened as well
6769 assert(getTypeAction(Mask.getValueType()) ==
6771 "Unable to widen binary VP op");
6772 Mask = GetWidenedVector(Mask);
6773 assert(Mask.getValueType().getVectorElementCount() ==
6774 TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType())
6775 .getVectorElementCount() &&
6776 "Unable to widen vector load");
6777
6778 SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(),
6779 Mask, EVL, N->getMemOperand());
6780 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6781 ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
6782 return Res;
6783}
6784
6785SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) {
6786 SDLoc DL(N);
6787
6788 // The mask should be widened as well
6789 SDValue Mask = N->getMask();
6790 assert(getTypeAction(Mask.getValueType()) ==
6792 "Unable to widen VP strided load");
6793 Mask = GetWidenedVector(Mask);
6794
6795 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6796 assert(Mask.getValueType().getVectorElementCount() ==
6797 WidenVT.getVectorElementCount() &&
6798 "Data and mask vectors should have the same number of elements");
6799
6800 SDValue Res = DAG.getStridedLoadVP(
6801 N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(),
6802 N->getBasePtr(), N->getOffset(), N->getStride(), Mask,
6803 N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(),
6804 N->isExpandingLoad());
6805
6806 // Legalize the chain result - switch anything that used the old chain to
6807 // use the new one.
6808 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6809 return Res;
6810}
6811
6812SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) {
6813 SDValue Vec = N->getOperand(0);
6814 SDValue Mask = N->getOperand(1);
6815 SDValue Passthru = N->getOperand(2);
6816 EVT WideVecVT =
6817 TLI.getTypeToTransformTo(*DAG.getContext(), Vec.getValueType());
6818 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
6819 Mask.getValueType().getVectorElementType(),
6820 WideVecVT.getVectorElementCount());
6821
6822 SDValue WideVec = ModifyToType(Vec, WideVecVT);
6823 SDValue WideMask = ModifyToType(Mask, WideMaskVT, /*FillWithZeroes=*/true);
6824 SDValue WidePassthru = ModifyToType(Passthru, WideVecVT);
6825 return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), WideVecVT, WideVec,
6826 WideMask, WidePassthru);
6827}
6828
6829SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
6830 EVT VT = N->getValueType(0);
6831 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
6832 SDValue Mask = N->getMask();
6833 EVT MaskVT = Mask.getValueType();
6834 SDValue PassThru = GetWidenedVector(N->getPassThru());
6835 ISD::LoadExtType ExtType = N->getExtensionType();
6836 SDLoc dl(N);
6837
6838 EVT WideMaskVT =
6839 EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
6840 WidenVT.getVectorElementCount());
6841
6842 if (ExtType == ISD::NON_EXTLOAD && !N->isExpandingLoad() &&
6843 TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WidenVT) &&
6844 TLI.isTypeLegal(WideMaskVT) &&
6845 // If there is a passthru, we shouldn't use vp.load. However,
6846 // type legalizer will struggle on masked.load with
6847 // scalable vectors, so for scalable vectors, we still use vp.load
6848 // but manually merge the load result with the passthru using vp.select.
6849 (N->getPassThru()->isUndef() || VT.isScalableVector())) {
6850 Mask = DAG.getInsertSubvector(dl, DAG.getPOISON(WideMaskVT), Mask, 0);
6851 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
6853 SDValue NewLoad =
6854 DAG.getLoadVP(N->getAddressingMode(), ISD::NON_EXTLOAD, WidenVT, dl,
6855 N->getChain(), N->getBasePtr(), N->getOffset(), Mask, EVL,
6856 N->getMemoryVT(), N->getMemOperand());
6857 SDValue NewVal = NewLoad;
6858
6859 // Manually merge with vselect
6860 if (!N->getPassThru()->isUndef()) {
6861 assert(WidenVT.isScalableVector());
6862 NewVal = DAG.getNode(ISD::VSELECT, dl, WidenVT, Mask, NewVal, PassThru);
6863 // The lanes past EVL are poison.
6864 NewVal = DAG.getNode(ISD::VP_MERGE, dl, WidenVT,
6865 DAG.getAllOnesConstant(dl, WideMaskVT), NewVal,
6866 DAG.getPOISON(WidenVT), EVL);
6867 }
6868
6869 // Modified the chain - switch anything that used the old chain to use
6870 // the new one.
6871 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1));
6872
6873 return NewVal;
6874 }
6875
6876 // The mask should be widened as well
6877 Mask = ModifyToType(Mask, WideMaskVT, true);
6878
6879 SDValue Res = DAG.getMaskedLoad(
6880 WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask,
6881 PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(),
6882 ExtType, N->isExpandingLoad());
6883 // Legalize the chain result - switch anything that used the old chain to
6884 // use the new one.
6885 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6886 return Res;
6887}
6888
6889SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
6890
6891 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6892 SDValue Mask = N->getMask();
6893 EVT MaskVT = Mask.getValueType();
6894 SDValue PassThru = GetWidenedVector(N->getPassThru());
6895 SDValue Scale = N->getScale();
6896 unsigned NumElts = WideVT.getVectorNumElements();
6897 SDLoc dl(N);
6898
6899 // The mask should be widened as well
6900 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
6901 MaskVT.getVectorElementType(),
6902 WideVT.getVectorNumElements());
6903 Mask = ModifyToType(Mask, WideMaskVT, true);
6904
6905 // Widen the Index operand
6906 SDValue Index = N->getIndex();
6907 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
6908 Index.getValueType().getScalarType(),
6909 NumElts);
6910 Index = ModifyToType(Index, WideIndexVT);
6911 SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index,
6912 Scale };
6913
6914 // Widen the MemoryType
6915 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
6916 N->getMemoryVT().getScalarType(), NumElts);
6917 SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
6918 WideMemVT, dl, Ops, N->getMemOperand(),
6919 N->getIndexType(), N->getExtensionType());
6920
6921 // Legalize the chain result - switch anything that used the old chain to
6922 // use the new one.
6923 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6924 return Res;
6925}
6926
6927SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) {
6928 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6929 SDValue Mask = N->getMask();
6930 SDValue Scale = N->getScale();
6931 ElementCount WideEC = WideVT.getVectorElementCount();
6932 SDLoc dl(N);
6933
6934 SDValue Index = GetWidenedVector(N->getIndex());
6935 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(),
6936 N->getMemoryVT().getScalarType(), WideEC);
6937 Mask = GetWidenedMask(Mask, WideEC);
6938
6939 SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale,
6940 Mask, N->getVectorLength()};
6941 SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT,
6942 dl, Ops, N->getMemOperand(), N->getIndexType());
6943
6944 // Legalize the chain result - switch anything that used the old chain to
6945 // use the new one.
6946 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
6947 return Res;
6948}
6949
6950SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) {
6951 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
6952 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0));
6953}
6954
6955// Return true is this is a SETCC node or a strict version of it.
6956static inline bool isSETCCOp(unsigned Opcode) {
6957 switch (Opcode) {
6958 case ISD::SETCC:
6959 case ISD::STRICT_FSETCC:
6961 return true;
6962 }
6963 return false;
6964}
6965
6966// Return true if this is a node that could have two SETCCs as operands.
6967static inline bool isLogicalMaskOp(unsigned Opcode) {
6968 switch (Opcode) {
6969 case ISD::AND:
6970 case ISD::OR:
6971 case ISD::XOR:
6972 return true;
6973 }
6974 return false;
6975}
6976
6977// If N is a SETCC or a strict variant of it, return the type
6978// of the compare operands.
6980 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
6981 return N->getOperand(OpNo).getValueType();
6982}
6983
6984// This is used just for the assert in convertMask(). Check that this either
6985// a SETCC or a previously handled SETCC by convertMask().
6986#ifndef NDEBUG
6987static inline bool isSETCCorConvertedSETCC(SDValue N) {
6988 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR)
6989 N = N.getOperand(0);
6990 else if (N.getOpcode() == ISD::CONCAT_VECTORS) {
6991 for (unsigned i = 1; i < N->getNumOperands(); ++i)
6992 if (!N->getOperand(i)->isUndef())
6993 return false;
6994 N = N.getOperand(0);
6995 }
6996
6997 if (N.getOpcode() == ISD::TRUNCATE)
6998 N = N.getOperand(0);
6999 else if (N.getOpcode() == ISD::SIGN_EXTEND)
7000 N = N.getOperand(0);
7001
7002 if (isLogicalMaskOp(N.getOpcode()))
7003 return isSETCCorConvertedSETCC(N.getOperand(0)) &&
7004 isSETCCorConvertedSETCC(N.getOperand(1));
7005
7006 return (isSETCCOp(N.getOpcode()) ||
7008}
7009#endif
7010
7011// Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT
7012// to ToMaskVT if needed with vector extension or truncation.
7013SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT,
7014 EVT ToMaskVT) {
7015 // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled.
7016 // FIXME: This code seems to be too restrictive, we might consider
7017 // generalizing it or dropping it.
7018 assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument.");
7019
7020 // Make a new Mask node, with a legal result VT.
7021 SDValue Mask;
7023 for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i)
7024 Ops.push_back(InMask->getOperand(i));
7025 if (InMask->isStrictFPOpcode()) {
7026 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask),
7027 { MaskVT, MVT::Other }, Ops);
7028 ReplaceValueWith(InMask.getValue(1), Mask.getValue(1));
7029 }
7030 else
7031 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops,
7032 InMask->getFlags());
7033
7034 // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign
7035 // extend or truncate is needed.
7036 LLVMContext &Ctx = *DAG.getContext();
7037 unsigned MaskScalarBits = MaskVT.getScalarSizeInBits();
7038 unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits();
7039 if (MaskScalarBits < ToMaskScalBits) {
7040 EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
7041 MaskVT.getVectorNumElements());
7042 Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask);
7043 } else if (MaskScalarBits > ToMaskScalBits) {
7044 EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(),
7045 MaskVT.getVectorNumElements());
7046 Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask);
7047 }
7048
7049 assert(Mask->getValueType(0).getScalarSizeInBits() ==
7050 ToMaskVT.getScalarSizeInBits() &&
7051 "Mask should have the right element size by now.");
7052
7053 // Adjust Mask to the right number of elements.
7054 unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements();
7055 if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) {
7056 Mask = DAG.getExtractSubvector(SDLoc(Mask), ToMaskVT, Mask, 0);
7057 } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) {
7058 unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls);
7059 EVT SubVT = Mask->getValueType(0);
7060 SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getPOISON(SubVT));
7061 SubOps[0] = Mask;
7062 Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps);
7063 }
7064
7065 assert((Mask->getValueType(0) == ToMaskVT) &&
7066 "A mask of ToMaskVT should have been produced by now.");
7067
7068 return Mask;
7069}
7070
7071// This method tries to handle some special cases for the vselect mask
7072// and if needed adjusting the mask vector type to match that of the VSELECT.
7073// Without it, many cases end up with scalarization of the SETCC, with many
7074// unnecessary instructions.
7075SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) {
7076 LLVMContext &Ctx = *DAG.getContext();
7077 SDValue Cond = N->getOperand(0);
7078
7079 if (N->getOpcode() != ISD::VSELECT)
7080 return SDValue();
7081
7082 if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode()))
7083 return SDValue();
7084
7085 // If this is a splitted VSELECT that was previously already handled, do
7086 // nothing.
7087 EVT CondVT = Cond->getValueType(0);
7088 if (CondVT.getScalarSizeInBits() != 1)
7089 return SDValue();
7090
7091 EVT VSelVT = N->getValueType(0);
7092
7093 // This method can't handle scalable vector types.
7094 // FIXME: This support could be added in the future.
7095 if (VSelVT.isScalableVector())
7096 return SDValue();
7097
7098 // Only handle vector types which are a power of 2.
7099 if (!isPowerOf2_64(VSelVT.getSizeInBits()))
7100 return SDValue();
7101
7102 // Don't touch if this will be scalarized.
7103 EVT FinalVT = VSelVT;
7104 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector)
7105 FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx);
7106
7107 if (FinalVT.getVectorNumElements() == 1)
7108 return SDValue();
7109
7110 // If there is support for an i1 vector mask, don't touch.
7111 if (isSETCCOp(Cond.getOpcode())) {
7112 EVT SetCCOpVT = getSETCCOperandType(Cond);
7113 while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal)
7114 SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT);
7115 EVT SetCCResVT = getSetCCResultType(SetCCOpVT);
7116 if (SetCCResVT.getScalarSizeInBits() == 1)
7117 return SDValue();
7118 } else if (CondVT.getScalarType() == MVT::i1) {
7119 // If there is support for an i1 vector mask (or only scalar i1 conditions),
7120 // don't touch.
7121 while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal)
7122 CondVT = TLI.getTypeToTransformTo(Ctx, CondVT);
7123
7124 if (CondVT.getScalarType() == MVT::i1)
7125 return SDValue();
7126 }
7127
7128 // Widen the vselect result type if needed.
7129 if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector)
7130 VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT);
7131
7132 // The mask of the VSELECT should have integer elements.
7133 EVT ToMaskVT = VSelVT;
7134 if (!ToMaskVT.getScalarType().isInteger())
7135 ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger();
7136
7137 SDValue Mask;
7138 if (isSETCCOp(Cond->getOpcode())) {
7139 EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond));
7140 Mask = convertMask(Cond, MaskVT, ToMaskVT);
7141 } else if (isLogicalMaskOp(Cond->getOpcode()) &&
7142 isSETCCOp(Cond->getOperand(0).getOpcode()) &&
7143 isSETCCOp(Cond->getOperand(1).getOpcode())) {
7144 // Cond is (AND/OR/XOR (SETCC, SETCC))
7145 SDValue SETCC0 = Cond->getOperand(0);
7146 SDValue SETCC1 = Cond->getOperand(1);
7147 EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0));
7148 EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1));
7149 unsigned ScalarBits0 = VT0.getScalarSizeInBits();
7150 unsigned ScalarBits1 = VT1.getScalarSizeInBits();
7151 unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits();
7152 EVT MaskVT;
7153 // If the two SETCCs have different VTs, either extend/truncate one of
7154 // them to the other "towards" ToMaskVT, or truncate one and extend the
7155 // other to ToMaskVT.
7156 if (ScalarBits0 != ScalarBits1) {
7157 EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1);
7158 EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0);
7159 if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits())
7160 MaskVT = WideVT;
7161 else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits())
7162 MaskVT = NarrowVT;
7163 else
7164 MaskVT = ToMaskVT;
7165 } else
7166 // If the two SETCCs have the same VT, don't change it.
7167 MaskVT = VT0;
7168
7169 // Make new SETCCs and logical nodes.
7170 SETCC0 = convertMask(SETCC0, VT0, MaskVT);
7171 SETCC1 = convertMask(SETCC1, VT1, MaskVT);
7172 Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1);
7173
7174 // Convert the logical op for VSELECT if needed.
7175 Mask = convertMask(Cond, MaskVT, ToMaskVT);
7176 } else
7177 return SDValue();
7178
7179 return Mask;
7180}
7181
7182SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) {
7183 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7184 ElementCount WidenEC = WidenVT.getVectorElementCount();
7185
7186 SDValue Cond1 = N->getOperand(0);
7187 EVT CondVT = Cond1.getValueType();
7188 unsigned Opcode = N->getOpcode();
7189 if (CondVT.isVector()) {
7190 if (SDValue WideCond = WidenVSELECTMask(N)) {
7191 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
7192 SDValue InOp2 = GetWidenedVector(N->getOperand(2));
7193 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
7194 return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2);
7195 }
7196
7197 EVT CondEltVT = CondVT.getVectorElementType();
7198 EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC);
7199 if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector)
7200 Cond1 = GetWidenedVector(Cond1);
7201
7202 // If we have to split the condition there is no point in widening the
7203 // select. This would result in an cycle of widening the select ->
7204 // widening the condition operand -> splitting the condition operand ->
7205 // splitting the select -> widening the select. Instead split this select
7206 // further and widen the resulting type.
7207 if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) {
7208 SDValue SplitSelect = SplitVecOp_VSELECT(N, 0);
7209 SDValue Res = ModifyToType(SplitSelect, WidenVT);
7210 return Res;
7211 }
7212
7213 if (Cond1.getValueType() != CondWidenVT)
7214 Cond1 = ModifyToType(Cond1, CondWidenVT);
7215 }
7216
7217 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
7218 SDValue InOp2 = GetWidenedVector(N->getOperand(2));
7219 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT);
7220 if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE)
7221 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2,
7222 N->getOperand(3));
7223 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2);
7224}
7225
7226SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) {
7227 SDValue InOp1 = GetWidenedVector(N->getOperand(2));
7228 SDValue InOp2 = GetWidenedVector(N->getOperand(3));
7229 return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
7230 InOp1.getValueType(), N->getOperand(0),
7231 N->getOperand(1), InOp1, InOp2, N->getOperand(4));
7232}
7233
7234SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) {
7235 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7236 return DAG.getUNDEF(WidenVT);
7237}
7238
7239SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) {
7240 EVT VT = N->getValueType(0);
7241 SDLoc dl(N);
7242
7243 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7244 unsigned NumElts = VT.getVectorNumElements();
7245 unsigned WidenNumElts = WidenVT.getVectorNumElements();
7246
7247 SDValue InOp1 = GetWidenedVector(N->getOperand(0));
7248 SDValue InOp2 = GetWidenedVector(N->getOperand(1));
7249
7250 // Adjust mask based on new input vector length.
7251 SmallVector<int, 16> NewMask(WidenNumElts, -1);
7252 for (unsigned i = 0; i != NumElts; ++i) {
7253 int Idx = N->getMaskElt(i);
7254 if (Idx < (int)NumElts)
7255 NewMask[i] = Idx;
7256 else
7257 NewMask[i] = Idx - NumElts + WidenNumElts;
7258 }
7259 return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask);
7260}
7261
7262SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) {
7263 EVT VT = N->getValueType(0);
7264 EVT EltVT = VT.getVectorElementType();
7265 SDLoc dl(N);
7266
7267 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7268 SDValue OpValue = GetWidenedVector(N->getOperand(0));
7269 assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type");
7270
7271 SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue);
7272 unsigned WidenNumElts = WidenVT.getVectorMinNumElements();
7273 unsigned VTNumElts = VT.getVectorMinNumElements();
7274 unsigned IdxVal = WidenNumElts - VTNumElts;
7275
7276 if (VT.isScalableVector()) {
7277 // Try to split the 'Widen ReverseVal' into smaller extracts and concat the
7278 // results together, e.g.(nxv6i64 -> nxv8i64)
7279 // nxv8i64 vector_reverse
7280 // <->
7281 // nxv8i64 concat(
7282 // nxv2i64 extract_subvector(nxv8i64, 2)
7283 // nxv2i64 extract_subvector(nxv8i64, 4)
7284 // nxv2i64 extract_subvector(nxv8i64, 6)
7285 // nxv2i64 undef)
7286
7287 unsigned GCD = std::gcd(VTNumElts, WidenNumElts);
7288 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
7290 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken "
7291 "down type's element count");
7293 unsigned i = 0;
7294 for (; i < VTNumElts / GCD; ++i)
7295 Parts.push_back(
7296 DAG.getExtractSubvector(dl, PartVT, ReverseVal, IdxVal + i * GCD));
7297 for (; i < WidenNumElts / GCD; ++i)
7298 Parts.push_back(DAG.getPOISON(PartVT));
7299
7300 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts);
7301 }
7302
7303 // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for
7304 // fixed-vectors.
7305 SmallVector<int, 16> Mask(WidenNumElts, -1);
7306 std::iota(Mask.begin(), Mask.begin() + VTNumElts, IdxVal);
7307
7308 return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getPOISON(WidenVT),
7309 Mask);
7310}
7311
7312SDValue DAGTypeLegalizer::WidenVecRes_GET_ACTIVE_LANE_MASK(SDNode *N) {
7313 EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7314 return DAG.getNode(ISD::GET_ACTIVE_LANE_MASK, SDLoc(N), NVT, N->ops());
7315}
7316
7317SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
7318 assert(N->getValueType(0).isVector() &&
7319 N->getOperand(0).getValueType().isVector() &&
7320 "Operands must be vectors");
7321 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
7322 ElementCount WidenEC = WidenVT.getVectorElementCount();
7323
7324 SDValue InOp1 = N->getOperand(0);
7325 EVT InVT = InOp1.getValueType();
7326 assert(InVT.isVector() && "can not widen non-vector type");
7327 EVT WidenInVT =
7328 EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC);
7329
7330 // The input and output types often differ here, and it could be that while
7331 // we'd prefer to widen the result type, the input operands have been split.
7332 // In this case, we also need to split the result of this node as well.
7333 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) {
7334 SDValue SplitVSetCC = SplitVecOp_VSETCC(N);
7335 SDValue Res = ModifyToType(SplitVSetCC, WidenVT);
7336 return Res;
7337 }
7338
7339 // If the inputs also widen, handle them directly. Otherwise widen by hand.
7340 SDValue InOp2 = N->getOperand(1);
7341 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
7342 InOp1 = GetWidenedVector(InOp1);
7343 InOp2 = GetWidenedVector(InOp2);
7344 } else {
7345 SDValue Poison = DAG.getPOISON(WidenInVT);
7346 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(N));
7347 InOp1 = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), WidenInVT, Poison,
7348 InOp1, ZeroIdx);
7349 InOp2 = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), WidenInVT, Poison,
7350 InOp2, ZeroIdx);
7351 }
7352
7353 // Assume that the input and output will be widen appropriately. If not,
7354 // we will have to unroll it at some point.
7355 assert(InOp1.getValueType() == WidenInVT &&
7356 InOp2.getValueType() == WidenInVT &&
7357 "Input not widened to expected type!");
7358 (void)WidenInVT;
7359 if (N->getOpcode() == ISD::VP_SETCC) {
7360 SDValue Mask =
7361 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount());
7362 return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
7363 N->getOperand(2), Mask, N->getOperand(4));
7364 }
7365 return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2,
7366 N->getOperand(2));
7367}
7368
7369SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) {
7370 assert(N->getValueType(0).isVector() &&
7371 N->getOperand(1).getValueType().isVector() &&
7372 "Operands must be vectors");
7373 EVT VT = N->getValueType(0);
7374 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7375 unsigned WidenNumElts = WidenVT.getVectorNumElements();
7376 unsigned NumElts = VT.getVectorNumElements();
7377 EVT EltVT = VT.getVectorElementType();
7378
7379 SDLoc dl(N);
7380 SDValue Chain = N->getOperand(0);
7381 SDValue LHS = N->getOperand(1);
7382 SDValue RHS = N->getOperand(2);
7383 SDValue CC = N->getOperand(3);
7384 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
7385
7386 // Fully unroll and reassemble.
7387 SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getPOISON(EltVT));
7388 SmallVector<SDValue, 8> Chains(NumElts);
7389 for (unsigned i = 0; i != NumElts; ++i) {
7390 SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
7391 SDValue RHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, RHS, i);
7392
7393 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
7394 {Chain, LHSElem, RHSElem, CC});
7395 Chains[i] = Scalars[i].getValue(1);
7396 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
7397 DAG.getBoolConstant(true, dl, EltVT, VT),
7398 DAG.getBoolConstant(false, dl, EltVT, VT));
7399 }
7400
7401 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
7402 ReplaceValueWith(SDValue(N, 1), NewChain);
7403
7404 return DAG.getBuildVector(WidenVT, dl, Scalars);
7405}
7406
7407//===----------------------------------------------------------------------===//
7408// Widen Vector Operand
7409//===----------------------------------------------------------------------===//
7410bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
7411 LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG));
7412 SDValue Res = SDValue();
7413
7414 // See if the target wants to custom widen this node.
7415 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false))
7416 return false;
7417
7418 switch (N->getOpcode()) {
7419 default:
7420#ifndef NDEBUG
7421 dbgs() << "WidenVectorOperand op #" << OpNo << ": ";
7422 N->dump(&DAG);
7423 dbgs() << "\n";
7424#endif
7425 report_fatal_error("Do not know how to widen this operator's operand!");
7426
7427 case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
7428 case ISD::FAKE_USE:
7429 Res = WidenVecOp_FAKE_USE(N);
7430 break;
7431 case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
7432 case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break;
7433 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
7434 case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
7435 case ISD::STORE: Res = WidenVecOp_STORE(N); break;
7436 case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break;
7437 case ISD::EXPERIMENTAL_VP_STRIDED_STORE:
7438 Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo);
7439 break;
7443 Res = WidenVecOp_EXTEND_VECTOR_INREG(N);
7444 break;
7445 case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break;
7446 case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
7447 case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
7448 case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break;
7449 case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
7450 case ISD::STRICT_FSETCC:
7451 case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break;
7452 case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
7453 case ISD::FLDEXP:
7454 case ISD::FCOPYSIGN:
7455 case ISD::LROUND:
7456 case ISD::LLROUND:
7457 case ISD::LRINT:
7458 case ISD::LLRINT:
7459 Res = WidenVecOp_UnrollVectorOp(N);
7460 break;
7461 case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break;
7462
7463 case ISD::ANY_EXTEND:
7464 case ISD::SIGN_EXTEND:
7465 case ISD::ZERO_EXTEND:
7466 Res = WidenVecOp_EXTEND(N);
7467 break;
7468
7469 case ISD::SCMP:
7470 case ISD::UCMP:
7471 Res = WidenVecOp_CMP(N);
7472 break;
7473
7474 case ISD::FP_EXTEND:
7476 case ISD::FP_ROUND:
7478 case ISD::FP_TO_SINT:
7480 case ISD::FP_TO_UINT:
7482 case ISD::SINT_TO_FP:
7484 case ISD::UINT_TO_FP:
7486 case ISD::TRUNCATE:
7488 Res = WidenVecOp_Convert(N);
7489 break;
7490
7493 Res = WidenVecOp_FP_TO_XINT_SAT(N);
7494 break;
7495
7498 case ISD::VECREDUCE_ADD:
7499 case ISD::VECREDUCE_MUL:
7500 case ISD::VECREDUCE_AND:
7501 case ISD::VECREDUCE_OR:
7502 case ISD::VECREDUCE_XOR:
7511 Res = WidenVecOp_VECREDUCE(N);
7512 break;
7515 Res = WidenVecOp_VECREDUCE_SEQ(N);
7516 break;
7517 case ISD::VP_REDUCE_FADD:
7518 case ISD::VP_REDUCE_SEQ_FADD:
7519 case ISD::VP_REDUCE_FMUL:
7520 case ISD::VP_REDUCE_SEQ_FMUL:
7521 case ISD::VP_REDUCE_ADD:
7522 case ISD::VP_REDUCE_MUL:
7523 case ISD::VP_REDUCE_AND:
7524 case ISD::VP_REDUCE_OR:
7525 case ISD::VP_REDUCE_XOR:
7526 case ISD::VP_REDUCE_SMAX:
7527 case ISD::VP_REDUCE_SMIN:
7528 case ISD::VP_REDUCE_UMAX:
7529 case ISD::VP_REDUCE_UMIN:
7530 case ISD::VP_REDUCE_FMAX:
7531 case ISD::VP_REDUCE_FMIN:
7532 case ISD::VP_REDUCE_FMAXIMUM:
7533 case ISD::VP_REDUCE_FMINIMUM:
7534 Res = WidenVecOp_VP_REDUCE(N);
7535 break;
7536 case ISD::VP_CTTZ_ELTS:
7537 case ISD::VP_CTTZ_ELTS_ZERO_POISON:
7538 Res = WidenVecOp_VP_CttzElements(N);
7539 break;
7541 Res = WidenVecOp_VECTOR_FIND_LAST_ACTIVE(N);
7542 break;
7543 }
7544
7545 // If Res is null, the sub-method took care of registering the result.
7546 if (!Res.getNode()) return false;
7547
7548 // If the result is N, the sub-method updated N in place. Tell the legalizer
7549 // core about this.
7550 if (Res.getNode() == N)
7551 return true;
7552
7553
7554 if (N->isStrictFPOpcode())
7555 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
7556 "Invalid operand expansion");
7557 else
7558 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
7559 "Invalid operand expansion");
7560
7561 ReplaceValueWith(SDValue(N, 0), Res);
7562 return false;
7563}
7564
7565SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) {
7566 SDLoc DL(N);
7567 EVT VT = N->getValueType(0);
7568
7569 SDValue InOp = N->getOperand(0);
7570 assert(getTypeAction(InOp.getValueType()) ==
7572 "Unexpected type action");
7573 InOp = GetWidenedVector(InOp);
7576 "Input wasn't widened!");
7577
7578 // We may need to further widen the operand until it has the same total
7579 // vector size as the result.
7580 EVT InVT = InOp.getValueType();
7581 if (InVT.getSizeInBits() != VT.getSizeInBits()) {
7582 EVT InEltVT = InVT.getVectorElementType();
7583 for (EVT FixedVT : MVT::vector_valuetypes()) {
7584 EVT FixedEltVT = FixedVT.getVectorElementType();
7585 if (TLI.isTypeLegal(FixedVT) &&
7586 FixedVT.getSizeInBits() == VT.getSizeInBits() &&
7587 FixedEltVT == InEltVT) {
7588 assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() &&
7589 "Not enough elements in the fixed type for the operand!");
7590 assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() &&
7591 "We can't have the same type as we started with!");
7592 if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements())
7593 InOp = DAG.getInsertSubvector(DL, DAG.getPOISON(FixedVT), InOp, 0);
7594 else
7595 InOp = DAG.getExtractSubvector(DL, FixedVT, InOp, 0);
7596 break;
7597 }
7598 }
7599 InVT = InOp.getValueType();
7600 if (InVT.getSizeInBits() != VT.getSizeInBits())
7601 // We couldn't find a legal vector type that was a widening of the input
7602 // and could be extended in-register to the result type, so we have to
7603 // scalarize.
7604 return WidenVecOp_Convert(N);
7605 }
7606
7607 // Use special DAG nodes to represent the operation of extending the
7608 // low lanes.
7609 switch (N->getOpcode()) {
7610 default:
7611 llvm_unreachable("Extend legalization on extend operation!");
7612 case ISD::ANY_EXTEND:
7613 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp);
7614 case ISD::SIGN_EXTEND:
7615 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp);
7616 case ISD::ZERO_EXTEND:
7617 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp);
7618 }
7619}
7620
7621SDValue DAGTypeLegalizer::WidenVecOp_CMP(SDNode *N) {
7622 SDLoc dl(N);
7623
7624 EVT OpVT = N->getOperand(0).getValueType();
7625 EVT ResVT = N->getValueType(0);
7626 SDValue LHS = GetWidenedVector(N->getOperand(0));
7627 SDValue RHS = GetWidenedVector(N->getOperand(1));
7628
7629 // 1. EXTRACT_SUBVECTOR
7630 // 2. SIGN_EXTEND/ZERO_EXTEND
7631 // 3. CMP
7632 LHS = DAG.getExtractSubvector(dl, OpVT, LHS, 0);
7633 RHS = DAG.getExtractSubvector(dl, OpVT, RHS, 0);
7634
7635 // At this point the result type is guaranteed to be valid, so we can use it
7636 // as the operand type by extending it appropriately
7637 ISD::NodeType ExtendOpcode =
7638 N->getOpcode() == ISD::SCMP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7639 LHS = DAG.getNode(ExtendOpcode, dl, ResVT, LHS);
7640 RHS = DAG.getNode(ExtendOpcode, dl, ResVT, RHS);
7641
7642 return DAG.getNode(N->getOpcode(), dl, ResVT, LHS, RHS);
7643}
7644
7645SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) {
7646 // The result (and first input) is legal, but the second input is illegal.
7647 // We can't do much to fix that, so just unroll and let the extracts off of
7648 // the second input be widened as needed later.
7649 return DAG.UnrollVectorOp(N);
7650}
7651
7652SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) {
7653 SDLoc DL(N);
7654 EVT ResultVT = N->getValueType(0);
7655 SDValue Test = N->getOperand(1);
7656 SDValue WideArg = GetWidenedVector(N->getOperand(0));
7657
7658 // Process this node similarly to SETCC.
7659 EVT WideResultVT = getSetCCResultType(WideArg.getValueType());
7660 if (ResultVT.getScalarType() == MVT::i1)
7661 WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
7662 WideResultVT.getVectorNumElements());
7663
7664 SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT,
7665 {WideArg, Test}, N->getFlags());
7666
7667 // Extract the needed results from the result vector.
7668 EVT ResVT =
7669 EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(),
7670 ResultVT.getVectorNumElements());
7671 SDValue CC = DAG.getExtractSubvector(DL, ResVT, WideNode, 0);
7672
7673 EVT OpVT = N->getOperand(0).getValueType();
7674 ISD::NodeType ExtendCode =
7675 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
7676 return DAG.getNode(ExtendCode, DL, ResultVT, CC);
7677}
7678
7679SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
7680 // Since the result is legal and the input is illegal.
7681 EVT VT = N->getValueType(0);
7682 EVT EltVT = VT.getVectorElementType();
7683 SDLoc dl(N);
7684 SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
7685 assert(getTypeAction(InOp.getValueType()) ==
7687 "Unexpected type action");
7688 InOp = GetWidenedVector(InOp);
7689 EVT InVT = InOp.getValueType();
7690 unsigned Opcode = N->getOpcode();
7691
7692 // See if a widened result type would be legal, if so widen the node.
7693 // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
7694 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
7695 InVT.getVectorElementCount());
7696 if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
7697 SDValue Res;
7698 if (N->isStrictFPOpcode()) {
7699 if (Opcode == ISD::STRICT_FP_ROUND)
7700 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
7701 { N->getOperand(0), InOp, N->getOperand(2) });
7702 else
7703 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
7704 { N->getOperand(0), InOp });
7705 // Legalize the chain result - switch anything that used the old chain to
7706 // use the new one.
7707 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
7708 } else {
7709 if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
7710 Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1));
7711 else
7712 Res = DAG.getNode(Opcode, dl, WideVT, InOp);
7713 }
7714 return DAG.getExtractSubvector(dl, VT, Res, 0);
7715 }
7716
7717 EVT InEltVT = InVT.getVectorElementType();
7718
7719 // Unroll the convert into some scalar code and create a nasty build vector.
7720 unsigned NumElts = VT.getVectorNumElements();
7722 if (N->isStrictFPOpcode()) {
7723 SmallVector<SDValue, 4> NewOps(N->ops());
7724 SmallVector<SDValue, 32> OpChains;
7725 for (unsigned i=0; i < NumElts; ++i) {
7726 NewOps[1] = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
7727 Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
7728 OpChains.push_back(Ops[i].getValue(1));
7729 }
7730 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
7731 ReplaceValueWith(SDValue(N, 1), NewChain);
7732 } else {
7733 for (unsigned i = 0; i < NumElts; ++i) {
7734 SDValue Elt = DAG.getExtractVectorElt(dl, InEltVT, InOp, i);
7735 if (Opcode == ISD::FP_ROUND || Opcode == ISD::CONVERT_FROM_ARBITRARY_FP)
7736 Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt, N->getOperand(1));
7737 else
7738 Ops[i] = DAG.getNode(Opcode, dl, EltVT, Elt);
7739 }
7740 }
7741
7742 return DAG.getBuildVector(VT, dl, Ops);
7743}
7744
7745SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) {
7746 EVT DstVT = N->getValueType(0);
7747 SDValue Src = GetWidenedVector(N->getOperand(0));
7748 EVT SrcVT = Src.getValueType();
7749 ElementCount WideNumElts = SrcVT.getVectorElementCount();
7750 SDLoc dl(N);
7751
7752 // See if a widened result type would be legal, if so widen the node.
7753 EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(),
7754 DstVT.getVectorElementType(), WideNumElts);
7755 if (TLI.isTypeLegal(WideDstVT)) {
7756 SDValue Res =
7757 DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1));
7758 return DAG.getNode(
7759 ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res,
7760 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
7761 }
7762
7763 // Give up and unroll.
7764 return DAG.UnrollVectorOp(N);
7765}
7766
7767SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
7768 EVT VT = N->getValueType(0);
7769 SDValue InOp = GetWidenedVector(N->getOperand(0));
7770 EVT InWidenVT = InOp.getValueType();
7771 SDLoc dl(N);
7772
7773 // Check if we can convert between two legal vector types and extract.
7774 TypeSize InWidenSize = InWidenVT.getSizeInBits();
7775 TypeSize Size = VT.getSizeInBits();
7776 // x86mmx is not an acceptable vector element type, so don't try.
7777 if (!VT.isVector() && VT != MVT::x86mmx &&
7778 InWidenSize.hasKnownScalarFactor(Size)) {
7779 unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size);
7780 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
7781 if (TLI.isTypeLegal(NewVT)) {
7782 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
7783 return DAG.getExtractVectorElt(dl, VT, BitOp, 0);
7784 }
7785 }
7786
7787 // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
7788 // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
7789 // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
7790 // having to copy via memory.
7791 if (VT.isVector()) {
7792 EVT EltVT = VT.getVectorElementType();
7793 unsigned EltSize = EltVT.getFixedSizeInBits();
7794 if (InWidenSize.isKnownMultipleOf(EltSize)) {
7795 ElementCount NewNumElts =
7796 (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits())
7797 .divideCoefficientBy(EltSize);
7798 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
7799 if (TLI.isTypeLegal(NewVT)) {
7800 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
7801 return DAG.getExtractSubvector(dl, VT, BitOp, 0);
7802 }
7803 }
7804 }
7805
7806 return CreateStackStoreLoad(InOp, VT);
7807}
7808
7809// Vectors with sizes that are not powers of 2 need to be widened to the
7810// next largest power of 2. For example, we may get a vector of 3 32-bit
7811// integers or of 6 16-bit integers, both of which have to be widened to a
7812// 128-bit vector.
7813SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) {
7814 SDValue WidenedOp = GetWidenedVector(N->getOperand(1));
7815 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0),
7816 WidenedOp);
7817}
7818
7819SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) {
7820 EVT VT = N->getValueType(0);
7821 EVT EltVT = VT.getVectorElementType();
7822 EVT InVT = N->getOperand(0).getValueType();
7823 SDLoc dl(N);
7824
7825 // If the widen width for this operand is the same as the width of the concat
7826 // and all but the first operand is undef, just use the widened operand.
7827 unsigned NumOperands = N->getNumOperands();
7828 if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) {
7829 unsigned i;
7830 for (i = 1; i < NumOperands; ++i)
7831 if (!N->getOperand(i).isUndef())
7832 break;
7833
7834 if (i == NumOperands)
7835 return GetWidenedVector(N->getOperand(0));
7836 }
7837
7838 // Otherwise, fall back to a nasty build vector.
7839 unsigned NumElts = VT.getVectorNumElements();
7841
7842 unsigned NumInElts = InVT.getVectorNumElements();
7843
7844 unsigned Idx = 0;
7845 for (unsigned i=0; i < NumOperands; ++i) {
7846 SDValue InOp = N->getOperand(i);
7847 assert(getTypeAction(InOp.getValueType()) ==
7849 "Unexpected type action");
7850 InOp = GetWidenedVector(InOp);
7851 for (unsigned j = 0; j < NumInElts; ++j)
7852 Ops[Idx++] = DAG.getExtractVectorElt(dl, EltVT, InOp, j);
7853 }
7854 return DAG.getBuildVector(VT, dl, Ops);
7855}
7856
7857SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
7858 EVT VT = N->getValueType(0);
7859 SDValue SubVec = N->getOperand(1);
7860 SDValue InVec = N->getOperand(0);
7861
7862 EVT OrigVT = SubVec.getValueType();
7863 SubVec = GetWidenedVector(SubVec);
7864 EVT SubVT = SubVec.getValueType();
7865
7866 // Whether or not all the elements of the widened SubVec will be inserted into
7867 // valid indices of VT.
7868 bool IndicesValid = false;
7869 // If we statically know that VT can fit SubVT, the indices are valid.
7870 if (VT.knownBitsGE(SubVT))
7871 IndicesValid = true;
7872 else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
7873 // Otherwise, if we're inserting a fixed vector into a scalable vector and
7874 // we know the minimum vscale we can work out if it's valid ourselves.
7875 Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
7876 Attribute::VScaleRange);
7877 if (Attr.isValid()) {
7878 unsigned VScaleMin = Attr.getVScaleRangeMin();
7879 if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
7880 SubVT.getFixedSizeInBits())
7881 IndicesValid = true;
7882 }
7883 }
7884
7885 if (!IndicesValid)
7887 "Don't know how to widen the operands for INSERT_SUBVECTOR");
7888
7889 SDLoc DL(N);
7890
7891 // We need to make sure that the indices are still valid, otherwise we might
7892 // widen what was previously well-defined to something undefined.
7893 if (InVec.isUndef() && N->getConstantOperandVal(2) == 0)
7894 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec,
7895 N->getOperand(2));
7896
7897 if (OrigVT.isScalableVector()) {
7898 // When the widened types match, overwriting the start of a vector is
7899 // effectively a merge operation that can be implement as a vselect.
7900 if (SubVT == VT && N->getConstantOperandVal(2) == 0) {
7901 SDValue Mask =
7902 DAG.getMaskFromElementCount(DL, VT, OrigVT.getVectorElementCount());
7903 return DAG.getNode(ISD::VSELECT, DL, VT, Mask, SubVec, InVec);
7904 }
7905
7906 // Fallback to inserting through memory.
7907 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false);
7908 SDValue StackPtr = DAG.CreateStackTemporary(VT.getStoreSize(), Alignment);
7909 MachineFunction &MF = DAG.getMachineFunction();
7910 int FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
7911 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex);
7912
7913 MachineMemOperand *StoreMMO = MF.getMachineMemOperand(
7916 MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
7919
7920 // Write out the vector being inserting into.
7921 SDValue Ch =
7922 DAG.getStore(DAG.getEntryNode(), DL, InVec, StackPtr, StoreMMO);
7923
7924 // Build a mask to match the length of the sub-vector.
7925 SDValue Mask =
7926 DAG.getMaskFromElementCount(DL, SubVT, OrigVT.getVectorElementCount());
7927
7928 // Overwrite the sub-vector at the required offset.
7929 SDValue SubVecPtr =
7930 TLI.getVectorSubVecPointer(DAG, StackPtr, VT, OrigVT, N->getOperand(2));
7931 Ch = DAG.getMaskedStore(Ch, DL, SubVec, SubVecPtr,
7932 DAG.getPOISON(SubVecPtr.getValueType()), Mask, VT,
7933 StoreMMO, ISD::UNINDEXED, ISD::NON_EXTLOAD);
7934
7935 // Read back the result.
7936 return DAG.getLoad(VT, DL, Ch, StackPtr, LoadMMO);
7937 }
7938
7939 // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR
7940 // with a series of INSERT_VECTOR_ELT
7941 unsigned Idx = N->getConstantOperandVal(2);
7942
7943 SDValue InsertElt = InVec;
7944 for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) {
7945 SDValue ExtractElt =
7946 DAG.getExtractVectorElt(DL, VT.getVectorElementType(), SubVec, I);
7947 InsertElt = DAG.getInsertVectorElt(DL, InsertElt, ExtractElt, I + Idx);
7948 }
7949
7950 return InsertElt;
7951}
7952
7953SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
7954 SDValue InOp = GetWidenedVector(N->getOperand(0));
7955 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N),
7956 N->getValueType(0), InOp, N->getOperand(1));
7957}
7958
7959SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
7960 SDValue InOp = GetWidenedVector(N->getOperand(0));
7961 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
7962 N->getValueType(0), InOp, N->getOperand(1));
7963}
7964
7965SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) {
7966 SDLoc DL(N);
7967 EVT ResVT = N->getValueType(0);
7968
7969 // Widen the input as requested by the legalizer.
7970 SDValue WideInOp = GetWidenedVector(N->getOperand(0));
7971 EVT WideInVT = WideInOp.getValueType();
7972
7973 // Simple case: if widened input is still smaller than or equal to result,
7974 // just use it directly.
7975 if (WideInVT.getSizeInBits() <= ResVT.getSizeInBits())
7976 return DAG.getNode(N->getOpcode(), DL, ResVT, WideInOp);
7977
7978 // EXTEND_VECTOR_INREG requires input bits <= result bits.
7979 // If widening makes the input larger than the original result, widen the
7980 // result to match, then extract back down.
7981 EVT ResEltVT = ResVT.getVectorElementType();
7982 unsigned EltBits = ResEltVT.getSizeInBits();
7983 assert((WideInVT.getSizeInBits() % EltBits) == 0 &&
7984 "Widened input size must be a multiple of result element size");
7985
7986 unsigned WideNumElts = WideInVT.getSizeInBits() / EltBits;
7987 EVT WideResVT = EVT::getVectorVT(*DAG.getContext(), ResEltVT, WideNumElts);
7988
7989 SDValue WideRes = DAG.getNode(N->getOpcode(), DL, WideResVT, WideInOp);
7990 return DAG.getExtractSubvector(DL, ResVT, WideRes, 0);
7991}
7992
7993SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
7994 // We have to widen the value, but we want only to store the original
7995 // vector type.
7996 StoreSDNode *ST = cast<StoreSDNode>(N);
7997
7998 if (!ST->getMemoryVT().getScalarType().isByteSized())
7999 return TLI.scalarizeVectorStore(ST, DAG);
8000
8001 if (ST->isTruncatingStore())
8002 return TLI.scalarizeVectorStore(ST, DAG);
8003
8004 // Generate a vector-predicated store if it is custom/legal on the target.
8005 // To avoid possible recursion, only do this if the widened mask type is
8006 // legal.
8007 // FIXME: Not all targets may support EVL in VP_STORE. These will have been
8008 // removed from the IR by the ExpandVectorPredication pass but we're
8009 // reintroducing them here.
8010 SDValue StVal = ST->getValue();
8011 EVT StVT = StVal.getValueType();
8012 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
8013 EVT WideMaskVT = getSetCCResultType(WideVT);
8014
8015 if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
8016 TLI.isTypeLegal(WideMaskVT)) {
8017 // Widen the value.
8018 SDLoc DL(N);
8019 StVal = GetWidenedVector(StVal);
8020 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
8021 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
8022 StVT.getVectorElementCount());
8023 return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
8024 ST->getOffset(), Mask, EVL, StVT, ST->getMemOperand(),
8025 ST->getAddressingMode());
8026 }
8027
8029 if (GenWidenVectorStores(StChain, ST)) {
8030 if (StChain.size() == 1)
8031 return StChain[0];
8032
8033 return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
8034 }
8035
8036 if (StVT.isVector()) {
8037 // If all else fails replace the store with a wide masked store.
8038 SDLoc DL(N);
8039 SDValue WideStVal = GetWidenedVector(StVal);
8040 SDValue Mask =
8041 DAG.getMaskFromElementCount(DL, WideVT, StVT.getVectorElementCount());
8042
8043 return DAG.getMaskedStore(ST->getChain(), DL, WideStVal, ST->getBasePtr(),
8044 ST->getOffset(), Mask, ST->getMemoryVT(),
8045 ST->getMemOperand(), ST->getAddressingMode(),
8046 ST->isTruncatingStore());
8047 }
8048
8049 report_fatal_error("Unable to widen vector store");
8050}
8051
8052SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) {
8053 assert((OpNo == 1 || OpNo == 3) &&
8054 "Can widen only data or mask operand of vp_store");
8055 VPStoreSDNode *ST = cast<VPStoreSDNode>(N);
8056 SDValue Mask = ST->getMask();
8057 SDValue StVal = ST->getValue();
8058 SDLoc dl(N);
8059
8060 if (OpNo == 1) {
8061 // Widen the value.
8062 StVal = GetWidenedVector(StVal);
8063
8064 // We only handle the case where the mask needs widening to an
8065 // identically-sized type as the vector inputs.
8066 assert(getTypeAction(Mask.getValueType()) ==
8068 "Unable to widen VP store");
8069 Mask = GetWidenedVector(Mask);
8070 } else {
8071 Mask = GetWidenedVector(Mask);
8072
8073 // We only handle the case where the stored value needs widening to an
8074 // identically-sized type as the mask.
8075 assert(getTypeAction(StVal.getValueType()) ==
8077 "Unable to widen VP store");
8078 StVal = GetWidenedVector(StVal);
8079 }
8080
8081 assert(Mask.getValueType().getVectorElementCount() ==
8083 "Mask and data vectors should have the same number of elements");
8084 return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(),
8085 ST->getOffset(), Mask, ST->getVectorLength(),
8086 ST->getMemoryVT(), ST->getMemOperand(),
8087 ST->getAddressingMode(), ST->isTruncatingStore(),
8088 ST->isCompressingStore());
8089}
8090
8091SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N,
8092 unsigned OpNo) {
8093 assert((OpNo == 1 || OpNo == 4) &&
8094 "Can widen only data or mask operand of vp_strided_store");
8095 VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N);
8096 SDValue Mask = SST->getMask();
8097 SDValue StVal = SST->getValue();
8098 SDLoc DL(N);
8099
8100 if (OpNo == 1)
8101 assert(getTypeAction(Mask.getValueType()) ==
8103 "Unable to widen VP strided store");
8104 else
8105 assert(getTypeAction(StVal.getValueType()) ==
8107 "Unable to widen VP strided store");
8108
8109 StVal = GetWidenedVector(StVal);
8110 Mask = GetWidenedVector(Mask);
8111
8113 Mask.getValueType().getVectorElementCount() &&
8114 "Data and mask vectors should have the same number of elements");
8115
8116 return DAG.getStridedStoreVP(
8117 SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(),
8118 SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(),
8119 SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(),
8120 SST->isCompressingStore());
8121}
8122
8123SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
8124 assert((OpNo == 1 || OpNo == 4) &&
8125 "Can widen only data or mask operand of mstore");
8126 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8127 SDValue Mask = MST->getMask();
8128 EVT MaskVT = Mask.getValueType();
8129 SDValue StVal = MST->getValue();
8130 EVT VT = StVal.getValueType();
8131 SDLoc dl(N);
8132
8133 EVT WideVT, WideMaskVT;
8134 if (OpNo == 1) {
8135 // Widen the value.
8136 StVal = GetWidenedVector(StVal);
8137
8138 WideVT = StVal.getValueType();
8139 WideMaskVT =
8140 EVT::getVectorVT(*DAG.getContext(), MaskVT.getVectorElementType(),
8141 WideVT.getVectorElementCount());
8142 } else {
8143 WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT);
8144
8145 EVT ValueVT = StVal.getValueType();
8146 WideVT = EVT::getVectorVT(*DAG.getContext(), ValueVT.getVectorElementType(),
8147 WideMaskVT.getVectorElementCount());
8148 }
8149
8150 if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
8151 TLI.isTypeLegal(WideMaskVT) && !MST->isCompressingStore()) {
8152 Mask = DAG.getInsertSubvector(dl, DAG.getPOISON(WideMaskVT), Mask, 0);
8153 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8155 return DAG.getStoreVP(MST->getChain(), dl, StVal, MST->getBasePtr(),
8156 MST->getOffset(), Mask, EVL, MST->getMemoryVT(),
8157 MST->getMemOperand(), MST->getAddressingMode());
8158 }
8159
8160 if (OpNo == 1) {
8161 // The mask should be widened as well.
8162 Mask = ModifyToType(Mask, WideMaskVT, true);
8163 } else {
8164 // Widen the mask.
8165 Mask = ModifyToType(Mask, WideMaskVT, true);
8166
8167 StVal = ModifyToType(StVal, WideVT);
8168 }
8169
8170 assert(Mask.getValueType().getVectorElementCount() ==
8172 "Mask and data vectors should have the same number of elements");
8173 return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(),
8174 MST->getOffset(), Mask, MST->getMemoryVT(),
8175 MST->getMemOperand(), MST->getAddressingMode(),
8176 false, MST->isCompressingStore());
8177}
8178
8179SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
8180 assert(OpNo == 4 && "Can widen only the index of mgather");
8181 auto *MG = cast<MaskedGatherSDNode>(N);
8182 SDValue DataOp = MG->getPassThru();
8183 SDValue Mask = MG->getMask();
8184 SDValue Scale = MG->getScale();
8185
8186 // Just widen the index. It's allowed to have extra elements.
8187 SDValue Index = GetWidenedVector(MG->getIndex());
8188
8189 SDLoc dl(N);
8190 SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
8191 Scale};
8192 SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
8193 MG->getMemOperand(), MG->getIndexType(),
8194 MG->getExtensionType());
8195 ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
8196 ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
8197 return SDValue();
8198}
8199
8200SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
8201 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8202 SDValue DataOp = MSC->getValue();
8203 SDValue Mask = MSC->getMask();
8204 SDValue Index = MSC->getIndex();
8205 SDValue Scale = MSC->getScale();
8206 EVT WideMemVT = MSC->getMemoryVT();
8207
8208 if (OpNo == 1) {
8209 DataOp = GetWidenedVector(DataOp);
8210 unsigned NumElts = DataOp.getValueType().getVectorNumElements();
8211
8212 // Widen index.
8213 EVT IndexVT = Index.getValueType();
8214 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(),
8215 IndexVT.getVectorElementType(), NumElts);
8216 Index = ModifyToType(Index, WideIndexVT);
8217
8218 // The mask should be widened as well.
8219 EVT MaskVT = Mask.getValueType();
8220 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(),
8221 MaskVT.getVectorElementType(), NumElts);
8222 Mask = ModifyToType(Mask, WideMaskVT, true);
8223
8224 // Widen the MemoryType
8225 WideMemVT = EVT::getVectorVT(*DAG.getContext(),
8226 MSC->getMemoryVT().getScalarType(), NumElts);
8227 } else if (OpNo == 4) {
8228 // Just widen the index. It's allowed to have extra elements.
8229 Index = GetWidenedVector(Index);
8230 } else
8231 llvm_unreachable("Can't widen this operand of mscatter");
8232
8233 SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index,
8234 Scale};
8235 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N),
8236 Ops, MSC->getMemOperand(), MSC->getIndexType(),
8237 MSC->isTruncatingStore());
8238}
8239
8240SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) {
8241 VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N);
8242 SDValue DataOp = VPSC->getValue();
8243 SDValue Mask = VPSC->getMask();
8244 SDValue Index = VPSC->getIndex();
8245 SDValue Scale = VPSC->getScale();
8246 EVT WideMemVT = VPSC->getMemoryVT();
8247
8248 if (OpNo == 1) {
8249 DataOp = GetWidenedVector(DataOp);
8250 Index = GetWidenedVector(Index);
8251 const auto WideEC = DataOp.getValueType().getVectorElementCount();
8252 Mask = GetWidenedMask(Mask, WideEC);
8253 WideMemVT = EVT::getVectorVT(*DAG.getContext(),
8254 VPSC->getMemoryVT().getScalarType(), WideEC);
8255 } else if (OpNo == 3) {
8256 // Just widen the index. It's allowed to have extra elements.
8257 Index = GetWidenedVector(Index);
8258 } else
8259 llvm_unreachable("Can't widen this operand of VP_SCATTER");
8260
8261 SDValue Ops[] = {
8262 VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask,
8263 VPSC->getVectorLength()};
8264 return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops,
8265 VPSC->getMemOperand(), VPSC->getIndexType());
8266}
8267
8268SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
8269 SDValue InOp0 = GetWidenedVector(N->getOperand(0));
8270 SDValue InOp1 = GetWidenedVector(N->getOperand(1));
8271 SDLoc dl(N);
8272 EVT VT = N->getValueType(0);
8273
8274 // WARNING: In this code we widen the compare instruction with garbage.
8275 // This garbage may contain denormal floats which may be slow. Is this a real
8276 // concern ? Should we zero the unused lanes if this is a float compare ?
8277
8278 // Get a new SETCC node to compare the newly widened operands.
8279 // Only some of the compared elements are legal.
8280 EVT SVT = getSetCCResultType(InOp0.getValueType());
8281 // The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
8282 if (VT.getScalarType() == MVT::i1)
8283 SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8284 SVT.getVectorElementCount());
8285
8286 SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N),
8287 SVT, InOp0, InOp1, N->getOperand(2));
8288
8289 // Extract the needed results from the result vector.
8290 EVT ResVT = EVT::getVectorVT(*DAG.getContext(),
8293 SDValue CC = DAG.getExtractSubvector(dl, ResVT, WideSETCC, 0);
8294
8295 EVT OpVT = N->getOperand(0).getValueType();
8296 ISD::NodeType ExtendCode =
8297 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
8298 return DAG.getNode(ExtendCode, dl, VT, CC);
8299}
8300
8301SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) {
8302 SDValue Chain = N->getOperand(0);
8303 SDValue LHS = GetWidenedVector(N->getOperand(1));
8304 SDValue RHS = GetWidenedVector(N->getOperand(2));
8305 SDValue CC = N->getOperand(3);
8306 SDLoc dl(N);
8307
8308 EVT VT = N->getValueType(0);
8309 EVT EltVT = VT.getVectorElementType();
8310 EVT TmpEltVT = LHS.getValueType().getVectorElementType();
8311 unsigned NumElts = VT.getVectorNumElements();
8312
8313 // Unroll into a build vector.
8314 SmallVector<SDValue, 8> Scalars(NumElts);
8315 SmallVector<SDValue, 8> Chains(NumElts);
8316
8317 for (unsigned i = 0; i != NumElts; ++i) {
8318 SDValue LHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, LHS, i);
8319 SDValue RHSElem = DAG.getExtractVectorElt(dl, TmpEltVT, RHS, i);
8320
8321 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other},
8322 {Chain, LHSElem, RHSElem, CC});
8323 Chains[i] = Scalars[i].getValue(1);
8324 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i],
8325 DAG.getBoolConstant(true, dl, EltVT, VT),
8326 DAG.getBoolConstant(false, dl, EltVT, VT));
8327 }
8328
8329 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
8330 ReplaceValueWith(SDValue(N, 1), NewChain);
8331
8332 return DAG.getBuildVector(VT, dl, Scalars);
8333}
8334
8335static unsigned getExtendForIntVecReduction(unsigned Opc) {
8336 switch (Opc) {
8337 default:
8338 llvm_unreachable("Expected integer vector reduction");
8339 case ISD::VECREDUCE_ADD:
8340 case ISD::VECREDUCE_MUL:
8341 case ISD::VECREDUCE_AND:
8342 case ISD::VECREDUCE_OR:
8343 case ISD::VECREDUCE_XOR:
8344 return ISD::ANY_EXTEND;
8347 return ISD::SIGN_EXTEND;
8350 return ISD::ZERO_EXTEND;
8351 }
8352}
8353
8354SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
8355 SDLoc dl(N);
8356 SDValue Op = GetWidenedVector(N->getOperand(0));
8357 EVT VT = N->getValueType(0);
8358 EVT OrigVT = N->getOperand(0).getValueType();
8359 EVT WideVT = Op.getValueType();
8360 EVT ElemVT = OrigVT.getVectorElementType();
8361 SDNodeFlags Flags = N->getFlags();
8362
8363 unsigned Opc = N->getOpcode();
8364 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
8365 SDValue NeutralElem = DAG.getIdentityElement(BaseOpc, dl, ElemVT, Flags);
8366 assert(NeutralElem && "Neutral element must exist");
8367
8368 // Pad the vector with the neutral element.
8369 unsigned OrigElts = OrigVT.getVectorMinNumElements();
8370 unsigned WideElts = WideVT.getVectorMinNumElements();
8371
8372 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids
8373 // needing to pad the source vector, because the inactive lanes can simply be
8374 // disabled and not contribute to the result.
8375 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
8376 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
8377 SDValue Start = NeutralElem;
8378 if (VT.isInteger())
8379 Start = DAG.getNode(getExtendForIntVecReduction(Opc), dl, VT, Start);
8380 assert(Start.getValueType() == VT);
8381 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8382 WideVT.getVectorElementCount());
8383 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
8384 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8385 OrigVT.getVectorElementCount());
8386 return DAG.getNode(*VPOpcode, dl, VT, {Start, Op, Mask, EVL}, Flags);
8387 }
8388
8389 if (WideVT.isScalableVector()) {
8390 unsigned GCD = std::gcd(OrigElts, WideElts);
8391 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
8393 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
8394 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
8395 Op = DAG.getInsertSubvector(dl, Op, SplatNeutral, Idx);
8396 return DAG.getNode(Opc, dl, VT, Op, Flags);
8397 }
8398
8399 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
8400 Op = DAG.getInsertVectorElt(dl, Op, NeutralElem, Idx);
8401
8402 return DAG.getNode(Opc, dl, VT, Op, Flags);
8403}
8404
8405SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) {
8406 SDLoc dl(N);
8407 SDValue AccOp = N->getOperand(0);
8408 SDValue VecOp = N->getOperand(1);
8409 SDValue Op = GetWidenedVector(VecOp);
8410
8411 EVT VT = N->getValueType(0);
8412 EVT OrigVT = VecOp.getValueType();
8413 EVT WideVT = Op.getValueType();
8414 EVT ElemVT = OrigVT.getVectorElementType();
8415 SDNodeFlags Flags = N->getFlags();
8416
8417 unsigned Opc = N->getOpcode();
8418 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc);
8419 SDValue NeutralElem = DAG.getIdentityElement(BaseOpc, dl, ElemVT, Flags);
8420
8421 // Pad the vector with the neutral element.
8422 unsigned OrigElts = OrigVT.getVectorMinNumElements();
8423 unsigned WideElts = WideVT.getVectorMinNumElements();
8424
8425 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids
8426 // needing to pad the source vector, because the inactive lanes can simply be
8427 // disabled and not contribute to the result.
8428 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc);
8429 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) {
8430 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
8431 WideVT.getVectorElementCount());
8432 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT);
8433 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(),
8434 OrigVT.getVectorElementCount());
8435 return DAG.getNode(*VPOpcode, dl, VT, {AccOp, Op, Mask, EVL}, Flags);
8436 }
8437
8438 if (WideVT.isScalableVector()) {
8439 unsigned GCD = std::gcd(OrigElts, WideElts);
8440 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT,
8442 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem);
8443 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD)
8444 Op = DAG.getInsertSubvector(dl, Op, SplatNeutral, Idx);
8445 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
8446 }
8447
8448 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
8449 Op = DAG.getInsertVectorElt(dl, Op, NeutralElem, Idx);
8450
8451 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags);
8452}
8453
8454SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) {
8455 assert(N->isVPOpcode() && "Expected VP opcode");
8456
8457 SDLoc dl(N);
8458 SDValue Op = GetWidenedVector(N->getOperand(1));
8459 SDValue Mask = GetWidenedMask(N->getOperand(2),
8460 Op.getValueType().getVectorElementCount());
8461
8462 return DAG.getNode(N->getOpcode(), dl, N->getValueType(0),
8463 {N->getOperand(0), Op, Mask, N->getOperand(3)},
8464 N->getFlags());
8465}
8466
8467SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
8468 // This only gets called in the case that the left and right inputs and
8469 // result are of a legal odd vector type, and the condition is illegal i1 of
8470 // the same odd width that needs widening.
8471 EVT VT = N->getValueType(0);
8472 assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
8473
8474 SDValue Cond = GetWidenedVector(N->getOperand(0));
8475 SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
8476 SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
8477 SDLoc DL(N);
8478
8479 SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
8480 LeftIn, RightIn);
8481 return DAG.getExtractSubvector(DL, VT, Select, 0);
8482}
8483
8484SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) {
8485 SDLoc DL(N);
8486 SDValue Source = GetWidenedVector(N->getOperand(0));
8487 EVT SrcVT = Source.getValueType();
8488 SDValue Mask =
8489 GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount());
8490
8491 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0),
8492 {Source, Mask, N->getOperand(2)}, N->getFlags());
8493}
8494
8495SDValue DAGTypeLegalizer::WidenVecOp_VECTOR_FIND_LAST_ACTIVE(SDNode *N) {
8496 SDLoc DL(N);
8497 SDValue Mask = N->getOperand(0);
8498 EVT OrigMaskVT = Mask.getValueType();
8499 SDValue WideMask = GetWidenedVector(Mask);
8500 EVT WideMaskVT = WideMask.getValueType();
8501
8502 // Pad the mask with zeros to ensure inactive lanes don't affect the result.
8503 unsigned OrigElts = OrigMaskVT.getVectorNumElements();
8504 unsigned WideElts = WideMaskVT.getVectorNumElements();
8505 if (OrigElts != WideElts) {
8506 SDValue ZeroMask = DAG.getConstant(0, DL, WideMaskVT);
8507 WideMask = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideMaskVT, ZeroMask,
8508 Mask, DAG.getVectorIdxConstant(0, DL));
8509 }
8510
8511 return DAG.getNode(ISD::VECTOR_FIND_LAST_ACTIVE, DL, N->getValueType(0),
8512 WideMask);
8513}
8514
8515//===----------------------------------------------------------------------===//
8516// Vector Widening Utilities
8517//===----------------------------------------------------------------------===//
8518
8519// Utility function to find the type to chop up a widen vector for load/store
8520// TLI: Target lowering used to determine legal types.
8521// Width: Width left need to load/store.
8522// WidenVT: The widen vector type to load to/store from
8523// Align: If 0, don't allow use of a wider type
8524// WidenEx: If Align is not 0, the amount additional we can load/store from.
8525
8526static std::optional<EVT> findMemType(SelectionDAG &DAG,
8527 const TargetLowering &TLI, unsigned Width,
8528 EVT WidenVT, unsigned Align = 0,
8529 unsigned WidenEx = 0) {
8530 EVT WidenEltVT = WidenVT.getVectorElementType();
8531 const bool Scalable = WidenVT.isScalableVector();
8532 unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue();
8533 unsigned WidenEltWidth = WidenEltVT.getSizeInBits();
8534 unsigned AlignInBits = Align*8;
8535
8536 EVT RetVT = WidenEltVT;
8537 // Don't bother looking for an integer type if the vector is scalable, skip
8538 // to vector types.
8539 if (!Scalable) {
8540 // If we have one element to load/store, return it.
8541 if (Width == WidenEltWidth)
8542 return RetVT;
8543
8544 // See if there is larger legal integer than the element type to load/store.
8545 for (EVT MemVT : reverse(MVT::integer_valuetypes())) {
8546 unsigned MemVTWidth = MemVT.getSizeInBits();
8547 if (MemVT.getSizeInBits() <= WidenEltWidth)
8548 break;
8549 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
8550 if ((Action == TargetLowering::TypeLegal ||
8552 (WidenWidth % MemVTWidth) == 0 &&
8553 isPowerOf2_32(WidenWidth / MemVTWidth) &&
8554 (MemVTWidth <= Width ||
8555 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
8556 if (MemVTWidth == WidenWidth)
8557 return MemVT;
8558 RetVT = MemVT;
8559 break;
8560 }
8561 }
8562 }
8563
8564 // See if there is a larger vector type to load/store that has the same vector
8565 // element type and is evenly divisible with the WidenVT.
8566 for (EVT MemVT : reverse(MVT::vector_valuetypes())) {
8567 // Skip vector MVTs which don't match the scalable property of WidenVT.
8568 if (Scalable != MemVT.isScalableVector())
8569 continue;
8570 unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue();
8571 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
8572 if ((Action == TargetLowering::TypeLegal ||
8574 WidenEltVT == MemVT.getVectorElementType() &&
8575 (WidenWidth % MemVTWidth) == 0 &&
8576 isPowerOf2_32(WidenWidth / MemVTWidth) &&
8577 (MemVTWidth <= Width ||
8578 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
8579 if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT)
8580 return MemVT;
8581 }
8582 }
8583
8584 // Using element-wise loads and stores for widening operations is not
8585 // supported for scalable vectors
8586 if (Scalable)
8587 return std::nullopt;
8588
8589 return RetVT;
8590}
8591
8592// Builds a vector type from scalar loads
8593// VecTy: Resulting Vector type
8594// LDOps: Load operators to build a vector type
8595// [Start,End) the list of loads to use.
8598 unsigned Start, unsigned End) {
8599 SDLoc dl(LdOps[Start]);
8600 EVT LdTy = LdOps[Start].getValueType();
8601 unsigned Width = VecTy.getSizeInBits();
8602 unsigned NumElts = Width / LdTy.getSizeInBits();
8603 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts);
8604
8605 unsigned Idx = 1;
8606 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]);
8607
8608 for (unsigned i = Start + 1; i != End; ++i) {
8609 EVT NewLdTy = LdOps[i].getValueType();
8610 if (NewLdTy != LdTy) {
8611 NumElts = Width / NewLdTy.getSizeInBits();
8612 NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
8613 VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
8614 // Readjust position and vector position based on new load type.
8615 Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
8616 LdTy = NewLdTy;
8617 }
8618 VecOp = DAG.getInsertVectorElt(dl, VecOp, LdOps[i], Idx++);
8619 }
8620 return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
8621}
8622
8623SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
8624 LoadSDNode *LD) {
8625 // The strategy assumes that we can efficiently load power-of-two widths.
8626 // The routine chops the vector into the largest vector loads with the same
8627 // element type or scalar loads and then recombines it to the widen vector
8628 // type.
8629 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
8630 EVT LdVT = LD->getMemoryVT();
8631 SDLoc dl(LD);
8632 assert(LdVT.isVector() && WidenVT.isVector());
8633 assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
8635
8636 // Load information
8637 SDValue Chain = LD->getChain();
8638 SDValue BasePtr = LD->getBasePtr();
8639 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
8640 AAMDNodes AAInfo = LD->getAAInfo();
8641
8642 TypeSize LdWidth = LdVT.getSizeInBits();
8643 TypeSize WidenWidth = WidenVT.getSizeInBits();
8644 TypeSize WidthDiff = WidenWidth - LdWidth;
8645 // Allow wider loads if they are sufficiently aligned to avoid memory faults
8646 // and if the original load is simple.
8647 unsigned LdAlign =
8648 (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value();
8649
8650 // Find the vector type that can load from.
8651 std::optional<EVT> FirstVT =
8652 findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign,
8653 WidthDiff.getKnownMinValue());
8654
8655 if (!FirstVT)
8656 return SDValue();
8657
8658 SmallVector<EVT, 8> MemVTs;
8659 TypeSize FirstVTWidth = FirstVT->getSizeInBits();
8660
8661 // Unless we're able to load in one instruction we must work out how to load
8662 // the remainder.
8663 if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) {
8664 std::optional<EVT> NewVT = FirstVT;
8665 TypeSize RemainingWidth = LdWidth;
8666 TypeSize NewVTWidth = FirstVTWidth;
8667 do {
8668 RemainingWidth -= NewVTWidth;
8669 if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) {
8670 // The current type we are using is too large. Find a better size.
8671 NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(),
8672 WidenVT, LdAlign, WidthDiff.getKnownMinValue());
8673 if (!NewVT)
8674 return SDValue();
8675 NewVTWidth = NewVT->getSizeInBits();
8676 }
8677 MemVTs.push_back(*NewVT);
8678 } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth));
8679 }
8680
8681 SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(),
8682 LD->getBaseAlign(), MMOFlags, AAInfo);
8683 LdChain.push_back(LdOp.getValue(1));
8684
8685 // Check if we can load the element with one instruction.
8686 if (MemVTs.empty())
8687 return coerceLoadedValue(LdOp, *FirstVT, WidenVT, LdWidth, FirstVTWidth, dl,
8688 DAG);
8689
8690 // Load vector by using multiple loads from largest vector to scalar.
8692 LdOps.push_back(LdOp);
8693
8694 uint64_t ScaledOffset = 0;
8695 MachinePointerInfo MPI = LD->getPointerInfo();
8696
8697 // First incremement past the first load.
8698 IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr,
8699 &ScaledOffset);
8700
8701 for (EVT MemVT : MemVTs) {
8702 Align NewAlign = ScaledOffset == 0
8703 ? LD->getBaseAlign()
8704 : commonAlignment(LD->getAlign(), ScaledOffset);
8705 SDValue L =
8706 DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo);
8707
8708 LdOps.push_back(L);
8709 LdChain.push_back(L.getValue(1));
8710 IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset);
8711 }
8712
8713 // Build the vector from the load operations.
8714 unsigned End = LdOps.size();
8715 if (!LdOps[0].getValueType().isVector())
8716 // All the loads are scalar loads.
8717 return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
8718
8719 // If the load contains vectors, build the vector using concat vector.
8720 // All of the vectors used to load are power-of-2, and the scalar loads can be
8721 // combined to make a power-of-2 vector.
8722 SmallVector<SDValue, 16> ConcatOps(End);
8723 int i = End - 1;
8724 int Idx = End;
8725 EVT LdTy = LdOps[i].getValueType();
8726 // First, combine the scalar loads to a vector.
8727 if (!LdTy.isVector()) {
8728 for (--i; i >= 0; --i) {
8729 LdTy = LdOps[i].getValueType();
8730 if (LdTy.isVector())
8731 break;
8732 }
8733 ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End);
8734 }
8735
8736 ConcatOps[--Idx] = LdOps[i];
8737 for (--i; i >= 0; --i) {
8738 EVT NewLdTy = LdOps[i].getValueType();
8739 if (NewLdTy != LdTy) {
8740 // Create a larger vector.
8741 TypeSize LdTySize = LdTy.getSizeInBits();
8742 TypeSize NewLdTySize = NewLdTy.getSizeInBits();
8743 assert(NewLdTySize.isScalable() == LdTySize.isScalable() &&
8744 NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue()));
8745 unsigned NumOps =
8746 NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue();
8748 unsigned j = 0;
8749 for (; j != End-Idx; ++j)
8750 WidenOps[j] = ConcatOps[Idx+j];
8751 for (; j != NumOps; ++j)
8752 WidenOps[j] = DAG.getPOISON(LdTy);
8753
8754 ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
8755 WidenOps);
8756 Idx = End - 1;
8757 LdTy = NewLdTy;
8758 }
8759 ConcatOps[--Idx] = LdOps[i];
8760 }
8761
8762 if (WidenWidth == LdTy.getSizeInBits() * (End - Idx))
8763 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT,
8764 ArrayRef(&ConcatOps[Idx], End - Idx));
8765
8766 // We need to fill the rest with undefs to build the vector.
8767 unsigned NumOps =
8768 WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue();
8770 SDValue UndefVal = DAG.getPOISON(LdTy);
8771 {
8772 unsigned i = 0;
8773 for (; i != End-Idx; ++i)
8774 WidenOps[i] = ConcatOps[Idx+i];
8775 for (; i != NumOps; ++i)
8776 WidenOps[i] = UndefVal;
8777 }
8778 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps);
8779}
8780
8781SDValue
8782DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain,
8783 LoadSDNode *LD,
8784 ISD::LoadExtType ExtType) {
8785 // For extension loads, it may not be more efficient to chop up the vector
8786 // and then extend it. Instead, we unroll the load and build a new vector.
8787 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0));
8788 EVT LdVT = LD->getMemoryVT();
8789 SDLoc dl(LD);
8790 assert(LdVT.isVector() && WidenVT.isVector());
8791 assert(LdVT.isScalableVector() == WidenVT.isScalableVector());
8792
8793 // Load information
8794 SDValue Chain = LD->getChain();
8795 SDValue BasePtr = LD->getBasePtr();
8796 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags();
8797 AAMDNodes AAInfo = LD->getAAInfo();
8798
8799 if (LdVT.isScalableVector())
8800 return SDValue();
8801
8802 EVT EltVT = WidenVT.getVectorElementType();
8803 EVT LdEltVT = LdVT.getVectorElementType();
8804 unsigned NumElts = LdVT.getVectorNumElements();
8805
8806 // Load each element and widen.
8807 unsigned WidenNumElts = WidenVT.getVectorNumElements();
8808 SmallVector<SDValue, 16> Ops(WidenNumElts);
8809 unsigned Increment = LdEltVT.getSizeInBits() / 8;
8810 Ops[0] =
8811 DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(),
8812 LdEltVT, LD->getBaseAlign(), MMOFlags, AAInfo);
8813 LdChain.push_back(Ops[0].getValue(1));
8814 unsigned i = 0, Offset = Increment;
8815 for (i=1; i < NumElts; ++i, Offset += Increment) {
8816 SDValue NewBasePtr =
8817 DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset));
8818 Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
8819 LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
8820 LD->getBaseAlign(), MMOFlags, AAInfo);
8821 LdChain.push_back(Ops[i].getValue(1));
8822 }
8823
8824 // Fill the rest with undefs.
8825 SDValue UndefVal = DAG.getPOISON(EltVT);
8826 for (; i != WidenNumElts; ++i)
8827 Ops[i] = UndefVal;
8828
8829 return DAG.getBuildVector(WidenVT, dl, Ops);
8830}
8831
8832bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain,
8833 StoreSDNode *ST) {
8834 // The strategy assumes that we can efficiently store power-of-two widths.
8835 // The routine chops the vector into the largest vector stores with the same
8836 // element type or scalar stores.
8837 SDValue Chain = ST->getChain();
8838 SDValue BasePtr = ST->getBasePtr();
8839 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
8840 AAMDNodes AAInfo = ST->getAAInfo();
8841 SDValue ValOp = GetWidenedVector(ST->getValue());
8842 SDLoc dl(ST);
8843
8844 EVT StVT = ST->getMemoryVT();
8845 TypeSize StWidth = StVT.getSizeInBits();
8846 EVT ValVT = ValOp.getValueType();
8847 TypeSize ValWidth = ValVT.getSizeInBits();
8848 EVT ValEltVT = ValVT.getVectorElementType();
8849 unsigned ValEltWidth = ValEltVT.getFixedSizeInBits();
8850 assert(StVT.getVectorElementType() == ValEltVT);
8851 assert(StVT.isScalableVector() == ValVT.isScalableVector() &&
8852 "Mismatch between store and value types");
8853
8854 int Idx = 0; // current index to store
8855
8856 MachinePointerInfo MPI = ST->getPointerInfo();
8857 uint64_t ScaledOffset = 0;
8858
8859 // A breakdown of how to widen this vector store. Each element of the vector
8860 // is a memory VT combined with the number of times it is to be stored to,
8861 // e,g., v5i32 -> {{v2i32,2},{i32,1}}
8863
8864 while (StWidth.isNonZero()) {
8865 // Find the largest vector type we can store with.
8866 std::optional<EVT> NewVT =
8867 findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT);
8868 if (!NewVT)
8869 return false;
8870 MemVTs.push_back({*NewVT, 0});
8871 TypeSize NewVTWidth = NewVT->getSizeInBits();
8872
8873 do {
8874 StWidth -= NewVTWidth;
8875 MemVTs.back().second++;
8876 } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth));
8877 }
8878
8879 for (const auto &Pair : MemVTs) {
8880 EVT NewVT = Pair.first;
8881 unsigned Count = Pair.second;
8882 TypeSize NewVTWidth = NewVT.getSizeInBits();
8883
8884 if (NewVT.isVector()) {
8885 unsigned NumVTElts = NewVT.getVectorMinNumElements();
8886 do {
8887 Align NewAlign = ScaledOffset == 0
8888 ? ST->getBaseAlign()
8889 : commonAlignment(ST->getAlign(), ScaledOffset);
8890 SDValue EOp = DAG.getExtractSubvector(dl, NewVT, ValOp, Idx);
8891 SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign,
8892 MMOFlags, AAInfo);
8893 StChain.push_back(PartStore);
8894
8895 Idx += NumVTElts;
8896 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr,
8897 &ScaledOffset);
8898 } while (--Count);
8899 } else {
8900 // Cast the vector to the scalar type we can store.
8901 unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue();
8902 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
8903 SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
8904 // Readjust index position based on new vector type.
8905 Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue();
8906 do {
8907 SDValue EOp = DAG.getExtractVectorElt(dl, NewVT, VecOp, Idx++);
8908 SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI,
8909 ST->getBaseAlign(), MMOFlags, AAInfo);
8910 StChain.push_back(PartStore);
8911
8912 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr);
8913 } while (--Count);
8914 // Restore index back to be relative to the original widen element type.
8915 Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth;
8916 }
8917 }
8918
8919 return true;
8920}
8921
8922/// Modifies a vector input (widen or narrows) to a vector of NVT. The
8923/// input vector must have the same element type as NVT.
8924/// FillWithZeroes specifies that the vector should be widened with zeroes.
8925SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT,
8926 bool FillWithZeroes) {
8927 // Note that InOp might have been widened so it might already have
8928 // the right width or it might need be narrowed.
8929 EVT InVT = InOp.getValueType();
8931 "input and widen element type must match");
8932 assert(InVT.isScalableVector() == NVT.isScalableVector() &&
8933 "cannot modify scalable vectors in this way");
8934 SDLoc dl(InOp);
8935
8936 // Check if InOp already has the right width.
8937 if (InVT == NVT)
8938 return InOp;
8939
8940 ElementCount InEC = InVT.getVectorElementCount();
8941 ElementCount WidenEC = NVT.getVectorElementCount();
8942 if (WidenEC.hasKnownScalarFactor(InEC)) {
8943 unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC);
8944 SmallVector<SDValue, 16> Ops(NumConcat);
8945 SDValue FillVal =
8946 FillWithZeroes ? DAG.getConstant(0, dl, InVT) : DAG.getPOISON(InVT);
8947 Ops[0] = InOp;
8948 for (unsigned i = 1; i != NumConcat; ++i)
8949 Ops[i] = FillVal;
8950
8951 return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops);
8952 }
8953
8954 if (InEC.hasKnownScalarFactor(WidenEC))
8955 return DAG.getExtractSubvector(dl, NVT, InOp, 0);
8956
8957 assert(!InVT.isScalableVector() && !NVT.isScalableVector() &&
8958 "Scalable vectors should have been handled already.");
8959
8960 unsigned InNumElts = InEC.getFixedValue();
8961 unsigned WidenNumElts = WidenEC.getFixedValue();
8962
8963 // Fall back to extract and build (+ mask, if padding with zeros).
8964 SmallVector<SDValue, 16> Ops(WidenNumElts);
8965 EVT EltVT = NVT.getVectorElementType();
8966 unsigned MinNumElts = std::min(WidenNumElts, InNumElts);
8967 unsigned Idx;
8968 for (Idx = 0; Idx < MinNumElts; ++Idx)
8969 Ops[Idx] = DAG.getExtractVectorElt(dl, EltVT, InOp, Idx);
8970
8971 SDValue UndefVal = DAG.getPOISON(EltVT);
8972 for (; Idx < WidenNumElts; ++Idx)
8973 Ops[Idx] = UndefVal;
8974
8975 SDValue Widened = DAG.getBuildVector(NVT, dl, Ops);
8976 if (!FillWithZeroes)
8977 return Widened;
8978
8979 assert(NVT.isInteger() &&
8980 "We expect to never want to FillWithZeroes for non-integral types.");
8981
8983 MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT));
8984 MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT));
8985
8986 return DAG.getNode(ISD::AND, dl, NVT, Widened,
8987 DAG.getBuildVector(NVT, dl, MaskOps));
8988}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static unsigned getExtendForIntVecReduction(SDNode *N)
static SDValue BuildVectorFromScalar(SelectionDAG &DAG, EVT VecTy, SmallVectorImpl< SDValue > &LdOps, unsigned Start, unsigned End)
static std::optional< EVT > findMemType(SelectionDAG &DAG, const TargetLowering &TLI, unsigned Width, EVT WidenVT, unsigned Align, unsigned WidenEx)
static EVT getSETCCOperandType(SDValue N)
static bool isSETCCOp(unsigned Opcode)
static bool isLogicalMaskOp(unsigned Opcode)
static bool isSETCCorConvertedSETCC(SDValue N)
static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, SmallVectorImpl< SDValue > &ConcatOps, unsigned ConcatEnd, EVT VT, EVT MaxVT, EVT WidenVT)
static SDValue coerceLoadedValue(SDValue LdOp, EVT FirstVT, EVT WidenVT, TypeSize LdWidth, TypeSize FirstVTWidth, SDLoc dl, SelectionDAG &DAG)
Either return the same load or provide appropriate casts from the load and return that.
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
This file provides utility analysis objects describing memory locations.
MachineInstr unsigned OpIdx
uint64_t High
#define P(N)
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
This file implements the SmallBitVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
This is an SDNode representing atomic operations.
LLVM_ABI unsigned getVScaleRangeMin() const
Returns the minimum value for the vscale_range attribute.
bool isValid() const
Return true if the attribute is any kind of attribute.
Definition Attributes.h:261
static constexpr ElementCount getScalable(ScalarTy MinVal)
Definition TypeSize.h:312
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
This class is used to represent ISD::LOAD nodes.
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
static auto integer_valuetypes()
static auto vector_valuetypes()
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
This class is used to represent an MGATHER node.
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getInc() const
const SDValue & getScale() const
const SDValue & getMask() const
const SDValue & getIntID() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
ISD::MemIndexType getIndexType() const
This class is used to represent an MLOAD node.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
const SDValue & getMask() const
const SDValue & getPassThru() const
const SDValue & getOffset() const
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
This class is used to represent an MSTORE node.
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
const SDValue & getOffset() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
This is an abstract virtual class for memory operations.
Align getBaseAlign() const
Returns alignment and volatility of the memory access.
const MDNode * getRanges() const
Returns the Ranges that describes the dereference.
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isStrictFPOpcode()
Test if this node is a strict floating point pseudo-op.
const APInt & getAsAPIntVal() const
Helper method returns the APInt value of a ConstantSDNode.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDNodeFlags getFlags() const
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getNumOperands() const
Return the number of values used by this operation.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getInsertVectorElt(const SDLoc &DL, SDValue Vec, SDValue Elt, unsigned Idx)
Insert Elt into Vec at offset Idx.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getPOISON(EVT VT)
Return a POISON node. POISON does not have a useful SDLoc.
LLVMContext * getContext() const
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition SetVector.h:94
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
pointer data()
Return a pointer to the vector's buffer, even if empty().
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
LegalizeTypeAction
This enum indicates whether a types are legal for a target, and if not, what action should be used to...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
BooleanContent
Enum that describes how the target represents true/false values.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
static ISD::NodeType getExtendForContent(BooleanContent Content)
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc,...
bool isUnindexed() const
Return true if this is NOT a pre/post inc/dec load/store.
This class is used to represent an VP_GATHER node.
const SDValue & getScale() const
ISD::MemIndexType getIndexType() const
How is Index applied to BasePtr when computing addresses.
const SDValue & getVectorLength() const
const SDValue & getIndex() const
const SDValue & getBasePtr() const
const SDValue & getMask() const
This class is used to represent a VP_LOAD node.
const SDValue & getValue() const
This class is used to represent a VP_STORE node.
This class is used to represent an EXPERIMENTAL_VP_STRIDED_LOAD node.
const SDValue & getMask() const
ISD::LoadExtType getExtensionType() const
const SDValue & getStride() const
const SDValue & getOffset() const
const SDValue & getVectorLength() const
const SDValue & getBasePtr() const
This class is used to represent an EXPERIMENTAL_VP_STRIDED_STORE node.
const SDValue & getBasePtr() const
const SDValue & getMask() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if this is a truncating store.
const SDValue & getOffset() const
const SDValue & getVectorLength() const
const SDValue & getStride() const
bool isCompressingStore() const
Returns true if the op does a compression to the vector before storing.
constexpr bool isKnownMultipleOf(ScalarTy RHS) const
This function tells the caller whether the element count is known at compile time to be a multiple of...
Definition TypeSize.h:180
constexpr bool hasKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns true if there exists a value X where RHS.multiplyCoefficientBy(X) will result in a value whos...
Definition TypeSize.h:269
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
static constexpr bool isKnownLE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:230
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr ScalarTy getKnownScalarFactor(const FixedOrScalableQuantity &RHS) const
Returns a value X where RHS.multiplyCoefficientBy(X) will result in a value whose quantity matches ou...
Definition TypeSize.h:277
static constexpr bool isKnownLT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:216
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isKnownEven() const
A return value of true indicates we know at compile time that the number of elements (vscale * Min) i...
Definition TypeSize.h:176
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
static constexpr bool isKnownGT(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:223
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
Definition TypeSize.h:237
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition ISDOpcodes.h:511
@ POISON
POISON - A poison node.
Definition ISDOpcodes.h:236
@ PARTIAL_REDUCE_SMLA
PARTIAL_REDUCE_[U|S]MLA(Accumulator, Input1, Input2) The partial reduction nodes sign or zero extend ...
@ LOOP_DEPENDENCE_RAW_MASK
@ VECREDUCE_SEQ_FADD
Generic reduction nodes.
@ MLOAD
Masked load and store - consecutive vector load and store operations with additional mask operand tha...
@ INSERT_SUBVECTOR
INSERT_SUBVECTOR(VECTOR1, VECTOR2, IDX) - Returns a vector with VECTOR2 inserted into VECTOR1.
Definition ISDOpcodes.h:600
@ BSWAP
Byte Swap and Counting operators.
Definition ISDOpcodes.h:779
@ SMULFIX
RESULT = [US]MULFIX(LHS, RHS, SCALE) - Perform fixed point multiplication on 2 integers with the same...
Definition ISDOpcodes.h:394
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ SMULFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:400
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ CTTZ_ELTS
Returns the number of number of trailing (least significant) zero elements in a vector.
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ VECTOR_FIND_LAST_ACTIVE
Finds the index of the last active mask element Operands: Mask.
@ FMODF
FMODF - Decomposes the operand into integral and fractional parts, each having the same type and sign...
@ FATAN2
FATAN2 - atan2, inspired by libm.
@ FSINCOSPI
FSINCOSPI - Compute both the sine and cosine times pi more accurately than FSINCOS(pi*x),...
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition ISDOpcodes.h:880
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ VECREDUCE_FMAX
FMIN/FMAX nodes can have flags, for NaN/NoNaN variants.
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ VECREDUCE_FMAXIMUM
FMINIMUM/FMAXIMUM nodes propatate NaNs and signed zeroes using the llvm.minimum and llvm....
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:747
@ SIGN_EXTEND_VECTOR_INREG
SIGN_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register sign-extension of the low ...
Definition ISDOpcodes.h:910
@ FPTRUNC_ROUND
FPTRUNC_ROUND - This corresponds to the fptrunc_round intrinsic.
Definition ISDOpcodes.h:515
@ FAKE_USE
FAKE_USE represents a use of the operand but does not do anything.
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ CLMUL
Carry-less multiplication operations.
Definition ISDOpcodes.h:774
@ FLDEXP
FLDEXP - ldexp, inspired by libm (op0 * 2**op1).
@ SDIVFIX
RESULT = [US]DIVFIX(LHS, RHS, SCALE) - Perform fixed point division on 2 integers with the same width...
Definition ISDOpcodes.h:407
@ CONVERT_FROM_ARBITRARY_FP
CONVERT_FROM_ARBITRARY_FP - This operator converts from an arbitrary floating-point represented as an...
@ CTLZ_ZERO_POISON
Definition ISDOpcodes.h:788
@ PARTIAL_REDUCE_UMLA
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ AVGCEILS
AVGCEILS/AVGCEILU - Rounding averaging add - Add two integers using an integer of type i[N+2],...
Definition ISDOpcodes.h:715
@ STRICT_UINT_TO_FP
Definition ISDOpcodes.h:485
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ VECREDUCE_FADD
These reductions have relaxed evaluation order semantics, and have a single vector operand.
@ PARTIAL_REDUCE_FMLA
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ SSUBO
Same for subtraction.
Definition ISDOpcodes.h:352
@ VECTOR_INTERLEAVE
VECTOR_INTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor to...
Definition ISDOpcodes.h:635
@ STEP_VECTOR
STEP_VECTOR(IMM) - Returns a scalable vector whose lanes are comprised of a linear sequence of unsign...
Definition ISDOpcodes.h:691
@ FCANONICALIZE
Returns platform specific canonical encoding of a floating point number.
Definition ISDOpcodes.h:541
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition ISDOpcodes.h:374
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ UNDEF
UNDEF - An undefined node.
Definition ISDOpcodes.h:233
@ SPLAT_VECTOR
SPLAT_VECTOR(VAL) - Returns a vector with the scalar value VAL duplicated in all lanes.
Definition ISDOpcodes.h:672
@ GET_ACTIVE_LANE_MASK
GET_ACTIVE_LANE_MASK - this corrosponds to the llvm.get.active.lane.mask intrinsic.
@ SADDO
RESULT, BOOL = [SU]ADDO(LHS, RHS) - Overflow-aware nodes for addition.
Definition ISDOpcodes.h:348
@ ARITH_FENCE
ARITH_FENCE - This corresponds to a arithmetic fence intrinsic.
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition ISDOpcodes.h:704
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ AssertNoFPClass
AssertNoFPClass - These nodes record if a register contains a float value that is known to be not som...
Definition ISDOpcodes.h:78
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimumNumber or maximumNumber on two values,...
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum maximum on two values, following IEEE-754 definition...
@ SSHLSAT
RESULT = [US]SHLSAT(LHS, RHS) - Perform saturation left shift.
Definition ISDOpcodes.h:386
@ SMULO
Same for multiplication.
Definition ISDOpcodes.h:356
@ VECTOR_SPLICE_LEFT
VECTOR_SPLICE_LEFT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1, VEC2) left by OFFSET elements an...
Definition ISDOpcodes.h:653
@ ANY_EXTEND_VECTOR_INREG
ANY_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register any-extension of the low la...
Definition ISDOpcodes.h:899
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition ISDOpcodes.h:727
@ MASKED_UDIV
Masked vector arithmetic that returns poison on disabled lanes.
@ VECTOR_REVERSE
VECTOR_REVERSE(VECTOR) - Returns a vector, of the same type as VECTOR, whose elements are shuffled us...
Definition ISDOpcodes.h:640
@ SDIVFIXSAT
Same as the corresponding unsaturated fixed point instructions, but the result is clamped between the...
Definition ISDOpcodes.h:413
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition ISDOpcodes.h:805
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition ISDOpcodes.h:484
@ MGATHER
Masked gather and scatter - load and store operations for a vector of random addresses with additiona...
@ STRICT_FP_TO_UINT
Definition ISDOpcodes.h:478
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition ISDOpcodes.h:500
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:477
@ FMINIMUM
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0....
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:505
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ SCMP
[US]CMP - 3-way comparison of signed or unsigned integers.
Definition ISDOpcodes.h:735
@ AVGFLOORS
AVGFLOORS/AVGFLOORU - Averaging add - Add two integers using an integer of type i[N+1],...
Definition ISDOpcodes.h:710
@ VECTOR_SPLICE_RIGHT
VECTOR_SPLICE_RIGHT(VEC1, VEC2, OFFSET) - Shifts CONCAT_VECTORS(VEC1,VEC2) right by OFFSET elements a...
Definition ISDOpcodes.h:657
@ FREEZE
FREEZE - FREEZE(VAL) returns an arbitrary value if VAL is UNDEF (or is evaluated to UNDEF),...
Definition ISDOpcodes.h:241
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ CTTZ_ZERO_POISON
Bit counting operators with a poisoned result for zero inputs.
Definition ISDOpcodes.h:787
@ FFREXP
FFREXP - frexp, extract fractional and exponent component of a floating-point value.
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ VECTOR_COMPRESS
VECTOR_COMPRESS(Vec, Mask, Passthru) consecutively place vector elements based on mask e....
Definition ISDOpcodes.h:699
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition ISDOpcodes.h:921
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:997
@ EXPERIMENTAL_VECTOR_HISTOGRAM
Experimental vector histogram intrinsic Operands: Input Chain, Inc, Mask, Base, Index,...
@ FP_TO_SINT_SAT
FP_TO_[US]INT_SAT - Convert floating point value in operand 0 to a signed or unsigned scalar integer ...
Definition ISDOpcodes.h:945
@ VECREDUCE_FMINIMUM
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
@ VECREDUCE_SEQ_FMUL
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ PARTIAL_REDUCE_SUMLA
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition ISDOpcodes.h:365
@ VECTOR_DEINTERLEAVE
VECTOR_DEINTERLEAVE(VEC1, VEC2, ...) - Returns N vectors from N input vectors, where N is the factor ...
Definition ISDOpcodes.h:624
@ CTTZ_ELTS_ZERO_POISON
@ FMINIMUMNUM
FMINIMUMNUM/FMAXIMUMNUM - minimumnum/maximumnum that is same with FMINNUM_IEEE and FMAXNUM_IEEE besid...
@ ABDS
ABDS/ABDU - Absolute difference - Return the absolute difference between two numbers interpreted as s...
Definition ISDOpcodes.h:722
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
@ LOOP_DEPENDENCE_WAR_MASK
The llvm.loop.dependence.
LLVM_ABI bool isBuildVectorOfConstantSDNodes(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR node of all ConstantSDNode or undef.
LLVM_ABI NodeType getUnmaskedBinOpOpcode(unsigned MaskedOpc)
Given a MaskedOpc of ISD::MASKED_(U|S)(DIV|REM), returns the unmasked ISD::(U|S)(DIV|REM).
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
LLVM_ABI std::optional< unsigned > getVPForBaseOpcode(unsigned Opcode)
Translate this non-VP Opcode to its corresponding VP Opcode.
MemIndexType
MemIndexType enum - This enum defines how to interpret MGATHER/SCATTER's index parameter when calcula...
LLVM_ABI bool isConstantSplatVector(const SDNode *N, APInt &SplatValue)
Node predicates.
LLVM_ABI NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode)
Get underlying scalar opcode for VECREDUCE opcode.
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
constexpr double e
Context & getContext() const
Definition BasicBlock.h:99
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1764
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
constexpr int PoisonMaskElem
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1884
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI void processShuffleMasks(ArrayRef< int > Mask, unsigned NumOfSrcRegs, unsigned NumOfDestRegs, unsigned NumOfUsedRegs, function_ref< void()> NoInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned)> SingleInputAction, function_ref< void(ArrayRef< int >, unsigned, unsigned, bool)> ManyInputsAction)
Splits and processes shuffle mask depending on the number of input and output registers.
@ Increment
Incrementally increasing token ID.
Definition AllocToken.h:26
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
EVT getDoubleNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:479
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
bool isByteSized() const
Return true if the bit size is a multiple of 8.
Definition ValueTypes.h:251
unsigned getVectorMinNumElements() const
Given a vector type, return the minimum number of elements it contains.
Definition ValueTypes.h:367
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
bool isPow2VectorType() const
Returns true if the given vector is a power of 2.
Definition ValueTypes.h:486
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition ValueTypes.h:61
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition ValueTypes.h:389
EVT widenIntegerVectorElementType(LLVMContext &Context) const
Return a VT for an integer vector type with the size of the elements doubled.
Definition ValueTypes.h:460
bool isFixedLengthVector() const
Definition ValueTypes.h:189
static EVT getFloatingPointVT(unsigned BitWidth)
Returns the EVT that represents a floating-point type with the given number of bits.
Definition ValueTypes.h:55
EVT getRoundIntegerType(LLVMContext &Context) const
Rounds the bit-width of the given integer EVT up to the nearest power of two (and at least to eight),...
Definition ValueTypes.h:427
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsEq(EVT VT) const
Return true if this has the same number of bits as VT.
Definition ValueTypes.h:264
LLVM_ABI Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
bool isScalableVector() const
Return true if this is a vector type where the runtime length is machine dependent.
Definition ValueTypes.h:182
bool knownBitsGE(EVT VT) const
Return true if we know at compile time this has more than or the same bits as VT.
Definition ValueTypes.h:276
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
EVT changeElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a type whose attributes match ourselves with the exception of the element type that i...
Definition ValueTypes.h:121
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition ValueTypes.h:469
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.