LLVM 18.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
34#include "llvm/Support/Debug.h"
38#include <numeric>
39#include <optional>
40
41#define DEBUG_TYPE "legalizer"
42
43using namespace llvm;
44using namespace LegalizeActions;
45using namespace MIPatternMatch;
46
47/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
48///
49/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
50/// with any leftover piece as type \p LeftoverTy
51///
52/// Returns -1 in the first element of the pair if the breakdown is not
53/// satisfiable.
54static std::pair<int, int>
55getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
56 assert(!LeftoverTy.isValid() && "this is an out argument");
57
58 unsigned Size = OrigTy.getSizeInBits();
59 unsigned NarrowSize = NarrowTy.getSizeInBits();
60 unsigned NumParts = Size / NarrowSize;
61 unsigned LeftoverSize = Size - NumParts * NarrowSize;
62 assert(Size > NarrowSize);
63
64 if (LeftoverSize == 0)
65 return {NumParts, 0};
66
67 if (NarrowTy.isVector()) {
68 unsigned EltSize = OrigTy.getScalarSizeInBits();
69 if (LeftoverSize % EltSize != 0)
70 return {-1, -1};
71 LeftoverTy = LLT::scalarOrVector(
72 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
73 } else {
74 LeftoverTy = LLT::scalar(LeftoverSize);
75 }
76
77 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
78 return std::make_pair(NumParts, NumLeftover);
79}
80
82
83 if (!Ty.isScalar())
84 return nullptr;
85
86 switch (Ty.getSizeInBits()) {
87 case 16:
88 return Type::getHalfTy(Ctx);
89 case 32:
90 return Type::getFloatTy(Ctx);
91 case 64:
92 return Type::getDoubleTy(Ctx);
93 case 80:
94 return Type::getX86_FP80Ty(Ctx);
95 case 128:
96 return Type::getFP128Ty(Ctx);
97 default:
98 return nullptr;
99 }
100}
101
103 GISelChangeObserver &Observer,
104 MachineIRBuilder &Builder)
105 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
106 LI(*MF.getSubtarget().getLegalizerInfo()),
107 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
108
110 GISelChangeObserver &Observer,
112 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
113 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
114
117 LostDebugLocObserver &LocObserver) {
118 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
119
121
122 if (isa<GIntrinsic>(MI))
123 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
124 auto Step = LI.getAction(MI, MRI);
125 switch (Step.Action) {
126 case Legal:
127 LLVM_DEBUG(dbgs() << ".. Already legal\n");
128 return AlreadyLegal;
129 case Libcall:
130 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
131 return libcall(MI, LocObserver);
132 case NarrowScalar:
133 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
134 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
135 case WidenScalar:
136 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
137 return widenScalar(MI, Step.TypeIdx, Step.NewType);
138 case Bitcast:
139 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
140 return bitcast(MI, Step.TypeIdx, Step.NewType);
141 case Lower:
142 LLVM_DEBUG(dbgs() << ".. Lower\n");
143 return lower(MI, Step.TypeIdx, Step.NewType);
144 case FewerElements:
145 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
146 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
147 case MoreElements:
148 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
149 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
150 case Custom:
151 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
152 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
153 default:
154 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
155 return UnableToLegalize;
156 }
157}
158
159void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
161 for (int i = 0; i < NumParts; ++i)
163 MIRBuilder.buildUnmerge(VRegs, Reg);
164}
165
166bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
167 LLT MainTy, LLT &LeftoverTy,
169 SmallVectorImpl<Register> &LeftoverRegs) {
170 assert(!LeftoverTy.isValid() && "this is an out argument");
171
172 unsigned RegSize = RegTy.getSizeInBits();
173 unsigned MainSize = MainTy.getSizeInBits();
174 unsigned NumParts = RegSize / MainSize;
175 unsigned LeftoverSize = RegSize - NumParts * MainSize;
176
177 // Use an unmerge when possible.
178 if (LeftoverSize == 0) {
179 for (unsigned I = 0; I < NumParts; ++I)
180 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
181 MIRBuilder.buildUnmerge(VRegs, Reg);
182 return true;
183 }
184
185 // Perform irregular split. Leftover is last element of RegPieces.
186 if (MainTy.isVector()) {
187 SmallVector<Register, 8> RegPieces;
188 extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
189 for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
190 VRegs.push_back(RegPieces[i]);
191 LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
192 LeftoverTy = MRI.getType(LeftoverRegs[0]);
193 return true;
194 }
195
196 LeftoverTy = LLT::scalar(LeftoverSize);
197 // For irregular sizes, extract the individual parts.
198 for (unsigned I = 0; I != NumParts; ++I) {
199 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
200 VRegs.push_back(NewReg);
201 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
202 }
203
204 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
205 Offset += LeftoverSize) {
206 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
207 LeftoverRegs.push_back(NewReg);
208 MIRBuilder.buildExtract(NewReg, Reg, Offset);
209 }
210
211 return true;
212}
213
214void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
216 LLT RegTy = MRI.getType(Reg);
217 assert(RegTy.isVector() && "Expected a vector type");
218
219 LLT EltTy = RegTy.getElementType();
220 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
221 unsigned RegNumElts = RegTy.getNumElements();
222 unsigned LeftoverNumElts = RegNumElts % NumElts;
223 unsigned NumNarrowTyPieces = RegNumElts / NumElts;
224
225 // Perfect split without leftover
226 if (LeftoverNumElts == 0)
227 return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
228
229 // Irregular split. Provide direct access to all elements for artifact
230 // combiner using unmerge to elements. Then build vectors with NumElts
231 // elements. Remaining element(s) will be (used to build vector) Leftover.
233 extractParts(Reg, EltTy, RegNumElts, Elts);
234
235 unsigned Offset = 0;
236 // Requested sub-vectors of NarrowTy.
237 for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
238 ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
239 VRegs.push_back(MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
240 }
241
242 // Leftover element(s).
243 if (LeftoverNumElts == 1) {
244 VRegs.push_back(Elts[Offset]);
245 } else {
246 LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
247 ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
248 VRegs.push_back(
249 MIRBuilder.buildMergeLikeInstr(LeftoverTy, Pieces).getReg(0));
250 }
251}
252
253void LegalizerHelper::insertParts(Register DstReg,
254 LLT ResultTy, LLT PartTy,
255 ArrayRef<Register> PartRegs,
256 LLT LeftoverTy,
257 ArrayRef<Register> LeftoverRegs) {
258 if (!LeftoverTy.isValid()) {
259 assert(LeftoverRegs.empty());
260
261 if (!ResultTy.isVector()) {
262 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
263 return;
264 }
265
266 if (PartTy.isVector())
267 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
268 else
269 MIRBuilder.buildBuildVector(DstReg, PartRegs);
270 return;
271 }
272
273 // Merge sub-vectors with different number of elements and insert into DstReg.
274 if (ResultTy.isVector()) {
275 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
277 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
278 AllRegs.push_back(Reg);
279 return mergeMixedSubvectors(DstReg, AllRegs);
280 }
281
282 SmallVector<Register> GCDRegs;
283 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
284 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
285 extractGCDType(GCDRegs, GCDTy, PartReg);
286 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
287 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
288}
289
290void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
291 Register Reg) {
292 LLT Ty = MRI.getType(Reg);
294 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
295 Elts.append(RegElts);
296}
297
298/// Merge \p PartRegs with different types into \p DstReg.
299void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
300 ArrayRef<Register> PartRegs) {
302 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
303 appendVectorElts(AllElts, PartRegs[i]);
304
305 Register Leftover = PartRegs[PartRegs.size() - 1];
306 if (MRI.getType(Leftover).isScalar())
307 AllElts.push_back(Leftover);
308 else
309 appendVectorElts(AllElts, Leftover);
310
311 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
312}
313
314/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
316 const MachineInstr &MI) {
317 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
318
319 const int StartIdx = Regs.size();
320 const int NumResults = MI.getNumOperands() - 1;
321 Regs.resize(Regs.size() + NumResults);
322 for (int I = 0; I != NumResults; ++I)
323 Regs[StartIdx + I] = MI.getOperand(I).getReg();
324}
325
326void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
327 LLT GCDTy, Register SrcReg) {
328 LLT SrcTy = MRI.getType(SrcReg);
329 if (SrcTy == GCDTy) {
330 // If the source already evenly divides the result type, we don't need to do
331 // anything.
332 Parts.push_back(SrcReg);
333 } else {
334 // Need to split into common type sized pieces.
335 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
336 getUnmergeResults(Parts, *Unmerge);
337 }
338}
339
340LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
341 LLT NarrowTy, Register SrcReg) {
342 LLT SrcTy = MRI.getType(SrcReg);
343 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
344 extractGCDType(Parts, GCDTy, SrcReg);
345 return GCDTy;
346}
347
348LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
350 unsigned PadStrategy) {
351 LLT LCMTy = getLCMType(DstTy, NarrowTy);
352
353 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
354 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
355 int NumOrigSrc = VRegs.size();
356
357 Register PadReg;
358
359 // Get a value we can use to pad the source value if the sources won't evenly
360 // cover the result type.
361 if (NumOrigSrc < NumParts * NumSubParts) {
362 if (PadStrategy == TargetOpcode::G_ZEXT)
363 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
364 else if (PadStrategy == TargetOpcode::G_ANYEXT)
365 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
366 else {
367 assert(PadStrategy == TargetOpcode::G_SEXT);
368
369 // Shift the sign bit of the low register through the high register.
370 auto ShiftAmt =
372 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
373 }
374 }
375
376 // Registers for the final merge to be produced.
377 SmallVector<Register, 4> Remerge(NumParts);
378
379 // Registers needed for intermediate merges, which will be merged into a
380 // source for Remerge.
381 SmallVector<Register, 4> SubMerge(NumSubParts);
382
383 // Once we've fully read off the end of the original source bits, we can reuse
384 // the same high bits for remaining padding elements.
385 Register AllPadReg;
386
387 // Build merges to the LCM type to cover the original result type.
388 for (int I = 0; I != NumParts; ++I) {
389 bool AllMergePartsArePadding = true;
390
391 // Build the requested merges to the requested type.
392 for (int J = 0; J != NumSubParts; ++J) {
393 int Idx = I * NumSubParts + J;
394 if (Idx >= NumOrigSrc) {
395 SubMerge[J] = PadReg;
396 continue;
397 }
398
399 SubMerge[J] = VRegs[Idx];
400
401 // There are meaningful bits here we can't reuse later.
402 AllMergePartsArePadding = false;
403 }
404
405 // If we've filled up a complete piece with padding bits, we can directly
406 // emit the natural sized constant if applicable, rather than a merge of
407 // smaller constants.
408 if (AllMergePartsArePadding && !AllPadReg) {
409 if (PadStrategy == TargetOpcode::G_ANYEXT)
410 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
411 else if (PadStrategy == TargetOpcode::G_ZEXT)
412 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
413
414 // If this is a sign extension, we can't materialize a trivial constant
415 // with the right type and have to produce a merge.
416 }
417
418 if (AllPadReg) {
419 // Avoid creating additional instructions if we're just adding additional
420 // copies of padding bits.
421 Remerge[I] = AllPadReg;
422 continue;
423 }
424
425 if (NumSubParts == 1)
426 Remerge[I] = SubMerge[0];
427 else
428 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
429
430 // In the sign extend padding case, re-use the first all-signbit merge.
431 if (AllMergePartsArePadding && !AllPadReg)
432 AllPadReg = Remerge[I];
433 }
434
435 VRegs = std::move(Remerge);
436 return LCMTy;
437}
438
439void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
440 ArrayRef<Register> RemergeRegs) {
441 LLT DstTy = MRI.getType(DstReg);
442
443 // Create the merge to the widened source, and extract the relevant bits into
444 // the result.
445
446 if (DstTy == LCMTy) {
447 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
448 return;
449 }
450
451 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
452 if (DstTy.isScalar() && LCMTy.isScalar()) {
453 MIRBuilder.buildTrunc(DstReg, Remerge);
454 return;
455 }
456
457 if (LCMTy.isVector()) {
458 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
459 SmallVector<Register, 8> UnmergeDefs(NumDefs);
460 UnmergeDefs[0] = DstReg;
461 for (unsigned I = 1; I != NumDefs; ++I)
462 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
463
464 MIRBuilder.buildUnmerge(UnmergeDefs,
465 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
466 return;
467 }
468
469 llvm_unreachable("unhandled case");
470}
471
472static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
473#define RTLIBCASE_INT(LibcallPrefix) \
474 do { \
475 switch (Size) { \
476 case 32: \
477 return RTLIB::LibcallPrefix##32; \
478 case 64: \
479 return RTLIB::LibcallPrefix##64; \
480 case 128: \
481 return RTLIB::LibcallPrefix##128; \
482 default: \
483 llvm_unreachable("unexpected size"); \
484 } \
485 } while (0)
486
487#define RTLIBCASE(LibcallPrefix) \
488 do { \
489 switch (Size) { \
490 case 32: \
491 return RTLIB::LibcallPrefix##32; \
492 case 64: \
493 return RTLIB::LibcallPrefix##64; \
494 case 80: \
495 return RTLIB::LibcallPrefix##80; \
496 case 128: \
497 return RTLIB::LibcallPrefix##128; \
498 default: \
499 llvm_unreachable("unexpected size"); \
500 } \
501 } while (0)
502
503 switch (Opcode) {
504 case TargetOpcode::G_MUL:
505 RTLIBCASE_INT(MUL_I);
506 case TargetOpcode::G_SDIV:
507 RTLIBCASE_INT(SDIV_I);
508 case TargetOpcode::G_UDIV:
509 RTLIBCASE_INT(UDIV_I);
510 case TargetOpcode::G_SREM:
511 RTLIBCASE_INT(SREM_I);
512 case TargetOpcode::G_UREM:
513 RTLIBCASE_INT(UREM_I);
514 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
515 RTLIBCASE_INT(CTLZ_I);
516 case TargetOpcode::G_FADD:
517 RTLIBCASE(ADD_F);
518 case TargetOpcode::G_FSUB:
519 RTLIBCASE(SUB_F);
520 case TargetOpcode::G_FMUL:
521 RTLIBCASE(MUL_F);
522 case TargetOpcode::G_FDIV:
523 RTLIBCASE(DIV_F);
524 case TargetOpcode::G_FEXP:
525 RTLIBCASE(EXP_F);
526 case TargetOpcode::G_FEXP2:
527 RTLIBCASE(EXP2_F);
528 case TargetOpcode::G_FEXP10:
529 RTLIBCASE(EXP10_F);
530 case TargetOpcode::G_FREM:
531 RTLIBCASE(REM_F);
532 case TargetOpcode::G_FPOW:
533 RTLIBCASE(POW_F);
534 case TargetOpcode::G_FMA:
535 RTLIBCASE(FMA_F);
536 case TargetOpcode::G_FSIN:
537 RTLIBCASE(SIN_F);
538 case TargetOpcode::G_FCOS:
539 RTLIBCASE(COS_F);
540 case TargetOpcode::G_FLOG10:
541 RTLIBCASE(LOG10_F);
542 case TargetOpcode::G_FLOG:
543 RTLIBCASE(LOG_F);
544 case TargetOpcode::G_FLOG2:
545 RTLIBCASE(LOG2_F);
546 case TargetOpcode::G_FLDEXP:
547 RTLIBCASE(LDEXP_F);
548 case TargetOpcode::G_FCEIL:
549 RTLIBCASE(CEIL_F);
550 case TargetOpcode::G_FFLOOR:
551 RTLIBCASE(FLOOR_F);
552 case TargetOpcode::G_FMINNUM:
553 RTLIBCASE(FMIN_F);
554 case TargetOpcode::G_FMAXNUM:
555 RTLIBCASE(FMAX_F);
556 case TargetOpcode::G_FSQRT:
557 RTLIBCASE(SQRT_F);
558 case TargetOpcode::G_FRINT:
559 RTLIBCASE(RINT_F);
560 case TargetOpcode::G_FNEARBYINT:
561 RTLIBCASE(NEARBYINT_F);
562 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
563 RTLIBCASE(ROUNDEVEN_F);
564 }
565 llvm_unreachable("Unknown libcall function");
566}
567
568/// True if an instruction is in tail position in its caller. Intended for
569/// legalizing libcalls as tail calls when possible.
571 const TargetInstrInfo &TII,
573 MachineBasicBlock &MBB = *MI.getParent();
574 const Function &F = MBB.getParent()->getFunction();
575
576 // Conservatively require the attributes of the call to match those of
577 // the return. Ignore NoAlias and NonNull because they don't affect the
578 // call sequence.
579 AttributeList CallerAttrs = F.getAttributes();
580 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
581 .removeAttribute(Attribute::NoAlias)
582 .removeAttribute(Attribute::NonNull)
583 .hasAttributes())
584 return false;
585
586 // It's not safe to eliminate the sign / zero extension of the return value.
587 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
588 CallerAttrs.hasRetAttr(Attribute::SExt))
589 return false;
590
591 // Only tail call if the following instruction is a standard return or if we
592 // have a `thisreturn` callee, and a sequence like:
593 //
594 // G_MEMCPY %0, %1, %2
595 // $x0 = COPY %0
596 // RET_ReallyLR implicit $x0
597 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
598 if (Next != MBB.instr_end() && Next->isCopy()) {
599 switch (MI.getOpcode()) {
600 default:
601 llvm_unreachable("unsupported opcode");
602 case TargetOpcode::G_BZERO:
603 return false;
604 case TargetOpcode::G_MEMCPY:
605 case TargetOpcode::G_MEMMOVE:
606 case TargetOpcode::G_MEMSET:
607 break;
608 }
609
610 Register VReg = MI.getOperand(0).getReg();
611 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
612 return false;
613
614 Register PReg = Next->getOperand(0).getReg();
615 if (!PReg.isPhysical())
616 return false;
617
618 auto Ret = next_nodbg(Next, MBB.instr_end());
619 if (Ret == MBB.instr_end() || !Ret->isReturn())
620 return false;
621
622 if (Ret->getNumImplicitOperands() != 1)
623 return false;
624
625 if (PReg != Ret->getOperand(0).getReg())
626 return false;
627
628 // Skip over the COPY that we just validated.
629 Next = Ret;
630 }
631
632 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
633 return false;
634
635 return true;
636}
637
640 const CallLowering::ArgInfo &Result,
642 const CallingConv::ID CC) {
643 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
644
646 Info.CallConv = CC;
648 Info.OrigRet = Result;
649 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
650 if (!CLI.lowerCall(MIRBuilder, Info))
652
654}
655
658 const CallLowering::ArgInfo &Result,
660 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
661 const char *Name = TLI.getLibcallName(Libcall);
662 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
663 return createLibcall(MIRBuilder, Name, Result, Args, CC);
664}
665
666// Useful for libcalls where all operands have the same type.
669 Type *OpType) {
670 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
671
672 // FIXME: What does the original arg index mean here?
674 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
675 Args.push_back({MO.getReg(), OpType, 0});
676 return createLibcall(MIRBuilder, Libcall,
677 {MI.getOperand(0).getReg(), OpType, 0}, Args);
678}
679
682 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
683 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
684
686 // Add all the args, except for the last which is an imm denoting 'tail'.
687 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
688 Register Reg = MI.getOperand(i).getReg();
689
690 // Need derive an IR type for call lowering.
691 LLT OpLLT = MRI.getType(Reg);
692 Type *OpTy = nullptr;
693 if (OpLLT.isPointer())
694 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
695 else
696 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
697 Args.push_back({Reg, OpTy, 0});
698 }
699
700 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
701 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
702 RTLIB::Libcall RTLibcall;
703 unsigned Opc = MI.getOpcode();
704 switch (Opc) {
705 case TargetOpcode::G_BZERO:
706 RTLibcall = RTLIB::BZERO;
707 break;
708 case TargetOpcode::G_MEMCPY:
709 RTLibcall = RTLIB::MEMCPY;
710 Args[0].Flags[0].setReturned();
711 break;
712 case TargetOpcode::G_MEMMOVE:
713 RTLibcall = RTLIB::MEMMOVE;
714 Args[0].Flags[0].setReturned();
715 break;
716 case TargetOpcode::G_MEMSET:
717 RTLibcall = RTLIB::MEMSET;
718 Args[0].Flags[0].setReturned();
719 break;
720 default:
721 llvm_unreachable("unsupported opcode");
722 }
723 const char *Name = TLI.getLibcallName(RTLibcall);
724
725 // Unsupported libcall on the target.
726 if (!Name) {
727 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
728 << MIRBuilder.getTII().getName(Opc) << "\n");
730 }
731
733 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
735 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
736 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
737 isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
738
739 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
740 if (!CLI.lowerCall(MIRBuilder, Info))
742
743 if (Info.LoweredTailCall) {
744 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
745
746 // Check debug locations before removing the return.
747 LocObserver.checkpoint(true);
748
749 // We must have a return following the call (or debug insts) to get past
750 // isLibCallInTailPosition.
751 do {
752 MachineInstr *Next = MI.getNextNode();
753 assert(Next &&
754 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
755 "Expected instr following MI to be return or debug inst?");
756 // We lowered a tail call, so the call is now the return from the block.
757 // Delete the old return.
758 Next->eraseFromParent();
759 } while (MI.getNextNode());
760
761 // We expect to lose the debug location from the return.
762 LocObserver.checkpoint(false);
763 }
764
766}
767
768static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
769 Type *FromType) {
770 auto ToMVT = MVT::getVT(ToType);
771 auto FromMVT = MVT::getVT(FromType);
772
773 switch (Opcode) {
774 case TargetOpcode::G_FPEXT:
775 return RTLIB::getFPEXT(FromMVT, ToMVT);
776 case TargetOpcode::G_FPTRUNC:
777 return RTLIB::getFPROUND(FromMVT, ToMVT);
778 case TargetOpcode::G_FPTOSI:
779 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
780 case TargetOpcode::G_FPTOUI:
781 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
782 case TargetOpcode::G_SITOFP:
783 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
784 case TargetOpcode::G_UITOFP:
785 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
786 }
787 llvm_unreachable("Unsupported libcall function");
788}
789
792 Type *FromType) {
793 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
794 return createLibcall(MIRBuilder, Libcall,
795 {MI.getOperand(0).getReg(), ToType, 0},
796 {{MI.getOperand(1).getReg(), FromType, 0}});
797}
798
801 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
802 unsigned Size = LLTy.getSizeInBits();
803 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
804
805 switch (MI.getOpcode()) {
806 default:
807 return UnableToLegalize;
808 case TargetOpcode::G_MUL:
809 case TargetOpcode::G_SDIV:
810 case TargetOpcode::G_UDIV:
811 case TargetOpcode::G_SREM:
812 case TargetOpcode::G_UREM:
813 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
814 Type *HLTy = IntegerType::get(Ctx, Size);
815 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
816 if (Status != Legalized)
817 return Status;
818 break;
819 }
820 case TargetOpcode::G_FADD:
821 case TargetOpcode::G_FSUB:
822 case TargetOpcode::G_FMUL:
823 case TargetOpcode::G_FDIV:
824 case TargetOpcode::G_FMA:
825 case TargetOpcode::G_FPOW:
826 case TargetOpcode::G_FREM:
827 case TargetOpcode::G_FCOS:
828 case TargetOpcode::G_FSIN:
829 case TargetOpcode::G_FLOG10:
830 case TargetOpcode::G_FLOG:
831 case TargetOpcode::G_FLOG2:
832 case TargetOpcode::G_FLDEXP:
833 case TargetOpcode::G_FEXP:
834 case TargetOpcode::G_FEXP2:
835 case TargetOpcode::G_FEXP10:
836 case TargetOpcode::G_FCEIL:
837 case TargetOpcode::G_FFLOOR:
838 case TargetOpcode::G_FMINNUM:
839 case TargetOpcode::G_FMAXNUM:
840 case TargetOpcode::G_FSQRT:
841 case TargetOpcode::G_FRINT:
842 case TargetOpcode::G_FNEARBYINT:
843 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
844 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
845 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
846 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
847 return UnableToLegalize;
848 }
849 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
850 if (Status != Legalized)
851 return Status;
852 break;
853 }
854 case TargetOpcode::G_FPEXT:
855 case TargetOpcode::G_FPTRUNC: {
856 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
857 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
858 if (!FromTy || !ToTy)
859 return UnableToLegalize;
861 if (Status != Legalized)
862 return Status;
863 break;
864 }
865 case TargetOpcode::G_FPTOSI:
866 case TargetOpcode::G_FPTOUI: {
867 // FIXME: Support other types
868 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
869 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
870 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
871 return UnableToLegalize;
873 MI, MIRBuilder,
874 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
875 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
876 if (Status != Legalized)
877 return Status;
878 break;
879 }
880 case TargetOpcode::G_SITOFP:
881 case TargetOpcode::G_UITOFP: {
882 // FIXME: Support other types
883 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
884 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
885 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
886 return UnableToLegalize;
888 MI, MIRBuilder,
889 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
890 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
891 if (Status != Legalized)
892 return Status;
893 break;
894 }
895 case TargetOpcode::G_BZERO:
896 case TargetOpcode::G_MEMCPY:
897 case TargetOpcode::G_MEMMOVE:
898 case TargetOpcode::G_MEMSET: {
899 LegalizeResult Result =
901 if (Result != Legalized)
902 return Result;
903 MI.eraseFromParent();
904 return Result;
905 }
906 }
907
908 MI.eraseFromParent();
909 return Legalized;
910}
911
913 unsigned TypeIdx,
914 LLT NarrowTy) {
915 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
916 uint64_t NarrowSize = NarrowTy.getSizeInBits();
917
918 switch (MI.getOpcode()) {
919 default:
920 return UnableToLegalize;
921 case TargetOpcode::G_IMPLICIT_DEF: {
922 Register DstReg = MI.getOperand(0).getReg();
923 LLT DstTy = MRI.getType(DstReg);
924
925 // If SizeOp0 is not an exact multiple of NarrowSize, emit
926 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
927 // FIXME: Although this would also be legal for the general case, it causes
928 // a lot of regressions in the emitted code (superfluous COPYs, artifact
929 // combines not being hit). This seems to be a problem related to the
930 // artifact combiner.
931 if (SizeOp0 % NarrowSize != 0) {
932 LLT ImplicitTy = NarrowTy;
933 if (DstTy.isVector())
934 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
935
936 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
937 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
938
939 MI.eraseFromParent();
940 return Legalized;
941 }
942
943 int NumParts = SizeOp0 / NarrowSize;
944
946 for (int i = 0; i < NumParts; ++i)
947 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
948
949 if (DstTy.isVector())
950 MIRBuilder.buildBuildVector(DstReg, DstRegs);
951 else
952 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
953 MI.eraseFromParent();
954 return Legalized;
955 }
956 case TargetOpcode::G_CONSTANT: {
957 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
958 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
959 unsigned TotalSize = Ty.getSizeInBits();
960 unsigned NarrowSize = NarrowTy.getSizeInBits();
961 int NumParts = TotalSize / NarrowSize;
962
964 for (int I = 0; I != NumParts; ++I) {
965 unsigned Offset = I * NarrowSize;
966 auto K = MIRBuilder.buildConstant(NarrowTy,
967 Val.lshr(Offset).trunc(NarrowSize));
968 PartRegs.push_back(K.getReg(0));
969 }
970
971 LLT LeftoverTy;
972 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
973 SmallVector<Register, 1> LeftoverRegs;
974 if (LeftoverBits != 0) {
975 LeftoverTy = LLT::scalar(LeftoverBits);
976 auto K = MIRBuilder.buildConstant(
977 LeftoverTy,
978 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
979 LeftoverRegs.push_back(K.getReg(0));
980 }
981
982 insertParts(MI.getOperand(0).getReg(),
983 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
984
985 MI.eraseFromParent();
986 return Legalized;
987 }
988 case TargetOpcode::G_SEXT:
989 case TargetOpcode::G_ZEXT:
990 case TargetOpcode::G_ANYEXT:
991 return narrowScalarExt(MI, TypeIdx, NarrowTy);
992 case TargetOpcode::G_TRUNC: {
993 if (TypeIdx != 1)
994 return UnableToLegalize;
995
996 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
997 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
998 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
999 return UnableToLegalize;
1000 }
1001
1002 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1003 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1004 MI.eraseFromParent();
1005 return Legalized;
1006 }
1007
1008 case TargetOpcode::G_FREEZE: {
1009 if (TypeIdx != 0)
1010 return UnableToLegalize;
1011
1012 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1013 // Should widen scalar first
1014 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1015 return UnableToLegalize;
1016
1017 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1019 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1020 Parts.push_back(
1021 MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1022 }
1023
1024 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1025 MI.eraseFromParent();
1026 return Legalized;
1027 }
1028 case TargetOpcode::G_ADD:
1029 case TargetOpcode::G_SUB:
1030 case TargetOpcode::G_SADDO:
1031 case TargetOpcode::G_SSUBO:
1032 case TargetOpcode::G_SADDE:
1033 case TargetOpcode::G_SSUBE:
1034 case TargetOpcode::G_UADDO:
1035 case TargetOpcode::G_USUBO:
1036 case TargetOpcode::G_UADDE:
1037 case TargetOpcode::G_USUBE:
1038 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1039 case TargetOpcode::G_MUL:
1040 case TargetOpcode::G_UMULH:
1041 return narrowScalarMul(MI, NarrowTy);
1042 case TargetOpcode::G_EXTRACT:
1043 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1044 case TargetOpcode::G_INSERT:
1045 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1046 case TargetOpcode::G_LOAD: {
1047 auto &LoadMI = cast<GLoad>(MI);
1048 Register DstReg = LoadMI.getDstReg();
1049 LLT DstTy = MRI.getType(DstReg);
1050 if (DstTy.isVector())
1051 return UnableToLegalize;
1052
1053 if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1054 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1055 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1056 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1057 LoadMI.eraseFromParent();
1058 return Legalized;
1059 }
1060
1061 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1062 }
1063 case TargetOpcode::G_ZEXTLOAD:
1064 case TargetOpcode::G_SEXTLOAD: {
1065 auto &LoadMI = cast<GExtLoad>(MI);
1066 Register DstReg = LoadMI.getDstReg();
1067 Register PtrReg = LoadMI.getPointerReg();
1068
1069 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1070 auto &MMO = LoadMI.getMMO();
1071 unsigned MemSize = MMO.getSizeInBits();
1072
1073 if (MemSize == NarrowSize) {
1074 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1075 } else if (MemSize < NarrowSize) {
1076 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1077 } else if (MemSize > NarrowSize) {
1078 // FIXME: Need to split the load.
1079 return UnableToLegalize;
1080 }
1081
1082 if (isa<GZExtLoad>(LoadMI))
1083 MIRBuilder.buildZExt(DstReg, TmpReg);
1084 else
1085 MIRBuilder.buildSExt(DstReg, TmpReg);
1086
1087 LoadMI.eraseFromParent();
1088 return Legalized;
1089 }
1090 case TargetOpcode::G_STORE: {
1091 auto &StoreMI = cast<GStore>(MI);
1092
1093 Register SrcReg = StoreMI.getValueReg();
1094 LLT SrcTy = MRI.getType(SrcReg);
1095 if (SrcTy.isVector())
1096 return UnableToLegalize;
1097
1098 int NumParts = SizeOp0 / NarrowSize;
1099 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1100 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1101 if (SrcTy.isVector() && LeftoverBits != 0)
1102 return UnableToLegalize;
1103
1104 if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1105 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1106 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1107 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1108 StoreMI.eraseFromParent();
1109 return Legalized;
1110 }
1111
1112 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1113 }
1114 case TargetOpcode::G_SELECT:
1115 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1116 case TargetOpcode::G_AND:
1117 case TargetOpcode::G_OR:
1118 case TargetOpcode::G_XOR: {
1119 // Legalize bitwise operation:
1120 // A = BinOp<Ty> B, C
1121 // into:
1122 // B1, ..., BN = G_UNMERGE_VALUES B
1123 // C1, ..., CN = G_UNMERGE_VALUES C
1124 // A1 = BinOp<Ty/N> B1, C2
1125 // ...
1126 // AN = BinOp<Ty/N> BN, CN
1127 // A = G_MERGE_VALUES A1, ..., AN
1128 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1129 }
1130 case TargetOpcode::G_SHL:
1131 case TargetOpcode::G_LSHR:
1132 case TargetOpcode::G_ASHR:
1133 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1134 case TargetOpcode::G_CTLZ:
1135 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1136 case TargetOpcode::G_CTTZ:
1137 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1138 case TargetOpcode::G_CTPOP:
1139 if (TypeIdx == 1)
1140 switch (MI.getOpcode()) {
1141 case TargetOpcode::G_CTLZ:
1142 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1143 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1144 case TargetOpcode::G_CTTZ:
1145 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1146 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1147 case TargetOpcode::G_CTPOP:
1148 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1149 default:
1150 return UnableToLegalize;
1151 }
1152
1154 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1156 return Legalized;
1157 case TargetOpcode::G_INTTOPTR:
1158 if (TypeIdx != 1)
1159 return UnableToLegalize;
1160
1162 narrowScalarSrc(MI, NarrowTy, 1);
1164 return Legalized;
1165 case TargetOpcode::G_PTRTOINT:
1166 if (TypeIdx != 0)
1167 return UnableToLegalize;
1168
1170 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1172 return Legalized;
1173 case TargetOpcode::G_PHI: {
1174 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1175 // NarrowSize.
1176 if (SizeOp0 % NarrowSize != 0)
1177 return UnableToLegalize;
1178
1179 unsigned NumParts = SizeOp0 / NarrowSize;
1180 SmallVector<Register, 2> DstRegs(NumParts);
1181 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1183 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1184 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1186 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1187 SrcRegs[i / 2]);
1188 }
1189 MachineBasicBlock &MBB = *MI.getParent();
1191 for (unsigned i = 0; i < NumParts; ++i) {
1192 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1194 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1195 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1196 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1197 }
1199 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1201 MI.eraseFromParent();
1202 return Legalized;
1203 }
1204 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1205 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1206 if (TypeIdx != 2)
1207 return UnableToLegalize;
1208
1209 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1211 narrowScalarSrc(MI, NarrowTy, OpIdx);
1213 return Legalized;
1214 }
1215 case TargetOpcode::G_ICMP: {
1216 Register LHS = MI.getOperand(2).getReg();
1217 LLT SrcTy = MRI.getType(LHS);
1218 uint64_t SrcSize = SrcTy.getSizeInBits();
1219 CmpInst::Predicate Pred =
1220 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1221
1222 // TODO: Handle the non-equality case for weird sizes.
1223 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1224 return UnableToLegalize;
1225
1226 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1227 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1228 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1229 LHSLeftoverRegs))
1230 return UnableToLegalize;
1231
1232 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1233 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1234 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1235 RHSPartRegs, RHSLeftoverRegs))
1236 return UnableToLegalize;
1237
1238 // We now have the LHS and RHS of the compare split into narrow-type
1239 // registers, plus potentially some leftover type.
1240 Register Dst = MI.getOperand(0).getReg();
1241 LLT ResTy = MRI.getType(Dst);
1242 if (ICmpInst::isEquality(Pred)) {
1243 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1244 // them together. For each equal part, the result should be all 0s. For
1245 // each non-equal part, we'll get at least one 1.
1246 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1248 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1249 auto LHS = std::get<0>(LHSAndRHS);
1250 auto RHS = std::get<1>(LHSAndRHS);
1251 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1252 Xors.push_back(Xor);
1253 }
1254
1255 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1256 // to the desired narrow type so that we can OR them together later.
1257 SmallVector<Register, 4> WidenedXors;
1258 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1259 auto LHS = std::get<0>(LHSAndRHS);
1260 auto RHS = std::get<1>(LHSAndRHS);
1261 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1262 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1263 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1264 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1265 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1266 }
1267
1268 // Now, for each part we broke up, we know if they are equal/not equal
1269 // based off the G_XOR. We can OR these all together and compare against
1270 // 0 to get the result.
1271 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1272 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1273 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1274 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1275 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1276 } else {
1277 // TODO: Handle non-power-of-two types.
1278 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1279 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1280 Register LHSL = LHSPartRegs[0];
1281 Register LHSH = LHSPartRegs[1];
1282 Register RHSL = RHSPartRegs[0];
1283 Register RHSH = RHSPartRegs[1];
1284 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1285 MachineInstrBuilder CmpHEQ =
1288 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1289 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1290 }
1291 MI.eraseFromParent();
1292 return Legalized;
1293 }
1294 case TargetOpcode::G_SEXT_INREG: {
1295 if (TypeIdx != 0)
1296 return UnableToLegalize;
1297
1298 int64_t SizeInBits = MI.getOperand(2).getImm();
1299
1300 // So long as the new type has more bits than the bits we're extending we
1301 // don't need to break it apart.
1302 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1304 // We don't lose any non-extension bits by truncating the src and
1305 // sign-extending the dst.
1306 MachineOperand &MO1 = MI.getOperand(1);
1307 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1308 MO1.setReg(TruncMIB.getReg(0));
1309
1310 MachineOperand &MO2 = MI.getOperand(0);
1311 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1313 MIRBuilder.buildSExt(MO2, DstExt);
1314 MO2.setReg(DstExt);
1316 return Legalized;
1317 }
1318
1319 // Break it apart. Components below the extension point are unmodified. The
1320 // component containing the extension point becomes a narrower SEXT_INREG.
1321 // Components above it are ashr'd from the component containing the
1322 // extension point.
1323 if (SizeOp0 % NarrowSize != 0)
1324 return UnableToLegalize;
1325 int NumParts = SizeOp0 / NarrowSize;
1326
1327 // List the registers where the destination will be scattered.
1329 // List the registers where the source will be split.
1331
1332 // Create all the temporary registers.
1333 for (int i = 0; i < NumParts; ++i) {
1334 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1335
1336 SrcRegs.push_back(SrcReg);
1337 }
1338
1339 // Explode the big arguments into smaller chunks.
1340 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1341
1342 Register AshrCstReg =
1343 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1344 .getReg(0);
1345 Register FullExtensionReg = 0;
1346 Register PartialExtensionReg = 0;
1347
1348 // Do the operation on each small part.
1349 for (int i = 0; i < NumParts; ++i) {
1350 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1351 DstRegs.push_back(SrcRegs[i]);
1352 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1353 assert(PartialExtensionReg &&
1354 "Expected to visit partial extension before full");
1355 if (FullExtensionReg) {
1356 DstRegs.push_back(FullExtensionReg);
1357 continue;
1358 }
1359 DstRegs.push_back(
1360 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1361 .getReg(0));
1362 FullExtensionReg = DstRegs.back();
1363 } else {
1364 DstRegs.push_back(
1366 .buildInstr(
1367 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1368 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1369 .getReg(0));
1370 PartialExtensionReg = DstRegs.back();
1371 }
1372 }
1373
1374 // Gather the destination registers into the final destination.
1375 Register DstReg = MI.getOperand(0).getReg();
1376 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1377 MI.eraseFromParent();
1378 return Legalized;
1379 }
1380 case TargetOpcode::G_BSWAP:
1381 case TargetOpcode::G_BITREVERSE: {
1382 if (SizeOp0 % NarrowSize != 0)
1383 return UnableToLegalize;
1384
1386 SmallVector<Register, 2> SrcRegs, DstRegs;
1387 unsigned NumParts = SizeOp0 / NarrowSize;
1388 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1389
1390 for (unsigned i = 0; i < NumParts; ++i) {
1391 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1392 {SrcRegs[NumParts - 1 - i]});
1393 DstRegs.push_back(DstPart.getReg(0));
1394 }
1395
1396 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1397
1399 MI.eraseFromParent();
1400 return Legalized;
1401 }
1402 case TargetOpcode::G_PTR_ADD:
1403 case TargetOpcode::G_PTRMASK: {
1404 if (TypeIdx != 1)
1405 return UnableToLegalize;
1407 narrowScalarSrc(MI, NarrowTy, 2);
1409 return Legalized;
1410 }
1411 case TargetOpcode::G_FPTOUI:
1412 case TargetOpcode::G_FPTOSI:
1413 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1414 case TargetOpcode::G_FPEXT:
1415 if (TypeIdx != 0)
1416 return UnableToLegalize;
1418 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1420 return Legalized;
1421 case TargetOpcode::G_FLDEXP:
1422 case TargetOpcode::G_STRICT_FLDEXP:
1423 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1424 }
1425}
1426
1428 LLT Ty = MRI.getType(Val);
1429 if (Ty.isScalar())
1430 return Val;
1431
1433 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1434 if (Ty.isPointer()) {
1435 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1436 return Register();
1437 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1438 }
1439
1440 Register NewVal = Val;
1441
1442 assert(Ty.isVector());
1443 LLT EltTy = Ty.getElementType();
1444 if (EltTy.isPointer())
1445 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1446 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1447}
1448
1450 unsigned OpIdx, unsigned ExtOpcode) {
1451 MachineOperand &MO = MI.getOperand(OpIdx);
1452 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1453 MO.setReg(ExtB.getReg(0));
1454}
1455
1457 unsigned OpIdx) {
1458 MachineOperand &MO = MI.getOperand(OpIdx);
1459 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1460 MO.setReg(ExtB.getReg(0));
1461}
1462
1464 unsigned OpIdx, unsigned TruncOpcode) {
1465 MachineOperand &MO = MI.getOperand(OpIdx);
1466 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1468 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1469 MO.setReg(DstExt);
1470}
1471
1473 unsigned OpIdx, unsigned ExtOpcode) {
1474 MachineOperand &MO = MI.getOperand(OpIdx);
1475 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1477 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1478 MO.setReg(DstTrunc);
1479}
1480
1482 unsigned OpIdx) {
1483 MachineOperand &MO = MI.getOperand(OpIdx);
1485 Register Dst = MO.getReg();
1486 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1487 MO.setReg(DstExt);
1489}
1490
1492 unsigned OpIdx) {
1493 MachineOperand &MO = MI.getOperand(OpIdx);
1496}
1497
1498void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1499 MachineOperand &Op = MI.getOperand(OpIdx);
1500 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1501}
1502
1503void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1504 MachineOperand &MO = MI.getOperand(OpIdx);
1505 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1507 MIRBuilder.buildBitcast(MO, CastDst);
1508 MO.setReg(CastDst);
1509}
1510
1512LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1513 LLT WideTy) {
1514 if (TypeIdx != 1)
1515 return UnableToLegalize;
1516
1517 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1518 if (DstTy.isVector())
1519 return UnableToLegalize;
1520
1521 LLT SrcTy = MRI.getType(Src1Reg);
1522 const int DstSize = DstTy.getSizeInBits();
1523 const int SrcSize = SrcTy.getSizeInBits();
1524 const int WideSize = WideTy.getSizeInBits();
1525 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1526
1527 unsigned NumOps = MI.getNumOperands();
1528 unsigned NumSrc = MI.getNumOperands() - 1;
1529 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1530
1531 if (WideSize >= DstSize) {
1532 // Directly pack the bits in the target type.
1533 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1534
1535 for (unsigned I = 2; I != NumOps; ++I) {
1536 const unsigned Offset = (I - 1) * PartSize;
1537
1538 Register SrcReg = MI.getOperand(I).getReg();
1539 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1540
1541 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1542
1543 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1544 MRI.createGenericVirtualRegister(WideTy);
1545
1546 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1547 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1548 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1549 ResultReg = NextResult;
1550 }
1551
1552 if (WideSize > DstSize)
1553 MIRBuilder.buildTrunc(DstReg, ResultReg);
1554 else if (DstTy.isPointer())
1555 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1556
1557 MI.eraseFromParent();
1558 return Legalized;
1559 }
1560
1561 // Unmerge the original values to the GCD type, and recombine to the next
1562 // multiple greater than the original type.
1563 //
1564 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1565 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1566 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1567 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1568 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1569 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1570 // %12:_(s12) = G_MERGE_VALUES %10, %11
1571 //
1572 // Padding with undef if necessary:
1573 //
1574 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1575 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1576 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1577 // %7:_(s2) = G_IMPLICIT_DEF
1578 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1579 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1580 // %10:_(s12) = G_MERGE_VALUES %8, %9
1581
1582 const int GCD = std::gcd(SrcSize, WideSize);
1583 LLT GCDTy = LLT::scalar(GCD);
1584
1586 SmallVector<Register, 8> NewMergeRegs;
1587 SmallVector<Register, 8> Unmerges;
1588 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1589
1590 // Decompose the original operands if they don't evenly divide.
1591 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1592 Register SrcReg = MO.getReg();
1593 if (GCD == SrcSize) {
1594 Unmerges.push_back(SrcReg);
1595 } else {
1596 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1597 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1598 Unmerges.push_back(Unmerge.getReg(J));
1599 }
1600 }
1601
1602 // Pad with undef to the next size that is a multiple of the requested size.
1603 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1604 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1605 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1606 Unmerges.push_back(UndefReg);
1607 }
1608
1609 const int PartsPerGCD = WideSize / GCD;
1610
1611 // Build merges of each piece.
1612 ArrayRef<Register> Slicer(Unmerges);
1613 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1614 auto Merge =
1615 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1616 NewMergeRegs.push_back(Merge.getReg(0));
1617 }
1618
1619 // A truncate may be necessary if the requested type doesn't evenly divide the
1620 // original result type.
1621 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1622 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1623 } else {
1624 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1625 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1626 }
1627
1628 MI.eraseFromParent();
1629 return Legalized;
1630}
1631
1633LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1634 LLT WideTy) {
1635 if (TypeIdx != 0)
1636 return UnableToLegalize;
1637
1638 int NumDst = MI.getNumOperands() - 1;
1639 Register SrcReg = MI.getOperand(NumDst).getReg();
1640 LLT SrcTy = MRI.getType(SrcReg);
1641 if (SrcTy.isVector())
1642 return UnableToLegalize;
1643
1644 Register Dst0Reg = MI.getOperand(0).getReg();
1645 LLT DstTy = MRI.getType(Dst0Reg);
1646 if (!DstTy.isScalar())
1647 return UnableToLegalize;
1648
1649 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1650 if (SrcTy.isPointer()) {
1652 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1653 LLVM_DEBUG(
1654 dbgs() << "Not casting non-integral address space integer\n");
1655 return UnableToLegalize;
1656 }
1657
1658 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1659 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1660 }
1661
1662 // Widen SrcTy to WideTy. This does not affect the result, but since the
1663 // user requested this size, it is probably better handled than SrcTy and
1664 // should reduce the total number of legalization artifacts.
1665 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1666 SrcTy = WideTy;
1667 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1668 }
1669
1670 // Theres no unmerge type to target. Directly extract the bits from the
1671 // source type
1672 unsigned DstSize = DstTy.getSizeInBits();
1673
1674 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1675 for (int I = 1; I != NumDst; ++I) {
1676 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1677 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1678 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1679 }
1680
1681 MI.eraseFromParent();
1682 return Legalized;
1683 }
1684
1685 // Extend the source to a wider type.
1686 LLT LCMTy = getLCMType(SrcTy, WideTy);
1687
1688 Register WideSrc = SrcReg;
1689 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1690 // TODO: If this is an integral address space, cast to integer and anyext.
1691 if (SrcTy.isPointer()) {
1692 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1693 return UnableToLegalize;
1694 }
1695
1696 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1697 }
1698
1699 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1700
1701 // Create a sequence of unmerges and merges to the original results. Since we
1702 // may have widened the source, we will need to pad the results with dead defs
1703 // to cover the source register.
1704 // e.g. widen s48 to s64:
1705 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1706 //
1707 // =>
1708 // %4:_(s192) = G_ANYEXT %0:_(s96)
1709 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1710 // ; unpack to GCD type, with extra dead defs
1711 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1712 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1713 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1714 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1715 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1716 const LLT GCDTy = getGCDType(WideTy, DstTy);
1717 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1718 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1719
1720 // Directly unmerge to the destination without going through a GCD type
1721 // if possible
1722 if (PartsPerRemerge == 1) {
1723 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1724
1725 for (int I = 0; I != NumUnmerge; ++I) {
1726 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1727
1728 for (int J = 0; J != PartsPerUnmerge; ++J) {
1729 int Idx = I * PartsPerUnmerge + J;
1730 if (Idx < NumDst)
1731 MIB.addDef(MI.getOperand(Idx).getReg());
1732 else {
1733 // Create dead def for excess components.
1734 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1735 }
1736 }
1737
1738 MIB.addUse(Unmerge.getReg(I));
1739 }
1740 } else {
1742 for (int J = 0; J != NumUnmerge; ++J)
1743 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1744
1745 SmallVector<Register, 8> RemergeParts;
1746 for (int I = 0; I != NumDst; ++I) {
1747 for (int J = 0; J < PartsPerRemerge; ++J) {
1748 const int Idx = I * PartsPerRemerge + J;
1749 RemergeParts.emplace_back(Parts[Idx]);
1750 }
1751
1752 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
1753 RemergeParts.clear();
1754 }
1755 }
1756
1757 MI.eraseFromParent();
1758 return Legalized;
1759}
1760
1762LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1763 LLT WideTy) {
1764 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
1765 unsigned Offset = MI.getOperand(2).getImm();
1766
1767 if (TypeIdx == 0) {
1768 if (SrcTy.isVector() || DstTy.isVector())
1769 return UnableToLegalize;
1770
1771 SrcOp Src(SrcReg);
1772 if (SrcTy.isPointer()) {
1773 // Extracts from pointers can be handled only if they are really just
1774 // simple integers.
1776 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1777 return UnableToLegalize;
1778
1779 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1780 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1781 SrcTy = SrcAsIntTy;
1782 }
1783
1784 if (DstTy.isPointer())
1785 return UnableToLegalize;
1786
1787 if (Offset == 0) {
1788 // Avoid a shift in the degenerate case.
1789 MIRBuilder.buildTrunc(DstReg,
1790 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1791 MI.eraseFromParent();
1792 return Legalized;
1793 }
1794
1795 // Do a shift in the source type.
1796 LLT ShiftTy = SrcTy;
1797 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1798 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1799 ShiftTy = WideTy;
1800 }
1801
1802 auto LShr = MIRBuilder.buildLShr(
1803 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1804 MIRBuilder.buildTrunc(DstReg, LShr);
1805 MI.eraseFromParent();
1806 return Legalized;
1807 }
1808
1809 if (SrcTy.isScalar()) {
1811 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1813 return Legalized;
1814 }
1815
1816 if (!SrcTy.isVector())
1817 return UnableToLegalize;
1818
1819 if (DstTy != SrcTy.getElementType())
1820 return UnableToLegalize;
1821
1822 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1823 return UnableToLegalize;
1824
1826 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1827
1828 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1829 Offset);
1830 widenScalarDst(MI, WideTy.getScalarType(), 0);
1832 return Legalized;
1833}
1834
1836LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1837 LLT WideTy) {
1838 if (TypeIdx != 0 || WideTy.isVector())
1839 return UnableToLegalize;
1841 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1842 widenScalarDst(MI, WideTy);
1844 return Legalized;
1845}
1846
1848LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1849 LLT WideTy) {
1850 unsigned Opcode;
1851 unsigned ExtOpcode;
1852 std::optional<Register> CarryIn;
1853 switch (MI.getOpcode()) {
1854 default:
1855 llvm_unreachable("Unexpected opcode!");
1856 case TargetOpcode::G_SADDO:
1857 Opcode = TargetOpcode::G_ADD;
1858 ExtOpcode = TargetOpcode::G_SEXT;
1859 break;
1860 case TargetOpcode::G_SSUBO:
1861 Opcode = TargetOpcode::G_SUB;
1862 ExtOpcode = TargetOpcode::G_SEXT;
1863 break;
1864 case TargetOpcode::G_UADDO:
1865 Opcode = TargetOpcode::G_ADD;
1866 ExtOpcode = TargetOpcode::G_ZEXT;
1867 break;
1868 case TargetOpcode::G_USUBO:
1869 Opcode = TargetOpcode::G_SUB;
1870 ExtOpcode = TargetOpcode::G_ZEXT;
1871 break;
1872 case TargetOpcode::G_SADDE:
1873 Opcode = TargetOpcode::G_UADDE;
1874 ExtOpcode = TargetOpcode::G_SEXT;
1875 CarryIn = MI.getOperand(4).getReg();
1876 break;
1877 case TargetOpcode::G_SSUBE:
1878 Opcode = TargetOpcode::G_USUBE;
1879 ExtOpcode = TargetOpcode::G_SEXT;
1880 CarryIn = MI.getOperand(4).getReg();
1881 break;
1882 case TargetOpcode::G_UADDE:
1883 Opcode = TargetOpcode::G_UADDE;
1884 ExtOpcode = TargetOpcode::G_ZEXT;
1885 CarryIn = MI.getOperand(4).getReg();
1886 break;
1887 case TargetOpcode::G_USUBE:
1888 Opcode = TargetOpcode::G_USUBE;
1889 ExtOpcode = TargetOpcode::G_ZEXT;
1890 CarryIn = MI.getOperand(4).getReg();
1891 break;
1892 }
1893
1894 if (TypeIdx == 1) {
1895 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
1896
1898 if (CarryIn)
1899 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
1900 widenScalarDst(MI, WideTy, 1);
1901
1903 return Legalized;
1904 }
1905
1906 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1907 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1908 // Do the arithmetic in the larger type.
1909 Register NewOp;
1910 if (CarryIn) {
1911 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1912 NewOp = MIRBuilder
1913 .buildInstr(Opcode, {WideTy, CarryOutTy},
1914 {LHSExt, RHSExt, *CarryIn})
1915 .getReg(0);
1916 } else {
1917 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1918 }
1919 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1920 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1921 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1922 // There is no overflow if the ExtOp is the same as NewOp.
1923 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1924 // Now trunc the NewOp to the original result.
1925 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1926 MI.eraseFromParent();
1927 return Legalized;
1928}
1929
1931LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1932 LLT WideTy) {
1933 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1934 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1935 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1936 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1937 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1938 // We can convert this to:
1939 // 1. Any extend iN to iM
1940 // 2. SHL by M-N
1941 // 3. [US][ADD|SUB|SHL]SAT
1942 // 4. L/ASHR by M-N
1943 //
1944 // It may be more efficient to lower this to a min and a max operation in
1945 // the higher precision arithmetic if the promoted operation isn't legal,
1946 // but this decision is up to the target's lowering request.
1947 Register DstReg = MI.getOperand(0).getReg();
1948
1949 unsigned NewBits = WideTy.getScalarSizeInBits();
1950 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1951
1952 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1953 // must not left shift the RHS to preserve the shift amount.
1954 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1955 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1956 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1957 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1958 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1959 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1960
1961 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1962 {ShiftL, ShiftR}, MI.getFlags());
1963
1964 // Use a shift that will preserve the number of sign bits when the trunc is
1965 // folded away.
1966 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1967 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1968
1969 MIRBuilder.buildTrunc(DstReg, Result);
1970 MI.eraseFromParent();
1971 return Legalized;
1972}
1973
1975LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1976 LLT WideTy) {
1977 if (TypeIdx == 1) {
1979 widenScalarDst(MI, WideTy, 1);
1981 return Legalized;
1982 }
1983
1984 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1985 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
1986 LLT SrcTy = MRI.getType(LHS);
1987 LLT OverflowTy = MRI.getType(OriginalOverflow);
1988 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1989
1990 // To determine if the result overflowed in the larger type, we extend the
1991 // input to the larger type, do the multiply (checking if it overflows),
1992 // then also check the high bits of the result to see if overflow happened
1993 // there.
1994 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1995 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1996 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1997
1998 auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1999 {LeftOperand, RightOperand});
2000 auto Mul = Mulo->getOperand(0);
2001 MIRBuilder.buildTrunc(Result, Mul);
2002
2003 MachineInstrBuilder ExtResult;
2004 // Overflow occurred if it occurred in the larger type, or if the high part
2005 // of the result does not zero/sign-extend the low part. Check this second
2006 // possibility first.
2007 if (IsSigned) {
2008 // For signed, overflow occurred when the high part does not sign-extend
2009 // the low part.
2010 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2011 } else {
2012 // Unsigned overflow occurred when the high part does not zero-extend the
2013 // low part.
2014 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2015 }
2016
2017 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2018 // so we don't need to check the overflow result of larger type Mulo.
2019 if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
2020 auto Overflow =
2021 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2022 // Finally check if the multiplication in the larger type itself overflowed.
2023 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2024 } else {
2025 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2026 }
2027 MI.eraseFromParent();
2028 return Legalized;
2029}
2030
2033 switch (MI.getOpcode()) {
2034 default:
2035 return UnableToLegalize;
2036 case TargetOpcode::G_ATOMICRMW_XCHG:
2037 case TargetOpcode::G_ATOMICRMW_ADD:
2038 case TargetOpcode::G_ATOMICRMW_SUB:
2039 case TargetOpcode::G_ATOMICRMW_AND:
2040 case TargetOpcode::G_ATOMICRMW_OR:
2041 case TargetOpcode::G_ATOMICRMW_XOR:
2042 case TargetOpcode::G_ATOMICRMW_MIN:
2043 case TargetOpcode::G_ATOMICRMW_MAX:
2044 case TargetOpcode::G_ATOMICRMW_UMIN:
2045 case TargetOpcode::G_ATOMICRMW_UMAX:
2046 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2048 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2049 widenScalarDst(MI, WideTy, 0);
2051 return Legalized;
2052 case TargetOpcode::G_ATOMIC_CMPXCHG:
2053 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2055 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2056 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2057 widenScalarDst(MI, WideTy, 0);
2059 return Legalized;
2060 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2061 if (TypeIdx == 0) {
2063 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2064 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2065 widenScalarDst(MI, WideTy, 0);
2067 return Legalized;
2068 }
2069 assert(TypeIdx == 1 &&
2070 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2072 widenScalarDst(MI, WideTy, 1);
2074 return Legalized;
2075 case TargetOpcode::G_EXTRACT:
2076 return widenScalarExtract(MI, TypeIdx, WideTy);
2077 case TargetOpcode::G_INSERT:
2078 return widenScalarInsert(MI, TypeIdx, WideTy);
2079 case TargetOpcode::G_MERGE_VALUES:
2080 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2081 case TargetOpcode::G_UNMERGE_VALUES:
2082 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2083 case TargetOpcode::G_SADDO:
2084 case TargetOpcode::G_SSUBO:
2085 case TargetOpcode::G_UADDO:
2086 case TargetOpcode::G_USUBO:
2087 case TargetOpcode::G_SADDE:
2088 case TargetOpcode::G_SSUBE:
2089 case TargetOpcode::G_UADDE:
2090 case TargetOpcode::G_USUBE:
2091 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2092 case TargetOpcode::G_UMULO:
2093 case TargetOpcode::G_SMULO:
2094 return widenScalarMulo(MI, TypeIdx, WideTy);
2095 case TargetOpcode::G_SADDSAT:
2096 case TargetOpcode::G_SSUBSAT:
2097 case TargetOpcode::G_SSHLSAT:
2098 case TargetOpcode::G_UADDSAT:
2099 case TargetOpcode::G_USUBSAT:
2100 case TargetOpcode::G_USHLSAT:
2101 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2102 case TargetOpcode::G_CTTZ:
2103 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2104 case TargetOpcode::G_CTLZ:
2105 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2106 case TargetOpcode::G_CTPOP: {
2107 if (TypeIdx == 0) {
2109 widenScalarDst(MI, WideTy, 0);
2111 return Legalized;
2112 }
2113
2114 Register SrcReg = MI.getOperand(1).getReg();
2115
2116 // First extend the input.
2117 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2118 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2119 ? TargetOpcode::G_ANYEXT
2120 : TargetOpcode::G_ZEXT;
2121 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2122 LLT CurTy = MRI.getType(SrcReg);
2123 unsigned NewOpc = MI.getOpcode();
2124 if (NewOpc == TargetOpcode::G_CTTZ) {
2125 // The count is the same in the larger type except if the original
2126 // value was zero. This can be handled by setting the bit just off
2127 // the top of the original type.
2128 auto TopBit =
2130 MIBSrc = MIRBuilder.buildOr(
2131 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2132 // Now we know the operand is non-zero, use the more relaxed opcode.
2133 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2134 }
2135
2136 // Perform the operation at the larger size.
2137 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2138 // This is already the correct result for CTPOP and CTTZs
2139 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2140 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2141 // The correct result is NewOp - (Difference in widety and current ty).
2142 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2143 MIBNewOp = MIRBuilder.buildSub(
2144 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2145 }
2146
2147 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2148 MI.eraseFromParent();
2149 return Legalized;
2150 }
2151 case TargetOpcode::G_BSWAP: {
2153 Register DstReg = MI.getOperand(0).getReg();
2154
2155 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2156 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2157 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2158 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2159
2160 MI.getOperand(0).setReg(DstExt);
2161
2163
2164 LLT Ty = MRI.getType(DstReg);
2165 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2166 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2167 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2168
2169 MIRBuilder.buildTrunc(DstReg, ShrReg);
2171 return Legalized;
2172 }
2173 case TargetOpcode::G_BITREVERSE: {
2175
2176 Register DstReg = MI.getOperand(0).getReg();
2177 LLT Ty = MRI.getType(DstReg);
2178 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2179
2180 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2181 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2182 MI.getOperand(0).setReg(DstExt);
2184
2185 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2186 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2187 MIRBuilder.buildTrunc(DstReg, Shift);
2189 return Legalized;
2190 }
2191 case TargetOpcode::G_FREEZE:
2193 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2194 widenScalarDst(MI, WideTy);
2196 return Legalized;
2197
2198 case TargetOpcode::G_ABS:
2200 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2201 widenScalarDst(MI, WideTy);
2203 return Legalized;
2204
2205 case TargetOpcode::G_ADD:
2206 case TargetOpcode::G_AND:
2207 case TargetOpcode::G_MUL:
2208 case TargetOpcode::G_OR:
2209 case TargetOpcode::G_XOR:
2210 case TargetOpcode::G_SUB:
2211 // Perform operation at larger width (any extension is fines here, high bits
2212 // don't affect the result) and then truncate the result back to the
2213 // original type.
2215 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2216 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2217 widenScalarDst(MI, WideTy);
2219 return Legalized;
2220
2221 case TargetOpcode::G_SBFX:
2222 case TargetOpcode::G_UBFX:
2224
2225 if (TypeIdx == 0) {
2226 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2227 widenScalarDst(MI, WideTy);
2228 } else {
2229 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2230 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2231 }
2232
2234 return Legalized;
2235
2236 case TargetOpcode::G_SHL:
2238
2239 if (TypeIdx == 0) {
2240 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2241 widenScalarDst(MI, WideTy);
2242 } else {
2243 assert(TypeIdx == 1);
2244 // The "number of bits to shift" operand must preserve its value as an
2245 // unsigned integer:
2246 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2247 }
2248
2250 return Legalized;
2251
2252 case TargetOpcode::G_SDIV:
2253 case TargetOpcode::G_SREM:
2254 case TargetOpcode::G_SMIN:
2255 case TargetOpcode::G_SMAX:
2257 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2258 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2259 widenScalarDst(MI, WideTy);
2261 return Legalized;
2262
2263 case TargetOpcode::G_SDIVREM:
2265 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2266 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2267 widenScalarDst(MI, WideTy);
2268 widenScalarDst(MI, WideTy, 1);
2270 return Legalized;
2271
2272 case TargetOpcode::G_ASHR:
2273 case TargetOpcode::G_LSHR:
2275
2276 if (TypeIdx == 0) {
2277 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2278 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2279
2280 widenScalarSrc(MI, WideTy, 1, CvtOp);
2281 widenScalarDst(MI, WideTy);
2282 } else {
2283 assert(TypeIdx == 1);
2284 // The "number of bits to shift" operand must preserve its value as an
2285 // unsigned integer:
2286 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2287 }
2288
2290 return Legalized;
2291 case TargetOpcode::G_UDIV:
2292 case TargetOpcode::G_UREM:
2293 case TargetOpcode::G_UMIN:
2294 case TargetOpcode::G_UMAX:
2296 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2297 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2298 widenScalarDst(MI, WideTy);
2300 return Legalized;
2301
2302 case TargetOpcode::G_UDIVREM:
2304 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2305 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2306 widenScalarDst(MI, WideTy);
2307 widenScalarDst(MI, WideTy, 1);
2309 return Legalized;
2310
2311 case TargetOpcode::G_SELECT:
2313 if (TypeIdx == 0) {
2314 // Perform operation at larger width (any extension is fine here, high
2315 // bits don't affect the result) and then truncate the result back to the
2316 // original type.
2317 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2318 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2319 widenScalarDst(MI, WideTy);
2320 } else {
2321 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2322 // Explicit extension is required here since high bits affect the result.
2323 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2324 }
2326 return Legalized;
2327
2328 case TargetOpcode::G_FPTOSI:
2329 case TargetOpcode::G_FPTOUI:
2331
2332 if (TypeIdx == 0)
2333 widenScalarDst(MI, WideTy);
2334 else
2335 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2336
2338 return Legalized;
2339 case TargetOpcode::G_SITOFP:
2341
2342 if (TypeIdx == 0)
2343 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2344 else
2345 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2346
2348 return Legalized;
2349 case TargetOpcode::G_UITOFP:
2351
2352 if (TypeIdx == 0)
2353 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2354 else
2355 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2356
2358 return Legalized;
2359 case TargetOpcode::G_LOAD:
2360 case TargetOpcode::G_SEXTLOAD:
2361 case TargetOpcode::G_ZEXTLOAD:
2363 widenScalarDst(MI, WideTy);
2365 return Legalized;
2366
2367 case TargetOpcode::G_STORE: {
2368 if (TypeIdx != 0)
2369 return UnableToLegalize;
2370
2371 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2372 if (!Ty.isScalar())
2373 return UnableToLegalize;
2374
2376
2377 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2378 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2379 widenScalarSrc(MI, WideTy, 0, ExtType);
2380
2382 return Legalized;
2383 }
2384 case TargetOpcode::G_CONSTANT: {
2385 MachineOperand &SrcMO = MI.getOperand(1);
2387 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2388 MRI.getType(MI.getOperand(0).getReg()));
2389 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2390 ExtOpc == TargetOpcode::G_ANYEXT) &&
2391 "Illegal Extend");
2392 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2393 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2394 ? SrcVal.sext(WideTy.getSizeInBits())
2395 : SrcVal.zext(WideTy.getSizeInBits());
2397 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2398
2399 widenScalarDst(MI, WideTy);
2401 return Legalized;
2402 }
2403 case TargetOpcode::G_FCONSTANT: {
2404 // To avoid changing the bits of the constant due to extension to a larger
2405 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2406 MachineOperand &SrcMO = MI.getOperand(1);
2407 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2409 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2410 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2411 MI.eraseFromParent();
2412 return Legalized;
2413 }
2414 case TargetOpcode::G_IMPLICIT_DEF: {
2416 widenScalarDst(MI, WideTy);
2418 return Legalized;
2419 }
2420 case TargetOpcode::G_BRCOND:
2422 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2424 return Legalized;
2425
2426 case TargetOpcode::G_FCMP:
2428 if (TypeIdx == 0)
2429 widenScalarDst(MI, WideTy);
2430 else {
2431 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2432 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2433 }
2435 return Legalized;
2436
2437 case TargetOpcode::G_ICMP:
2439 if (TypeIdx == 0)
2440 widenScalarDst(MI, WideTy);
2441 else {
2442 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2443 MI.getOperand(1).getPredicate()))
2444 ? TargetOpcode::G_SEXT
2445 : TargetOpcode::G_ZEXT;
2446 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2447 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2448 }
2450 return Legalized;
2451
2452 case TargetOpcode::G_PTR_ADD:
2453 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2455 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2457 return Legalized;
2458
2459 case TargetOpcode::G_PHI: {
2460 assert(TypeIdx == 0 && "Expecting only Idx 0");
2461
2463 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2464 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2466 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2467 }
2468
2469 MachineBasicBlock &MBB = *MI.getParent();
2471 widenScalarDst(MI, WideTy);
2473 return Legalized;
2474 }
2475 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2476 if (TypeIdx == 0) {
2477 Register VecReg = MI.getOperand(1).getReg();
2478 LLT VecTy = MRI.getType(VecReg);
2480
2482 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2483 TargetOpcode::G_ANYEXT);
2484
2485 widenScalarDst(MI, WideTy, 0);
2487 return Legalized;
2488 }
2489
2490 if (TypeIdx != 2)
2491 return UnableToLegalize;
2493 // TODO: Probably should be zext
2494 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2496 return Legalized;
2497 }
2498 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2499 if (TypeIdx == 0) {
2501 const LLT WideEltTy = WideTy.getElementType();
2502
2503 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2504 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2505 widenScalarDst(MI, WideTy, 0);
2507 return Legalized;
2508 }
2509
2510 if (TypeIdx == 1) {
2512
2513 Register VecReg = MI.getOperand(1).getReg();
2514 LLT VecTy = MRI.getType(VecReg);
2515 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2516
2517 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2518 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2519 widenScalarDst(MI, WideVecTy, 0);
2521 return Legalized;
2522 }
2523
2524 if (TypeIdx == 2) {
2526 // TODO: Probably should be zext
2527 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2529 return Legalized;
2530 }
2531
2532 return UnableToLegalize;
2533 }
2534 case TargetOpcode::G_FADD:
2535 case TargetOpcode::G_FMUL:
2536 case TargetOpcode::G_FSUB:
2537 case TargetOpcode::G_FMA:
2538 case TargetOpcode::G_FMAD:
2539 case TargetOpcode::G_FNEG:
2540 case TargetOpcode::G_FABS:
2541 case TargetOpcode::G_FCANONICALIZE:
2542 case TargetOpcode::G_FMINNUM:
2543 case TargetOpcode::G_FMAXNUM:
2544 case TargetOpcode::G_FMINNUM_IEEE:
2545 case TargetOpcode::G_FMAXNUM_IEEE:
2546 case TargetOpcode::G_FMINIMUM:
2547 case TargetOpcode::G_FMAXIMUM:
2548 case TargetOpcode::G_FDIV:
2549 case TargetOpcode::G_FREM:
2550 case TargetOpcode::G_FCEIL:
2551 case TargetOpcode::G_FFLOOR:
2552 case TargetOpcode::G_FCOS:
2553 case TargetOpcode::G_FSIN:
2554 case TargetOpcode::G_FLOG10:
2555 case TargetOpcode::G_FLOG:
2556 case TargetOpcode::G_FLOG2:
2557 case TargetOpcode::G_FRINT:
2558 case TargetOpcode::G_FNEARBYINT:
2559 case TargetOpcode::G_FSQRT:
2560 case TargetOpcode::G_FEXP:
2561 case TargetOpcode::G_FEXP2:
2562 case TargetOpcode::G_FEXP10:
2563 case TargetOpcode::G_FPOW:
2564 case TargetOpcode::G_INTRINSIC_TRUNC:
2565 case TargetOpcode::G_INTRINSIC_ROUND:
2566 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2567 assert(TypeIdx == 0);
2569
2570 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2571 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2572
2573 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2575 return Legalized;
2576 case TargetOpcode::G_FPOWI:
2577 case TargetOpcode::G_FLDEXP:
2578 case TargetOpcode::G_STRICT_FLDEXP: {
2579 if (TypeIdx == 0) {
2580 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2581 return UnableToLegalize;
2582
2584 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2585 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2587 return Legalized;
2588 }
2589
2590 if (TypeIdx == 1) {
2591 // For some reason SelectionDAG tries to promote to a libcall without
2592 // actually changing the integer type for promotion.
2594 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2596 return Legalized;
2597 }
2598
2599 return UnableToLegalize;
2600 }
2601 case TargetOpcode::G_FFREXP: {
2603
2604 if (TypeIdx == 0) {
2605 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2606 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2607 } else {
2608 widenScalarDst(MI, WideTy, 1);
2609 }
2610
2612 return Legalized;
2613 }
2614 case TargetOpcode::G_INTTOPTR:
2615 if (TypeIdx != 1)
2616 return UnableToLegalize;
2617
2619 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2621 return Legalized;
2622 case TargetOpcode::G_PTRTOINT:
2623 if (TypeIdx != 0)
2624 return UnableToLegalize;
2625
2627 widenScalarDst(MI, WideTy, 0);
2629 return Legalized;
2630 case TargetOpcode::G_BUILD_VECTOR: {
2632
2633 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2634 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2635 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2636
2637 // Avoid changing the result vector type if the source element type was
2638 // requested.
2639 if (TypeIdx == 1) {
2640 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2641 } else {
2642 widenScalarDst(MI, WideTy, 0);
2643 }
2644
2646 return Legalized;
2647 }
2648 case TargetOpcode::G_SEXT_INREG:
2649 if (TypeIdx != 0)
2650 return UnableToLegalize;
2651
2653 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2654 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2656 return Legalized;
2657 case TargetOpcode::G_PTRMASK: {
2658 if (TypeIdx != 1)
2659 return UnableToLegalize;
2661 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2663 return Legalized;
2664 }
2665 case TargetOpcode::G_VECREDUCE_FMIN:
2666 case TargetOpcode::G_VECREDUCE_FMAX:
2667 case TargetOpcode::G_VECREDUCE_FMINIMUM:
2668 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
2669 if (TypeIdx != 0)
2670 return UnableToLegalize;
2672 Register VecReg = MI.getOperand(1).getReg();
2673 LLT VecTy = MRI.getType(VecReg);
2674 LLT WideVecTy = VecTy.isVector()
2675 ? LLT::vector(VecTy.getElementCount(), WideTy)
2676 : WideTy;
2677 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
2678 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2680 return Legalized;
2681 }
2682}
2683
2685 MachineIRBuilder &B, Register Src, LLT Ty) {
2686 auto Unmerge = B.buildUnmerge(Ty, Src);
2687 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2688 Pieces.push_back(Unmerge.getReg(I));
2689}
2690
2693 Register Dst = MI.getOperand(0).getReg();
2694
2697
2698 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
2699 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
2700 Align Alignment = Align(DL.getABITypeAlign(
2702
2704 AddrPtrTy, MF.getConstantPool()->getConstantPoolIndex(
2705 MI.getOperand(1).getFPImm(), Alignment));
2706
2709 MRI.getType(Dst), Alignment);
2710
2711 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Addr, *MMO);
2712 MI.eraseFromParent();
2713
2714 return Legalized;
2715}
2716
2719 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
2720 if (SrcTy.isVector()) {
2721 LLT SrcEltTy = SrcTy.getElementType();
2723
2724 if (DstTy.isVector()) {
2725 int NumDstElt = DstTy.getNumElements();
2726 int NumSrcElt = SrcTy.getNumElements();
2727
2728 LLT DstEltTy = DstTy.getElementType();
2729 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2730 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2731
2732 // If there's an element size mismatch, insert intermediate casts to match
2733 // the result element type.
2734 if (NumSrcElt < NumDstElt) { // Source element type is larger.
2735 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2736 //
2737 // =>
2738 //
2739 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2740 // %3:_(<2 x s8>) = G_BITCAST %2
2741 // %4:_(<2 x s8>) = G_BITCAST %3
2742 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2743 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2744 SrcPartTy = SrcEltTy;
2745 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2746 //
2747 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2748 //
2749 // =>
2750 //
2751 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2752 // %3:_(s16) = G_BITCAST %2
2753 // %4:_(s16) = G_BITCAST %3
2754 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2755 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2756 DstCastTy = DstEltTy;
2757 }
2758
2759 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2760 for (Register &SrcReg : SrcRegs)
2761 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2762 } else
2763 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2764
2765 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
2766 MI.eraseFromParent();
2767 return Legalized;
2768 }
2769
2770 if (DstTy.isVector()) {
2772 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2773 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
2774 MI.eraseFromParent();
2775 return Legalized;
2776 }
2777
2778 return UnableToLegalize;
2779}
2780
2781/// Figure out the bit offset into a register when coercing a vector index for
2782/// the wide element type. This is only for the case when promoting vector to
2783/// one with larger elements.
2784//
2785///
2786/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2787/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2789 Register Idx,
2790 unsigned NewEltSize,
2791 unsigned OldEltSize) {
2792 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2793 LLT IdxTy = B.getMRI()->getType(Idx);
2794
2795 // Now figure out the amount we need to shift to get the target bits.
2796 auto OffsetMask = B.buildConstant(
2797 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2798 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2799 return B.buildShl(IdxTy, OffsetIdx,
2800 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2801}
2802
2803/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2804/// is casting to a vector with a smaller element size, perform multiple element
2805/// extracts and merge the results. If this is coercing to a vector with larger
2806/// elements, index the bitcasted vector and extract the target element with bit
2807/// operations. This is intended to force the indexing in the native register
2808/// size for architectures that can dynamically index the register file.
2811 LLT CastTy) {
2812 if (TypeIdx != 1)
2813 return UnableToLegalize;
2814
2815 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
2816
2817 LLT SrcEltTy = SrcVecTy.getElementType();
2818 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2819 unsigned OldNumElts = SrcVecTy.getNumElements();
2820
2821 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2822 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2823
2824 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2825 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2826 if (NewNumElts > OldNumElts) {
2827 // Decreasing the vector element size
2828 //
2829 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2830 // =>
2831 // v4i32:castx = bitcast x:v2i64
2832 //
2833 // i64 = bitcast
2834 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2835 // (i32 (extract_vector_elt castx, (2 * y + 1)))
2836 //
2837 if (NewNumElts % OldNumElts != 0)
2838 return UnableToLegalize;
2839
2840 // Type of the intermediate result vector.
2841 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2842 LLT MidTy =
2843 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2844
2845 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2846
2847 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2848 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2849
2850 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2851 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2852 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2853 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2854 NewOps[I] = Elt.getReg(0);
2855 }
2856
2857 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2858 MIRBuilder.buildBitcast(Dst, NewVec);
2859 MI.eraseFromParent();
2860 return Legalized;
2861 }
2862
2863 if (NewNumElts < OldNumElts) {
2864 if (NewEltSize % OldEltSize != 0)
2865 return UnableToLegalize;
2866
2867 // This only depends on powers of 2 because we use bit tricks to figure out
2868 // the bit offset we need to shift to get the target element. A general
2869 // expansion could emit division/multiply.
2870 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2871 return UnableToLegalize;
2872
2873 // Increasing the vector element size.
2874 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2875 //
2876 // =>
2877 //
2878 // %cast = G_BITCAST %vec
2879 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2880 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2881 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2882 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2883 // %elt_bits = G_LSHR %wide_elt, %offset_bits
2884 // %elt = G_TRUNC %elt_bits
2885
2886 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2887 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2888
2889 // Divide to get the index in the wider element type.
2890 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2891
2892 Register WideElt = CastVec;
2893 if (CastTy.isVector()) {
2894 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2895 ScaledIdx).getReg(0);
2896 }
2897
2898 // Compute the bit offset into the register of the target element.
2900 MIRBuilder, Idx, NewEltSize, OldEltSize);
2901
2902 // Shift the wide element to get the target element.
2903 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2904 MIRBuilder.buildTrunc(Dst, ExtractedBits);
2905 MI.eraseFromParent();
2906 return Legalized;
2907 }
2908
2909 return UnableToLegalize;
2910}
2911
2912/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2913/// TargetReg, while preserving other bits in \p TargetReg.
2914///
2915/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2917 Register TargetReg, Register InsertReg,
2918 Register OffsetBits) {
2919 LLT TargetTy = B.getMRI()->getType(TargetReg);
2920 LLT InsertTy = B.getMRI()->getType(InsertReg);
2921 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2922 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2923
2924 // Produce a bitmask of the value to insert
2925 auto EltMask = B.buildConstant(
2926 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2927 InsertTy.getSizeInBits()));
2928 // Shift it into position
2929 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2930 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2931
2932 // Clear out the bits in the wide element
2933 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2934
2935 // The value to insert has all zeros already, so stick it into the masked
2936 // wide element.
2937 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2938}
2939
2940/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2941/// is increasing the element size, perform the indexing in the target element
2942/// type, and use bit operations to insert at the element position. This is
2943/// intended for architectures that can dynamically index the register file and
2944/// want to force indexing in the native register size.
2947 LLT CastTy) {
2948 if (TypeIdx != 0)
2949 return UnableToLegalize;
2950
2951 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
2952 MI.getFirst4RegLLTs();
2953 LLT VecTy = DstTy;
2954
2955 LLT VecEltTy = VecTy.getElementType();
2956 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2957 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2958 const unsigned OldEltSize = VecEltTy.getSizeInBits();
2959
2960 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2961 unsigned OldNumElts = VecTy.getNumElements();
2962
2963 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2964 if (NewNumElts < OldNumElts) {
2965 if (NewEltSize % OldEltSize != 0)
2966 return UnableToLegalize;
2967
2968 // This only depends on powers of 2 because we use bit tricks to figure out
2969 // the bit offset we need to shift to get the target element. A general
2970 // expansion could emit division/multiply.
2971 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2972 return UnableToLegalize;
2973
2974 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2975 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2976
2977 // Divide to get the index in the wider element type.
2978 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2979
2980 Register ExtractedElt = CastVec;
2981 if (CastTy.isVector()) {
2982 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2983 ScaledIdx).getReg(0);
2984 }
2985
2986 // Compute the bit offset into the register of the target element.
2988 MIRBuilder, Idx, NewEltSize, OldEltSize);
2989
2990 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2991 Val, OffsetBits);
2992 if (CastTy.isVector()) {
2994 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2995 }
2996
2997 MIRBuilder.buildBitcast(Dst, InsertedElt);
2998 MI.eraseFromParent();
2999 return Legalized;
3000 }
3001
3002 return UnableToLegalize;
3003}
3004
3006 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3007 Register DstReg = LoadMI.getDstReg();
3008 Register PtrReg = LoadMI.getPointerReg();
3009 LLT DstTy = MRI.getType(DstReg);
3010 MachineMemOperand &MMO = LoadMI.getMMO();
3011 LLT MemTy = MMO.getMemoryType();
3013
3014 unsigned MemSizeInBits = MemTy.getSizeInBits();
3015 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3016
3017 if (MemSizeInBits != MemStoreSizeInBits) {
3018 if (MemTy.isVector())
3019 return UnableToLegalize;
3020
3021 // Promote to a byte-sized load if not loading an integral number of
3022 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3023 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3024 MachineMemOperand *NewMMO =
3025 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3026
3027 Register LoadReg = DstReg;
3028 LLT LoadTy = DstTy;
3029
3030 // If this wasn't already an extending load, we need to widen the result
3031 // register to avoid creating a load with a narrower result than the source.
3032 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3033 LoadTy = WideMemTy;
3034 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3035 }
3036
3037 if (isa<GSExtLoad>(LoadMI)) {
3038 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3039 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3040 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3041 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3042 // The extra bits are guaranteed to be zero, since we stored them that
3043 // way. A zext load from Wide thus automatically gives zext from MemVT.
3044 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3045 } else {
3046 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3047 }
3048
3049 if (DstTy != LoadTy)
3050 MIRBuilder.buildTrunc(DstReg, LoadReg);
3051
3052 LoadMI.eraseFromParent();
3053 return Legalized;
3054 }
3055
3056 // Big endian lowering not implemented.
3058 return UnableToLegalize;
3059
3060 // This load needs splitting into power of 2 sized loads.
3061 //
3062 // Our strategy here is to generate anyextending loads for the smaller
3063 // types up to next power-2 result type, and then combine the two larger
3064 // result values together, before truncating back down to the non-pow-2
3065 // type.
3066 // E.g. v1 = i24 load =>
3067 // v2 = i32 zextload (2 byte)
3068 // v3 = i32 load (1 byte)
3069 // v4 = i32 shl v3, 16
3070 // v5 = i32 or v4, v2
3071 // v1 = i24 trunc v5
3072 // By doing this we generate the correct truncate which should get
3073 // combined away as an artifact with a matching extend.
3074
3075 uint64_t LargeSplitSize, SmallSplitSize;
3076
3077 if (!isPowerOf2_32(MemSizeInBits)) {
3078 // This load needs splitting into power of 2 sized loads.
3079 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3080 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3081 } else {
3082 // This is already a power of 2, but we still need to split this in half.
3083 //
3084 // Assume we're being asked to decompose an unaligned load.
3085 // TODO: If this requires multiple splits, handle them all at once.
3086 auto &Ctx = MF.getFunction().getContext();
3087 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3088 return UnableToLegalize;
3089
3090 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3091 }
3092
3093 if (MemTy.isVector()) {
3094 // TODO: Handle vector extloads
3095 if (MemTy != DstTy)
3096 return UnableToLegalize;
3097
3098 // TODO: We can do better than scalarizing the vector and at least split it
3099 // in half.
3100 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3101 }
3102
3103 MachineMemOperand *LargeMMO =
3104 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3105 MachineMemOperand *SmallMMO =
3106 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3107
3108 LLT PtrTy = MRI.getType(PtrReg);
3109 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3110 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3111 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3112 PtrReg, *LargeMMO);
3113
3114 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3115 LargeSplitSize / 8);
3116 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3117 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3118 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3119 SmallPtr, *SmallMMO);
3120
3121 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3122 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3123
3124 if (AnyExtTy == DstTy)
3125 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3126 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3127 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3128 MIRBuilder.buildTrunc(DstReg, {Or});
3129 } else {
3130 assert(DstTy.isPointer() && "expected pointer");
3131 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3132
3133 // FIXME: We currently consider this to be illegal for non-integral address
3134 // spaces, but we need still need a way to reinterpret the bits.
3135 MIRBuilder.buildIntToPtr(DstReg, Or);
3136 }
3137
3138 LoadMI.eraseFromParent();
3139 return Legalized;
3140}
3141
3143 // Lower a non-power of 2 store into multiple pow-2 stores.
3144 // E.g. split an i24 store into an i16 store + i8 store.
3145 // We do this by first extending the stored value to the next largest power
3146 // of 2 type, and then using truncating stores to store the components.
3147 // By doing this, likewise with G_LOAD, generate an extend that can be
3148 // artifact-combined away instead of leaving behind extracts.
3149 Register SrcReg = StoreMI.getValueReg();
3150 Register PtrReg = StoreMI.getPointerReg();
3151 LLT SrcTy = MRI.getType(SrcReg);
3153 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3154 LLT MemTy = MMO.getMemoryType();
3155
3156 unsigned StoreWidth = MemTy.getSizeInBits();
3157 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3158
3159 if (StoreWidth != StoreSizeInBits) {
3160 if (SrcTy.isVector())
3161 return UnableToLegalize;
3162
3163 // Promote to a byte-sized store with upper bits zero if not
3164 // storing an integral number of bytes. For example, promote
3165 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3166 LLT WideTy = LLT::scalar(StoreSizeInBits);
3167
3168 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3169 // Avoid creating a store with a narrower source than result.
3170 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3171 SrcTy = WideTy;
3172 }
3173
3174 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3175
3176 MachineMemOperand *NewMMO =
3177 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3178 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3179 StoreMI.eraseFromParent();
3180 return Legalized;
3181 }
3182
3183 if (MemTy.isVector()) {
3184 // TODO: Handle vector trunc stores
3185 if (MemTy != SrcTy)
3186 return UnableToLegalize;
3187
3188 // TODO: We can do better than scalarizing the vector and at least split it
3189 // in half.
3190 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3191 }
3192
3193 unsigned MemSizeInBits = MemTy.getSizeInBits();
3194 uint64_t LargeSplitSize, SmallSplitSize;
3195
3196 if (!isPowerOf2_32(MemSizeInBits)) {
3197 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3198 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3199 } else {
3200 auto &Ctx = MF.getFunction().getContext();
3201 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3202 return UnableToLegalize; // Don't know what we're being asked to do.
3203
3204 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3205 }
3206
3207 // Extend to the next pow-2. If this store was itself the result of lowering,
3208 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3209 // that's wider than the stored size.
3210 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3211 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3212
3213 if (SrcTy.isPointer()) {
3214 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3215 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3216 }
3217
3218 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3219
3220 // Obtain the smaller value by shifting away the larger value.
3221 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3222 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3223
3224 // Generate the PtrAdd and truncating stores.
3225 LLT PtrTy = MRI.getType(PtrReg);
3226 auto OffsetCst = MIRBuilder.buildConstant(
3227 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3228 auto SmallPtr =
3229 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3230
3231 MachineMemOperand *LargeMMO =
3232 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3233 MachineMemOperand *SmallMMO =
3234 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3235 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3236 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3237 StoreMI.eraseFromParent();
3238 return Legalized;
3239}
3240
3242LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3243 switch (MI.getOpcode()) {
3244 case TargetOpcode::G_LOAD: {
3245 if (TypeIdx != 0)
3246 return UnableToLegalize;
3247 MachineMemOperand &MMO = **MI.memoperands_begin();
3248
3249 // Not sure how to interpret a bitcast of an extending load.
3250 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3251 return UnableToLegalize;
3252
3254 bitcastDst(MI, CastTy, 0);
3255 MMO.setType(CastTy);
3257 return Legalized;
3258 }
3259 case TargetOpcode::G_STORE: {
3260 if (TypeIdx != 0)
3261 return UnableToLegalize;
3262
3263 MachineMemOperand &MMO = **MI.memoperands_begin();
3264
3265 // Not sure how to interpret a bitcast of a truncating store.
3266 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3267 return UnableToLegalize;
3268
3270 bitcastSrc(MI, CastTy, 0);
3271 MMO.setType(CastTy);
3273 return Legalized;
3274 }
3275 case TargetOpcode::G_SELECT: {
3276 if (TypeIdx != 0)
3277 return UnableToLegalize;
3278
3279 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3280 LLVM_DEBUG(
3281 dbgs() << "bitcast action not implemented for vector select\n");
3282 return UnableToLegalize;
3283 }
3284
3286 bitcastSrc(MI, CastTy, 2);
3287 bitcastSrc(MI, CastTy, 3);
3288 bitcastDst(MI, CastTy, 0);
3290 return Legalized;
3291 }
3292 case TargetOpcode::G_AND:
3293 case TargetOpcode::G_OR:
3294 case TargetOpcode::G_XOR: {
3296 bitcastSrc(MI, CastTy, 1);
3297 bitcastSrc(MI, CastTy, 2);
3298 bitcastDst(MI, CastTy, 0);
3300 return Legalized;
3301 }
3302 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3303 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3304 case TargetOpcode::G_INSERT_VECTOR_ELT:
3305 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3306 default:
3307 return UnableToLegalize;
3308 }
3309}
3310
3311// Legalize an instruction by changing the opcode in place.
3312void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3314 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3316}
3317
3319LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3320 using namespace TargetOpcode;
3321
3322 switch(MI.getOpcode()) {
3323 default:
3324 return UnableToLegalize;
3325 case TargetOpcode::G_FCONSTANT:
3326 return lowerFConstant(MI);
3327 case TargetOpcode::G_BITCAST:
3328 return lowerBitcast(MI);
3329 case TargetOpcode::G_SREM:
3330 case TargetOpcode::G_UREM: {
3331 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3332 auto Quot =
3333 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3334 {MI.getOperand(1), MI.getOperand(2)});
3335
3336 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3337 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3338 MI.eraseFromParent();
3339 return Legalized;
3340 }
3341 case TargetOpcode::G_SADDO:
3342 case TargetOpcode::G_SSUBO:
3343 return lowerSADDO_SSUBO(MI);
3344 case TargetOpcode::G_UMULH:
3345 case TargetOpcode::G_SMULH:
3346 return lowerSMULH_UMULH(MI);
3347 case TargetOpcode::G_SMULO:
3348 case TargetOpcode::G_UMULO: {
3349 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3350 // result.
3351 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3352 LLT Ty = MRI.getType(Res);
3353
3354 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3355 ? TargetOpcode::G_SMULH
3356 : TargetOpcode::G_UMULH;
3357
3359 const auto &TII = MIRBuilder.getTII();
3360 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3361 MI.removeOperand(1);
3363
3364 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3365 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3366
3367 // Move insert point forward so we can use the Res register if needed.
3369
3370 // For *signed* multiply, overflow is detected by checking:
3371 // (hi != (lo >> bitwidth-1))
3372 if (Opcode == TargetOpcode::G_SMULH) {
3373 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3374 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3375 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3376 } else {
3377 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3378 }
3379 return Legalized;
3380 }
3381 case TargetOpcode::G_FNEG: {
3382 auto [Res, SubByReg] = MI.getFirst2Regs();
3383 LLT Ty = MRI.getType(Res);
3384
3385 // TODO: Handle vector types once we are able to
3386 // represent them.
3387 if (Ty.isVector())
3388 return UnableToLegalize;
3389 auto SignMask =
3391 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3392 MI.eraseFromParent();
3393 return Legalized;
3394 }
3395 case TargetOpcode::G_FSUB:
3396 case TargetOpcode::G_STRICT_FSUB: {
3397 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3398 LLT Ty = MRI.getType(Res);
3399
3400 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3401 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3402
3403 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3404 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3405 else
3406 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3407
3408 MI.eraseFromParent();
3409 return Legalized;
3410 }
3411 case TargetOpcode::G_FMAD:
3412 return lowerFMad(MI);
3413 case TargetOpcode::G_FFLOOR:
3414 return lowerFFloor(MI);
3415 case TargetOpcode::G_INTRINSIC_ROUND:
3416 return lowerIntrinsicRound(MI);
3417 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3418 // Since round even is the assumed rounding mode for unconstrained FP
3419 // operations, rint and roundeven are the same operation.
3420 changeOpcode(MI, TargetOpcode::G_FRINT);
3421 return Legalized;
3422 }
3423 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3424 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3425 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3426 **MI.memoperands_begin());
3427 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3428 MI.eraseFromParent();
3429 return Legalized;
3430 }
3431 case TargetOpcode::G_LOAD:
3432 case TargetOpcode::G_SEXTLOAD:
3433 case TargetOpcode::G_ZEXTLOAD:
3434 return lowerLoad(cast<GAnyLoad>(MI));
3435 case TargetOpcode::G_STORE:
3436 return lowerStore(cast<GStore>(MI));
3437 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3438 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3439 case TargetOpcode::G_CTLZ:
3440 case TargetOpcode::G_CTTZ:
3441 case TargetOpcode::G_CTPOP:
3442 return lowerBitCount(MI);
3443 case G_UADDO: {
3444 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3445
3446 MIRBuilder.buildAdd(Res, LHS, RHS);
3447 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3448
3449 MI.eraseFromParent();
3450 return Legalized;
3451 }
3452 case G_UADDE: {
3453 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3454 const LLT CondTy = MRI.getType(CarryOut);
3455 const LLT Ty = MRI.getType(Res);
3456
3457 // Initial add of the two operands.
3458 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3459
3460 // Initial check for carry.
3461 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3462
3463 // Add the sum and the carry.
3464 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3465 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3466
3467 // Second check for carry. We can only carry if the initial sum is all 1s
3468 // and the carry is set, resulting in a new sum of 0.
3469 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3470 auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
3471 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3472 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3473
3474 MI.eraseFromParent();
3475 return Legalized;
3476 }
3477 case G_USUBO: {
3478 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3479
3480 MIRBuilder.buildSub(Res, LHS, RHS);
3482
3483 MI.eraseFromParent();
3484 return Legalized;
3485 }
3486 case G_USUBE: {
3487 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3488 const LLT CondTy = MRI.getType(BorrowOut);
3489 const LLT Ty = MRI.getType(Res);
3490
3491 // Initial subtract of the two operands.
3492 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3493
3494 // Initial check for borrow.
3495 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3496
3497 // Subtract the borrow from the first subtract.
3498 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3499 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3500
3501 // Second check for borrow. We can only borrow if the initial difference is
3502 // 0 and the borrow is set, resulting in a new difference of all 1s.
3503 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3504 auto TmpResEqZero =
3505 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3506 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3507 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3508
3509 MI.eraseFromParent();
3510 return Legalized;
3511 }
3512 case G_UITOFP:
3513 return lowerUITOFP(MI);
3514 case G_SITOFP:
3515 return lowerSITOFP(MI);
3516 case G_FPTOUI:
3517 return lowerFPTOUI(MI);
3518 case G_FPTOSI:
3519 return lowerFPTOSI(MI);
3520 case G_FPTRUNC:
3521 return lowerFPTRUNC(MI);
3522 case G_FPOWI:
3523 return lowerFPOWI(MI);
3524 case G_SMIN:
3525 case G_SMAX:
3526 case G_UMIN:
3527 case G_UMAX:
3528 return lowerMinMax(MI);
3529 case G_FCOPYSIGN:
3530 return lowerFCopySign(MI);
3531 case G_FMINNUM:
3532 case G_FMAXNUM:
3533 return lowerFMinNumMaxNum(MI);
3534 case G_MERGE_VALUES:
3535 return lowerMergeValues(MI);
3536 case G_UNMERGE_VALUES:
3537 return lowerUnmergeValues(MI);
3538 case TargetOpcode::G_SEXT_INREG: {
3539 assert(MI.getOperand(2).isImm() && "Expected immediate");
3540 int64_t SizeInBits = MI.getOperand(2).getImm();
3541
3542 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3543 LLT DstTy = MRI.getType(DstReg);
3544 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3545
3546 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3547 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3548 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3549 MI.eraseFromParent();
3550 return Legalized;
3551 }
3552 case G_EXTRACT_VECTOR_ELT:
3553 case G_INSERT_VECTOR_ELT:
3555 case G_SHUFFLE_VECTOR:
3556 return lowerShuffleVector(MI);
3557 case G_DYN_STACKALLOC:
3558 return lowerDynStackAlloc(MI);
3559 case G_STACKSAVE:
3560 return lowerStackSave(MI);
3561 case G_STACKRESTORE:
3562 return lowerStackRestore(MI);
3563 case G_EXTRACT:
3564 return lowerExtract(MI);
3565 case G_INSERT:
3566 return lowerInsert(MI);
3567 case G_BSWAP:
3568 return lowerBswap(MI);
3569 case G_BITREVERSE:
3570 return lowerBitreverse(MI);
3571 case G_READ_REGISTER:
3572 case G_WRITE_REGISTER:
3573 return lowerReadWriteRegister(MI);
3574 case G_UADDSAT:
3575 case G_USUBSAT: {
3576 // Try to make a reasonable guess about which lowering strategy to use. The
3577 // target can override this with custom lowering and calling the
3578 // implementation functions.
3579 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3580 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3581 return lowerAddSubSatToMinMax(MI);
3583 }
3584 case G_SADDSAT:
3585 case G_SSUBSAT: {
3586 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3587
3588 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3589 // since it's a shorter expansion. However, we would need to figure out the
3590 // preferred boolean type for the carry out for the query.
3591 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3592 return lowerAddSubSatToMinMax(MI);
3594 }
3595 case G_SSHLSAT:
3596 case G_USHLSAT:
3597 return lowerShlSat(MI);
3598 case G_ABS:
3599 return lowerAbsToAddXor(MI);
3600 case G_SELECT:
3601 return lowerSelect(MI);
3602 case G_IS_FPCLASS:
3603 return lowerISFPCLASS(MI);
3604 case G_SDIVREM:
3605 case G_UDIVREM:
3606 return lowerDIVREM(MI);
3607 case G_FSHL:
3608 case G_FSHR:
3609 return lowerFunnelShift(MI);
3610 case G_ROTL:
3611 case G_ROTR:
3612 return lowerRotate(MI);
3613 case G_MEMSET:
3614 case G_MEMCPY:
3615 case G_MEMMOVE:
3616 return lowerMemCpyFamily(MI);
3617 case G_MEMCPY_INLINE:
3618 return lowerMemcpyInline(MI);
3619 case G_ZEXT:
3620 case G_SEXT:
3621 case G_ANYEXT:
3622 return lowerEXT(MI);
3624 return lowerVectorReduction(MI);
3625 }
3626}
3627
3629 Align MinAlign) const {
3630 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3631 // datalayout for the preferred alignment. Also there should be a target hook
3632 // for this to allow targets to reduce the alignment and ignore the
3633 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3634 // the type.
3635 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3636}
3637
3640 MachinePointerInfo &PtrInfo) {
3643 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3644
3645 unsigned AddrSpace = DL.getAllocaAddrSpace();
3646 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3647
3648 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3649 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3650}
3651
3653 LLT VecTy) {
3654 int64_t IdxVal;
3655 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3656 return IdxReg;
3657
3658 LLT IdxTy = B.getMRI()->getType(IdxReg);
3659 unsigned NElts = VecTy.getNumElements();
3660 if (isPowerOf2_32(NElts)) {
3661 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3662 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3663 }
3664
3665 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3666 .getReg(0);
3667}
3668
3670 Register Index) {
3671 LLT EltTy = VecTy.getElementType();
3672
3673 // Calculate the element offset and add it to the pointer.
3674 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3675 assert(EltSize * 8 == EltTy.getSizeInBits() &&
3676 "Converting bits to bytes lost precision");
3677
3679
3680 LLT IdxTy = MRI.getType(Index);
3681 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3682 MIRBuilder.buildConstant(IdxTy, EltSize));
3683
3684 LLT PtrTy = MRI.getType(VecPtr);
3685 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3686}
3687
3688#ifndef NDEBUG
3689/// Check that all vector operands have same number of elements. Other operands
3690/// should be listed in NonVecOp.
3693 std::initializer_list<unsigned> NonVecOpIndices) {
3694 if (MI.getNumMemOperands() != 0)
3695 return false;
3696
3697 LLT VecTy = MRI.getType(MI.getReg(0));
3698 if (!VecTy.isVector())
3699 return false;
3700 unsigned NumElts = VecTy.getNumElements();
3701
3702 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
3703 MachineOperand &Op = MI.getOperand(OpIdx);
3704 if (!Op.isReg()) {
3705 if (!is_contained(NonVecOpIndices, OpIdx))
3706 return false;
3707 continue;
3708 }
3709
3710 LLT Ty = MRI.getType(Op.getReg());
3711 if (!Ty.isVector()) {
3712 if (!is_contained(NonVecOpIndices, OpIdx))
3713 return false;
3714 continue;
3715 }
3716
3717 if (Ty.getNumElements() != NumElts)
3718 return false;
3719 }
3720
3721 return true;
3722}
3723#endif
3724
3725/// Fill \p DstOps with DstOps that have same number of elements combined as
3726/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
3727/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
3728/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
3729static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
3730 unsigned NumElts) {
3731 LLT LeftoverTy;
3732 assert(Ty.isVector() && "Expected vector type");
3733 LLT EltTy = Ty.getElementType();
3734 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
3735 int NumParts, NumLeftover;
3736 std::tie(NumParts, NumLeftover) =
3737 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
3738
3739 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
3740 for (int i = 0; i < NumParts; ++i) {
3741 DstOps.push_back(NarrowTy);
3742 }
3743
3744 if (LeftoverTy.isValid()) {
3745 assert(NumLeftover == 1 && "expected exactly one leftover");
3746 DstOps.push_back(LeftoverTy);
3747 }
3748}
3749
3750/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
3751/// made from \p Op depending on operand type.
3752static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
3753 MachineOperand &Op) {
3754 for (unsigned i = 0; i < N; ++i) {
3755 if (Op.isReg())
3756 Ops.push_back(Op.getReg());
3757 else if (Op.isImm())
3758 Ops.push_back(Op.getImm());
3759 else if (Op.isPredicate())
3760 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
3761 else
3762 llvm_unreachable("Unsupported type");
3763 }
3764}
3765
3766// Handle splitting vector operations which need to have the same number of
3767// elements in each type index, but each type index may have a different element
3768// type.
3769//
3770// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3771// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3772// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3773//
3774// Also handles some irregular breakdown cases, e.g.
3775// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3776// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3777// s64 = G_SHL s64, s32
3780 GenericMachineInstr &MI, unsigned NumElts,
3781 std::initializer_list<unsigned> NonVecOpIndices) {
3782 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
3783 "Non-compatible opcode or not specified non-vector operands");
3784 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3785
3786 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3787 unsigned NumDefs = MI.getNumDefs();
3788
3789 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
3790 // Build instructions with DstOps to use instruction found by CSE directly.
3791 // CSE copies found instruction into given vreg when building with vreg dest.
3792 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
3793 // Output registers will be taken from created instructions.
3794 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
3795 for (unsigned i = 0; i < NumDefs; ++i) {
3796 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
3797 }
3798
3799 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
3800 // Operands listed in NonVecOpIndices will be used as is without splitting;
3801 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
3802 // scalar condition (op 1), immediate in sext_inreg (op 2).
3803 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
3804 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3805 ++UseIdx, ++UseNo) {
3806 if (is_contained(NonVecOpIndices, UseIdx)) {
3807 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
3808 MI.getOperand(UseIdx));
3809 } else {
3810 SmallVector<Register, 8> SplitPieces;
3811 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
3812 for (auto Reg : SplitPieces)
3813 InputOpsPieces[UseNo].push_back(Reg);
3814 }
3815 }
3816
3817 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3818
3819 // Take i-th piece of each input operand split and build sub-vector/scalar
3820 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
3821 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3823 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3824 Defs.push_back(OutputOpsPieces[DstNo][i]);
3825
3827 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3828 Uses.push_back(InputOpsPieces[InputNo][i]);
3829
3830 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
3831 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3832 OutputRegs[DstNo].push_back(I.getReg(DstNo));
3833 }
3834
3835 // Merge small outputs into MI's output for each def operand.
3836 if (NumLeftovers) {
3837 for (unsigned i = 0; i < NumDefs; ++i)
3838 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
3839 } else {
3840 for (unsigned i = 0; i < NumDefs; ++i)
3841 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
3842 }
3843
3844 MI.eraseFromParent();
3845 return Legalized;
3846}
3847
3850 unsigned NumElts) {
3851 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3852
3853 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3854 unsigned NumDefs = MI.getNumDefs();
3855
3856 SmallVector<DstOp, 8> OutputOpsPieces;
3857 SmallVector<Register, 8> OutputRegs;
3858 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
3859
3860 // Instructions that perform register split will be inserted in basic block
3861 // where register is defined (basic block is in the next operand).
3862 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
3863 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3864 UseIdx += 2, ++UseNo) {
3865 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
3867 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3868 }
3869
3870 // Build PHIs with fewer elements.
3871 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3872 MIRBuilder.setInsertPt(*MI.getParent(), MI);
3873 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3874 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
3875 Phi.addDef(
3876 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
3877 OutputRegs.push_back(Phi.getReg(0));
3878
3879 for (unsigned j = 0; j < NumInputs / 2; ++j) {
3880 Phi.addUse(InputOpsPieces[j][i]);
3881 Phi.add(MI.getOperand(1 + j * 2 + 1));
3882 }
3883 }
3884
3885 // Merge small outputs into MI's def.
3886 if (NumLeftovers) {
3887 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
3888 } else {
3889 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
3890 }
3891
3892 MI.eraseFromParent();
3893 return Legalized;
3894}
3895
3898 unsigned TypeIdx,
3899 LLT NarrowTy) {
3900 const int NumDst = MI.getNumOperands() - 1;
3901 const Register SrcReg = MI.getOperand(NumDst).getReg();
3902 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3903 LLT SrcTy = MRI.getType(SrcReg);
3904
3905 if (TypeIdx != 1 || NarrowTy == DstTy)
3906 return UnableToLegalize;
3907
3908 // Requires compatible types. Otherwise SrcReg should have been defined by
3909 // merge-like instruction that would get artifact combined. Most likely
3910 // instruction that defines SrcReg has to perform more/fewer elements
3911 // legalization compatible with NarrowTy.
3912 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3913 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3914
3915 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3916 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
3917 return UnableToLegalize;
3918
3919 // This is most likely DstTy (smaller then register size) packed in SrcTy
3920 // (larger then register size) and since unmerge was not combined it will be
3921 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
3922 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
3923
3924 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
3925 //
3926 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
3927 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
3928 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
3929 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
3930 const int NumUnmerge = Unmerge->getNumOperands() - 1;
3931 const int PartsPerUnmerge = NumDst / NumUnmerge;
3932
3933 for (int I = 0; I != NumUnmerge; ++I) {
3934 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3935
3936 for (int J = 0; J != PartsPerUnmerge; ++J)
3937 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3938 MIB.addUse(Unmerge.getReg(I));
3939 }
3940
3941 MI.eraseFromParent();
3942 return Legalized;
3943}
3944
3947 LLT NarrowTy) {
3948 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3949 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
3950 // that should have been artifact combined. Most likely instruction that uses
3951 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
3952 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3953 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3954 if (NarrowTy == SrcTy)
3955 return UnableToLegalize;
3956
3957 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
3958 // is for old mir tests. Since the changes to more/fewer elements it should no
3959 // longer be possible to generate MIR like this when starting from llvm-ir
3960 // because LCMTy approach was replaced with merge/unmerge to vector elements.
3961 if (TypeIdx == 1) {
3962 assert(SrcTy.isVector() && "Expected vector types");
3963 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3964 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3965 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
3966 return UnableToLegalize;
3967 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
3968 //
3969 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
3970 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
3971 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
3972 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
3973 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
3974 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
3975
3977 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
3978 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
3979 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
3980 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3981 Elts.push_back(Unmerge.getReg(j));
3982 }
3983
3984 SmallVector<Register, 8> NarrowTyElts;
3985 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
3986 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3987 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
3988 ++i, Offset += NumNarrowTyElts) {
3989 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
3990 NarrowTyElts.push_back(
3991 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
3992 }
3993
3994 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
3995 MI.eraseFromParent();
3996 return Legalized;
3997 }
3998
3999 assert(TypeIdx == 0 && "Bad type index");
4000 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4001 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4002 return UnableToLegalize;
4003
4004 // This is most likely SrcTy (smaller then register size) packed in DstTy
4005 // (larger then register size) and since merge was not combined it will be
4006 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4007 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4008
4009 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4010 //
4011 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4012 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4013 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4014 SmallVector<Register, 8> NarrowTyElts;
4015 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4016 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4017 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4018 for (unsigned i = 0; i < NumParts; ++i) {
4020 for (unsigned j = 0; j < NumElts; ++j)
4021 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4022 NarrowTyElts.push_back(
4023 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4024 }
4025
4026 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4027 MI.eraseFromParent();
4028 return Legalized;
4029}
4030
4033 unsigned TypeIdx,
4034 LLT NarrowVecTy) {
4035 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4036 Register InsertVal;
4037 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4038
4039 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4040 if (IsInsert)
4041 InsertVal = MI.getOperand(2).getReg();
4042
4043 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4044
4045 // TODO: Handle total scalarization case.
4046 if (!NarrowVecTy.isVector())
4047 return UnableToLegalize;
4048
4049 LLT VecTy = MRI.getType(SrcVec);
4050
4051 // If the index is a constant, we can really break this down as you would
4052 // expect, and index into the target size pieces.
4053 int64_t IdxVal;
4054 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4055 if (MaybeCst) {
4056 IdxVal = MaybeCst->Value.getSExtValue();
4057 // Avoid out of bounds indexing the pieces.
4058 if (IdxVal >= VecTy.getNumElements()) {
4059 MIRBuilder.buildUndef(DstReg);
4060 MI.eraseFromParent();
4061 return Legalized;
4062 }
4063
4064 SmallVector<Register, 8> VecParts;
4065 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4066
4067 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4068 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4069 TargetOpcode::G_ANYEXT);
4070
4071 unsigned NewNumElts = NarrowVecTy.getNumElements();
4072
4073 LLT IdxTy = MRI.getType(Idx);
4074 int64_t PartIdx = IdxVal / NewNumElts;
4075 auto NewIdx =
4076 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4077
4078 if (IsInsert) {
4079 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4080
4081 // Use the adjusted index to insert into one of the subvectors.
4082 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4083 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4084 VecParts[PartIdx] = InsertPart.getReg(0);
4085
4086 // Recombine the inserted subvector with the others to reform the result
4087 // vector.
4088 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4089 } else {
4090 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4091 }
4092
4093 MI.eraseFromParent();
4094 return Legalized;
4095 }
4096
4097 // With a variable index, we can't perform the operation in a smaller type, so
4098 // we're forced to expand this.
4099 //
4100 // TODO: We could emit a chain of compare/select to figure out which piece to
4101 // index.
4103}
4104
4107 LLT NarrowTy) {
4108 // FIXME: Don't know how to handle secondary types yet.
4109 if (TypeIdx != 0)
4110 return UnableToLegalize;
4111
4112 // This implementation doesn't work for atomics. Give up instead of doing
4113 // something invalid.
4114 if (LdStMI.isAtomic())
4115 return UnableToLegalize;
4116
4117 bool IsLoad = isa<GLoad>(LdStMI);
4118 Register ValReg = LdStMI.getReg(0);
4119 Register AddrReg = LdStMI.getPointerReg();
4120 LLT ValTy = MRI.getType(ValReg);
4121
4122 // FIXME: Do we need a distinct NarrowMemory legalize action?
4123 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4124 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4125 return UnableToLegalize;
4126 }
4127
4128 int NumParts = -1;
4129 int NumLeftover = -1;
4130 LLT LeftoverTy;
4131 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4132 if (IsLoad) {
4133 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4134 } else {
4135 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4136 NarrowLeftoverRegs)) {
4137 NumParts = NarrowRegs.size();
4138 NumLeftover = NarrowLeftoverRegs.size();
4139 }
4140 }
4141
4142 if (NumParts == -1)
4143 return UnableToLegalize;
4144
4145 LLT PtrTy = MRI.getType(AddrReg);
4146 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4147
4148 unsigned TotalSize = ValTy.getSizeInBits();
4149
4150 // Split the load/store into PartTy sized pieces starting at Offset. If this
4151 // is a load, return the new registers in ValRegs. For a store, each elements
4152 // of ValRegs should be PartTy. Returns the next offset that needs to be
4153 // handled.
4155 auto MMO = LdStMI.getMMO();
4156 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4157 unsigned NumParts, unsigned Offset) -> unsigned {
4159 unsigned PartSize = PartTy.getSizeInBits();
4160 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4161 ++Idx) {
4162 unsigned ByteOffset = Offset / 8;
4163 Register NewAddrReg;
4164
4165 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4166
4167 MachineMemOperand *NewMMO =
4168 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4169
4170 if (IsLoad) {
4171 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4172 ValRegs.push_back(Dst);
4173 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4174 } else {
4175 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4176 }
4177 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4178 }
4179
4180 return Offset;
4181 };
4182
4183 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4184 unsigned HandledOffset =
4185 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4186
4187 // Handle the rest of the register if this isn't an even type breakdown.
4188 if (LeftoverTy.isValid())
4189 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4190
4191 if (IsLoad) {
4192 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4193 LeftoverTy, NarrowLeftoverRegs);
4194 }
4195
4196 LdStMI.eraseFromParent();
4197 return Legalized;
4198}
4199
4202 LLT NarrowTy) {
4203 using namespace TargetOpcode;
4204 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4205 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4206
4207 switch (MI.getOpcode()) {
4208 case G_IMPLICIT_DEF:
4209 case G_TRUNC:
4210 case G_AND:
4211 case G_OR:
4212 case G_XOR:
4213 case G_ADD:
4214 case G_SUB:
4215 case G_MUL:
4216 case G_PTR_ADD:
4217 case G_SMULH:
4218 case G_UMULH:
4219 case G_FADD:
4220 case G_FMUL:
4221 case G_FSUB:
4222 case G_FNEG:
4223 case G_FABS:
4224 case G_FCANONICALIZE:
4225 case G_FDIV:
4226 case G_FREM:
4227 case G_FMA:
4228 case G_FMAD:
4229 case G_FPOW:
4230 case G_FEXP:
4231 case G_FEXP2:
4232 case G_FEXP10:
4233 case G_FLOG:
4234 case G_FLOG2:
4235 case G_FLOG10:
4236 case G_FLDEXP:
4237 case G_FNEARBYINT:
4238 case G_FCEIL:
4239 case G_FFLOOR:
4240 case G_FRINT:
4241 case G_INTRINSIC_ROUND:
4242 case G_INTRINSIC_ROUNDEVEN:
4243 case G_INTRINSIC_TRUNC:
4244 case G_FCOS:
4245 case G_FSIN:
4246 case G_FSQRT:
4247 case G_BSWAP:
4248 case G_BITREVERSE:
4249 case G_SDIV:
4250 case G_UDIV:
4251 case G_SREM:
4252 case G_UREM:
4253 case G_SDIVREM:
4254 case G_UDIVREM:
4255 case G_SMIN:
4256 case G_SMAX:
4257 case G_UMIN:
4258 case G_UMAX:
4259 case G_ABS:
4260 case G_FMINNUM:
4261 case G_FMAXNUM:
4262 case G_FMINNUM_IEEE:
4263 case G_FMAXNUM_IEEE:
4264 case G_FMINIMUM:
4265 case G_FMAXIMUM:
4266 case G_FSHL:
4267 case G_FSHR:
4268 case G_ROTL:
4269 case G_ROTR:
4270 case G_FREEZE:
4271 case G_SADDSAT:
4272 case G_SSUBSAT:
4273 case G_UADDSAT:
4274 case G_USUBSAT:
4275 case G_UMULO:
4276 case G_SMULO:
4277 case G_SHL:
4278 case G_LSHR:
4279 case G_ASHR:
4280 case G_SSHLSAT:
4281 case G_USHLSAT:
4282 case G_CTLZ:
4283 case G_CTLZ_ZERO_UNDEF:
4284 case G_CTTZ:
4285 case G_CTTZ_ZERO_UNDEF:
4286 case G_CTPOP:
4287 case G_FCOPYSIGN:
4288 case G_ZEXT:
4289 case G_SEXT:
4290 case G_ANYEXT:
4291 case G_FPEXT:
4292 case G_FPTRUNC:
4293 case G_SITOFP:
4294 case G_UITOFP:
4295 case G_FPTOSI:
4296 case G_FPTOUI:
4297 case G_INTTOPTR:
4298 case G_PTRTOINT:
4299 case G_ADDRSPACE_CAST:
4300 case G_UADDO:
4301 case G_USUBO:
4302 case G_UADDE:
4303 case G_USUBE:
4304 case G_SADDO:
4305 case G_SSUBO:
4306 case G_SADDE:
4307 case G_SSUBE:
4308 case G_STRICT_FADD:
4309 case G_STRICT_FSUB:
4310 case G_STRICT_FMUL:
4311 case G_STRICT_FMA:
4312 case G_STRICT_FLDEXP:
4313 case G_FFREXP:
4314 return fewerElementsVectorMultiEltType(GMI, NumElts);
4315 case G_ICMP:
4316 case G_FCMP:
4317 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4318 case G_IS_FPCLASS:
4319 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4320 case G_SELECT:
4321 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4322 return fewerElementsVectorMultiEltType(GMI, NumElts);
4323 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4324 case G_PHI:
4325 return fewerElementsVectorPhi(GMI, NumElts);
4326 case G_UNMERGE_VALUES:
4327 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4328 case G_BUILD_VECTOR:
4329 assert(TypeIdx == 0 && "not a vector type index");
4330 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4331 case G_CONCAT_VECTORS:
4332 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4333 return UnableToLegalize;
4334 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4335 case G_EXTRACT_VECTOR_ELT:
4336 case G_INSERT_VECTOR_ELT:
4337 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4338 case G_LOAD:
4339 case G_STORE:
4340 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4341 case G_SEXT_INREG:
4342 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4344 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4345 case G_SHUFFLE_VECTOR:
4346 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4347 default:
4348 return UnableToLegalize;
4349 }
4350}
4351
4353 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4354 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4355 if (TypeIdx != 0)
4356 return UnableToLegalize;
4357
4358 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4359 MI.getFirst3RegLLTs();
4360 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4361 // The shuffle should be canonicalized by now.
4362 if (DstTy != Src1Ty)
4363 return UnableToLegalize;
4364 if (DstTy != Src2Ty)
4365 return UnableToLegalize;
4366
4367 if (!isPowerOf2_32(DstTy.getNumElements()))
4368 return UnableToLegalize;
4369
4370 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4371 // Further legalization attempts will be needed to do split further.
4372 NarrowTy =
4373 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4374 unsigned NewElts = NarrowTy.getNumElements();
4375
4376 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4377 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4378 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4379 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4380 SplitSrc2Regs[1]};
4381
4382 Register Hi, Lo;
4383
4384 // If Lo or Hi uses elements from at most two of the four input vectors, then
4385 // express it as a vector shuffle of those two inputs. Otherwise extract the
4386 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4388 for (unsigned High = 0; High < 2; ++High) {
4389 Register &Output = High ? Hi : Lo;
4390
4391 // Build a shuffle mask for the output, discovering on the fly which
4392 // input vectors to use as shuffle operands (recorded in InputUsed).
4393 // If building a suitable shuffle vector proves too hard, then bail
4394 // out with useBuildVector set.
4395 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4396 unsigned FirstMaskIdx = High * NewElts;
4397 bool UseBuildVector = false;
4398 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4399 // The mask element. This indexes into the input.
4400 int Idx = Mask[FirstMaskIdx + MaskOffset];
4401
4402 // The input vector this mask element indexes into.
4403 unsigned Input = (unsigned)Idx / NewElts;
4404
4405 if (Input >= std::size(Inputs)) {
4406 // The mask element does not index into any input vector.
4407 Ops.push_back(-1);
4408 continue;
4409 }
4410
4411 // Turn the index into an offset from the start of the input vector.
4412 Idx -= Input * NewElts;
4413
4414 // Find or create a shuffle vector operand to hold this input.
4415 unsigned OpNo;
4416 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4417 if (InputUsed[OpNo] == Input) {
4418 // This input vector is already an operand.
4419 break;
4420 } else if (InputUsed[OpNo] == -1U) {
4421 // Create a new operand for this input vector.
4422 InputUsed[OpNo] = Input;
4423 break;
4424 }
4425 }
4426
4427 if (OpNo >= std::size(InputUsed)) {
4428 // More than two input vectors used! Give up on trying to create a
4429 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4430 UseBuildVector = true;
4431 break;
4432 }
4433
4434 // Add the mask index for the new shuffle vector.
4435 Ops.push_back(Idx + OpNo * NewElts);
4436 }
4437
4438 if (UseBuildVector) {
4439 LLT EltTy = NarrowTy.getElementType();
4441
4442 // Extract the input elements by hand.
4443 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4444 // The mask element. This indexes into the input.
4445 int Idx = Mask[FirstMaskIdx + MaskOffset];
4446
4447 // The input vector this mask element indexes into.
4448 unsigned Input = (unsigned)Idx / NewElts;
4449
4450 if (Input >= std::size(Inputs)) {
4451 // The mask element is "undef" or indexes off the end of the input.
4452 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4453 continue;
4454 }
4455
4456 // Turn the index into an offset from the start of the input vector.
4457 Idx -= Input * NewElts;
4458
4459 // Extract the vector element by hand.
4460 SVOps.push_back(MIRBuilder
4461 .buildExtractVectorElement(
4462 EltTy, Inputs[Input],
4464 .getReg(0));
4465 }
4466
4467 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4468 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4469 } else if (InputUsed[0] == -1U) {
4470 // No input vectors were used! The result is undefined.
4471 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4472 } else {
4473 Register Op0 = Inputs[InputUsed[0]];
4474 // If only one input was used, use an undefined vector for the other.
4475 Register Op1 = InputUsed[1] == -1U
4476 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4477 : Inputs[InputUsed[1]];
4478 // At least one input vector was used. Create a new shuffle vector.
4479 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4480 }
4481
4482 Ops.clear();
4483 }
4484
4485 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4486 MI.eraseFromParent();
4487 return