LLVM 22.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
36#include "llvm/Support/Debug.h"
40#include <numeric>
41#include <optional>
42
43#define DEBUG_TYPE "legalizer"
44
45using namespace llvm;
46using namespace LegalizeActions;
47using namespace MIPatternMatch;
48
49/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
50///
51/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
52/// with any leftover piece as type \p LeftoverTy
53///
54/// Returns -1 in the first element of the pair if the breakdown is not
55/// satisfiable.
56static std::pair<int, int>
57getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
58 assert(!LeftoverTy.isValid() && "this is an out argument");
59
60 unsigned Size = OrigTy.getSizeInBits();
61 unsigned NarrowSize = NarrowTy.getSizeInBits();
62 unsigned NumParts = Size / NarrowSize;
63 unsigned LeftoverSize = Size - NumParts * NarrowSize;
64 assert(Size > NarrowSize);
65
66 if (LeftoverSize == 0)
67 return {NumParts, 0};
68
69 if (NarrowTy.isVector()) {
70 unsigned EltSize = OrigTy.getScalarSizeInBits();
71 if (LeftoverSize % EltSize != 0)
72 return {-1, -1};
73 LeftoverTy = OrigTy.changeElementCount(
74 ElementCount::getFixed(LeftoverSize / EltSize));
75 } else {
76 LeftoverTy = LLT::scalar(LeftoverSize);
77 }
78
79 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
80 return std::make_pair(NumParts, NumLeftover);
81}
82
84
85 if (!Ty.isScalar())
86 return nullptr;
87
88 switch (Ty.getSizeInBits()) {
89 case 16:
90 return Type::getHalfTy(Ctx);
91 case 32:
92 return Type::getFloatTy(Ctx);
93 case 64:
94 return Type::getDoubleTy(Ctx);
95 case 80:
96 return Type::getX86_FP80Ty(Ctx);
97 case 128:
98 return Type::getFP128Ty(Ctx);
99 default:
100 return nullptr;
101 }
102}
103
106 MachineIRBuilder &Builder)
107 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
108 LI(*MF.getSubtarget().getLegalizerInfo()),
109 TLI(*MF.getSubtarget().getTargetLowering()), VT(nullptr) {}
110
114 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
115 TLI(*MF.getSubtarget().getTargetLowering()), VT(VT) {}
116
119 LostDebugLocObserver &LocObserver) {
120 LLVM_DEBUG(dbgs() << "\nLegalizing: " << MI);
121
122 MIRBuilder.setInstrAndDebugLoc(MI);
123
124 if (isa<GIntrinsic>(MI))
125 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
126 auto Step = LI.getAction(MI, MRI);
127 switch (Step.Action) {
128 case Legal:
129 LLVM_DEBUG(dbgs() << ".. Already legal\n");
130 return AlreadyLegal;
131 case Libcall:
132 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
133 return libcall(MI, LocObserver);
134 case NarrowScalar:
135 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
136 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
137 case WidenScalar:
138 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
139 return widenScalar(MI, Step.TypeIdx, Step.NewType);
140 case Bitcast:
141 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
142 return bitcast(MI, Step.TypeIdx, Step.NewType);
143 case Lower:
144 LLVM_DEBUG(dbgs() << ".. Lower\n");
145 return lower(MI, Step.TypeIdx, Step.NewType);
146 case FewerElements:
147 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
148 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
149 case MoreElements:
150 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
151 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
152 case Custom:
153 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
154 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
156 default:
157 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
158 return UnableToLegalize;
159 }
160}
161
162void LegalizerHelper::insertParts(Register DstReg,
163 LLT ResultTy, LLT PartTy,
164 ArrayRef<Register> PartRegs,
165 LLT LeftoverTy,
166 ArrayRef<Register> LeftoverRegs) {
167 if (!LeftoverTy.isValid()) {
168 assert(LeftoverRegs.empty());
169
170 if (!ResultTy.isVector()) {
171 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
172 return;
173 }
174
175 if (PartTy.isVector())
176 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
177 else
178 MIRBuilder.buildBuildVector(DstReg, PartRegs);
179 return;
180 }
181
182 // Merge sub-vectors with different number of elements and insert into DstReg.
183 if (ResultTy.isVector()) {
184 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
185 SmallVector<Register, 8> AllRegs(PartRegs);
186 AllRegs.append(LeftoverRegs.begin(), LeftoverRegs.end());
187 return mergeMixedSubvectors(DstReg, AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
202 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (!MRI.getType(Leftover).isVector())
216 AllElts.push_back(Leftover);
217 else
218 appendVectorElts(AllElts, Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
245 getUnmergeResults(Parts, *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(SrcReg);
252 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(DstTy, NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
280 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
281 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(DstReg, Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372
373 MIRBuilder.buildUnmerge(UnmergeDefs,
374 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_LROUND:
414 RTLIBCASE(LROUND_F);
415 case TargetOpcode::G_LLROUND:
416 RTLIBCASE(LLROUND_F);
417 case TargetOpcode::G_MUL:
418 RTLIBCASE_INT(MUL_I);
419 case TargetOpcode::G_SDIV:
420 RTLIBCASE_INT(SDIV_I);
421 case TargetOpcode::G_UDIV:
422 RTLIBCASE_INT(UDIV_I);
423 case TargetOpcode::G_SREM:
424 RTLIBCASE_INT(SREM_I);
425 case TargetOpcode::G_UREM:
426 RTLIBCASE_INT(UREM_I);
427 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
428 RTLIBCASE_INT(CTLZ_I);
429 case TargetOpcode::G_FADD:
430 RTLIBCASE(ADD_F);
431 case TargetOpcode::G_FSUB:
432 RTLIBCASE(SUB_F);
433 case TargetOpcode::G_FMUL:
434 RTLIBCASE(MUL_F);
435 case TargetOpcode::G_FDIV:
436 RTLIBCASE(DIV_F);
437 case TargetOpcode::G_FEXP:
438 RTLIBCASE(EXP_F);
439 case TargetOpcode::G_FEXP2:
440 RTLIBCASE(EXP2_F);
441 case TargetOpcode::G_FEXP10:
442 RTLIBCASE(EXP10_F);
443 case TargetOpcode::G_FREM:
444 RTLIBCASE(REM_F);
445 case TargetOpcode::G_FPOW:
446 RTLIBCASE(POW_F);
447 case TargetOpcode::G_FPOWI:
448 RTLIBCASE(POWI_F);
449 case TargetOpcode::G_FMA:
450 RTLIBCASE(FMA_F);
451 case TargetOpcode::G_FSIN:
452 RTLIBCASE(SIN_F);
453 case TargetOpcode::G_FCOS:
454 RTLIBCASE(COS_F);
455 case TargetOpcode::G_FTAN:
456 RTLIBCASE(TAN_F);
457 case TargetOpcode::G_FASIN:
458 RTLIBCASE(ASIN_F);
459 case TargetOpcode::G_FACOS:
460 RTLIBCASE(ACOS_F);
461 case TargetOpcode::G_FATAN:
462 RTLIBCASE(ATAN_F);
463 case TargetOpcode::G_FATAN2:
464 RTLIBCASE(ATAN2_F);
465 case TargetOpcode::G_FSINH:
466 RTLIBCASE(SINH_F);
467 case TargetOpcode::G_FCOSH:
468 RTLIBCASE(COSH_F);
469 case TargetOpcode::G_FTANH:
470 RTLIBCASE(TANH_F);
471 case TargetOpcode::G_FSINCOS:
472 RTLIBCASE(SINCOS_F);
473 case TargetOpcode::G_FMODF:
474 RTLIBCASE(MODF_F);
475 case TargetOpcode::G_FLOG10:
476 RTLIBCASE(LOG10_F);
477 case TargetOpcode::G_FLOG:
478 RTLIBCASE(LOG_F);
479 case TargetOpcode::G_FLOG2:
480 RTLIBCASE(LOG2_F);
481 case TargetOpcode::G_FLDEXP:
482 RTLIBCASE(LDEXP_F);
483 case TargetOpcode::G_FCEIL:
484 RTLIBCASE(CEIL_F);
485 case TargetOpcode::G_FFLOOR:
486 RTLIBCASE(FLOOR_F);
487 case TargetOpcode::G_FMINNUM:
488 RTLIBCASE(FMIN_F);
489 case TargetOpcode::G_FMAXNUM:
490 RTLIBCASE(FMAX_F);
491 case TargetOpcode::G_FMINIMUMNUM:
492 RTLIBCASE(FMINIMUM_NUM_F);
493 case TargetOpcode::G_FMAXIMUMNUM:
494 RTLIBCASE(FMAXIMUM_NUM_F);
495 case TargetOpcode::G_FSQRT:
496 RTLIBCASE(SQRT_F);
497 case TargetOpcode::G_FRINT:
498 RTLIBCASE(RINT_F);
499 case TargetOpcode::G_FNEARBYINT:
500 RTLIBCASE(NEARBYINT_F);
501 case TargetOpcode::G_INTRINSIC_TRUNC:
502 RTLIBCASE(TRUNC_F);
503 case TargetOpcode::G_INTRINSIC_ROUND:
504 RTLIBCASE(ROUND_F);
505 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
506 RTLIBCASE(ROUNDEVEN_F);
507 case TargetOpcode::G_INTRINSIC_LRINT:
508 RTLIBCASE(LRINT_F);
509 case TargetOpcode::G_INTRINSIC_LLRINT:
510 RTLIBCASE(LLRINT_F);
511 }
512 llvm_unreachable("Unknown libcall function");
513#undef RTLIBCASE_INT
514#undef RTLIBCASE
515}
516
517/// True if an instruction is in tail position in its caller. Intended for
518/// legalizing libcalls as tail calls when possible.
521 const TargetInstrInfo &TII,
523 MachineBasicBlock &MBB = *MI.getParent();
524 const Function &F = MBB.getParent()->getFunction();
525
526 // Conservatively require the attributes of the call to match those of
527 // the return. Ignore NoAlias and NonNull because they don't affect the
528 // call sequence.
529 AttributeList CallerAttrs = F.getAttributes();
530 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
531 .removeAttribute(Attribute::NoAlias)
532 .removeAttribute(Attribute::NonNull)
533 .hasAttributes())
534 return false;
535
536 // It's not safe to eliminate the sign / zero extension of the return value.
537 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
538 CallerAttrs.hasRetAttr(Attribute::SExt))
539 return false;
540
541 // Only tail call if the following instruction is a standard return or if we
542 // have a `thisreturn` callee, and a sequence like:
543 //
544 // G_MEMCPY %0, %1, %2
545 // $x0 = COPY %0
546 // RET_ReallyLR implicit $x0
547 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
548 if (Next != MBB.instr_end() && Next->isCopy()) {
549 if (MI.getOpcode() == TargetOpcode::G_BZERO)
550 return false;
551
552 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
553 // mempy/etc routines return the same parameter. For other it will be the
554 // returned value.
555 Register VReg = MI.getOperand(0).getReg();
556 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
557 return false;
558
559 Register PReg = Next->getOperand(0).getReg();
560 if (!PReg.isPhysical())
561 return false;
562
563 auto Ret = next_nodbg(Next, MBB.instr_end());
564 if (Ret == MBB.instr_end() || !Ret->isReturn())
565 return false;
566
567 if (Ret->getNumImplicitOperands() != 1)
568 return false;
569
570 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
571 return false;
572
573 // Skip over the COPY that we just validated.
574 Next = Ret;
575 }
576
577 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
578 return false;
579
580 return true;
581}
582
584llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
585 const CallLowering::ArgInfo &Result,
587 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
588 MachineInstr *MI) {
589 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
590
592 Info.CallConv = CC;
593 Info.Callee = MachineOperand::CreateES(Name);
594 Info.OrigRet = Result;
595 if (MI)
596 Info.IsTailCall =
597 (Result.Ty->isVoidTy() ||
598 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
599 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
600 *MIRBuilder.getMRI());
601
602 llvm::append_range(Info.OrigArgs, Args);
603 if (!CLI.lowerCall(MIRBuilder, Info))
605
606 if (MI && Info.LoweredTailCall) {
607 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
608
609 // Check debug locations before removing the return.
610 LocObserver.checkpoint(true);
611
612 // We must have a return following the call (or debug insts) to get past
613 // isLibCallInTailPosition.
614 do {
615 MachineInstr *Next = MI->getNextNode();
616 assert(Next &&
617 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
618 "Expected instr following MI to be return or debug inst?");
619 // We lowered a tail call, so the call is now the return from the block.
620 // Delete the old return.
621 Next->eraseFromParent();
622 } while (MI->getNextNode());
623
624 // We expect to lose the debug location from the return.
625 LocObserver.checkpoint(false);
626 }
628}
629
631llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
632 const CallLowering::ArgInfo &Result,
634 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
635 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
636 const char *Name = TLI.getLibcallName(Libcall);
637 if (!Name)
639 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
640 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
641}
642
643// Useful for libcalls where all operands have the same type.
646 Type *OpType, LostDebugLocObserver &LocObserver) {
647 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
648
649 // FIXME: What does the original arg index mean here?
651 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
652 Args.push_back({MO.getReg(), OpType, 0});
653 return createLibcall(MIRBuilder, Libcall,
654 {MI.getOperand(0).getReg(), OpType, 0}, Args,
655 LocObserver, &MI);
656}
657
658LegalizerHelper::LegalizeResult LegalizerHelper::emitSincosLibcall(
659 MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType,
660 LostDebugLocObserver &LocObserver) {
661 MachineFunction &MF = *MI.getMF();
662 MachineRegisterInfo &MRI = MF.getRegInfo();
663
664 Register DstSin = MI.getOperand(0).getReg();
665 Register DstCos = MI.getOperand(1).getReg();
666 Register Src = MI.getOperand(2).getReg();
667 LLT DstTy = MRI.getType(DstSin);
668
669 int MemSize = DstTy.getSizeInBytes();
670 Align Alignment = getStackTemporaryAlignment(DstTy);
671 const DataLayout &DL = MIRBuilder.getDataLayout();
672 unsigned AddrSpace = DL.getAllocaAddrSpace();
673 MachinePointerInfo PtrInfo;
674
675 Register StackPtrSin =
676 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
677 .getReg(0);
678 Register StackPtrCos =
679 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
680 .getReg(0);
681
682 auto &Ctx = MF.getFunction().getContext();
683 auto LibcallResult =
685 {{0}, Type::getVoidTy(Ctx), 0},
686 {{Src, OpType, 0},
687 {StackPtrSin, PointerType::get(Ctx, AddrSpace), 1},
688 {StackPtrCos, PointerType::get(Ctx, AddrSpace), 2}},
689 LocObserver, &MI);
690
691 if (LibcallResult != LegalizeResult::Legalized)
693
695 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
697 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
698
699 MIRBuilder.buildLoad(DstSin, StackPtrSin, *LoadMMOSin);
700 MIRBuilder.buildLoad(DstCos, StackPtrCos, *LoadMMOCos);
701 MI.eraseFromParent();
702
704}
705
707LegalizerHelper::emitModfLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder,
708 unsigned Size, Type *OpType,
709 LostDebugLocObserver &LocObserver) {
710 MachineFunction &MF = MIRBuilder.getMF();
711 MachineRegisterInfo &MRI = MF.getRegInfo();
712
713 Register DstFrac = MI.getOperand(0).getReg();
714 Register DstInt = MI.getOperand(1).getReg();
715 Register Src = MI.getOperand(2).getReg();
716 LLT DstTy = MRI.getType(DstFrac);
717
718 int MemSize = DstTy.getSizeInBytes();
719 Align Alignment = getStackTemporaryAlignment(DstTy);
720 const DataLayout &DL = MIRBuilder.getDataLayout();
721 unsigned AddrSpace = DL.getAllocaAddrSpace();
722 MachinePointerInfo PtrInfo;
723
724 Register StackPtrInt =
725 createStackTemporary(TypeSize::getFixed(MemSize), Alignment, PtrInfo)
726 .getReg(0);
727
728 auto &Ctx = MF.getFunction().getContext();
729 auto LibcallResult = createLibcall(
730 MIRBuilder, getRTLibDesc(MI.getOpcode(), Size), {DstFrac, OpType, 0},
731 {{Src, OpType, 0}, {StackPtrInt, PointerType::get(Ctx, AddrSpace), 1}},
732 LocObserver, &MI);
733
734 if (LibcallResult != LegalizeResult::Legalized)
736
738 PtrInfo, MachineMemOperand::MOLoad, MemSize, Alignment);
739
740 MIRBuilder.buildLoad(DstInt, StackPtrInt, *LoadMMOInt);
741 MI.eraseFromParent();
742
744}
745
748 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
749 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
750
752 // Add all the args, except for the last which is an imm denoting 'tail'.
753 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
754 Register Reg = MI.getOperand(i).getReg();
755
756 // Need derive an IR type for call lowering.
757 LLT OpLLT = MRI.getType(Reg);
758 Type *OpTy = nullptr;
759 if (OpLLT.isPointer())
760 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
761 else
762 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
763 Args.push_back({Reg, OpTy, 0});
764 }
765
766 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
767 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
768 RTLIB::Libcall RTLibcall;
769 unsigned Opc = MI.getOpcode();
770 const char *Name;
771 switch (Opc) {
772 case TargetOpcode::G_BZERO:
773 RTLibcall = RTLIB::BZERO;
774 Name = TLI.getLibcallName(RTLibcall);
775 break;
776 case TargetOpcode::G_MEMCPY:
777 RTLibcall = RTLIB::MEMCPY;
778 Name = TLI.getLibcallImplName(TLI.getMemcpyImpl()).data();
779 Args[0].Flags[0].setReturned();
780 break;
781 case TargetOpcode::G_MEMMOVE:
782 RTLibcall = RTLIB::MEMMOVE;
783 Name = TLI.getLibcallName(RTLibcall);
784 Args[0].Flags[0].setReturned();
785 break;
786 case TargetOpcode::G_MEMSET:
787 RTLibcall = RTLIB::MEMSET;
788 Name = TLI.getLibcallName(RTLibcall);
789 Args[0].Flags[0].setReturned();
790 break;
791 default:
792 llvm_unreachable("unsupported opcode");
793 }
794
795 // Unsupported libcall on the target.
796 if (!Name) {
797 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
798 << MIRBuilder.getTII().getName(Opc) << "\n");
800 }
801
803 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
804 Info.Callee = MachineOperand::CreateES(Name);
805 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
806 Info.IsTailCall =
807 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
808 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
809
810 llvm::append_range(Info.OrigArgs, Args);
811 if (!CLI.lowerCall(MIRBuilder, Info))
813
814 if (Info.LoweredTailCall) {
815 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
816
817 // Check debug locations before removing the return.
818 LocObserver.checkpoint(true);
819
820 // We must have a return following the call (or debug insts) to get past
821 // isLibCallInTailPosition.
822 do {
823 MachineInstr *Next = MI.getNextNode();
824 assert(Next &&
825 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
826 "Expected instr following MI to be return or debug inst?");
827 // We lowered a tail call, so the call is now the return from the block.
828 // Delete the old return.
829 Next->eraseFromParent();
830 } while (MI.getNextNode());
831
832 // We expect to lose the debug location from the return.
833 LocObserver.checkpoint(false);
834 }
835
837}
838
839static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI) {
840 unsigned Opc = MI.getOpcode();
841 auto &AtomicMI = cast<GMemOperation>(MI);
842 auto &MMO = AtomicMI.getMMO();
843 auto Ordering = MMO.getMergedOrdering();
844 LLT MemType = MMO.getMemoryType();
845 uint64_t MemSize = MemType.getSizeInBytes();
846 if (MemType.isVector())
847 return RTLIB::UNKNOWN_LIBCALL;
848
849#define LCALLS(A, B) {A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL}
850#define LCALL5(A) \
851 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
852 switch (Opc) {
853 case TargetOpcode::G_ATOMIC_CMPXCHG:
854 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
855 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
856 return getOutlineAtomicHelper(LC, Ordering, MemSize);
857 }
858 case TargetOpcode::G_ATOMICRMW_XCHG: {
859 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
860 return getOutlineAtomicHelper(LC, Ordering, MemSize);
861 }
862 case TargetOpcode::G_ATOMICRMW_ADD:
863 case TargetOpcode::G_ATOMICRMW_SUB: {
864 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
865 return getOutlineAtomicHelper(LC, Ordering, MemSize);
866 }
867 case TargetOpcode::G_ATOMICRMW_AND: {
868 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
869 return getOutlineAtomicHelper(LC, Ordering, MemSize);
870 }
871 case TargetOpcode::G_ATOMICRMW_OR: {
872 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
873 return getOutlineAtomicHelper(LC, Ordering, MemSize);
874 }
875 case TargetOpcode::G_ATOMICRMW_XOR: {
876 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
877 return getOutlineAtomicHelper(LC, Ordering, MemSize);
878 }
879 default:
880 return RTLIB::UNKNOWN_LIBCALL;
881 }
882#undef LCALLS
883#undef LCALL5
884}
885
888 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
889
890 Type *RetTy;
891 SmallVector<Register> RetRegs;
893 unsigned Opc = MI.getOpcode();
894 switch (Opc) {
895 case TargetOpcode::G_ATOMIC_CMPXCHG:
896 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
898 LLT SuccessLLT;
899 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
900 MI.getFirst4RegLLTs();
901 RetRegs.push_back(Ret);
902 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
903 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
904 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
905 NewLLT) = MI.getFirst5RegLLTs();
906 RetRegs.push_back(Success);
907 RetTy = StructType::get(
908 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
909 }
910 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
911 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
912 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
913 break;
914 }
915 case TargetOpcode::G_ATOMICRMW_XCHG:
916 case TargetOpcode::G_ATOMICRMW_ADD:
917 case TargetOpcode::G_ATOMICRMW_SUB:
918 case TargetOpcode::G_ATOMICRMW_AND:
919 case TargetOpcode::G_ATOMICRMW_OR:
920 case TargetOpcode::G_ATOMICRMW_XOR: {
921 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
922 RetRegs.push_back(Ret);
923 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
924 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
925 Val =
926 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
927 .getReg(0);
928 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
929 Val =
930 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
931 .getReg(0);
932 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
933 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
934 break;
935 }
936 default:
937 llvm_unreachable("unsupported opcode");
938 }
939
940 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
941 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
942 RTLIB::Libcall RTLibcall = getOutlineAtomicLibcall(MI);
943 const char *Name = TLI.getLibcallName(RTLibcall);
944
945 // Unsupported libcall on the target.
946 if (!Name) {
947 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
948 << MIRBuilder.getTII().getName(Opc) << "\n");
950 }
951
953 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
954 Info.Callee = MachineOperand::CreateES(Name);
955 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
956
957 llvm::append_range(Info.OrigArgs, Args);
958 if (!CLI.lowerCall(MIRBuilder, Info))
960
962}
963
964static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
965 Type *FromType) {
966 auto ToMVT = MVT::getVT(ToType);
967 auto FromMVT = MVT::getVT(FromType);
968
969 switch (Opcode) {
970 case TargetOpcode::G_FPEXT:
971 return RTLIB::getFPEXT(FromMVT, ToMVT);
972 case TargetOpcode::G_FPTRUNC:
973 return RTLIB::getFPROUND(FromMVT, ToMVT);
974 case TargetOpcode::G_FPTOSI:
975 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
976 case TargetOpcode::G_FPTOUI:
977 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
978 case TargetOpcode::G_SITOFP:
979 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
980 case TargetOpcode::G_UITOFP:
981 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
982 }
983 llvm_unreachable("Unsupported libcall function");
984}
985
988 Type *FromType, LostDebugLocObserver &LocObserver,
989 const TargetLowering &TLI, bool IsSigned = false) {
990 CallLowering::ArgInfo Arg = {MI.getOperand(1).getReg(), FromType, 0};
991 if (FromType->isIntegerTy()) {
992 if (TLI.shouldSignExtendTypeInLibCall(FromType, IsSigned))
993 Arg.Flags[0].setSExt();
994 else
995 Arg.Flags[0].setZExt();
996 }
997
998 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
999 return createLibcall(MIRBuilder, Libcall,
1000 {MI.getOperand(0).getReg(), ToType, 0}, Arg, LocObserver,
1001 &MI);
1002}
1003
1004static RTLIB::Libcall
1006 RTLIB::Libcall RTLibcall;
1007 switch (MI.getOpcode()) {
1008 case TargetOpcode::G_GET_FPENV:
1009 RTLibcall = RTLIB::FEGETENV;
1010 break;
1011 case TargetOpcode::G_SET_FPENV:
1012 case TargetOpcode::G_RESET_FPENV:
1013 RTLibcall = RTLIB::FESETENV;
1014 break;
1015 case TargetOpcode::G_GET_FPMODE:
1016 RTLibcall = RTLIB::FEGETMODE;
1017 break;
1018 case TargetOpcode::G_SET_FPMODE:
1019 case TargetOpcode::G_RESET_FPMODE:
1020 RTLibcall = RTLIB::FESETMODE;
1021 break;
1022 default:
1023 llvm_unreachable("Unexpected opcode");
1024 }
1025 return RTLibcall;
1026}
1027
1028// Some library functions that read FP state (fegetmode, fegetenv) write the
1029// state into a region in memory. IR intrinsics that do the same operations
1030// (get_fpmode, get_fpenv) return the state as integer value. To implement these
1031// intrinsics via the library functions, we need to use temporary variable,
1032// for example:
1033//
1034// %0:_(s32) = G_GET_FPMODE
1035//
1036// is transformed to:
1037//
1038// %1:_(p0) = G_FRAME_INDEX %stack.0
1039// BL &fegetmode
1040// %0:_(s32) = G_LOAD % 1
1041//
1043LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
1045 LostDebugLocObserver &LocObserver) {
1046 const DataLayout &DL = MIRBuilder.getDataLayout();
1047 auto &MF = MIRBuilder.getMF();
1048 auto &MRI = *MIRBuilder.getMRI();
1049 auto &Ctx = MF.getFunction().getContext();
1050
1051 // Create temporary, where library function will put the read state.
1052 Register Dst = MI.getOperand(0).getReg();
1053 LLT StateTy = MRI.getType(Dst);
1054 TypeSize StateSize = StateTy.getSizeInBytes();
1055 Align TempAlign = getStackTemporaryAlignment(StateTy);
1056 MachinePointerInfo TempPtrInfo;
1057 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1058
1059 // Create a call to library function, with the temporary as an argument.
1060 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1061 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1062 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1063 auto Res =
1064 createLibcall(MIRBuilder, RTLibcall,
1065 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1066 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1067 LocObserver, nullptr);
1068 if (Res != LegalizerHelper::Legalized)
1069 return Res;
1070
1071 // Create a load from the temporary.
1072 MachineMemOperand *MMO = MF.getMachineMemOperand(
1073 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
1074 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
1075
1077}
1078
1079// Similar to `createGetStateLibcall` the function calls a library function
1080// using transient space in stack. In this case the library function reads
1081// content of memory region.
1083LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
1085 LostDebugLocObserver &LocObserver) {
1086 const DataLayout &DL = MIRBuilder.getDataLayout();
1087 auto &MF = MIRBuilder.getMF();
1088 auto &MRI = *MIRBuilder.getMRI();
1089 auto &Ctx = MF.getFunction().getContext();
1090
1091 // Create temporary, where library function will get the new state.
1092 Register Src = MI.getOperand(0).getReg();
1093 LLT StateTy = MRI.getType(Src);
1094 TypeSize StateSize = StateTy.getSizeInBytes();
1095 Align TempAlign = getStackTemporaryAlignment(StateTy);
1096 MachinePointerInfo TempPtrInfo;
1097 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
1098
1099 // Put the new state into the temporary.
1100 MachineMemOperand *MMO = MF.getMachineMemOperand(
1101 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
1102 MIRBuilder.buildStore(Src, Temp, *MMO);
1103
1104 // Create a call to library function, with the temporary as an argument.
1105 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
1106 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
1107 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1108 return createLibcall(MIRBuilder, RTLibcall,
1109 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1110 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
1111 LocObserver, nullptr);
1112}
1113
1114/// Returns the corresponding libcall for the given Pred and
1115/// the ICMP predicate that should be generated to compare with #0
1116/// after the libcall.
1117static std::pair<RTLIB::Libcall, CmpInst::Predicate>
1119#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred) \
1120 do { \
1121 switch (Size) { \
1122 case 32: \
1123 return {RTLIB::LibcallPrefix##32, ICmpPred}; \
1124 case 64: \
1125 return {RTLIB::LibcallPrefix##64, ICmpPred}; \
1126 case 128: \
1127 return {RTLIB::LibcallPrefix##128, ICmpPred}; \
1128 default: \
1129 llvm_unreachable("unexpected size"); \
1130 } \
1131 } while (0)
1132
1133 switch (Pred) {
1134 case CmpInst::FCMP_OEQ:
1136 case CmpInst::FCMP_UNE:
1138 case CmpInst::FCMP_OGE:
1140 case CmpInst::FCMP_OLT:
1142 case CmpInst::FCMP_OLE:
1144 case CmpInst::FCMP_OGT:
1146 case CmpInst::FCMP_UNO:
1148 default:
1149 return {RTLIB::UNKNOWN_LIBCALL, CmpInst::BAD_ICMP_PREDICATE};
1150 }
1151}
1152
1154LegalizerHelper::createFCMPLibcall(MachineIRBuilder &MIRBuilder,
1156 LostDebugLocObserver &LocObserver) {
1157 auto &MF = MIRBuilder.getMF();
1158 auto &Ctx = MF.getFunction().getContext();
1159 const GFCmp *Cmp = cast<GFCmp>(&MI);
1160
1161 LLT OpLLT = MRI.getType(Cmp->getLHSReg());
1162 unsigned Size = OpLLT.getSizeInBits();
1163 if ((Size != 32 && Size != 64 && Size != 128) ||
1164 OpLLT != MRI.getType(Cmp->getRHSReg()))
1165 return UnableToLegalize;
1166
1167 Type *OpType = getFloatTypeForLLT(Ctx, OpLLT);
1168
1169 // DstReg type is s32
1170 const Register DstReg = Cmp->getReg(0);
1171 LLT DstTy = MRI.getType(DstReg);
1172 const auto Cond = Cmp->getCond();
1173
1174 // Reference:
1175 // https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
1176 // Generates a libcall followed by ICMP.
1177 const auto BuildLibcall = [&](const RTLIB::Libcall Libcall,
1178 const CmpInst::Predicate ICmpPred,
1179 const DstOp &Res) -> Register {
1180 // FCMP libcall always returns an i32, and needs an ICMP with #0.
1181 constexpr LLT TempLLT = LLT::scalar(32);
1182 Register Temp = MRI.createGenericVirtualRegister(TempLLT);
1183 // Generate libcall, holding result in Temp
1184 const auto Status = createLibcall(
1185 MIRBuilder, Libcall, {Temp, Type::getInt32Ty(Ctx), 0},
1186 {{Cmp->getLHSReg(), OpType, 0}, {Cmp->getRHSReg(), OpType, 1}},
1187 LocObserver, &MI);
1188 if (!Status)
1189 return {};
1190
1191 // Compare temp with #0 to get the final result.
1192 return MIRBuilder
1193 .buildICmp(ICmpPred, Res, Temp, MIRBuilder.buildConstant(TempLLT, 0))
1194 .getReg(0);
1195 };
1196
1197 // Simple case if we have a direct mapping from predicate to libcall
1198 if (const auto [Libcall, ICmpPred] = getFCMPLibcallDesc(Cond, Size);
1199 Libcall != RTLIB::UNKNOWN_LIBCALL &&
1200 ICmpPred != CmpInst::BAD_ICMP_PREDICATE) {
1201 if (BuildLibcall(Libcall, ICmpPred, DstReg)) {
1202 return Legalized;
1203 }
1204 return UnableToLegalize;
1205 }
1206
1207 // No direct mapping found, should be generated as combination of libcalls.
1208
1209 switch (Cond) {
1210 case CmpInst::FCMP_UEQ: {
1211 // FCMP_UEQ: unordered or equal
1212 // Convert into (FCMP_OEQ || FCMP_UNO).
1213
1214 const auto [OeqLibcall, OeqPred] =
1216 const auto Oeq = BuildLibcall(OeqLibcall, OeqPred, DstTy);
1217
1218 const auto [UnoLibcall, UnoPred] =
1220 const auto Uno = BuildLibcall(UnoLibcall, UnoPred, DstTy);
1221 if (Oeq && Uno)
1222 MIRBuilder.buildOr(DstReg, Oeq, Uno);
1223 else
1224 return UnableToLegalize;
1225
1226 break;
1227 }
1228 case CmpInst::FCMP_ONE: {
1229 // FCMP_ONE: ordered and operands are unequal
1230 // Convert into (!FCMP_OEQ && !FCMP_UNO).
1231
1232 // We inverse the predicate instead of generating a NOT
1233 // to save one instruction.
1234 // On AArch64 isel can even select two cmp into a single ccmp.
1235 const auto [OeqLibcall, OeqPred] =
1237 const auto NotOeq =
1238 BuildLibcall(OeqLibcall, CmpInst::getInversePredicate(OeqPred), DstTy);
1239
1240 const auto [UnoLibcall, UnoPred] =
1242 const auto NotUno =
1243 BuildLibcall(UnoLibcall, CmpInst::getInversePredicate(UnoPred), DstTy);
1244
1245 if (NotOeq && NotUno)
1246 MIRBuilder.buildAnd(DstReg, NotOeq, NotUno);
1247 else
1248 return UnableToLegalize;
1249
1250 break;
1251 }
1252 case CmpInst::FCMP_ULT:
1253 case CmpInst::FCMP_UGE:
1254 case CmpInst::FCMP_UGT:
1255 case CmpInst::FCMP_ULE:
1256 case CmpInst::FCMP_ORD: {
1257 // Convert into: !(inverse(Pred))
1258 // E.g. FCMP_ULT becomes !FCMP_OGE
1259 // This is equivalent to the following, but saves some instructions.
1260 // MIRBuilder.buildNot(
1261 // PredTy,
1262 // MIRBuilder.buildFCmp(CmpInst::getInversePredicate(Pred), PredTy,
1263 // Op1, Op2));
1264 const auto [InversedLibcall, InversedPred] =
1266 if (!BuildLibcall(InversedLibcall,
1267 CmpInst::getInversePredicate(InversedPred), DstReg))
1268 return UnableToLegalize;
1269 break;
1270 }
1271 default:
1272 return UnableToLegalize;
1273 }
1274
1275 return Legalized;
1276}
1277
1278// The function is used to legalize operations that set default environment
1279// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
1280// On most targets supported in glibc FE_DFL_MODE is defined as
1281// `((const femode_t *) -1)`. Such assumption is used here. If for some target
1282// it is not true, the target must provide custom lowering.
1284LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
1286 LostDebugLocObserver &LocObserver) {
1287 const DataLayout &DL = MIRBuilder.getDataLayout();
1288 auto &MF = MIRBuilder.getMF();
1289 auto &Ctx = MF.getFunction().getContext();
1290
1291 // Create an argument for the library function.
1292 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
1293 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
1294 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
1295 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
1296 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
1297 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
1298 MIRBuilder.buildIntToPtr(Dest, DefValue);
1299
1300 RTLIB::Libcall RTLibcall = getStateLibraryFunctionFor(MI, TLI);
1301 return createLibcall(MIRBuilder, RTLibcall,
1302 CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0),
1303 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1304 LocObserver, &MI);
1305}
1306
1309 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1310
1311 switch (MI.getOpcode()) {
1312 default:
1313 return UnableToLegalize;
1314 case TargetOpcode::G_MUL:
1315 case TargetOpcode::G_SDIV:
1316 case TargetOpcode::G_UDIV:
1317 case TargetOpcode::G_SREM:
1318 case TargetOpcode::G_UREM:
1319 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1320 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1321 unsigned Size = LLTy.getSizeInBits();
1322 Type *HLTy = IntegerType::get(Ctx, Size);
1323 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1324 if (Status != Legalized)
1325 return Status;
1326 break;
1327 }
1328 case TargetOpcode::G_FADD:
1329 case TargetOpcode::G_FSUB:
1330 case TargetOpcode::G_FMUL:
1331 case TargetOpcode::G_FDIV:
1332 case TargetOpcode::G_FMA:
1333 case TargetOpcode::G_FPOW:
1334 case TargetOpcode::G_FREM:
1335 case TargetOpcode::G_FCOS:
1336 case TargetOpcode::G_FSIN:
1337 case TargetOpcode::G_FTAN:
1338 case TargetOpcode::G_FACOS:
1339 case TargetOpcode::G_FASIN:
1340 case TargetOpcode::G_FATAN:
1341 case TargetOpcode::G_FATAN2:
1342 case TargetOpcode::G_FCOSH:
1343 case TargetOpcode::G_FSINH:
1344 case TargetOpcode::G_FTANH:
1345 case TargetOpcode::G_FLOG10:
1346 case TargetOpcode::G_FLOG:
1347 case TargetOpcode::G_FLOG2:
1348 case TargetOpcode::G_FEXP:
1349 case TargetOpcode::G_FEXP2:
1350 case TargetOpcode::G_FEXP10:
1351 case TargetOpcode::G_FCEIL:
1352 case TargetOpcode::G_FFLOOR:
1353 case TargetOpcode::G_FMINNUM:
1354 case TargetOpcode::G_FMAXNUM:
1355 case TargetOpcode::G_FMINIMUMNUM:
1356 case TargetOpcode::G_FMAXIMUMNUM:
1357 case TargetOpcode::G_FSQRT:
1358 case TargetOpcode::G_FRINT:
1359 case TargetOpcode::G_FNEARBYINT:
1360 case TargetOpcode::G_INTRINSIC_TRUNC:
1361 case TargetOpcode::G_INTRINSIC_ROUND:
1362 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1363 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1364 unsigned Size = LLTy.getSizeInBits();
1365 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1366 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1367 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1368 return UnableToLegalize;
1369 }
1370 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1371 if (Status != Legalized)
1372 return Status;
1373 break;
1374 }
1375 case TargetOpcode::G_FSINCOS: {
1376 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1377 unsigned Size = LLTy.getSizeInBits();
1378 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1379 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1380 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1381 return UnableToLegalize;
1382 }
1383 return emitSincosLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1384 }
1385 case TargetOpcode::G_FMODF: {
1386 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1387 unsigned Size = LLTy.getSizeInBits();
1388 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1389 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1390 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1391 return UnableToLegalize;
1392 }
1393 return emitModfLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1394 }
1395 case TargetOpcode::G_LROUND:
1396 case TargetOpcode::G_LLROUND:
1397 case TargetOpcode::G_INTRINSIC_LRINT:
1398 case TargetOpcode::G_INTRINSIC_LLRINT: {
1399 LLT LLTy = MRI.getType(MI.getOperand(1).getReg());
1400 unsigned Size = LLTy.getSizeInBits();
1401 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1402 Type *ITy = IntegerType::get(
1403 Ctx, MRI.getType(MI.getOperand(0).getReg()).getSizeInBits());
1404 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1405 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1406 return UnableToLegalize;
1407 }
1408 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1410 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ITy, 0},
1411 {{MI.getOperand(1).getReg(), HLTy, 0}}, LocObserver, &MI);
1412 if (Status != Legalized)
1413 return Status;
1414 MI.eraseFromParent();
1415 return Legalized;
1416 }
1417 case TargetOpcode::G_FPOWI:
1418 case TargetOpcode::G_FLDEXP: {
1419 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1420 unsigned Size = LLTy.getSizeInBits();
1421 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1422 Type *ITy = IntegerType::get(
1423 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1424 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1425 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1426 return UnableToLegalize;
1427 }
1428 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1430 {MI.getOperand(1).getReg(), HLTy, 0},
1431 {MI.getOperand(2).getReg(), ITy, 1}};
1432 Args[1].Flags[0].setSExt();
1434 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1435 Args, LocObserver, &MI);
1436 if (Status != Legalized)
1437 return Status;
1438 break;
1439 }
1440 case TargetOpcode::G_FPEXT:
1441 case TargetOpcode::G_FPTRUNC: {
1442 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1443 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1444 if (!FromTy || !ToTy)
1445 return UnableToLegalize;
1447 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver, TLI);
1448 if (Status != Legalized)
1449 return Status;
1450 break;
1451 }
1452 case TargetOpcode::G_FCMP: {
1453 LegalizeResult Status = createFCMPLibcall(MIRBuilder, MI, LocObserver);
1454 if (Status != Legalized)
1455 return Status;
1456 MI.eraseFromParent();
1457 return Status;
1458 }
1459 case TargetOpcode::G_FPTOSI:
1460 case TargetOpcode::G_FPTOUI: {
1461 // FIXME: Support other types
1462 Type *FromTy =
1463 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1464 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1465 if ((ToSize != 32 && ToSize != 64 && ToSize != 128) || !FromTy)
1466 return UnableToLegalize;
1468 MI, MIRBuilder, Type::getIntNTy(Ctx, ToSize), FromTy, LocObserver, TLI);
1469 if (Status != Legalized)
1470 return Status;
1471 break;
1472 }
1473 case TargetOpcode::G_SITOFP:
1474 case TargetOpcode::G_UITOFP: {
1475 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1476 Type *ToTy =
1477 getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1478 if ((FromSize != 32 && FromSize != 64 && FromSize != 128) || !ToTy)
1479 return UnableToLegalize;
1480 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SITOFP;
1482 conversionLibcall(MI, MIRBuilder, ToTy, Type::getIntNTy(Ctx, FromSize),
1483 LocObserver, TLI, IsSigned);
1484 if (Status != Legalized)
1485 return Status;
1486 break;
1487 }
1488 case TargetOpcode::G_ATOMICRMW_XCHG:
1489 case TargetOpcode::G_ATOMICRMW_ADD:
1490 case TargetOpcode::G_ATOMICRMW_SUB:
1491 case TargetOpcode::G_ATOMICRMW_AND:
1492 case TargetOpcode::G_ATOMICRMW_OR:
1493 case TargetOpcode::G_ATOMICRMW_XOR:
1494 case TargetOpcode::G_ATOMIC_CMPXCHG:
1495 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1497 if (Status != Legalized)
1498 return Status;
1499 break;
1500 }
1501 case TargetOpcode::G_BZERO:
1502 case TargetOpcode::G_MEMCPY:
1503 case TargetOpcode::G_MEMMOVE:
1504 case TargetOpcode::G_MEMSET: {
1505 LegalizeResult Result =
1506 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1507 if (Result != Legalized)
1508 return Result;
1509 MI.eraseFromParent();
1510 return Result;
1511 }
1512 case TargetOpcode::G_GET_FPENV:
1513 case TargetOpcode::G_GET_FPMODE: {
1514 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1515 if (Result != Legalized)
1516 return Result;
1517 break;
1518 }
1519 case TargetOpcode::G_SET_FPENV:
1520 case TargetOpcode::G_SET_FPMODE: {
1521 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1522 if (Result != Legalized)
1523 return Result;
1524 break;
1525 }
1526 case TargetOpcode::G_RESET_FPENV:
1527 case TargetOpcode::G_RESET_FPMODE: {
1528 LegalizeResult Result =
1529 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1530 if (Result != Legalized)
1531 return Result;
1532 break;
1533 }
1534 }
1535
1536 MI.eraseFromParent();
1537 return Legalized;
1538}
1539
1541 unsigned TypeIdx,
1542 LLT NarrowTy) {
1543 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1544 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1545
1546 switch (MI.getOpcode()) {
1547 default:
1548 return UnableToLegalize;
1549 case TargetOpcode::G_IMPLICIT_DEF: {
1550 Register DstReg = MI.getOperand(0).getReg();
1551 LLT DstTy = MRI.getType(DstReg);
1552
1553 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1554 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1555 // FIXME: Although this would also be legal for the general case, it causes
1556 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1557 // combines not being hit). This seems to be a problem related to the
1558 // artifact combiner.
1559 if (SizeOp0 % NarrowSize != 0) {
1560 LLT ImplicitTy = DstTy.changeElementType(NarrowTy);
1561 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1562 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1563
1564 MI.eraseFromParent();
1565 return Legalized;
1566 }
1567
1568 int NumParts = SizeOp0 / NarrowSize;
1569
1571 for (int i = 0; i < NumParts; ++i)
1572 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1573
1574 if (DstTy.isVector())
1575 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1576 else
1577 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1578 MI.eraseFromParent();
1579 return Legalized;
1580 }
1581 case TargetOpcode::G_CONSTANT: {
1582 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1583 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1584 unsigned TotalSize = Ty.getSizeInBits();
1585 unsigned NarrowSize = NarrowTy.getSizeInBits();
1586 int NumParts = TotalSize / NarrowSize;
1587
1588 SmallVector<Register, 4> PartRegs;
1589 for (int I = 0; I != NumParts; ++I) {
1590 unsigned Offset = I * NarrowSize;
1591 auto K = MIRBuilder.buildConstant(NarrowTy,
1592 Val.lshr(Offset).trunc(NarrowSize));
1593 PartRegs.push_back(K.getReg(0));
1594 }
1595
1596 LLT LeftoverTy;
1597 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1598 SmallVector<Register, 1> LeftoverRegs;
1599 if (LeftoverBits != 0) {
1600 LeftoverTy = LLT::scalar(LeftoverBits);
1601 auto K = MIRBuilder.buildConstant(
1602 LeftoverTy,
1603 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1604 LeftoverRegs.push_back(K.getReg(0));
1605 }
1606
1607 insertParts(MI.getOperand(0).getReg(),
1608 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1609
1610 MI.eraseFromParent();
1611 return Legalized;
1612 }
1613 case TargetOpcode::G_SEXT:
1614 case TargetOpcode::G_ZEXT:
1615 case TargetOpcode::G_ANYEXT:
1616 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1617 case TargetOpcode::G_TRUNC: {
1618 if (TypeIdx != 1)
1619 return UnableToLegalize;
1620
1621 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1622 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1623 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1624 return UnableToLegalize;
1625 }
1626
1627 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1628 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1629 MI.eraseFromParent();
1630 return Legalized;
1631 }
1632 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
1633 case TargetOpcode::G_FREEZE: {
1634 if (TypeIdx != 0)
1635 return UnableToLegalize;
1636
1637 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1638 // Should widen scalar first
1639 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1640 return UnableToLegalize;
1641
1642 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1644 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1645 Parts.push_back(
1646 MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy}, {Unmerge.getReg(i)})
1647 .getReg(0));
1648 }
1649
1650 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1651 MI.eraseFromParent();
1652 return Legalized;
1653 }
1654 case TargetOpcode::G_ADD:
1655 case TargetOpcode::G_SUB:
1656 case TargetOpcode::G_SADDO:
1657 case TargetOpcode::G_SSUBO:
1658 case TargetOpcode::G_SADDE:
1659 case TargetOpcode::G_SSUBE:
1660 case TargetOpcode::G_UADDO:
1661 case TargetOpcode::G_USUBO:
1662 case TargetOpcode::G_UADDE:
1663 case TargetOpcode::G_USUBE:
1664 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1665 case TargetOpcode::G_MUL:
1666 case TargetOpcode::G_UMULH:
1667 return narrowScalarMul(MI, NarrowTy);
1668 case TargetOpcode::G_EXTRACT:
1669 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1670 case TargetOpcode::G_INSERT:
1671 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1672 case TargetOpcode::G_LOAD: {
1673 auto &LoadMI = cast<GLoad>(MI);
1674 Register DstReg = LoadMI.getDstReg();
1675 LLT DstTy = MRI.getType(DstReg);
1676 if (DstTy.isVector())
1677 return UnableToLegalize;
1678
1679 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1680 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1681 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1682 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1683 LoadMI.eraseFromParent();
1684 return Legalized;
1685 }
1686
1687 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1688 }
1689 case TargetOpcode::G_ZEXTLOAD:
1690 case TargetOpcode::G_SEXTLOAD: {
1691 auto &LoadMI = cast<GExtLoad>(MI);
1692 Register DstReg = LoadMI.getDstReg();
1693 Register PtrReg = LoadMI.getPointerReg();
1694
1695 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1696 auto &MMO = LoadMI.getMMO();
1697 unsigned MemSize = MMO.getSizeInBits().getValue();
1698
1699 if (MemSize == NarrowSize) {
1700 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1701 } else if (MemSize < NarrowSize) {
1702 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1703 } else if (MemSize > NarrowSize) {
1704 // FIXME: Need to split the load.
1705 return UnableToLegalize;
1706 }
1707
1708 if (isa<GZExtLoad>(LoadMI))
1709 MIRBuilder.buildZExt(DstReg, TmpReg);
1710 else
1711 MIRBuilder.buildSExt(DstReg, TmpReg);
1712
1713 LoadMI.eraseFromParent();
1714 return Legalized;
1715 }
1716 case TargetOpcode::G_STORE: {
1717 auto &StoreMI = cast<GStore>(MI);
1718
1719 Register SrcReg = StoreMI.getValueReg();
1720 LLT SrcTy = MRI.getType(SrcReg);
1721 if (SrcTy.isVector())
1722 return UnableToLegalize;
1723
1724 int NumParts = SizeOp0 / NarrowSize;
1725 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1726 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1727 if (SrcTy.isVector() && LeftoverBits != 0)
1728 return UnableToLegalize;
1729
1730 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1731 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1732 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1733 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1734 StoreMI.eraseFromParent();
1735 return Legalized;
1736 }
1737
1738 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1739 }
1740 case TargetOpcode::G_SELECT:
1741 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1742 case TargetOpcode::G_AND:
1743 case TargetOpcode::G_OR:
1744 case TargetOpcode::G_XOR: {
1745 // Legalize bitwise operation:
1746 // A = BinOp<Ty> B, C
1747 // into:
1748 // B1, ..., BN = G_UNMERGE_VALUES B
1749 // C1, ..., CN = G_UNMERGE_VALUES C
1750 // A1 = BinOp<Ty/N> B1, C2
1751 // ...
1752 // AN = BinOp<Ty/N> BN, CN
1753 // A = G_MERGE_VALUES A1, ..., AN
1754 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1755 }
1756 case TargetOpcode::G_SHL:
1757 case TargetOpcode::G_LSHR:
1758 case TargetOpcode::G_ASHR:
1759 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1760 case TargetOpcode::G_CTLZ:
1761 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1762 case TargetOpcode::G_CTTZ:
1763 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1764 case TargetOpcode::G_CTPOP:
1765 if (TypeIdx == 1)
1766 switch (MI.getOpcode()) {
1767 case TargetOpcode::G_CTLZ:
1768 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1769 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1770 case TargetOpcode::G_CTTZ:
1771 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1772 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1773 case TargetOpcode::G_CTPOP:
1774 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1775 default:
1776 return UnableToLegalize;
1777 }
1778
1779 Observer.changingInstr(MI);
1780 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1781 Observer.changedInstr(MI);
1782 return Legalized;
1783 case TargetOpcode::G_INTTOPTR:
1784 if (TypeIdx != 1)
1785 return UnableToLegalize;
1786
1787 Observer.changingInstr(MI);
1788 narrowScalarSrc(MI, NarrowTy, 1);
1789 Observer.changedInstr(MI);
1790 return Legalized;
1791 case TargetOpcode::G_PTRTOINT:
1792 if (TypeIdx != 0)
1793 return UnableToLegalize;
1794
1795 Observer.changingInstr(MI);
1796 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1797 Observer.changedInstr(MI);
1798 return Legalized;
1799 case TargetOpcode::G_PHI: {
1800 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1801 // NarrowSize.
1802 if (SizeOp0 % NarrowSize != 0)
1803 return UnableToLegalize;
1804
1805 unsigned NumParts = SizeOp0 / NarrowSize;
1806 SmallVector<Register, 2> DstRegs(NumParts);
1807 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1808 Observer.changingInstr(MI);
1809 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1810 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1811 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
1812 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1813 SrcRegs[i / 2], MIRBuilder, MRI);
1814 }
1815 MachineBasicBlock &MBB = *MI.getParent();
1816 MIRBuilder.setInsertPt(MBB, MI);
1817 for (unsigned i = 0; i < NumParts; ++i) {
1818 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1820 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1821 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1822 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1823 }
1824 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1825 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1826 Observer.changedInstr(MI);
1827 MI.eraseFromParent();
1828 return Legalized;
1829 }
1830 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1831 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1832 if (TypeIdx != 2)
1833 return UnableToLegalize;
1834
1835 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1836 Observer.changingInstr(MI);
1837 narrowScalarSrc(MI, NarrowTy, OpIdx);
1838 Observer.changedInstr(MI);
1839 return Legalized;
1840 }
1841 case TargetOpcode::G_ICMP: {
1842 Register LHS = MI.getOperand(2).getReg();
1843 LLT SrcTy = MRI.getType(LHS);
1844 CmpInst::Predicate Pred =
1845 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1846
1847 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1848 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1849 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1850 LHSLeftoverRegs, MIRBuilder, MRI))
1851 return UnableToLegalize;
1852
1853 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1854 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1855 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1856 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1857 return UnableToLegalize;
1858
1859 // We now have the LHS and RHS of the compare split into narrow-type
1860 // registers, plus potentially some leftover type.
1861 Register Dst = MI.getOperand(0).getReg();
1862 LLT ResTy = MRI.getType(Dst);
1863 if (ICmpInst::isEquality(Pred)) {
1864 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1865 // them together. For each equal part, the result should be all 0s. For
1866 // each non-equal part, we'll get at least one 1.
1867 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1869 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1870 auto LHS = std::get<0>(LHSAndRHS);
1871 auto RHS = std::get<1>(LHSAndRHS);
1872 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1873 Xors.push_back(Xor);
1874 }
1875
1876 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1877 // to the desired narrow type so that we can OR them together later.
1878 SmallVector<Register, 4> WidenedXors;
1879 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1880 auto LHS = std::get<0>(LHSAndRHS);
1881 auto RHS = std::get<1>(LHSAndRHS);
1882 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1883 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1884 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1885 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1886 llvm::append_range(Xors, WidenedXors);
1887 }
1888
1889 // Now, for each part we broke up, we know if they are equal/not equal
1890 // based off the G_XOR. We can OR these all together and compare against
1891 // 0 to get the result.
1892 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1893 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1894 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1895 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1896 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1897 } else {
1898 Register CmpIn;
1899 for (unsigned I = 0, E = LHSPartRegs.size(); I != E; ++I) {
1900 Register CmpOut;
1901 CmpInst::Predicate PartPred;
1902
1903 if (I == E - 1 && LHSLeftoverRegs.empty()) {
1904 PartPred = Pred;
1905 CmpOut = Dst;
1906 } else {
1907 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1908 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1909 }
1910
1911 if (!CmpIn) {
1912 MIRBuilder.buildICmp(PartPred, CmpOut, LHSPartRegs[I],
1913 RHSPartRegs[I]);
1914 } else {
1915 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSPartRegs[I],
1916 RHSPartRegs[I]);
1917 auto CmpEq = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1918 LHSPartRegs[I], RHSPartRegs[I]);
1919 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1920 }
1921
1922 CmpIn = CmpOut;
1923 }
1924
1925 for (unsigned I = 0, E = LHSLeftoverRegs.size(); I != E; ++I) {
1926 Register CmpOut;
1927 CmpInst::Predicate PartPred;
1928
1929 if (I == E - 1) {
1930 PartPred = Pred;
1931 CmpOut = Dst;
1932 } else {
1933 PartPred = ICmpInst::getUnsignedPredicate(Pred);
1934 CmpOut = MRI.createGenericVirtualRegister(ResTy);
1935 }
1936
1937 if (!CmpIn) {
1938 MIRBuilder.buildICmp(PartPred, CmpOut, LHSLeftoverRegs[I],
1939 RHSLeftoverRegs[I]);
1940 } else {
1941 auto Cmp = MIRBuilder.buildICmp(PartPred, ResTy, LHSLeftoverRegs[I],
1942 RHSLeftoverRegs[I]);
1943 auto CmpEq =
1944 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy,
1945 LHSLeftoverRegs[I], RHSLeftoverRegs[I]);
1946 MIRBuilder.buildSelect(CmpOut, CmpEq, CmpIn, Cmp);
1947 }
1948
1949 CmpIn = CmpOut;
1950 }
1951 }
1952 MI.eraseFromParent();
1953 return Legalized;
1954 }
1955 case TargetOpcode::G_FCMP:
1956 if (TypeIdx != 0)
1957 return UnableToLegalize;
1958
1959 Observer.changingInstr(MI);
1960 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1961 Observer.changedInstr(MI);
1962 return Legalized;
1963
1964 case TargetOpcode::G_SEXT_INREG: {
1965 if (TypeIdx != 0)
1966 return UnableToLegalize;
1967
1968 int64_t SizeInBits = MI.getOperand(2).getImm();
1969
1970 // So long as the new type has more bits than the bits we're extending we
1971 // don't need to break it apart.
1972 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1973 Observer.changingInstr(MI);
1974 // We don't lose any non-extension bits by truncating the src and
1975 // sign-extending the dst.
1976 MachineOperand &MO1 = MI.getOperand(1);
1977 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1978 MO1.setReg(TruncMIB.getReg(0));
1979
1980 MachineOperand &MO2 = MI.getOperand(0);
1981 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1982 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1983 MIRBuilder.buildSExt(MO2, DstExt);
1984 MO2.setReg(DstExt);
1985 Observer.changedInstr(MI);
1986 return Legalized;
1987 }
1988
1989 // Break it apart. Components below the extension point are unmodified. The
1990 // component containing the extension point becomes a narrower SEXT_INREG.
1991 // Components above it are ashr'd from the component containing the
1992 // extension point.
1993 if (SizeOp0 % NarrowSize != 0)
1994 return UnableToLegalize;
1995 int NumParts = SizeOp0 / NarrowSize;
1996
1997 // List the registers where the destination will be scattered.
1999 // List the registers where the source will be split.
2001
2002 // Create all the temporary registers.
2003 for (int i = 0; i < NumParts; ++i) {
2004 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
2005
2006 SrcRegs.push_back(SrcReg);
2007 }
2008
2009 // Explode the big arguments into smaller chunks.
2010 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
2011
2012 Register AshrCstReg =
2013 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
2014 .getReg(0);
2015 Register FullExtensionReg;
2016 Register PartialExtensionReg;
2017
2018 // Do the operation on each small part.
2019 for (int i = 0; i < NumParts; ++i) {
2020 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
2021 DstRegs.push_back(SrcRegs[i]);
2022 PartialExtensionReg = DstRegs.back();
2023 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
2024 assert(PartialExtensionReg &&
2025 "Expected to visit partial extension before full");
2026 if (FullExtensionReg) {
2027 DstRegs.push_back(FullExtensionReg);
2028 continue;
2029 }
2030 DstRegs.push_back(
2031 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
2032 .getReg(0));
2033 FullExtensionReg = DstRegs.back();
2034 } else {
2035 DstRegs.push_back(
2037 .buildInstr(
2038 TargetOpcode::G_SEXT_INREG, {NarrowTy},
2039 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
2040 .getReg(0));
2041 PartialExtensionReg = DstRegs.back();
2042 }
2043 }
2044
2045 // Gather the destination registers into the final destination.
2046 Register DstReg = MI.getOperand(0).getReg();
2047 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
2048 MI.eraseFromParent();
2049 return Legalized;
2050 }
2051 case TargetOpcode::G_BSWAP:
2052 case TargetOpcode::G_BITREVERSE: {
2053 if (SizeOp0 % NarrowSize != 0)
2054 return UnableToLegalize;
2055
2056 Observer.changingInstr(MI);
2057 SmallVector<Register, 2> SrcRegs, DstRegs;
2058 unsigned NumParts = SizeOp0 / NarrowSize;
2059 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
2060 MIRBuilder, MRI);
2061
2062 for (unsigned i = 0; i < NumParts; ++i) {
2063 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2064 {SrcRegs[NumParts - 1 - i]});
2065 DstRegs.push_back(DstPart.getReg(0));
2066 }
2067
2068 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
2069
2070 Observer.changedInstr(MI);
2071 MI.eraseFromParent();
2072 return Legalized;
2073 }
2074 case TargetOpcode::G_PTR_ADD:
2075 case TargetOpcode::G_PTRMASK: {
2076 if (TypeIdx != 1)
2077 return UnableToLegalize;
2078 Observer.changingInstr(MI);
2079 narrowScalarSrc(MI, NarrowTy, 2);
2080 Observer.changedInstr(MI);
2081 return Legalized;
2082 }
2083 case TargetOpcode::G_FPTOUI:
2084 case TargetOpcode::G_FPTOSI:
2085 case TargetOpcode::G_FPTOUI_SAT:
2086 case TargetOpcode::G_FPTOSI_SAT:
2087 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
2088 case TargetOpcode::G_FPEXT:
2089 if (TypeIdx != 0)
2090 return UnableToLegalize;
2091 Observer.changingInstr(MI);
2092 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
2093 Observer.changedInstr(MI);
2094 return Legalized;
2095 case TargetOpcode::G_FLDEXP:
2096 case TargetOpcode::G_STRICT_FLDEXP:
2097 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
2098 case TargetOpcode::G_VSCALE: {
2099 Register Dst = MI.getOperand(0).getReg();
2100 LLT Ty = MRI.getType(Dst);
2101
2102 // Assume VSCALE(1) fits into a legal integer
2103 const APInt One(NarrowTy.getSizeInBits(), 1);
2104 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
2105 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
2106 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
2107 MIRBuilder.buildMul(Dst, ZExt, C);
2108
2109 MI.eraseFromParent();
2110 return Legalized;
2111 }
2112 }
2113}
2114
2116 LLT Ty = MRI.getType(Val);
2117 if (Ty.isScalar())
2118 return Val;
2119
2120 const DataLayout &DL = MIRBuilder.getDataLayout();
2121 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
2122 if (Ty.isPointer()) {
2123 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
2124 return Register();
2125 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
2126 }
2127
2128 Register NewVal = Val;
2129
2130 assert(Ty.isVector());
2131 if (Ty.isPointerVector())
2132 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
2133 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
2134}
2135
2137 unsigned OpIdx, unsigned ExtOpcode) {
2138 MachineOperand &MO = MI.getOperand(OpIdx);
2139 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
2140 MO.setReg(ExtB.getReg(0));
2141}
2142
2144 unsigned OpIdx) {
2145 MachineOperand &MO = MI.getOperand(OpIdx);
2146 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
2147 MO.setReg(ExtB.getReg(0));
2148}
2149
2151 unsigned OpIdx, unsigned TruncOpcode) {
2152 MachineOperand &MO = MI.getOperand(OpIdx);
2153 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2154 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2155 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
2156 MO.setReg(DstExt);
2157}
2158
2160 unsigned OpIdx, unsigned ExtOpcode) {
2161 MachineOperand &MO = MI.getOperand(OpIdx);
2162 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
2163 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2164 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
2165 MO.setReg(DstTrunc);
2166}
2167
2169 unsigned OpIdx) {
2170 MachineOperand &MO = MI.getOperand(OpIdx);
2171 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2172 Register Dst = MO.getReg();
2173 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2174 MO.setReg(DstExt);
2175 MIRBuilder.buildDeleteTrailingVectorElements(Dst, DstExt);
2176}
2177
2179 unsigned OpIdx) {
2180 MachineOperand &MO = MI.getOperand(OpIdx);
2181 MO.setReg(MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO).getReg(0));
2182}
2183
2185 MachineOperand &Op = MI.getOperand(OpIdx);
2186 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
2187}
2188
2190 MachineOperand &MO = MI.getOperand(OpIdx);
2191 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
2192 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2193 MIRBuilder.buildBitcast(MO, CastDst);
2194 MO.setReg(CastDst);
2195}
2196
2198LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
2199 LLT WideTy) {
2200 if (TypeIdx != 1)
2201 return UnableToLegalize;
2202
2203 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
2204 if (DstTy.isVector())
2205 return UnableToLegalize;
2206
2207 LLT SrcTy = MRI.getType(Src1Reg);
2208 const int DstSize = DstTy.getSizeInBits();
2209 const int SrcSize = SrcTy.getSizeInBits();
2210 const int WideSize = WideTy.getSizeInBits();
2211 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
2212
2213 unsigned NumOps = MI.getNumOperands();
2214 unsigned NumSrc = MI.getNumOperands() - 1;
2215 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
2216
2217 if (WideSize >= DstSize) {
2218 // Directly pack the bits in the target type.
2219 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
2220
2221 for (unsigned I = 2; I != NumOps; ++I) {
2222 const unsigned Offset = (I - 1) * PartSize;
2223
2224 Register SrcReg = MI.getOperand(I).getReg();
2225 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
2226
2227 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
2228
2229 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
2230 MRI.createGenericVirtualRegister(WideTy);
2231
2232 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
2233 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
2234 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
2235 ResultReg = NextResult;
2236 }
2237
2238 if (WideSize > DstSize)
2239 MIRBuilder.buildTrunc(DstReg, ResultReg);
2240 else if (DstTy.isPointer())
2241 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
2242
2243 MI.eraseFromParent();
2244 return Legalized;
2245 }
2246
2247 // Unmerge the original values to the GCD type, and recombine to the next
2248 // multiple greater than the original type.
2249 //
2250 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
2251 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
2252 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
2253 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
2254 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
2255 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
2256 // %12:_(s12) = G_MERGE_VALUES %10, %11
2257 //
2258 // Padding with undef if necessary:
2259 //
2260 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
2261 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
2262 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
2263 // %7:_(s2) = G_IMPLICIT_DEF
2264 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
2265 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
2266 // %10:_(s12) = G_MERGE_VALUES %8, %9
2267
2268 const int GCD = std::gcd(SrcSize, WideSize);
2269 LLT GCDTy = LLT::scalar(GCD);
2270
2271 SmallVector<Register, 8> NewMergeRegs;
2272 SmallVector<Register, 8> Unmerges;
2273 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
2274
2275 // Decompose the original operands if they don't evenly divide.
2276 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
2277 Register SrcReg = MO.getReg();
2278 if (GCD == SrcSize) {
2279 Unmerges.push_back(SrcReg);
2280 } else {
2281 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
2282 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
2283 Unmerges.push_back(Unmerge.getReg(J));
2284 }
2285 }
2286
2287 // Pad with undef to the next size that is a multiple of the requested size.
2288 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
2289 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
2290 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
2291 Unmerges.push_back(UndefReg);
2292 }
2293
2294 const int PartsPerGCD = WideSize / GCD;
2295
2296 // Build merges of each piece.
2297 ArrayRef<Register> Slicer(Unmerges);
2298 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
2299 auto Merge =
2300 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
2301 NewMergeRegs.push_back(Merge.getReg(0));
2302 }
2303
2304 // A truncate may be necessary if the requested type doesn't evenly divide the
2305 // original result type.
2306 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
2307 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
2308 } else {
2309 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
2310 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
2311 }
2312
2313 MI.eraseFromParent();
2314 return Legalized;
2315}
2316
2318LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
2319 LLT WideTy) {
2320 if (TypeIdx != 0)
2321 return UnableToLegalize;
2322
2323 int NumDst = MI.getNumOperands() - 1;
2324 Register SrcReg = MI.getOperand(NumDst).getReg();
2325 LLT SrcTy = MRI.getType(SrcReg);
2326 if (SrcTy.isVector())
2327 return UnableToLegalize;
2328
2329 Register Dst0Reg = MI.getOperand(0).getReg();
2330 LLT DstTy = MRI.getType(Dst0Reg);
2331 if (!DstTy.isScalar())
2332 return UnableToLegalize;
2333
2334 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
2335 if (SrcTy.isPointer()) {
2336 const DataLayout &DL = MIRBuilder.getDataLayout();
2337 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
2338 LLVM_DEBUG(
2339 dbgs() << "Not casting non-integral address space integer\n");
2340 return UnableToLegalize;
2341 }
2342
2343 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
2344 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
2345 }
2346
2347 // Widen SrcTy to WideTy. This does not affect the result, but since the
2348 // user requested this size, it is probably better handled than SrcTy and
2349 // should reduce the total number of legalization artifacts.
2350 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2351 SrcTy = WideTy;
2352 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
2353 }
2354
2355 // Theres no unmerge type to target. Directly extract the bits from the
2356 // source type
2357 unsigned DstSize = DstTy.getSizeInBits();
2358
2359 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
2360 for (int I = 1; I != NumDst; ++I) {
2361 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
2362 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
2363 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
2364 }
2365
2366 MI.eraseFromParent();
2367 return Legalized;
2368 }
2369
2370 // Extend the source to a wider type.
2371 LLT LCMTy = getLCMType(SrcTy, WideTy);
2372
2373 Register WideSrc = SrcReg;
2374 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
2375 // TODO: If this is an integral address space, cast to integer and anyext.
2376 if (SrcTy.isPointer()) {
2377 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
2378 return UnableToLegalize;
2379 }
2380
2381 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
2382 }
2383
2384 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
2385
2386 // Create a sequence of unmerges and merges to the original results. Since we
2387 // may have widened the source, we will need to pad the results with dead defs
2388 // to cover the source register.
2389 // e.g. widen s48 to s64:
2390 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
2391 //
2392 // =>
2393 // %4:_(s192) = G_ANYEXT %0:_(s96)
2394 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2395 // ; unpack to GCD type, with extra dead defs
2396 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2397 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2398 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2399 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2400 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2401 const LLT GCDTy = getGCDType(WideTy, DstTy);
2402 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2403 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2404
2405 // Directly unmerge to the destination without going through a GCD type
2406 // if possible
2407 if (PartsPerRemerge == 1) {
2408 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2409
2410 for (int I = 0; I != NumUnmerge; ++I) {
2411 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2412
2413 for (int J = 0; J != PartsPerUnmerge; ++J) {
2414 int Idx = I * PartsPerUnmerge + J;
2415 if (Idx < NumDst)
2416 MIB.addDef(MI.getOperand(Idx).getReg());
2417 else {
2418 // Create dead def for excess components.
2419 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2420 }
2421 }
2422
2423 MIB.addUse(Unmerge.getReg(I));
2424 }
2425 } else {
2426 SmallVector<Register, 16> Parts;
2427 for (int J = 0; J != NumUnmerge; ++J)
2428 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2429
2430 SmallVector<Register, 8> RemergeParts;
2431 for (int I = 0; I != NumDst; ++I) {
2432 for (int J = 0; J < PartsPerRemerge; ++J) {
2433 const int Idx = I * PartsPerRemerge + J;
2434 RemergeParts.emplace_back(Parts[Idx]);
2435 }
2436
2437 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2438 RemergeParts.clear();
2439 }
2440 }
2441
2442 MI.eraseFromParent();
2443 return Legalized;
2444}
2445
2447LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2448 LLT WideTy) {
2449 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2450 unsigned Offset = MI.getOperand(2).getImm();
2451
2452 if (TypeIdx == 0) {
2453 if (SrcTy.isVector() || DstTy.isVector())
2454 return UnableToLegalize;
2455
2456 SrcOp Src(SrcReg);
2457 if (SrcTy.isPointer()) {
2458 // Extracts from pointers can be handled only if they are really just
2459 // simple integers.
2460 const DataLayout &DL = MIRBuilder.getDataLayout();
2461 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2462 return UnableToLegalize;
2463
2464 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2465 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2466 SrcTy = SrcAsIntTy;
2467 }
2468
2469 if (DstTy.isPointer())
2470 return UnableToLegalize;
2471
2472 if (Offset == 0) {
2473 // Avoid a shift in the degenerate case.
2474 MIRBuilder.buildTrunc(DstReg,
2475 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2476 MI.eraseFromParent();
2477 return Legalized;
2478 }
2479
2480 // Do a shift in the source type.
2481 LLT ShiftTy = SrcTy;
2482 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2483 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2484 ShiftTy = WideTy;
2485 }
2486
2487 auto LShr = MIRBuilder.buildLShr(
2488 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2489 MIRBuilder.buildTrunc(DstReg, LShr);
2490 MI.eraseFromParent();
2491 return Legalized;
2492 }
2493
2494 if (SrcTy.isScalar()) {
2495 Observer.changingInstr(MI);
2496 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2497 Observer.changedInstr(MI);
2498 return Legalized;
2499 }
2500
2501 if (!SrcTy.isVector())
2502 return UnableToLegalize;
2503
2504 if (DstTy != SrcTy.getElementType())
2505 return UnableToLegalize;
2506
2507 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2508 return UnableToLegalize;
2509
2510 Observer.changingInstr(MI);
2511 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2512
2513 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2514 Offset);
2515 widenScalarDst(MI, WideTy.getScalarType(), 0);
2516 Observer.changedInstr(MI);
2517 return Legalized;
2518}
2519
2521LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2522 LLT WideTy) {
2523 if (TypeIdx != 0 || WideTy.isVector())
2524 return UnableToLegalize;
2525 Observer.changingInstr(MI);
2526 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2527 widenScalarDst(MI, WideTy);
2528 Observer.changedInstr(MI);
2529 return Legalized;
2530}
2531
2533LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2534 LLT WideTy) {
2535 unsigned Opcode;
2536 unsigned ExtOpcode;
2537 std::optional<Register> CarryIn;
2538 switch (MI.getOpcode()) {
2539 default:
2540 llvm_unreachable("Unexpected opcode!");
2541 case TargetOpcode::G_SADDO:
2542 Opcode = TargetOpcode::G_ADD;
2543 ExtOpcode = TargetOpcode::G_SEXT;
2544 break;
2545 case TargetOpcode::G_SSUBO:
2546 Opcode = TargetOpcode::G_SUB;
2547 ExtOpcode = TargetOpcode::G_SEXT;
2548 break;
2549 case TargetOpcode::G_UADDO:
2550 Opcode = TargetOpcode::G_ADD;
2551 ExtOpcode = TargetOpcode::G_ZEXT;
2552 break;
2553 case TargetOpcode::G_USUBO:
2554 Opcode = TargetOpcode::G_SUB;
2555 ExtOpcode = TargetOpcode::G_ZEXT;
2556 break;
2557 case TargetOpcode::G_SADDE:
2558 Opcode = TargetOpcode::G_UADDE;
2559 ExtOpcode = TargetOpcode::G_SEXT;
2560 CarryIn = MI.getOperand(4).getReg();
2561 break;
2562 case TargetOpcode::G_SSUBE:
2563 Opcode = TargetOpcode::G_USUBE;
2564 ExtOpcode = TargetOpcode::G_SEXT;
2565 CarryIn = MI.getOperand(4).getReg();
2566 break;
2567 case TargetOpcode::G_UADDE:
2568 Opcode = TargetOpcode::G_UADDE;
2569 ExtOpcode = TargetOpcode::G_ZEXT;
2570 CarryIn = MI.getOperand(4).getReg();
2571 break;
2572 case TargetOpcode::G_USUBE:
2573 Opcode = TargetOpcode::G_USUBE;
2574 ExtOpcode = TargetOpcode::G_ZEXT;
2575 CarryIn = MI.getOperand(4).getReg();
2576 break;
2577 }
2578
2579 if (TypeIdx == 1) {
2580 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2581
2582 Observer.changingInstr(MI);
2583 if (CarryIn)
2584 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2585 widenScalarDst(MI, WideTy, 1);
2586
2587 Observer.changedInstr(MI);
2588 return Legalized;
2589 }
2590
2591 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2592 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2593 // Do the arithmetic in the larger type.
2594 Register NewOp;
2595 if (CarryIn) {
2596 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2597 NewOp = MIRBuilder
2598 .buildInstr(Opcode, {WideTy, CarryOutTy},
2599 {LHSExt, RHSExt, *CarryIn})
2600 .getReg(0);
2601 } else {
2602 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2603 }
2604 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2605 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2606 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2607 // There is no overflow if the ExtOp is the same as NewOp.
2608 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2609 // Now trunc the NewOp to the original result.
2610 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2611 MI.eraseFromParent();
2612 return Legalized;
2613}
2614
2616LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2617 LLT WideTy) {
2618 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2619 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2620 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2621 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2622 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2623 // We can convert this to:
2624 // 1. Any extend iN to iM
2625 // 2. SHL by M-N
2626 // 3. [US][ADD|SUB|SHL]SAT
2627 // 4. L/ASHR by M-N
2628 //
2629 // It may be more efficient to lower this to a min and a max operation in
2630 // the higher precision arithmetic if the promoted operation isn't legal,
2631 // but this decision is up to the target's lowering request.
2632 Register DstReg = MI.getOperand(0).getReg();
2633
2634 unsigned NewBits = WideTy.getScalarSizeInBits();
2635 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2636
2637 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2638 // must not left shift the RHS to preserve the shift amount.
2639 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2640 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2641 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2642 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2643 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2644 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2645
2646 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2647 {ShiftL, ShiftR}, MI.getFlags());
2648
2649 // Use a shift that will preserve the number of sign bits when the trunc is
2650 // folded away.
2651 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2652 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2653
2654 MIRBuilder.buildTrunc(DstReg, Result);
2655 MI.eraseFromParent();
2656 return Legalized;
2657}
2658
2660LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2661 LLT WideTy) {
2662 if (TypeIdx == 1) {
2663 Observer.changingInstr(MI);
2664 widenScalarDst(MI, WideTy, 1);
2665 Observer.changedInstr(MI);
2666 return Legalized;
2667 }
2668
2669 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2670 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2671 LLT SrcTy = MRI.getType(LHS);
2672 LLT OverflowTy = MRI.getType(OriginalOverflow);
2673 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2674
2675 // To determine if the result overflowed in the larger type, we extend the
2676 // input to the larger type, do the multiply (checking if it overflows),
2677 // then also check the high bits of the result to see if overflow happened
2678 // there.
2679 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2680 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2681 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2682
2683 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2684 // so we don't need to check the overflow result of larger type Mulo.
2685 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2686
2687 unsigned MulOpc =
2688 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2689
2690 MachineInstrBuilder Mulo;
2691 if (WideMulCanOverflow)
2692 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2693 {LeftOperand, RightOperand});
2694 else
2695 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2696
2697 auto Mul = Mulo->getOperand(0);
2698 MIRBuilder.buildTrunc(Result, Mul);
2699
2700 MachineInstrBuilder ExtResult;
2701 // Overflow occurred if it occurred in the larger type, or if the high part
2702 // of the result does not zero/sign-extend the low part. Check this second
2703 // possibility first.
2704 if (IsSigned) {
2705 // For signed, overflow occurred when the high part does not sign-extend
2706 // the low part.
2707 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2708 } else {
2709 // Unsigned overflow occurred when the high part does not zero-extend the
2710 // low part.
2711 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2712 }
2713
2714 if (WideMulCanOverflow) {
2715 auto Overflow =
2716 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2717 // Finally check if the multiplication in the larger type itself overflowed.
2718 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2719 } else {
2720 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2721 }
2722 MI.eraseFromParent();
2723 return Legalized;
2724}
2725
2728 unsigned Opcode = MI.getOpcode();
2729 switch (Opcode) {
2730 default:
2731 return UnableToLegalize;
2732 case TargetOpcode::G_ATOMICRMW_XCHG:
2733 case TargetOpcode::G_ATOMICRMW_ADD:
2734 case TargetOpcode::G_ATOMICRMW_SUB:
2735 case TargetOpcode::G_ATOMICRMW_AND:
2736 case TargetOpcode::G_ATOMICRMW_OR:
2737 case TargetOpcode::G_ATOMICRMW_XOR:
2738 case TargetOpcode::G_ATOMICRMW_MIN:
2739 case TargetOpcode::G_ATOMICRMW_MAX:
2740 case TargetOpcode::G_ATOMICRMW_UMIN:
2741 case TargetOpcode::G_ATOMICRMW_UMAX:
2742 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2743 Observer.changingInstr(MI);
2744 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2745 widenScalarDst(MI, WideTy, 0);
2746 Observer.changedInstr(MI);
2747 return Legalized;
2748 case TargetOpcode::G_ATOMIC_CMPXCHG:
2749 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2750 Observer.changingInstr(MI);
2751 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2752 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2753 widenScalarDst(MI, WideTy, 0);
2754 Observer.changedInstr(MI);
2755 return Legalized;
2756 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2757 if (TypeIdx == 0) {
2758 Observer.changingInstr(MI);
2759 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2760 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2761 widenScalarDst(MI, WideTy, 0);
2762 Observer.changedInstr(MI);
2763 return Legalized;
2764 }
2765 assert(TypeIdx == 1 &&
2766 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2767 Observer.changingInstr(MI);
2768 widenScalarDst(MI, WideTy, 1);
2769 Observer.changedInstr(MI);
2770 return Legalized;
2771 case TargetOpcode::G_EXTRACT:
2772 return widenScalarExtract(MI, TypeIdx, WideTy);
2773 case TargetOpcode::G_INSERT:
2774 return widenScalarInsert(MI, TypeIdx, WideTy);
2775 case TargetOpcode::G_MERGE_VALUES:
2776 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2777 case TargetOpcode::G_UNMERGE_VALUES:
2778 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2779 case TargetOpcode::G_SADDO:
2780 case TargetOpcode::G_SSUBO:
2781 case TargetOpcode::G_UADDO:
2782 case TargetOpcode::G_USUBO:
2783 case TargetOpcode::G_SADDE:
2784 case TargetOpcode::G_SSUBE:
2785 case TargetOpcode::G_UADDE:
2786 case TargetOpcode::G_USUBE:
2787 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2788 case TargetOpcode::G_UMULO:
2789 case TargetOpcode::G_SMULO:
2790 return widenScalarMulo(MI, TypeIdx, WideTy);
2791 case TargetOpcode::G_SADDSAT:
2792 case TargetOpcode::G_SSUBSAT:
2793 case TargetOpcode::G_SSHLSAT:
2794 case TargetOpcode::G_UADDSAT:
2795 case TargetOpcode::G_USUBSAT:
2796 case TargetOpcode::G_USHLSAT:
2797 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2798 case TargetOpcode::G_CTTZ:
2799 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2800 case TargetOpcode::G_CTLZ:
2801 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2802 case TargetOpcode::G_CTPOP: {
2803 if (TypeIdx == 0) {
2804 Observer.changingInstr(MI);
2805 widenScalarDst(MI, WideTy, 0);
2806 Observer.changedInstr(MI);
2807 return Legalized;
2808 }
2809
2810 Register SrcReg = MI.getOperand(1).getReg();
2811
2812 // First extend the input.
2813 unsigned ExtOpc = Opcode == TargetOpcode::G_CTTZ ||
2814 Opcode == TargetOpcode::G_CTTZ_ZERO_UNDEF
2815 ? TargetOpcode::G_ANYEXT
2816 : TargetOpcode::G_ZEXT;
2817 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2818 LLT CurTy = MRI.getType(SrcReg);
2819 unsigned NewOpc = Opcode;
2820 if (NewOpc == TargetOpcode::G_CTTZ) {
2821 // The count is the same in the larger type except if the original
2822 // value was zero. This can be handled by setting the bit just off
2823 // the top of the original type.
2824 auto TopBit =
2826 MIBSrc = MIRBuilder.buildOr(
2827 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2828 // Now we know the operand is non-zero, use the more relaxed opcode.
2829 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2830 }
2831
2832 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2833
2834 if (Opcode == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2835 // An optimization where the result is the CTLZ after the left shift by
2836 // (Difference in widety and current ty), that is,
2837 // MIBSrc = MIBSrc << (sizeinbits(WideTy) - sizeinbits(CurTy))
2838 // Result = ctlz MIBSrc
2839 MIBSrc = MIRBuilder.buildShl(WideTy, MIBSrc,
2840 MIRBuilder.buildConstant(WideTy, SizeDiff));
2841 }
2842
2843 // Perform the operation at the larger size.
2844 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2845 // This is already the correct result for CTPOP and CTTZs
2846 if (Opcode == TargetOpcode::G_CTLZ) {
2847 // The correct result is NewOp - (Difference in widety and current ty).
2848 MIBNewOp = MIRBuilder.buildSub(
2849 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2850 }
2851
2852 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2853 MI.eraseFromParent();
2854 return Legalized;
2855 }
2856 case TargetOpcode::G_BSWAP: {
2857 Observer.changingInstr(MI);
2858 Register DstReg = MI.getOperand(0).getReg();
2859
2860 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2861 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2862 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2863 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2864
2865 MI.getOperand(0).setReg(DstExt);
2866
2867 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2868
2869 LLT Ty = MRI.getType(DstReg);
2870 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2871 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2872 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2873
2874 MIRBuilder.buildTrunc(DstReg, ShrReg);
2875 Observer.changedInstr(MI);
2876 return Legalized;
2877 }
2878 case TargetOpcode::G_BITREVERSE: {
2879 Observer.changingInstr(MI);
2880
2881 Register DstReg = MI.getOperand(0).getReg();
2882 LLT Ty = MRI.getType(DstReg);
2883 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2884
2885 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2886 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2887 MI.getOperand(0).setReg(DstExt);
2888 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2889
2890 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2891 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2892 MIRBuilder.buildTrunc(DstReg, Shift);
2893 Observer.changedInstr(MI);
2894 return Legalized;
2895 }
2896 case TargetOpcode::G_FREEZE:
2897 case TargetOpcode::G_CONSTANT_FOLD_BARRIER:
2898 Observer.changingInstr(MI);
2899 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2900 widenScalarDst(MI, WideTy);
2901 Observer.changedInstr(MI);
2902 return Legalized;
2903
2904 case TargetOpcode::G_ABS:
2905 Observer.changingInstr(MI);
2906 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2907 widenScalarDst(MI, WideTy);
2908 Observer.changedInstr(MI);
2909 return Legalized;
2910
2911 case TargetOpcode::G_ADD:
2912 case TargetOpcode::G_AND:
2913 case TargetOpcode::G_MUL:
2914 case TargetOpcode::G_OR:
2915 case TargetOpcode::G_XOR:
2916 case TargetOpcode::G_SUB:
2917 case TargetOpcode::G_SHUFFLE_VECTOR:
2918 // Perform operation at larger width (any extension is fines here, high bits
2919 // don't affect the result) and then truncate the result back to the
2920 // original type.
2921 Observer.changingInstr(MI);
2922 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2923 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2924 widenScalarDst(MI, WideTy);
2925 Observer.changedInstr(MI);
2926 return Legalized;
2927
2928 case TargetOpcode::G_SBFX:
2929 case TargetOpcode::G_UBFX:
2930 Observer.changingInstr(MI);
2931
2932 if (TypeIdx == 0) {
2933 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2934 widenScalarDst(MI, WideTy);
2935 } else {
2936 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2937 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2938 }
2939
2940 Observer.changedInstr(MI);
2941 return Legalized;
2942
2943 case TargetOpcode::G_SHL:
2944 Observer.changingInstr(MI);
2945
2946 if (TypeIdx == 0) {
2947 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2948 widenScalarDst(MI, WideTy);
2949 } else {
2950 assert(TypeIdx == 1);
2951 // The "number of bits to shift" operand must preserve its value as an
2952 // unsigned integer:
2953 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2954 }
2955
2956 Observer.changedInstr(MI);
2957 return Legalized;
2958
2959 case TargetOpcode::G_ROTR:
2960 case TargetOpcode::G_ROTL:
2961 if (TypeIdx != 1)
2962 return UnableToLegalize;
2963
2964 Observer.changingInstr(MI);
2965 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2966 Observer.changedInstr(MI);
2967 return Legalized;
2968
2969 case TargetOpcode::G_SDIV:
2970 case TargetOpcode::G_SREM:
2971 case TargetOpcode::G_SMIN:
2972 case TargetOpcode::G_SMAX:
2973 case TargetOpcode::G_ABDS:
2974 Observer.changingInstr(MI);
2975 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2976 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2977 widenScalarDst(MI, WideTy);
2978 Observer.changedInstr(MI);
2979 return Legalized;
2980
2981 case TargetOpcode::G_SDIVREM:
2982 Observer.changingInstr(MI);
2983 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2984 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2985 widenScalarDst(MI, WideTy);
2986 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
2987 widenScalarDst(MI, WideTy, 1);
2988 Observer.changedInstr(MI);
2989 return Legalized;
2990
2991 case TargetOpcode::G_ASHR:
2992 case TargetOpcode::G_LSHR:
2993 Observer.changingInstr(MI);
2994
2995 if (TypeIdx == 0) {
2996 unsigned CvtOp = Opcode == TargetOpcode::G_ASHR ? TargetOpcode::G_SEXT
2997 : TargetOpcode::G_ZEXT;
2998
2999 widenScalarSrc(MI, WideTy, 1, CvtOp);
3000 widenScalarDst(MI, WideTy);
3001 } else {
3002 assert(TypeIdx == 1);
3003 // The "number of bits to shift" operand must preserve its value as an
3004 // unsigned integer:
3005 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3006 }
3007
3008 Observer.changedInstr(MI);
3009 return Legalized;
3010 case TargetOpcode::G_UDIV:
3011 case TargetOpcode::G_UREM:
3012 case TargetOpcode::G_ABDU:
3013 Observer.changingInstr(MI);
3014 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3015 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3016 widenScalarDst(MI, WideTy);
3017 Observer.changedInstr(MI);
3018 return Legalized;
3019 case TargetOpcode::G_UDIVREM:
3020 Observer.changingInstr(MI);
3021 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3022 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3023 widenScalarDst(MI, WideTy);
3024 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3025 widenScalarDst(MI, WideTy, 1);
3026 Observer.changedInstr(MI);
3027 return Legalized;
3028 case TargetOpcode::G_UMIN:
3029 case TargetOpcode::G_UMAX: {
3030 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3031
3032 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3033 unsigned ExtOpc =
3034 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(Ty, Ctx),
3035 getApproximateEVTForLLT(WideTy, Ctx))
3036 ? TargetOpcode::G_SEXT
3037 : TargetOpcode::G_ZEXT;
3038
3039 Observer.changingInstr(MI);
3040 widenScalarSrc(MI, WideTy, 1, ExtOpc);
3041 widenScalarSrc(MI, WideTy, 2, ExtOpc);
3042 widenScalarDst(MI, WideTy);
3043 Observer.changedInstr(MI);
3044 return Legalized;
3045 }
3046
3047 case TargetOpcode::G_SELECT:
3048 Observer.changingInstr(MI);
3049 if (TypeIdx == 0) {
3050 // Perform operation at larger width (any extension is fine here, high
3051 // bits don't affect the result) and then truncate the result back to the
3052 // original type.
3053 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3054 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
3055 widenScalarDst(MI, WideTy);
3056 } else {
3057 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
3058 // Explicit extension is required here since high bits affect the result.
3059 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
3060 }
3061 Observer.changedInstr(MI);
3062 return Legalized;
3063
3064 case TargetOpcode::G_FPEXT:
3065 if (TypeIdx != 1)
3066 return UnableToLegalize;
3067
3068 Observer.changingInstr(MI);
3069 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3070 Observer.changedInstr(MI);
3071 return Legalized;
3072 case TargetOpcode::G_FPTOSI:
3073 case TargetOpcode::G_FPTOUI:
3074 case TargetOpcode::G_INTRINSIC_LRINT:
3075 case TargetOpcode::G_INTRINSIC_LLRINT:
3076 case TargetOpcode::G_IS_FPCLASS:
3077 Observer.changingInstr(MI);
3078
3079 if (TypeIdx == 0)
3080 widenScalarDst(MI, WideTy);
3081 else
3082 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3083
3084 Observer.changedInstr(MI);
3085 return Legalized;
3086 case TargetOpcode::G_SITOFP:
3087 Observer.changingInstr(MI);
3088
3089 if (TypeIdx == 0)
3090 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3091 else
3092 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
3093
3094 Observer.changedInstr(MI);
3095 return Legalized;
3096 case TargetOpcode::G_UITOFP:
3097 Observer.changingInstr(MI);
3098
3099 if (TypeIdx == 0)
3100 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3101 else
3102 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3103
3104 Observer.changedInstr(MI);
3105 return Legalized;
3106 case TargetOpcode::G_FPTOSI_SAT:
3107 case TargetOpcode::G_FPTOUI_SAT:
3108 Observer.changingInstr(MI);
3109
3110 if (TypeIdx == 0) {
3111 Register OldDst = MI.getOperand(0).getReg();
3112 LLT Ty = MRI.getType(OldDst);
3113 Register ExtReg = MRI.createGenericVirtualRegister(WideTy);
3114 Register NewDst;
3115 MI.getOperand(0).setReg(ExtReg);
3116 uint64_t ShortBits = Ty.getScalarSizeInBits();
3117 uint64_t WideBits = WideTy.getScalarSizeInBits();
3118 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3119 if (Opcode == TargetOpcode::G_FPTOSI_SAT) {
3120 // z = i16 fptosi_sat(a)
3121 // ->
3122 // x = i32 fptosi_sat(a)
3123 // y = smin(x, 32767)
3124 // z = smax(y, -32768)
3125 auto MaxVal = MIRBuilder.buildConstant(
3126 WideTy, APInt::getSignedMaxValue(ShortBits).sext(WideBits));
3127 auto MinVal = MIRBuilder.buildConstant(
3128 WideTy, APInt::getSignedMinValue(ShortBits).sext(WideBits));
3129 Register MidReg =
3130 MIRBuilder.buildSMin(WideTy, ExtReg, MaxVal).getReg(0);
3131 NewDst = MIRBuilder.buildSMax(WideTy, MidReg, MinVal).getReg(0);
3132 } else {
3133 // z = i16 fptoui_sat(a)
3134 // ->
3135 // x = i32 fptoui_sat(a)
3136 // y = smin(x, 65535)
3137 auto MaxVal = MIRBuilder.buildConstant(
3138 WideTy, APInt::getAllOnes(ShortBits).zext(WideBits));
3139 NewDst = MIRBuilder.buildUMin(WideTy, ExtReg, MaxVal).getReg(0);
3140 }
3141 MIRBuilder.buildTrunc(OldDst, NewDst);
3142 } else
3143 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3144
3145 Observer.changedInstr(MI);
3146 return Legalized;
3147 case TargetOpcode::G_LOAD:
3148 case TargetOpcode::G_SEXTLOAD:
3149 case TargetOpcode::G_ZEXTLOAD:
3150 Observer.changingInstr(MI);
3151 widenScalarDst(MI, WideTy);
3152 Observer.changedInstr(MI);
3153 return Legalized;
3154
3155 case TargetOpcode::G_STORE: {
3156 if (TypeIdx != 0)
3157 return UnableToLegalize;
3158
3159 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3160 assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
3161 if (!Ty.isScalar()) {
3162 // We need to widen the vector element type.
3163 Observer.changingInstr(MI);
3164 widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
3165 // We also need to adjust the MMO to turn this into a truncating store.
3166 MachineMemOperand &MMO = **MI.memoperands_begin();
3167 MachineFunction &MF = MIRBuilder.getMF();
3168 auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
3169 MI.setMemRefs(MF, {NewMMO});
3170 Observer.changedInstr(MI);
3171 return Legalized;
3172 }
3173
3174 Observer.changingInstr(MI);
3175
3176 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
3177 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
3178 widenScalarSrc(MI, WideTy, 0, ExtType);
3179
3180 Observer.changedInstr(MI);
3181 return Legalized;
3182 }
3183 case TargetOpcode::G_CONSTANT: {
3184 MachineOperand &SrcMO = MI.getOperand(1);
3185 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3186 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
3187 MRI.getType(MI.getOperand(0).getReg()));
3188 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
3189 ExtOpc == TargetOpcode::G_ANYEXT) &&
3190 "Illegal Extend");
3191 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3192 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
3193 ? SrcVal.sext(WideTy.getSizeInBits())
3194 : SrcVal.zext(WideTy.getSizeInBits());
3195 Observer.changingInstr(MI);
3196 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3197
3198 widenScalarDst(MI, WideTy);
3199 Observer.changedInstr(MI);
3200 return Legalized;
3201 }
3202 case TargetOpcode::G_FCONSTANT: {
3203 // To avoid changing the bits of the constant due to extension to a larger
3204 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
3205 MachineOperand &SrcMO = MI.getOperand(1);
3206 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
3207 MIRBuilder.setInstrAndDebugLoc(MI);
3208 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
3209 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
3210 MI.eraseFromParent();
3211 return Legalized;
3212 }
3213 case TargetOpcode::G_IMPLICIT_DEF: {
3214 Observer.changingInstr(MI);
3215 widenScalarDst(MI, WideTy);
3216 Observer.changedInstr(MI);
3217 return Legalized;
3218 }
3219 case TargetOpcode::G_BRCOND:
3220 Observer.changingInstr(MI);
3221 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
3222 Observer.changedInstr(MI);
3223 return Legalized;
3224
3225 case TargetOpcode::G_FCMP:
3226 Observer.changingInstr(MI);
3227 if (TypeIdx == 0)
3228 widenScalarDst(MI, WideTy);
3229 else {
3230 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3231 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
3232 }
3233 Observer.changedInstr(MI);
3234 return Legalized;
3235
3236 case TargetOpcode::G_ICMP:
3237 Observer.changingInstr(MI);
3238 if (TypeIdx == 0)
3239 widenScalarDst(MI, WideTy);
3240 else {
3241 LLT SrcTy = MRI.getType(MI.getOperand(2).getReg());
3242 CmpInst::Predicate Pred =
3243 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3244
3245 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
3246 unsigned ExtOpcode =
3247 (CmpInst::isSigned(Pred) ||
3248 TLI.isSExtCheaperThanZExt(getApproximateEVTForLLT(SrcTy, Ctx),
3249 getApproximateEVTForLLT(WideTy, Ctx)))
3250 ? TargetOpcode::G_SEXT
3251 : TargetOpcode::G_ZEXT;
3252 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
3253 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
3254 }
3255 Observer.changedInstr(MI);
3256 return Legalized;
3257
3258 case TargetOpcode::G_PTR_ADD:
3259 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
3260 Observer.changingInstr(MI);
3261 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3262 Observer.changedInstr(MI);
3263 return Legalized;
3264
3265 case TargetOpcode::G_PHI: {
3266 assert(TypeIdx == 0 && "Expecting only Idx 0");
3267
3268 Observer.changingInstr(MI);
3269 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
3270 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3271 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
3272 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
3273 }
3274
3275 MachineBasicBlock &MBB = *MI.getParent();
3276 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
3277 widenScalarDst(MI, WideTy);
3278 Observer.changedInstr(MI);
3279 return Legalized;
3280 }
3281 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
3282 if (TypeIdx == 0) {
3283 Register VecReg = MI.getOperand(1).getReg();
3284 LLT VecTy = MRI.getType(VecReg);
3285 Observer.changingInstr(MI);
3286
3288 MI,
3290 TargetOpcode::G_ANYEXT);
3291
3292 widenScalarDst(MI, WideTy, 0);
3293 Observer.changedInstr(MI);
3294 return Legalized;
3295 }
3296
3297 if (TypeIdx != 2)
3298 return UnableToLegalize;
3299 Observer.changingInstr(MI);
3300 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3301 Observer.changedInstr(MI);
3302 return Legalized;
3303 }
3304 case TargetOpcode::G_INSERT_VECTOR_ELT: {
3305 if (TypeIdx == 0) {
3306 Observer.changingInstr(MI);
3307 const LLT WideEltTy = WideTy.getElementType();
3308
3309 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3310 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
3311 widenScalarDst(MI, WideTy, 0);
3312 Observer.changedInstr(MI);
3313 return Legalized;
3314 }
3315
3316 if (TypeIdx == 1) {
3317 Observer.changingInstr(MI);
3318
3319 Register VecReg = MI.getOperand(1).getReg();
3320 LLT VecTy = MRI.getType(VecReg);
3321 LLT WideVecTy = VecTy.changeVectorElementType(WideTy);
3322
3323 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
3324 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
3325 widenScalarDst(MI, WideVecTy, 0);
3326 Observer.changedInstr(MI);
3327 return Legalized;
3328 }
3329
3330 if (TypeIdx == 2) {
3331 Observer.changingInstr(MI);
3332 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
3333 Observer.changedInstr(MI);
3334 return Legalized;
3335 }
3336
3337 return UnableToLegalize;
3338 }
3339 case TargetOpcode::G_FADD:
3340 case TargetOpcode::G_FMUL:
3341 case TargetOpcode::G_FSUB:
3342 case TargetOpcode::G_FMA:
3343 case TargetOpcode::G_FMAD:
3344 case TargetOpcode::G_FNEG:
3345 case TargetOpcode::G_FABS:
3346 case TargetOpcode::G_FCANONICALIZE:
3347 case TargetOpcode::G_FMINNUM:
3348 case TargetOpcode::G_FMAXNUM:
3349 case TargetOpcode::G_FMINNUM_IEEE:
3350 case TargetOpcode::G_FMAXNUM_IEEE:
3351 case TargetOpcode::G_FMINIMUM:
3352 case TargetOpcode::G_FMAXIMUM:
3353 case TargetOpcode::G_FMINIMUMNUM:
3354 case TargetOpcode::G_FMAXIMUMNUM:
3355 case TargetOpcode::G_FDIV:
3356 case TargetOpcode::G_FREM:
3357 case TargetOpcode::G_FCEIL:
3358 case TargetOpcode::G_FFLOOR:
3359 case TargetOpcode::G_FCOS:
3360 case TargetOpcode::G_FSIN:
3361 case TargetOpcode::G_FTAN:
3362 case TargetOpcode::G_FACOS:
3363 case TargetOpcode::G_FASIN:
3364 case TargetOpcode::G_FATAN:
3365 case TargetOpcode::G_FATAN2:
3366 case TargetOpcode::G_FCOSH:
3367 case TargetOpcode::G_FSINH:
3368 case TargetOpcode::G_FTANH:
3369 case TargetOpcode::G_FLOG10:
3370 case TargetOpcode::G_FLOG:
3371 case TargetOpcode::G_FLOG2:
3372 case TargetOpcode::G_FRINT:
3373 case TargetOpcode::G_FNEARBYINT:
3374 case TargetOpcode::G_FSQRT:
3375 case TargetOpcode::G_FEXP:
3376 case TargetOpcode::G_FEXP2:
3377 case TargetOpcode::G_FEXP10:
3378 case TargetOpcode::G_FPOW:
3379 case TargetOpcode::G_INTRINSIC_TRUNC:
3380 case TargetOpcode::G_INTRINSIC_ROUND:
3381 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
3382 assert(TypeIdx == 0);
3383 Observer.changingInstr(MI);
3384
3385 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3386 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
3387
3388 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3389 Observer.changedInstr(MI);
3390 return Legalized;
3391 case TargetOpcode::G_FMODF: {
3392 Observer.changingInstr(MI);
3393 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3394
3395 widenScalarDst(MI, WideTy, 1, TargetOpcode::G_FPTRUNC);
3396 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), --MIRBuilder.getInsertPt());
3397 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3398 Observer.changedInstr(MI);
3399 return Legalized;
3400 }
3401 case TargetOpcode::G_FPOWI:
3402 case TargetOpcode::G_FLDEXP:
3403 case TargetOpcode::G_STRICT_FLDEXP: {
3404 if (TypeIdx == 0) {
3405 if (Opcode == TargetOpcode::G_STRICT_FLDEXP)
3406 return UnableToLegalize;
3407
3408 Observer.changingInstr(MI);
3409 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3410 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3411 Observer.changedInstr(MI);
3412 return Legalized;
3413 }
3414
3415 if (TypeIdx == 1) {
3416 // For some reason SelectionDAG tries to promote to a libcall without
3417 // actually changing the integer type for promotion.
3418 Observer.changingInstr(MI);
3419 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
3420 Observer.changedInstr(MI);
3421 return Legalized;
3422 }
3423
3424 return UnableToLegalize;
3425 }
3426 case TargetOpcode::G_FFREXP: {
3427 Observer.changingInstr(MI);
3428
3429 if (TypeIdx == 0) {
3430 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
3431 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3432 } else {
3433 widenScalarDst(MI, WideTy, 1);
3434 }
3435
3436 Observer.changedInstr(MI);
3437 return Legalized;
3438 }
3439 case TargetOpcode::G_LROUND:
3440 case TargetOpcode::G_LLROUND:
3441 Observer.changingInstr(MI);
3442
3443 if (TypeIdx == 0)
3444 widenScalarDst(MI, WideTy);
3445 else
3446 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
3447
3448 Observer.changedInstr(MI);
3449 return Legalized;
3450
3451 case TargetOpcode::G_INTTOPTR:
3452 if (TypeIdx != 1)
3453 return UnableToLegalize;
3454
3455 Observer.changingInstr(MI);
3456 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
3457 Observer.changedInstr(MI);
3458 return Legalized;
3459 case TargetOpcode::G_PTRTOINT:
3460 if (TypeIdx != 0)
3461 return UnableToLegalize;
3462
3463 Observer.changingInstr(MI);
3464 widenScalarDst(MI, WideTy, 0);
3465 Observer.changedInstr(MI);
3466 return Legalized;
3467 case TargetOpcode::G_BUILD_VECTOR: {
3468 Observer.changingInstr(MI);
3469
3470 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
3471 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
3472 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
3473
3474 // Avoid changing the result vector type if the source element type was
3475 // requested.
3476 if (TypeIdx == 1) {
3477 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
3478 } else {
3479 widenScalarDst(MI, WideTy, 0);
3480 }
3481
3482 Observer.changedInstr(MI);
3483 return Legalized;
3484 }
3485 case TargetOpcode::G_SEXT_INREG:
3486 if (TypeIdx != 0)
3487 return UnableToLegalize;
3488
3489 Observer.changingInstr(MI);
3490 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3491 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
3492 Observer.changedInstr(MI);
3493 return Legalized;
3494 case TargetOpcode::G_PTRMASK: {
3495 if (TypeIdx != 1)
3496 return UnableToLegalize;
3497 Observer.changingInstr(MI);
3498 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
3499 Observer.changedInstr(MI);
3500 return Legalized;
3501 }
3502 case TargetOpcode::G_VECREDUCE_ADD: {
3503 if (TypeIdx != 1)
3504 return UnableToLegalize;
3505 Observer.changingInstr(MI);
3506 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3507 widenScalarDst(MI, WideTy.getScalarType(), 0, TargetOpcode::G_TRUNC);
3508 Observer.changedInstr(MI);
3509 return Legalized;
3510 }
3511 case TargetOpcode::G_VECREDUCE_FADD:
3512 case TargetOpcode::G_VECREDUCE_FMUL:
3513 case TargetOpcode::G_VECREDUCE_FMIN:
3514 case TargetOpcode::G_VECREDUCE_FMAX:
3515 case TargetOpcode::G_VECREDUCE_FMINIMUM:
3516 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
3517 if (TypeIdx != 0)
3518 return UnableToLegalize;
3519 Observer.changingInstr(MI);
3520 Register VecReg = MI.getOperand(1).getReg();
3521 LLT VecTy = MRI.getType(VecReg);
3522 LLT WideVecTy = VecTy.changeElementType(WideTy);
3523 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
3524 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
3525 Observer.changedInstr(MI);
3526 return Legalized;
3527 }
3528 case TargetOpcode::G_VSCALE: {
3529 MachineOperand &SrcMO = MI.getOperand(1);
3530 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
3531 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3532 // The CImm is always a signed value
3533 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3534 Observer.changingInstr(MI);
3535 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3536 widenScalarDst(MI, WideTy);
3537 Observer.changedInstr(MI);
3538 return Legalized;
3539 }
3540 case TargetOpcode::G_SPLAT_VECTOR: {
3541 if (TypeIdx != 1)
3542 return UnableToLegalize;
3543
3544 Observer.changingInstr(MI);
3545 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
3546 Observer.changedInstr(MI);
3547 return Legalized;
3548 }
3549 case TargetOpcode::G_INSERT_SUBVECTOR: {
3550 if (TypeIdx != 0)
3551 return UnableToLegalize;
3552
3554 Register BigVec = IS.getBigVec();
3555 Register SubVec = IS.getSubVec();
3556
3557 LLT SubVecTy = MRI.getType(SubVec);
3558 LLT SubVecWideTy = SubVecTy.changeElementType(WideTy.getElementType());
3559
3560 // Widen the G_INSERT_SUBVECTOR
3561 auto BigZExt = MIRBuilder.buildZExt(WideTy, BigVec);
3562 auto SubZExt = MIRBuilder.buildZExt(SubVecWideTy, SubVec);
3563 auto WideInsert = MIRBuilder.buildInsertSubvector(WideTy, BigZExt, SubZExt,
3564 IS.getIndexImm());
3565
3566 // Truncate back down
3567 auto SplatZero = MIRBuilder.buildSplatVector(
3568 WideTy, MIRBuilder.buildConstant(WideTy.getElementType(), 0));
3569 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, IS.getReg(0), WideInsert,
3570 SplatZero);
3571
3572 MI.eraseFromParent();
3573
3574 return Legalized;
3575 }
3576 }
3577}
3578
3580 MachineIRBuilder &B, Register Src, LLT Ty) {
3581 auto Unmerge = B.buildUnmerge(Ty, Src);
3582 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3583 Pieces.push_back(Unmerge.getReg(I));
3584}
3585
3586static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3587 MachineIRBuilder &MIRBuilder) {
3588 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3589 MachineFunction &MF = MIRBuilder.getMF();
3590 const DataLayout &DL = MIRBuilder.getDataLayout();
3591 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3592 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3593 LLT DstLLT = MRI.getType(DstReg);
3594
3595 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3596
3597 auto Addr = MIRBuilder.buildConstantPool(
3598 AddrPtrTy,
3599 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3600
3601 MachineMemOperand *MMO =
3603 MachineMemOperand::MOLoad, DstLLT, Alignment);
3604
3605 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3606}
3607
3610 const MachineOperand &ConstOperand = MI.getOperand(1);
3611 const Constant *ConstantVal = ConstOperand.getCImm();
3612
3613 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3614 MI.eraseFromParent();
3615
3616 return Legalized;
3617}
3618
3621 const MachineOperand &ConstOperand = MI.getOperand(1);
3622 const Constant *ConstantVal = ConstOperand.getFPImm();
3623
3624 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3625 MI.eraseFromParent();
3626
3627 return Legalized;
3628}
3629
3632 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3633 if (SrcTy.isVector()) {
3634 LLT SrcEltTy = SrcTy.getElementType();
3636
3637 if (DstTy.isVector()) {
3638 int NumDstElt = DstTy.getNumElements();
3639 int NumSrcElt = SrcTy.getNumElements();
3640
3641 LLT DstEltTy = DstTy.getElementType();
3642 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3643 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3644
3645 // If there's an element size mismatch, insert intermediate casts to match
3646 // the result element type.
3647 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3648 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3649 //
3650 // =>
3651 //
3652 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3653 // %3:_(<2 x s8>) = G_BITCAST %2
3654 // %4:_(<2 x s8>) = G_BITCAST %3
3655 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3656 DstCastTy = DstTy.changeVectorElementCount(
3657 ElementCount::getFixed(NumDstElt / NumSrcElt));
3658 SrcPartTy = SrcEltTy;
3659 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3660 //
3661 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3662 //
3663 // =>
3664 //
3665 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3666 // %3:_(s16) = G_BITCAST %2
3667 // %4:_(s16) = G_BITCAST %3
3668 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3669 SrcPartTy = SrcTy.changeVectorElementCount(
3670 ElementCount::getFixed(NumSrcElt / NumDstElt));
3671 DstCastTy = DstEltTy;
3672 }
3673
3674 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3675 for (Register &SrcReg : SrcRegs)
3676 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3677 } else
3678 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3679
3680 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3681 MI.eraseFromParent();
3682 return Legalized;
3683 }
3684
3685 if (DstTy.isVector()) {
3687 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3688 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3689 MI.eraseFromParent();
3690 return Legalized;
3691 }
3692
3693 return UnableToLegalize;
3694}
3695
3696/// Figure out the bit offset into a register when coercing a vector index for
3697/// the wide element type. This is only for the case when promoting vector to
3698/// one with larger elements.
3699//
3700///
3701/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3702/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3704 Register Idx,
3705 unsigned NewEltSize,
3706 unsigned OldEltSize) {
3707 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3708 LLT IdxTy = B.getMRI()->getType(Idx);
3709
3710 // Now figure out the amount we need to shift to get the target bits.
3711 auto OffsetMask = B.buildConstant(
3712 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3713 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3714 return B.buildShl(IdxTy, OffsetIdx,
3715 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3716}
3717
3718/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3719/// is casting to a vector with a smaller element size, perform multiple element
3720/// extracts and merge the results. If this is coercing to a vector with larger
3721/// elements, index the bitcasted vector and extract the target element with bit
3722/// operations. This is intended to force the indexing in the native register
3723/// size for architectures that can dynamically index the register file.
3726 LLT CastTy) {
3727 if (TypeIdx != 1)
3728 return UnableToLegalize;
3729
3730 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3731
3732 LLT SrcEltTy = SrcVecTy.getElementType();
3733 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3734 unsigned OldNumElts = SrcVecTy.getNumElements();
3735
3736 LLT NewEltTy = CastTy.getScalarType();
3737 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3738
3739 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3740 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3741 if (NewNumElts > OldNumElts) {
3742 // Decreasing the vector element size
3743 //
3744 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3745 // =>
3746 // v4i32:castx = bitcast x:v2i64
3747 //
3748 // i64 = bitcast
3749 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3750 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3751 //
3752 if (NewNumElts % OldNumElts != 0)
3753 return UnableToLegalize;
3754
3755 // Type of the intermediate result vector.
3756 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3757 LLT MidTy =
3758 CastTy.changeElementCount(ElementCount::getFixed(NewEltsPerOldElt));
3759
3760 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3761
3762 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3763 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3764
3765 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3766 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3767 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3768 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3769 NewOps[I] = Elt.getReg(0);
3770 }
3771
3772 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3773 MIRBuilder.buildBitcast(Dst, NewVec);
3774 MI.eraseFromParent();
3775 return Legalized;
3776 }
3777
3778 if (NewNumElts < OldNumElts) {
3779 if (NewEltSize % OldEltSize != 0)
3780 return UnableToLegalize;
3781
3782 // This only depends on powers of 2 because we use bit tricks to figure out
3783 // the bit offset we need to shift to get the target element. A general
3784 // expansion could emit division/multiply.
3785 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3786 return UnableToLegalize;
3787
3788 // Increasing the vector element size.
3789 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3790 //
3791 // =>
3792 //
3793 // %cast = G_BITCAST %vec
3794 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3795 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3796 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3797 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3798 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3799 // %elt = G_TRUNC %elt_bits
3800
3801 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3802 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3803
3804 // Divide to get the index in the wider element type.
3805 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3806
3807 Register WideElt = CastVec;
3808 if (CastTy.isVector()) {
3809 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3810 ScaledIdx).getReg(0);
3811 }
3812
3813 // Compute the bit offset into the register of the target element.
3815 MIRBuilder, Idx, NewEltSize, OldEltSize);
3816
3817 // Shift the wide element to get the target element.
3818 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3819 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3820 MI.eraseFromParent();
3821 return Legalized;
3822 }
3823
3824 return UnableToLegalize;
3825}
3826
3827/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3828/// TargetReg, while preserving other bits in \p TargetReg.
3829///
3830/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3832 Register TargetReg, Register InsertReg,
3833 Register OffsetBits) {
3834 LLT TargetTy = B.getMRI()->getType(TargetReg);
3835 LLT InsertTy = B.getMRI()->getType(InsertReg);
3836 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3837 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3838
3839 // Produce a bitmask of the value to insert
3840 auto EltMask = B.buildConstant(
3841 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3842 InsertTy.getSizeInBits()));
3843 // Shift it into position
3844 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3845 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3846
3847 // Clear out the bits in the wide element
3848 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3849
3850 // The value to insert has all zeros already, so stick it into the masked
3851 // wide element.
3852 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3853}
3854
3855/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3856/// is increasing the element size, perform the indexing in the target element
3857/// type, and use bit operations to insert at the element position. This is
3858/// intended for architectures that can dynamically index the register file and
3859/// want to force indexing in the native register size.
3862 LLT CastTy) {
3863 if (TypeIdx != 0)
3864 return UnableToLegalize;
3865
3866 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3867 MI.getFirst4RegLLTs();
3868 LLT VecTy = DstTy;
3869
3870 LLT VecEltTy = VecTy.getElementType();
3871 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3872 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3873 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3874
3875 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3876 unsigned OldNumElts = VecTy.getNumElements();
3877
3878 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3879 if (NewNumElts < OldNumElts) {
3880 if (NewEltSize % OldEltSize != 0)
3881 return UnableToLegalize;
3882
3883 // This only depends on powers of 2 because we use bit tricks to figure out
3884 // the bit offset we need to shift to get the target element. A general
3885 // expansion could emit division/multiply.
3886 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3887 return UnableToLegalize;
3888
3889 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3890 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3891
3892 // Divide to get the index in the wider element type.
3893 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3894
3895 Register ExtractedElt = CastVec;
3896 if (CastTy.isVector()) {
3897 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3898 ScaledIdx).getReg(0);
3899 }
3900
3901 // Compute the bit offset into the register of the target element.
3903 MIRBuilder, Idx, NewEltSize, OldEltSize);
3904
3905 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3906 Val, OffsetBits);
3907 if (CastTy.isVector()) {
3908 InsertedElt = MIRBuilder.buildInsertVectorElement(
3909 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3910 }
3911
3912 MIRBuilder.buildBitcast(Dst, InsertedElt);
3913 MI.eraseFromParent();
3914 return Legalized;
3915 }
3916
3917 return UnableToLegalize;
3918}
3919
3920// This attempts to handle G_CONCAT_VECTORS with illegal operands, particularly
3921// those that have smaller than legal operands.
3922//
3923// <16 x s8> = G_CONCAT_VECTORS <4 x s8>, <4 x s8>, <4 x s8>, <4 x s8>
3924//
3925// ===>
3926//
3927// s32 = G_BITCAST <4 x s8>
3928// s32 = G_BITCAST <4 x s8>
3929// s32 = G_BITCAST <4 x s8>
3930// s32 = G_BITCAST <4 x s8>
3931// <4 x s32> = G_BUILD_VECTOR s32, s32, s32, s32
3932// <16 x s8> = G_BITCAST <4 x s32>
3935 LLT CastTy) {
3936 // Convert it to CONCAT instruction
3937 auto ConcatMI = dyn_cast<GConcatVectors>(&MI);
3938 if (!ConcatMI) {
3939 return UnableToLegalize;
3940 }
3941
3942 // Check if bitcast is Legal
3943 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
3944 LLT SrcScalTy = LLT::scalar(SrcTy.getSizeInBits());
3945
3946 // Check if the build vector is Legal
3947 if (!LI.isLegal({TargetOpcode::G_BUILD_VECTOR, {CastTy, SrcScalTy}})) {
3948 return UnableToLegalize;
3949 }
3950
3951 // Bitcast the sources
3952 SmallVector<Register> BitcastRegs;
3953 for (unsigned i = 0; i < ConcatMI->getNumSources(); i++) {
3954 BitcastRegs.push_back(
3955 MIRBuilder.buildBitcast(SrcScalTy, ConcatMI->getSourceReg(i))
3956 .getReg(0));
3957 }
3958
3959 // Build the scalar values into a vector
3960 Register BuildReg =
3961 MIRBuilder.buildBuildVector(CastTy, BitcastRegs).getReg(0);
3962 MIRBuilder.buildBitcast(DstReg, BuildReg);
3963
3964 MI.eraseFromParent();
3965 return Legalized;
3966}
3967
3968// This bitcasts a shuffle vector to a different type currently of the same
3969// element size. Mostly used to legalize ptr vectors, where ptrtoint/inttoptr
3970// will be used instead.
3971//
3972// <16 x p0> = G_CONCAT_VECTORS <4 x p0>, <4 x p0>, mask
3973// ===>
3974// <4 x s64> = G_PTRTOINT <4 x p0>
3975// <4 x s64> = G_PTRTOINT <4 x p0>
3976// <16 x s64> = G_CONCAT_VECTORS <4 x s64>, <4 x s64>, mask
3977// <16 x p0> = G_INTTOPTR <16 x s64>
3980 LLT CastTy) {
3981 auto ShuffleMI = cast<GShuffleVector>(&MI);
3982 LLT DstTy = MRI.getType(ShuffleMI->getReg(0));
3983 LLT SrcTy = MRI.getType(ShuffleMI->getReg(1));
3984
3985 // We currently only handle vectors of the same size.
3986 if (TypeIdx != 0 ||
3987 CastTy.getScalarSizeInBits() != DstTy.getScalarSizeInBits() ||
3988 CastTy.getElementCount() != DstTy.getElementCount())
3989 return UnableToLegalize;
3990
3991 LLT NewSrcTy = SrcTy.changeElementType(CastTy.getScalarType());
3992
3993 auto Inp1 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(1));
3994 auto Inp2 = MIRBuilder.buildCast(NewSrcTy, ShuffleMI->getReg(2));
3995 auto Shuf =
3996 MIRBuilder.buildShuffleVector(CastTy, Inp1, Inp2, ShuffleMI->getMask());
3997 MIRBuilder.buildCast(ShuffleMI->getReg(0), Shuf);
3998
3999 MI.eraseFromParent();
4000 return Legalized;
4001}
4002
4003/// This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
4004///
4005/// <vscale x 8 x i1> = G_EXTRACT_SUBVECTOR <vscale x 16 x i1>, N
4006///
4007/// ===>
4008///
4009/// <vscale x 2 x i1> = G_BITCAST <vscale x 16 x i1>
4010/// <vscale x 1 x i8> = G_EXTRACT_SUBVECTOR <vscale x 2 x i1>, N / 8
4011/// <vscale x 8 x i1> = G_BITCAST <vscale x 1 x i8>
4014 LLT CastTy) {
4015 auto ES = cast<GExtractSubvector>(&MI);
4016
4017 if (!CastTy.isVector())
4018 return UnableToLegalize;
4019
4020 if (TypeIdx != 0)
4021 return UnableToLegalize;
4022
4023 Register Dst = ES->getReg(0);
4024 Register Src = ES->getSrcVec();
4025 uint64_t Idx = ES->getIndexImm();
4026
4027 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4028
4029 LLT DstTy = MRI.getType(Dst);
4030 LLT SrcTy = MRI.getType(Src);
4031 ElementCount DstTyEC = DstTy.getElementCount();
4032 ElementCount SrcTyEC = SrcTy.getElementCount();
4033 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4034 auto SrcTyMinElts = SrcTyEC.getKnownMinValue();
4035
4036 if (DstTy == CastTy)
4037 return Legalized;
4038
4039 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4040 return UnableToLegalize;
4041
4042 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4043 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4044 if (CastEltSize < DstEltSize)
4045 return UnableToLegalize;
4046
4047 auto AdjustAmt = CastEltSize / DstEltSize;
4048 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4049 SrcTyMinElts % AdjustAmt != 0)
4050 return UnableToLegalize;
4051
4052 Idx /= AdjustAmt;
4053 SrcTy = LLT::vector(SrcTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4054 auto CastVec = MIRBuilder.buildBitcast(SrcTy, Src);
4055 auto PromotedES = MIRBuilder.buildExtractSubvector(CastTy, CastVec, Idx);
4056 MIRBuilder.buildBitcast(Dst, PromotedES);
4057
4058 ES->eraseFromParent();
4059 return Legalized;
4060}
4061
4062/// This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
4063///
4064/// <vscale x 16 x i1> = G_INSERT_SUBVECTOR <vscale x 16 x i1>,
4065/// <vscale x 8 x i1>,
4066/// N
4067///
4068/// ===>
4069///
4070/// <vscale x 2 x i8> = G_BITCAST <vscale x 16 x i1>
4071/// <vscale x 1 x i8> = G_BITCAST <vscale x 8 x i1>
4072/// <vscale x 2 x i8> = G_INSERT_SUBVECTOR <vscale x 2 x i8>,
4073/// <vscale x 1 x i8>, N / 8
4074/// <vscale x 16 x i1> = G_BITCAST <vscale x 2 x i8>
4077 LLT CastTy) {
4078 auto ES = cast<GInsertSubvector>(&MI);
4079
4080 if (!CastTy.isVector())
4081 return UnableToLegalize;
4082
4083 if (TypeIdx != 0)
4084 return UnableToLegalize;
4085
4086 Register Dst = ES->getReg(0);
4087 Register BigVec = ES->getBigVec();
4088 Register SubVec = ES->getSubVec();
4089 uint64_t Idx = ES->getIndexImm();
4090
4091 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
4092
4093 LLT DstTy = MRI.getType(Dst);
4094 LLT BigVecTy = MRI.getType(BigVec);
4095 LLT SubVecTy = MRI.getType(SubVec);
4096
4097 if (DstTy == CastTy)
4098 return Legalized;
4099
4100 if (DstTy.getSizeInBits() != CastTy.getSizeInBits())
4101 return UnableToLegalize;
4102
4103 ElementCount DstTyEC = DstTy.getElementCount();
4104 ElementCount BigVecTyEC = BigVecTy.getElementCount();
4105 ElementCount SubVecTyEC = SubVecTy.getElementCount();
4106 auto DstTyMinElts = DstTyEC.getKnownMinValue();
4107 auto BigVecTyMinElts = BigVecTyEC.getKnownMinValue();
4108 auto SubVecTyMinElts = SubVecTyEC.getKnownMinValue();
4109
4110 unsigned CastEltSize = CastTy.getElementType().getSizeInBits();
4111 unsigned DstEltSize = DstTy.getElementType().getSizeInBits();
4112 if (CastEltSize < DstEltSize)
4113 return UnableToLegalize;
4114
4115 auto AdjustAmt = CastEltSize / DstEltSize;
4116 if (Idx % AdjustAmt != 0 || DstTyMinElts % AdjustAmt != 0 ||
4117 BigVecTyMinElts % AdjustAmt != 0 || SubVecTyMinElts % AdjustAmt != 0)
4118 return UnableToLegalize;
4119
4120 Idx /= AdjustAmt;
4121 BigVecTy = LLT::vector(BigVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4122 SubVecTy = LLT::vector(SubVecTyEC.divideCoefficientBy(AdjustAmt), AdjustAmt);
4123 auto CastBigVec = MIRBuilder.buildBitcast(BigVecTy, BigVec);
4124 auto CastSubVec = MIRBuilder.buildBitcast(SubVecTy, SubVec);
4125 auto PromotedIS =
4126 MIRBuilder.buildInsertSubvector(CastTy, CastBigVec, CastSubVec, Idx);
4127 MIRBuilder.buildBitcast(Dst, PromotedIS);
4128
4129 ES->eraseFromParent();
4130 return Legalized;
4131}
4132
4134 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
4135 Register DstReg = LoadMI.getDstReg();
4136 Register PtrReg = LoadMI.getPointerReg();
4137 LLT DstTy = MRI.getType(DstReg);
4138 MachineMemOperand &MMO = LoadMI.getMMO();
4139 LLT MemTy = MMO.getMemoryType();
4140 MachineFunction &MF = MIRBuilder.getMF();
4141
4142 unsigned MemSizeInBits = MemTy.getSizeInBits();
4143 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
4144
4145 if (MemSizeInBits != MemStoreSizeInBits) {
4146 if (MemTy.isVector())
4147 return UnableToLegalize;
4148
4149 // Promote to a byte-sized load if not loading an integral number of
4150 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
4151 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
4152 MachineMemOperand *NewMMO =
4153 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
4154
4155 Register LoadReg = DstReg;
4156 LLT LoadTy = DstTy;
4157
4158 // If this wasn't already an extending load, we need to widen the result
4159 // register to avoid creating a load with a narrower result than the source.
4160 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
4161 LoadTy = WideMemTy;
4162 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
4163 }
4164
4165 if (isa<GSExtLoad>(LoadMI)) {
4166 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4167 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
4168 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
4169 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
4170 // The extra bits are guaranteed to be zero, since we stored them that
4171 // way. A zext load from Wide thus automatically gives zext from MemVT.
4172 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
4173 } else {
4174 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
4175 }
4176
4177 if (DstTy != LoadTy)
4178 MIRBuilder.buildTrunc(DstReg, LoadReg);
4179
4180 LoadMI.eraseFromParent();
4181 return Legalized;
4182 }
4183
4184 // Big endian lowering not implemented.
4185 if (MIRBuilder.getDataLayout().isBigEndian())
4186 return UnableToLegalize;
4187
4188 // This load needs splitting into power of 2 sized loads.
4189 //
4190 // Our strategy here is to generate anyextending loads for the smaller
4191 // types up to next power-2 result type, and then combine the two larger
4192 // result values together, before truncating back down to the non-pow-2
4193 // type.
4194 // E.g. v1 = i24 load =>
4195 // v2 = i32 zextload (2 byte)
4196 // v3 = i32 load (1 byte)
4197 // v4 = i32 shl v3, 16
4198 // v5 = i32 or v4, v2
4199 // v1 = i24 trunc v5
4200 // By doing this we generate the correct truncate which should get
4201 // combined away as an artifact with a matching extend.
4202
4203 uint64_t LargeSplitSize, SmallSplitSize;
4204
4205 if (!isPowerOf2_32(MemSizeInBits)) {
4206 // This load needs splitting into power of 2 sized loads.
4207 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
4208 SmallSplitSize = MemSizeInBits - LargeSplitSize;
4209 } else {
4210 // This is already a power of 2, but we still need to split this in half.
4211 //
4212 // Assume we're being asked to decompose an unaligned load.
4213 // TODO: If this requires multiple splits, handle them all at once.
4214 auto &Ctx = MF.getFunction().getContext();
4215 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4216 return UnableToLegalize;
4217
4218 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4219 }
4220
4221 if (MemTy.isVector()) {
4222 // TODO: Handle vector extloads
4223 if (MemTy != DstTy)
4224 return UnableToLegalize;
4225
4226 Align Alignment = LoadMI.getAlign();
4227 // Given an alignment larger than the size of the memory, we can increase
4228 // the size of the load without needing to scalarize it.
4229 if (Alignment.value() * 8 > MemSizeInBits &&
4231 LLT MoreTy = DstTy.changeVectorElementCount(
4233 MachineMemOperand *NewMMO = MF.getMachineMemOperand(&MMO, 0, MoreTy);
4234 auto NewLoad = MIRBuilder.buildLoad(MoreTy, PtrReg, *NewMMO);
4235 MIRBuilder.buildDeleteTrailingVectorElements(LoadMI.getReg(0),
4236 NewLoad.getReg(0));
4237 LoadMI.eraseFromParent();
4238 return Legalized;
4239 }
4240
4241 // TODO: We can do better than scalarizing the vector and at least split it
4242 // in half.
4243 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
4244 }
4245
4246 MachineMemOperand *LargeMMO =
4247 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4248 MachineMemOperand *SmallMMO =
4249 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4250
4251 LLT PtrTy = MRI.getType(PtrReg);
4252 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
4253 LLT AnyExtTy = LLT::scalar(AnyExtSize);
4254 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
4255 PtrReg, *LargeMMO);
4256
4257 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
4258 LargeSplitSize / 8);
4259 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
4260 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrAddReg, PtrReg, OffsetCst);
4261 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
4262 SmallPtr, *SmallMMO);
4263
4264 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
4265 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
4266
4267 if (AnyExtTy == DstTy)
4268 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
4269 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
4270 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4271 MIRBuilder.buildTrunc(DstReg, {Or});
4272 } else {
4273 assert(DstTy.isPointer() && "expected pointer");
4274 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
4275
4276 // FIXME: We currently consider this to be illegal for non-integral address
4277 // spaces, but we need still need a way to reinterpret the bits.
4278 MIRBuilder.buildIntToPtr(DstReg, Or);
4279 }
4280
4281 LoadMI.eraseFromParent();
4282 return Legalized;
4283}
4284
4286 // Lower a non-power of 2 store into multiple pow-2 stores.
4287 // E.g. split an i24 store into an i16 store + i8 store.
4288 // We do this by first extending the stored value to the next largest power
4289 // of 2 type, and then using truncating stores to store the components.
4290 // By doing this, likewise with G_LOAD, generate an extend that can be
4291 // artifact-combined away instead of leaving behind extracts.
4292 Register SrcReg = StoreMI.getValueReg();
4293 Register PtrReg = StoreMI.getPointerReg();
4294 LLT SrcTy = MRI.getType(SrcReg);
4295 MachineFunction &MF = MIRBuilder.getMF();
4296 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4297 LLT MemTy = MMO.getMemoryType();
4298
4299 unsigned StoreWidth = MemTy.getSizeInBits();
4300 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
4301
4302 if (StoreWidth != StoreSizeInBits && !SrcTy.isVector()) {
4303 // Promote to a byte-sized store with upper bits zero if not
4304 // storing an integral number of bytes. For example, promote
4305 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
4306 LLT WideTy = LLT::scalar(StoreSizeInBits);
4307
4308 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
4309 // Avoid creating a store with a narrower source than result.
4310 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
4311 SrcTy = WideTy;
4312 }
4313
4314 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
4315
4316 MachineMemOperand *NewMMO =
4317 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
4318 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
4319 StoreMI.eraseFromParent();
4320 return Legalized;
4321 }
4322
4323 if (MemTy.isVector()) {
4324 if (MemTy != SrcTy)
4325 return scalarizeVectorBooleanStore(StoreMI);
4326
4327 // TODO: We can do better than scalarizing the vector and at least split it
4328 // in half.
4329 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
4330 }
4331
4332 unsigned MemSizeInBits = MemTy.getSizeInBits();
4333 uint64_t LargeSplitSize, SmallSplitSize;
4334
4335 if (!isPowerOf2_32(MemSizeInBits)) {
4336 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
4337 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
4338 } else {
4339 auto &Ctx = MF.getFunction().getContext();
4340 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
4341 return UnableToLegalize; // Don't know what we're being asked to do.
4342
4343 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
4344 }
4345
4346 // Extend to the next pow-2. If this store was itself the result of lowering,
4347 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
4348 // that's wider than the stored size.
4349 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
4350 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
4351
4352 if (SrcTy.isPointer()) {
4353 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
4354 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
4355 }
4356
4357 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
4358
4359 // Obtain the smaller value by shifting away the larger value.
4360 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
4361 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
4362
4363 // Generate the PtrAdd and truncating stores.
4364 LLT PtrTy = MRI.getType(PtrReg);
4365 auto OffsetCst = MIRBuilder.buildConstant(
4366 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
4367 auto SmallPtr = MIRBuilder.buildObjectPtrOffset(PtrTy, PtrReg, OffsetCst);
4368
4369 MachineMemOperand *LargeMMO =
4370 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
4371 MachineMemOperand *SmallMMO =
4372 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
4373 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
4374 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
4375 StoreMI.eraseFromParent();
4376 return Legalized;
4377}
4378
4381 Register SrcReg = StoreMI.getValueReg();
4382 Register PtrReg = StoreMI.getPointerReg();
4383 LLT SrcTy = MRI.getType(SrcReg);
4384 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
4385 LLT MemTy = MMO.getMemoryType();
4386 LLT MemScalarTy = MemTy.getElementType();
4387 MachineFunction &MF = MIRBuilder.getMF();
4388
4389 assert(SrcTy.isVector() && "Expect a vector store type");
4390
4391 if (!MemScalarTy.isByteSized()) {
4392 // We need to build an integer scalar of the vector bit pattern.
4393 // It's not legal for us to add padding when storing a vector.
4394 unsigned NumBits = MemTy.getSizeInBits();
4395 LLT IntTy = LLT::scalar(NumBits);
4396 auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
4397 LLT IdxTy = TLI.getVectorIdxLLT(MF.getDataLayout());
4398
4399 for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
4400 auto Elt = MIRBuilder.buildExtractVectorElement(
4401 SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
4402 auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
4403 auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
4404 unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
4405 ? (MemTy.getNumElements() - 1) - I
4406 : I;
4407 auto ShiftAmt = MIRBuilder.buildConstant(
4408 IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
4409 auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
4410 CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
4411 }
4412 auto PtrInfo = MMO.getPointerInfo();
4413 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
4414 MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
4415 StoreMI.eraseFromParent();
4416 return Legalized;
4417 }
4418
4419 // TODO: implement simple scalarization.
4420 return UnableToLegalize;
4421}
4422
4424LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
4425 switch (MI.getOpcode()) {
4426 case TargetOpcode::G_LOAD: {
4427 if (TypeIdx != 0)
4428 return UnableToLegalize;
4429 MachineMemOperand &MMO = **MI.memoperands_begin();
4430
4431 // Not sure how to interpret a bitcast of an extending load.
4432 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4433 return UnableToLegalize;
4434
4435 Observer.changingInstr(MI);
4436 bitcastDst(MI, CastTy, 0);
4437 MMO.setType(CastTy);
4438 // The range metadata is no longer valid when reinterpreted as a different
4439 // type.
4440 MMO.clearRanges();
4441 Observer.changedInstr(MI);
4442 return Legalized;
4443 }
4444 case TargetOpcode::G_STORE: {
4445 if (TypeIdx != 0)
4446 return UnableToLegalize;
4447
4448 MachineMemOperand &MMO = **MI.memoperands_begin();
4449
4450 // Not sure how to interpret a bitcast of a truncating store.
4451 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
4452 return UnableToLegalize;
4453
4454 Observer.changingInstr(MI);
4455 bitcastSrc(MI, CastTy, 0);
4456 MMO.setType(CastTy);
4457 Observer.changedInstr(MI);
4458 return Legalized;
4459 }
4460 case TargetOpcode::G_SELECT: {
4461 if (TypeIdx != 0)
4462 return UnableToLegalize;
4463
4464 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
4465 LLVM_DEBUG(
4466 dbgs() << "bitcast action not implemented for vector select\n");
4467 return UnableToLegalize;
4468 }
4469
4470 Observer.changingInstr(MI);
4471 bitcastSrc(MI, CastTy, 2);
4472 bitcastSrc(MI, CastTy, 3);
4473 bitcastDst(MI, CastTy, 0);
4474 Observer.changedInstr(MI);
4475 return Legalized;
4476 }
4477 case TargetOpcode::G_AND:
4478 case TargetOpcode::G_OR:
4479 case TargetOpcode::G_XOR: {
4480 Observer.changingInstr(MI);
4481 bitcastSrc(MI, CastTy, 1);
4482 bitcastSrc(MI, CastTy, 2);
4483 bitcastDst(MI, CastTy, 0);
4484 Observer.changedInstr(MI);
4485 return Legalized;
4486 }
4487 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
4488 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
4489 case TargetOpcode::G_INSERT_VECTOR_ELT:
4490 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
4491 case TargetOpcode::G_CONCAT_VECTORS:
4492 return bitcastConcatVector(MI, TypeIdx, CastTy);
4493 case TargetOpcode::G_SHUFFLE_VECTOR:
4494 return bitcastShuffleVector(MI, TypeIdx, CastTy);
4495 case TargetOpcode::G_EXTRACT_SUBVECTOR:
4496 return bitcastExtractSubvector(MI, TypeIdx, CastTy);
4497 case TargetOpcode::G_INSERT_SUBVECTOR:
4498 return bitcastInsertSubvector(MI, TypeIdx, CastTy);
4499 default:
4500 return UnableToLegalize;
4501 }
4502}
4503
4504// Legalize an instruction by changing the opcode in place.
4505void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
4507 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
4509}
4510
4512LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
4513 using namespace TargetOpcode;
4514
4515 switch(MI.getOpcode()) {
4516 default:
4517 return UnableToLegalize;
4518 case TargetOpcode::G_FCONSTANT:
4519 return lowerFConstant(MI);
4520 case TargetOpcode::G_BITCAST:
4521 return lowerBitcast(MI);
4522 case TargetOpcode::G_SREM:
4523 case TargetOpcode::G_UREM: {
4524 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4525 auto Quot =
4526 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
4527 {MI.getOperand(1), MI.getOperand(2)});
4528
4529 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
4530 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
4531 MI.eraseFromParent();
4532 return Legalized;
4533 }
4534 case TargetOpcode::G_SADDO:
4535 case TargetOpcode::G_SSUBO:
4536 return lowerSADDO_SSUBO(MI);
4537 case TargetOpcode::G_SADDE:
4538 return lowerSADDE(MI);
4539 case TargetOpcode::G_SSUBE:
4540 return lowerSSUBE(MI);
4541 case TargetOpcode::G_UMULH:
4542 case TargetOpcode::G_SMULH:
4543 return lowerSMULH_UMULH(MI);
4544 case TargetOpcode::G_SMULO:
4545 case TargetOpcode::G_UMULO: {
4546 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
4547 // result.
4548 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
4549 LLT Ty = MRI.getType(Res);
4550
4551 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
4552 ? TargetOpcode::G_SMULH
4553 : TargetOpcode::G_UMULH;
4554
4555 Observer.changingInstr(MI);
4556 const auto &TII = MIRBuilder.getTII();
4557 MI.setDesc(TII.get(TargetOpcode::G_MUL));
4558 MI.removeOperand(1);
4559 Observer.changedInstr(MI);
4560
4561 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
4562 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4563
4564 // Move insert point forward so we can use the Res register if needed.
4565 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
4566
4567 // For *signed* multiply, overflow is detected by checking:
4568 // (hi != (lo >> bitwidth-1))
4569 if (Opcode == TargetOpcode::G_SMULH) {
4570 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
4571 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
4572 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
4573 } else {
4574 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
4575 }
4576 return Legalized;
4577 }
4578 case TargetOpcode::G_FNEG: {
4579 auto [Res, SubByReg] = MI.getFirst2Regs();
4580 LLT Ty = MRI.getType(Res);
4581
4582 auto SignMask = MIRBuilder.buildConstant(
4583 Ty, APInt::getSignMask(Ty.getScalarSizeInBits()));
4584 MIRBuilder.buildXor(Res, SubByReg, SignMask);
4585 MI.eraseFromParent();
4586 return Legalized;
4587 }
4588 case TargetOpcode::G_FSUB:
4589 case TargetOpcode::G_STRICT_FSUB: {
4590 auto [Res, LHS, RHS] = MI.getFirst3Regs();
4591 LLT Ty = MRI.getType(Res);
4592
4593 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
4594 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
4595
4596 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
4597 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
4598 else
4599 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
4600
4601 MI.eraseFromParent();
4602 return Legalized;
4603 }
4604 case TargetOpcode::G_FMAD:
4605 return lowerFMad(MI);
4606 case TargetOpcode::G_FFLOOR:
4607 return lowerFFloor(MI);
4608 case TargetOpcode::G_LROUND:
4609 case TargetOpcode::G_LLROUND: {
4610 Register DstReg = MI.getOperand(0).getReg();
4611 Register SrcReg = MI.getOperand(1).getReg();
4612 LLT SrcTy = MRI.getType(SrcReg);
4613 auto Round = MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND, {SrcTy},
4614 {SrcReg});
4615 MIRBuilder.buildFPTOSI(DstReg, Round);
4616 MI.eraseFromParent();
4617 return Legalized;
4618 }
4619 case TargetOpcode::G_INTRINSIC_ROUND:
4620 return lowerIntrinsicRound(MI);
4621 case TargetOpcode::G_FRINT: {
4622 // Since round even is the assumed rounding mode for unconstrained FP
4623 // operations, rint and roundeven are the same operation.
4624 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
4625 return Legalized;
4626 }
4627 case TargetOpcode::G_INTRINSIC_LRINT:
4628 case TargetOpcode::G_INTRINSIC_LLRINT: {
4629 Register DstReg = MI.getOperand(0).getReg();
4630 Register SrcReg = MI.getOperand(1).getReg();
4631 LLT SrcTy = MRI.getType(SrcReg);
4632 auto Round =
4633 MIRBuilder.buildInstr(TargetOpcode::G_FRINT, {SrcTy}, {SrcReg});
4634 MIRBuilder.buildFPTOSI(DstReg, Round);
4635 MI.eraseFromParent();
4636 return Legalized;
4637 }
4638 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
4639 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
4640 Register NewOldValRes = MRI.cloneVirtualRegister(OldValRes);
4641 MIRBuilder.buildAtomicCmpXchg(NewOldValRes, Addr, CmpVal, NewVal,
4642 **MI.memoperands_begin());
4643 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, NewOldValRes, CmpVal);
4644 MIRBuilder.buildCopy(OldValRes, NewOldValRes);
4645 MI.eraseFromParent();
4646 return Legalized;
4647 }
4648 case TargetOpcode::G_LOAD:
4649 case TargetOpcode::G_SEXTLOAD:
4650 case TargetOpcode::G_ZEXTLOAD:
4651 return lowerLoad(cast<GAnyLoad>(MI));
4652 case TargetOpcode::G_STORE:
4653 return lowerStore(cast<GStore>(MI));
4654 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
4655 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
4656 case TargetOpcode::G_CTLZ:
4657 case TargetOpcode::G_CTTZ:
4658 case TargetOpcode::G_CTPOP:
4659 return lowerBitCount(MI);
4660 case G_UADDO: {
4661 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
4662
4663 Register NewRes = MRI.cloneVirtualRegister(Res);
4664
4665 MIRBuilder.buildAdd(NewRes, LHS, RHS);
4666 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, NewRes, RHS);
4667
4668 MIRBuilder.buildCopy(Res, NewRes);
4669
4670 MI.eraseFromParent();
4671 return Legalized;
4672 }
4673 case G_UADDE: {
4674 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
4675 const LLT CondTy = MRI.getType(CarryOut);
4676 const LLT Ty = MRI.getType(Res);
4677
4678 Register NewRes = MRI.cloneVirtualRegister(Res);
4679
4680 // Initial add of the two operands.
4681 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
4682
4683 // Initial check for carry.
4684 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
4685
4686 // Add the sum and the carry.
4687 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
4688 MIRBuilder.buildAdd(NewRes, TmpRes, ZExtCarryIn);
4689
4690 // Second check for carry. We can only carry if the initial sum is all 1s
4691 // and the carry is set, resulting in a new sum of 0.
4692 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4693 auto ResEqZero =
4694 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, NewRes, Zero);
4695 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
4696 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
4697
4698 MIRBuilder.buildCopy(Res, NewRes);
4699
4700 MI.eraseFromParent();
4701 return Legalized;
4702 }
4703 case G_USUBO: {
4704 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
4705
4706 MIRBuilder.buildSub(Res, LHS, RHS);
4707 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
4708
4709 MI.eraseFromParent();
4710 return Legalized;
4711 }
4712 case G_USUBE: {
4713 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
4714 const LLT CondTy = MRI.getType(BorrowOut);
4715 const LLT Ty = MRI.getType(Res);
4716
4717 // Initial subtract of the two operands.
4718 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
4719
4720 // Initial check for borrow.
4721 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
4722
4723 // Subtract the borrow from the first subtract.
4724 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
4725 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
4726
4727 // Second check for borrow. We can only borrow if the initial difference is
4728 // 0 and the borrow is set, resulting in a new difference of all 1s.
4729 auto Zero = MIRBuilder.buildConstant(Ty, 0);
4730 auto TmpResEqZero =
4731 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
4732 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
4733 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
4734
4735 MI.eraseFromParent();
4736 return Legalized;
4737 }
4738 case G_UITOFP:
4739 return lowerUITOFP(MI);
4740 case G_SITOFP:
4741 return lowerSITOFP(MI);
4742 case G_FPTOUI:
4743 return lowerFPTOUI(MI);
4744 case G_FPTOSI:
4745 return lowerFPTOSI(MI);
4746 case G_FPTOUI_SAT:
4747 case G_FPTOSI_SAT:
4748 return lowerFPTOINT_SAT(MI);
4749 case G_FPTRUNC:
4750 return lowerFPTRUNC(MI);
4751 case G_FPOWI:
4752 return lowerFPOWI(MI);
4753 case G_SMIN:
4754 case G_SMAX:
4755 case G_UMIN:
4756 case G_UMAX:
4757 return lowerMinMax(MI);
4758 case G_SCMP:
4759 case G_UCMP:
4760 return lowerThreewayCompare(MI);
4761 case G_FCOPYSIGN:
4762 return lowerFCopySign(MI);
4763 case G_FMINNUM:
4764 case G_FMAXNUM:
4765 case G_FMINIMUMNUM:
4766 case G_FMAXIMUMNUM:
4767 return lowerFMinNumMaxNum(MI);
4768 case G_FMINIMUM:
4769 case G_FMAXIMUM:
4770 return lowerFMinimumMaximum(MI);
4771 case G_MERGE_VALUES:
4772 return lowerMergeValues(MI);
4773 case G_UNMERGE_VALUES:
4774 return lowerUnmergeValues(MI);
4775 case TargetOpcode::G_SEXT_INREG: {
4776 assert(MI.getOperand(2).isImm() && "Expected immediate");
4777 int64_t SizeInBits = MI.getOperand(2).getImm();
4778
4779 auto [DstReg, SrcReg] = MI.getFirst2Regs();
4780 LLT DstTy = MRI.getType(DstReg);
4781 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
4782
4783 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
4784 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
4785 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
4786 MI.eraseFromParent();
4787 return Legalized;
4788 }
4789 case G_EXTRACT_VECTOR_ELT:
4790 case G_INSERT_VECTOR_ELT:
4792 case G_SHUFFLE_VECTOR:
4793 return lowerShuffleVector(MI);
4794 case G_VECTOR_COMPRESS:
4795 return lowerVECTOR_COMPRESS(MI);
4796 case G_DYN_STACKALLOC:
4797 return lowerDynStackAlloc(MI);
4798 case G_STACKSAVE:
4799 return lowerStackSave(MI);
4800 case G_STACKRESTORE:
4801 return lowerStackRestore(MI);
4802 case G_EXTRACT:
4803 return lowerExtract(MI);
4804 case G_INSERT:
4805 return lowerInsert(MI);
4806 case G_BSWAP:
4807 return lowerBswap(MI);
4808 case G_BITREVERSE:
4809 return lowerBitreverse(MI);
4810 case G_READ_REGISTER:
4811 case G_WRITE_REGISTER:
4812 return lowerReadWriteRegister(MI);
4813 case G_UADDSAT:
4814 case G_USUBSAT: {
4815 // Try to make a reasonable guess about which lowering strategy to use. The
4816 // target can override this with custom lowering and calling the
4817 // implementation functions.
4818 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4819 if (LI.isLegalOrCustom({G_UMIN, Ty}))
4820 return lowerAddSubSatToMinMax(MI);
4822 }
4823 case G_SADDSAT:
4824 case G_SSUBSAT: {
4825 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4826
4827 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
4828 // since it's a shorter expansion. However, we would need to figure out the
4829 // preferred boolean type for the carry out for the query.
4830 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
4831 return lowerAddSubSatToMinMax(MI);
4833 }
4834 case G_SSHLSAT:
4835 case G_USHLSAT:
4836 return lowerShlSat(MI);
4837 case G_ABS:
4838 return lowerAbsToAddXor(MI);
4839 case G_ABDS:
4840 case G_ABDU: {
4841 bool IsSigned = MI.getOpcode() == G_ABDS;
4842 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
4843 if ((IsSigned && LI.isLegal({G_SMIN, Ty}) && LI.isLegal({G_SMAX, Ty})) ||
4844 (!IsSigned && LI.isLegal({G_UMIN, Ty}) && LI.isLegal({G_UMAX, Ty}))) {
4845 return lowerAbsDiffToMinMax(MI);
4846 }
4847 return lowerAbsDiffToSelect(MI);
4848 }
4849 case G_FABS:
4850 return lowerFAbs(MI);
4851 case G_SELECT:
4852 return lowerSelect(MI);
4853 case G_IS_FPCLASS:
4854 return lowerISFPCLASS(MI);
4855 case G_SDIVREM:
4856 case G_UDIVREM:
4857 return lowerDIVREM(MI);
4858 case G_FSHL:
4859 case G_FSHR:
4860 return lowerFunnelShift(MI);
4861 case G_ROTL:
4862 case G_ROTR:
4863 return lowerRotate(MI);
4864 case G_MEMSET:
4865 case G_MEMCPY:
4866 case G_MEMMOVE:
4867 return lowerMemCpyFamily(MI);
4868 case G_MEMCPY_INLINE:
4869 return lowerMemcpyInline(MI);
4870 case G_ZEXT:
4871 case G_SEXT:
4872 case G_ANYEXT:
4873 return lowerEXT(MI);
4874 case G_TRUNC:
4875 return lowerTRUNC(MI);
4877 return lowerVectorReduction(MI);
4878 case G_VAARG:
4879 return lowerVAArg(MI);
4880 case G_ATOMICRMW_SUB: {
4881 auto [Ret, Mem, Val] = MI.getFirst3Regs();
4882 const LLT ValTy = MRI.getType(Val);
4883 MachineMemOperand *MMO = *MI.memoperands_begin();
4884
4885 auto VNeg = MIRBuilder.buildNeg(ValTy, Val);
4886 MIRBuilder.buildAtomicRMW(G_ATOMICRMW_ADD, Ret, Mem, VNeg, *MMO);
4887 MI.eraseFromParent();
4888 return Legalized;
4889 }
4890 }
4891}
4892
4894 Align MinAlign) const {
4895 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
4896 // datalayout for the preferred alignment. Also there should be a target hook
4897 // for this to allow targets to reduce the alignment and ignore the
4898 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
4899 // the type.
4900 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
4901}
4902
4905 MachinePointerInfo &PtrInfo) {
4906 MachineFunction &MF = MIRBuilder.getMF();
4907 const DataLayout &DL = MIRBuilder.getDataLayout();
4908 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
4909
4910 unsigned AddrSpace = DL.getAllocaAddrSpace();
4911 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
4912
4913 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
4914 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4915}
4916
4918 const SrcOp &Val) {
4919 LLT SrcTy = Val.getLLTTy(MRI);
4920 Align StackTypeAlign =
4921 std::max(getStackTemporaryAlignment(SrcTy),
4923 MachinePointerInfo PtrInfo;
4924 auto StackTemp =
4925 createStackTemporary(SrcTy.getSizeInBytes(), StackTypeAlign, PtrInfo);
4926
4927 MIRBuilder.buildStore(Val, StackTemp, PtrInfo, StackTypeAlign);
4928 return MIRBuilder.buildLoad(Res, StackTemp, PtrInfo, StackTypeAlign);
4929}
4930
4932 LLT VecTy) {
4933 LLT IdxTy = B.getMRI()->getType(IdxReg);
4934 unsigned NElts = VecTy.getNumElements();
4935
4936 int64_t IdxVal;
4937 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4938 if (IdxVal < VecTy.getNumElements())
4939 return IdxReg;
4940 // If a constant index would be out of bounds, clamp it as well.
4941 }
4942
4943 if (isPowerOf2_32(NElts)) {
4944 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4945 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4946 }
4947
4948 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4949 .getReg(0);
4950}
4951
4953 Register Index) {
4954 LLT EltTy = VecTy.getElementType();
4955
4956 // Calculate the element offset and add it to the pointer.
4957 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4958 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4959 "Converting bits to bytes lost precision");
4960
4961 Index = clampVectorIndex(MIRBuilder, Index, VecTy);
4962
4963 // Convert index to the correct size for the address space.
4964 const DataLayout &DL = MIRBuilder.getDataLayout();
4965 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4966 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4967 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4968 if (IdxTy != MRI.getType(Index))
4969 Index = MIRBuilder.buildSExtOrTrunc(IdxTy, Index).getReg(0);
4970
4971 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4972 MIRBuilder.buildConstant(IdxTy, EltSize));
4973
4974 LLT PtrTy = MRI.getType(VecPtr);
4975 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4976}
4977
4978#ifndef NDEBUG
4979/// Check that all vector operands have same number of elements. Other operands
4980/// should be listed in NonVecOp.
4983 std::initializer_list<unsigned> NonVecOpIndices) {
4984 if (MI.getNumMemOperands() != 0)
4985 return false;
4986
4987 LLT VecTy = MRI.getType(MI.getReg(0));
4988 if (!VecTy.isVector())
4989 return false;
4990 unsigned NumElts = VecTy.getNumElements();
4991
4992 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4993 MachineOperand &Op = MI.getOperand(OpIdx);
4994 if (!Op.isReg()) {
4995 if (!is_contained(NonVecOpIndices, OpIdx))
4996 return false;
4997 continue;
4998 }
4999
5000 LLT Ty = MRI.getType(Op.getReg());
5001 if (!Ty.isVector()) {
5002 if (!is_contained(NonVecOpIndices, OpIdx))
5003 return false;
5004 continue;
5005 }
5006
5007 if (Ty.getNumElements() != NumElts)
5008 return false;
5009 }
5010
5011 return true;
5012}
5013#endif
5014
5015/// Fill \p DstOps with DstOps that have same number of elements combined as
5016/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
5017/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
5018/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
5019static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
5020 unsigned NumElts) {
5021 LLT LeftoverTy;
5022 assert(Ty.isVector() && "Expected vector type");
5023 LLT NarrowTy = Ty.changeElementCount(ElementCount::getFixed(NumElts));
5024 int NumParts, NumLeftover;
5025 std::tie(NumParts, NumLeftover) =
5026 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
5027
5028 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
5029 for (int i = 0; i < NumParts; ++i) {
5030 DstOps.push_back(NarrowTy);
5031 }
5032
5033 if (LeftoverTy.isValid()) {
5034 assert(NumLeftover == 1 && "expected exactly one leftover");
5035 DstOps.push_back(LeftoverTy);
5036 }
5037}
5038
5039/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
5040/// made from \p Op depending on operand type.
5042 MachineOperand &Op) {
5043 for (unsigned i = 0; i < N; ++i) {
5044 if (Op.isReg())
5045 Ops.push_back(Op.getReg());
5046 else if (Op.isImm())
5047 Ops.push_back(Op.getImm());
5048 else if (Op.isPredicate())
5049 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
5050 else
5051 llvm_unreachable("Unsupported type");
5052 }
5053}
5054
5055// Handle splitting vector operations which need to have the same number of
5056// elements in each type index, but each type index may have a different element
5057// type.
5058//
5059// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
5060// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5061// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5062//
5063// Also handles some irregular breakdown cases, e.g.
5064// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
5065// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
5066// s64 = G_SHL s64, s32
5069 GenericMachineInstr &MI, unsigned NumElts,
5070 std::initializer_list<unsigned> NonVecOpIndices) {
5071 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
5072 "Non-compatible opcode or not specified non-vector operands");
5073 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5074
5075 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5076 unsigned NumDefs = MI.getNumDefs();
5077
5078 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
5079 // Build instructions with DstOps to use instruction found by CSE directly.
5080 // CSE copies found instruction into given vreg when building with vreg dest.
5081 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
5082 // Output registers will be taken from created instructions.
5083 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
5084 for (unsigned i = 0; i < NumDefs; ++i) {
5085 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
5086 }
5087
5088 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
5089 // Operands listed in NonVecOpIndices will be used as is without splitting;
5090 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
5091 // scalar condition (op 1), immediate in sext_inreg (op 2).
5092 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
5093 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5094 ++UseIdx, ++UseNo) {
5095 if (is_contained(NonVecOpIndices, UseIdx)) {
5096 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
5097 MI.getOperand(UseIdx));
5098 } else {
5099 SmallVector<Register, 8> SplitPieces;
5100 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
5101 MRI);
5102 llvm::append_range(InputOpsPieces[UseNo], SplitPieces);
5103 }
5104 }
5105
5106 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5107
5108 // Take i-th piece of each input operand split and build sub-vector/scalar
5109 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
5110 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5112 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5113 Defs.push_back(OutputOpsPieces[DstNo][i]);
5114
5116 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
5117 Uses.push_back(InputOpsPieces[InputNo][i]);
5118
5119 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
5120 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
5121 OutputRegs[DstNo].push_back(I.getReg(DstNo));
5122 }
5123
5124 // Merge small outputs into MI's output for each def operand.
5125 if (NumLeftovers) {
5126 for (unsigned i = 0; i < NumDefs; ++i)
5127 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
5128 } else {
5129 for (unsigned i = 0; i < NumDefs; ++i)
5130 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
5131 }
5132
5133 MI.eraseFromParent();
5134 return Legalized;
5135}
5136
5139 unsigned NumElts) {
5140 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
5141
5142 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
5143 unsigned NumDefs = MI.getNumDefs();
5144
5145 SmallVector<DstOp, 8> OutputOpsPieces;
5146 SmallVector<Register, 8> OutputRegs;
5147 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
5148
5149 // Instructions that perform register split will be inserted in basic block
5150 // where register is defined (basic block is in the next operand).
5151 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
5152 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
5153 UseIdx += 2, ++UseNo) {
5154 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
5155 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminatorForward());
5156 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
5157 MIRBuilder, MRI);
5158 }
5159
5160 // Build PHIs with fewer elements.
5161 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
5162 MIRBuilder.setInsertPt(*MI.getParent(), MI);
5163 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
5164 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
5165 Phi.addDef(
5166 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
5167 OutputRegs.push_back(Phi.getReg(0));
5168
5169 for (unsigned j = 0; j < NumInputs / 2; ++j) {
5170 Phi.addUse(InputOpsPieces[j][i]);
5171 Phi.add(MI.getOperand(1 + j * 2 + 1));
5172 }
5173 }
5174
5175 // Set the insert point after the existing PHIs
5176 MachineBasicBlock &MBB = *MI.getParent();
5177 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
5178
5179 // Merge small outputs into MI's def.
5180 if (NumLeftovers) {
5181 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
5182 } else {
5183 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
5184 }
5185
5186 MI.eraseFromParent();
5187 return Legalized;
5188}
5189
5192 unsigned TypeIdx,
5193 LLT NarrowTy) {
5194 const int NumDst = MI.getNumOperands() - 1;
5195 const Register SrcReg = MI.getOperand(NumDst).getReg();
5196 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5197 LLT SrcTy = MRI.getType(SrcReg);
5198
5199 if (TypeIdx != 1 || NarrowTy == DstTy)
5200 return UnableToLegalize;
5201
5202 // Requires compatible types. Otherwise SrcReg should have been defined by
5203 // merge-like instruction that would get artifact combined. Most likely
5204 // instruction that defines SrcReg has to perform more/fewer elements
5205 // legalization compatible with NarrowTy.
5206 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5207 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5208
5209 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5210 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
5211 return UnableToLegalize;
5212
5213 // This is most likely DstTy (smaller then register size) packed in SrcTy
5214 // (larger then register size) and since unmerge was not combined it will be
5215 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
5216 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
5217
5218 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
5219 //
5220 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
5221 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
5222 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
5223 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
5224 const int NumUnmerge = Unmerge->getNumOperands() - 1;
5225 const int PartsPerUnmerge = NumDst / NumUnmerge;
5226
5227 for (int I = 0; I != NumUnmerge; ++I) {
5228 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
5229
5230 for (int J = 0; J != PartsPerUnmerge; ++J)
5231 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
5232 MIB.addUse(Unmerge.getReg(I));
5233 }
5234
5235 MI.eraseFromParent();
5236 return Legalized;
5237}
5238
5241 LLT NarrowTy) {
5242 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5243 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
5244 // that should have been artifact combined. Most likely instruction that uses
5245 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
5246 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
5247 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5248 if (NarrowTy == SrcTy)
5249 return UnableToLegalize;
5250
5251 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
5252 // is for old mir tests. Since the changes to more/fewer elements it should no
5253 // longer be possible to generate MIR like this when starting from llvm-ir
5254 // because LCMTy approach was replaced with merge/unmerge to vector elements.
5255 if (TypeIdx == 1) {
5256 assert(SrcTy.isVector() && "Expected vector types");
5257 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
5258 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
5259 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
5260 return UnableToLegalize;
5261 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
5262 //
5263 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
5264 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
5265 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
5266 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
5267 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
5268 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
5269
5271 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
5272 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
5273 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
5274 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
5275 Elts.push_back(Unmerge.getReg(j));
5276 }
5277
5278 SmallVector<Register, 8> NarrowTyElts;
5279 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
5280 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
5281 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
5282 ++i, Offset += NumNarrowTyElts) {
5283 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
5284 NarrowTyElts.push_back(
5285 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
5286 }
5287
5288 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5289 MI.eraseFromParent();
5290 return Legalized;
5291 }
5292
5293 assert(TypeIdx == 0 && "Bad type index");
5294 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
5295 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
5296 return UnableToLegalize;
5297
5298 // This is most likely SrcTy (smaller then register size) packed in DstTy
5299 // (larger then register size) and since merge was not combined it will be
5300 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
5301 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
5302
5303 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
5304 //
5305 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
5306 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
5307 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
5308 SmallVector<Register, 8> NarrowTyElts;
5309 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
5310 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
5311 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
5312 for (unsigned i = 0; i < NumParts; ++i) {
5314 for (unsigned j = 0; j < NumElts; ++j)
5315 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
5316 NarrowTyElts.push_back(
5317 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
5318 }
5319
5320 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
5321 MI.eraseFromParent();
5322 return Legalized;
5323}
5324
5327 unsigned TypeIdx,
5328 LLT NarrowVecTy) {
5329 auto [DstReg, SrcVec] = MI.getFirst2Regs();
5330 Register InsertVal;
5331 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
5332
5333 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
5334 if (IsInsert)
5335 InsertVal = MI.getOperand(2).getReg();
5336
5337 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
5338 LLT VecTy = MRI.getType(SrcVec);
5339
5340 // If the index is a constant, we can really break this down as you would
5341 // expect, and index into the target size pieces.
5342 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
5343 if (MaybeCst) {
5344 uint64_t IdxVal = MaybeCst->Value.getZExtValue();
5345 // Avoid out of bounds indexing the pieces.
5346 if (IdxVal >= VecTy.getNumElements()) {
5347 MIRBuilder.buildUndef(DstReg);
5348 MI.eraseFromParent();
5349 return Legalized;
5350 }
5351
5352 if (!NarrowVecTy.isVector()) {
5353 SmallVector<Register, 8> SplitPieces;
5354 extractParts(MI.getOperand(1).getReg(), NarrowVecTy,
5355 VecTy.getNumElements(), SplitPieces, MIRBuilder, MRI);
5356 if (IsInsert) {
5357 SplitPieces[IdxVal] = InsertVal;
5358 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), SplitPieces);
5359 } else {
5360 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), SplitPieces[IdxVal]);
5361 }
5362 } else {
5363 SmallVector<Register, 8> VecParts;
5364 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
5365
5366 // Build a sequence of NarrowTy pieces in VecParts for this operand.
5367 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
5368 TargetOpcode::G_ANYEXT);
5369
5370 unsigned NewNumElts = NarrowVecTy.getNumElements();
5371
5372 LLT IdxTy = MRI.getType(Idx);
5373 int64_t PartIdx = IdxVal / NewNumElts;
5374 auto NewIdx =
5375 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
5376
5377 if (IsInsert) {
5378 LLT PartTy = MRI.getType(VecParts[PartIdx]);
5379
5380 // Use the adjusted index to insert into one of the subvectors.
5381 auto InsertPart = MIRBuilder.buildInsertVectorElement(
5382 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
5383 VecParts[PartIdx] = InsertPart.getReg(0);
5384
5385 // Recombine the inserted subvector with the others to reform the result
5386 // vector.
5387 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
5388 } else {
5389 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
5390 }
5391 }
5392
5393 MI.eraseFromParent();
5394 return Legalized;
5395 }
5396
5397 // With a variable index, we can't perform the operation in a smaller type, so
5398 // we're forced to expand this.
5399 //
5400 // TODO: We could emit a chain of compare/select to figure out which piece to
5401 // index.
5403}
5404
5407 LLT NarrowTy) {
5408 // FIXME: Don't know how to handle secondary types yet.
5409 if (TypeIdx != 0)
5410 return UnableToLegalize;
5411
5412 if (!NarrowTy.isByteSized()) {
5413 LLVM_DEBUG(dbgs() << "Can't narrow load/store to non-byte-sized type\n");
5414 return UnableToLegalize;
5415 }
5416
5417 // This implementation doesn't work for atomics. Give up instead of doing
5418 // something invalid.
5419 if (LdStMI.isAtomic())
5420 return UnableToLegalize;
5421
5422 bool IsLoad = isa<GLoad>(LdStMI);
5423 Register ValReg = LdStMI.getReg(0);
5424 Register AddrReg = LdStMI.getPointerReg();
5425 LLT ValTy = MRI.getType(ValReg);
5426
5427 // FIXME: Do we need a distinct NarrowMemory legalize action?
5428 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
5429 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
5430 return UnableToLegalize;
5431 }
5432
5433 int NumParts = -1;
5434 int NumLeftover = -1;
5435 LLT LeftoverTy;
5436 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
5437 if (IsLoad) {
5438 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
5439 } else {
5440 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
5441 NarrowLeftoverRegs, MIRBuilder, MRI)) {
5442 NumParts = NarrowRegs.size();
5443 NumLeftover = NarrowLeftoverRegs.size();
5444 }
5445 }
5446
5447 if (NumParts == -1)
5448 return UnableToLegalize;
5449
5450 LLT PtrTy = MRI.getType(AddrReg);
5451 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
5452
5453 unsigned TotalSize = ValTy.getSizeInBits();
5454
5455 // Split the load/store into PartTy sized pieces starting at Offset. If this
5456 // is a load, return the new registers in ValRegs. For a store, each elements
5457 // of ValRegs should be PartTy. Returns the next offset that needs to be
5458 // handled.
5459 bool isBigEndian = MIRBuilder.getDataLayout().isBigEndian();
5460 auto MMO = LdStMI.getMMO();
5461 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
5462 unsigned NumParts, unsigned Offset) -> unsigned {
5463 MachineFunction &MF = MIRBuilder.getMF();
5464 unsigned PartSize = PartTy.getSizeInBits();
5465 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
5466 ++Idx) {
5467 unsigned ByteOffset = Offset / 8;
5468 Register NewAddrReg;
5469
5470 MIRBuilder.materializeObjectPtrOffset(NewAddrReg, AddrReg, OffsetTy,
5471 ByteOffset);
5472
5473 MachineMemOperand *NewMMO =
5474 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
5475
5476 if (IsLoad) {
5477 Register Dst = MRI.createGenericVirtualRegister(PartTy);
5478 ValRegs.push_back(Dst);
5479 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
5480 } else {
5481 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
5482 }
5483 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
5484 }
5485
5486 return Offset;
5487 };
5488
5489 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
5490 unsigned HandledOffset =
5491 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
5492
5493 // Handle the rest of the register if this isn't an even type breakdown.
5494 if (LeftoverTy.isValid())
5495 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
5496
5497 if (IsLoad) {
5498 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
5499 LeftoverTy, NarrowLeftoverRegs);
5500 }
5501
5502 LdStMI.eraseFromParent();
5503 return Legalized;
5504}
5505
5508 LLT NarrowTy) {
5509 using namespace TargetOpcode;
5511 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5512
5513 switch (MI.getOpcode()) {
5514 case G_IMPLICIT_DEF:
5515 case G_TRUNC:
5516 case G_AND:
5517 case G_OR:
5518 case G_XOR:
5519 case G_ADD:
5520 case G_SUB:
5521 case G_MUL:
5522 case G_PTR_ADD:
5523 case G_SMULH:
5524 case G_UMULH:
5525 case G_FADD:
5526 case G_FMUL:
5527 case G_FSUB:
5528 case G_FNEG:
5529 case G_FABS:
5530 case G_FCANONICALIZE:
5531 case G_FDIV:
5532 case G_FREM:
5533 case G_FMA:
5534 case G_FMAD:
5535 case G_FPOW:
5536 case G_FEXP:
5537 case G_FEXP2:
5538 case G_FEXP10:
5539 case G_FLOG:
5540 case G_FLOG2:
5541 case G_FLOG10:
5542 case G_FLDEXP:
5543 case G_FNEARBYINT:
5544 case G_FCEIL:
5545 case G_FFLOOR:
5546 case G_FRINT:
5547 case G_INTRINSIC_LRINT:
5548 case G_INTRINSIC_LLRINT:
5549 case G_INTRINSIC_ROUND:
5550 case G_INTRINSIC_ROUNDEVEN:
5551 case G_LROUND:
5552 case G_LLROUND:
5553 case G_INTRINSIC_TRUNC:
5554 case G_FMODF:
5555 case G_FCOS:
5556 case G_FSIN:
5557 case G_FTAN:
5558 case G_FACOS:
5559 case G_FASIN:
5560 case G_FATAN:
5561 case G_FATAN2:
5562 case G_FCOSH:
5563 case G_FSINH:
5564 case G_FTANH:
5565 case G_FSQRT:
5566 case G_BSWAP:
5567 case G_BITREVERSE:
5568 case G_SDIV:
5569 case G_UDIV:
5570 case G_SREM:
5571 case G_UREM:
5572 case G_SDIVREM:
5573 case G_UDIVREM:
5574 case G_SMIN:
5575 case G_SMAX:
5576 case G_UMIN:
5577 case G_UMAX:
5578 case G_ABS:
5579 case G_FMINNUM:
5580 case G_FMAXNUM:
5581 case G_FMINNUM_IEEE:
5582 case G_FMAXNUM_IEEE:
5583 case G_FMINIMUM:
5584 case G_FMAXIMUM:
5585 case G_FMINIMUMNUM:
5586 case G_FMAXIMUMNUM:
5587 case G_FSHL:
5588 case G_FSHR:
5589 case G_ROTL:
5590 case G_ROTR:
5591 case G_FREEZE:
5592 case G_SADDSAT:
5593 case G_SSUBSAT:
5594 case G_UADDSAT:
5595 case G_USUBSAT:
5596 case G_UMULO:
5597 case G_SMULO:
5598 case G_SHL:
5599 case G_LSHR:
5600 case G_ASHR:
5601 case G_SSHLSAT:
5602 case G_USHLSAT:
5603 case G_CTLZ:
5604 case G_CTLZ_ZERO_UNDEF:
5605 case G_CTTZ:
5606 case G_CTTZ_ZERO_UNDEF:
5607 case G_CTPOP:
5608 case G_FCOPYSIGN:
5609 case G_ZEXT:
5610 case G_SEXT:
5611 case G_ANYEXT:
5612 case G_FPEXT:
5613 case G_FPTRUNC:
5614 case G_SITOFP:
5615 case G_UITOFP:
5616 case G_FPTOSI:
5617 case G_FPTOUI:
5618 case G_FPTOSI_SAT:
5619 case G_FPTOUI_SAT:
5620 case G_INTTOPTR:
5621 case G_PTRTOINT:
5622 case G_ADDRSPACE_CAST:
5623 case G_UADDO:
5624 case G_USUBO:
5625 case G_UADDE:
5626 case G_USUBE:
5627 case G_SADDO:
5628 case G_SSUBO:
5629 case G_SADDE:
5630 case G_SSUBE:
5631 case G_STRICT_FADD:
5632 case G_STRICT_FSUB:
5633 case G_STRICT_FMUL:
5634 case G_STRICT_FMA:
5635 case G_STRICT_FLDEXP:
5636 case G_FFREXP:
5637 return fewerElementsVectorMultiEltType(GMI, NumElts);
5638 case G_ICMP:
5639 case G_FCMP:
5640 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
5641 case G_IS_FPCLASS:
5642 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
5643 case G_SELECT:
5644 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
5645 return fewerElementsVectorMultiEltType(GMI, NumElts);
5646 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
5647 case G_PHI:
5648 return fewerElementsVectorPhi(GMI, NumElts);
5649 case G_UNMERGE_VALUES:
5650 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
5651 case G_BUILD_VECTOR:
5652 assert(TypeIdx == 0 && "not a vector type index");
5653 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5654 case G_CONCAT_VECTORS:
5655 if (TypeIdx != 1) // TODO: This probably does work as expected already.
5656 return UnableToLegalize;
5657 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
5658 case G_EXTRACT_VECTOR_ELT:
5659 case G_INSERT_VECTOR_ELT:
5660 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
5661 case G_LOAD:
5662 case G_STORE:
5663 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
5664 case G_SEXT_INREG:
5665 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
5667 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
5668 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
5669 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
5670 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
5671 case G_SHUFFLE_VECTOR:
5672 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
5673 case G_FPOWI:
5674 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
5675 case G_BITCAST:
5676 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
5677 case G_INTRINSIC_FPTRUNC_ROUND:
5678 return fewerElementsVectorMultiEltType(GMI, NumElts, {2});
5679 default:
5680 return UnableToLegalize;
5681 }
5682}
5683
5686 LLT NarrowTy) {
5687 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
5688 "Not a bitcast operation");
5689
5690 if (TypeIdx != 0)
5691 return UnableToLegalize;
5692
5693 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5694
5695 unsigned NewElemCount =
5696 NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
5697 SmallVector<Register> SrcVRegs, BitcastVRegs;
5698 if (NewElemCount == 1) {
5699 LLT SrcNarrowTy = SrcTy.getElementType();
5700
5701 auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
5702 getUnmergeResults(SrcVRegs, *Unmerge);
5703 } else {
5704 LLT SrcNarrowTy =
5706
5707 // Split the Src and Dst Reg into smaller registers
5708 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
5709 return UnableToLegalize;
5710 }
5711
5712 // Build new smaller bitcast instructions
5713 // Not supporting Leftover types for now but will have to
5714 for (Register Reg : SrcVRegs)
5715 BitcastVRegs.push_back(MIRBuilder.buildBitcast(NarrowTy, Reg).getReg(0));
5716
5717 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
5718 MI.eraseFromParent();
5719 return Legalized;
5720}
5721
5723 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5724 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
5725 if (TypeIdx != 0)
5726 return UnableToLegalize;
5727
5728 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
5729 MI.getFirst3RegLLTs();
5730 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5731 // The shuffle should be canonicalized by now.
5732 if (DstTy != Src1Ty)
5733 return UnableToLegalize;
5734 if (DstTy != Src2Ty)
5735 return UnableToLegalize;
5736
5737 if (!isPowerOf2_32(DstTy.getNumElements()))
5738 return UnableToLegalize;
5739
5740 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
5741 // Further legalization attempts will be needed to do split further.
5742 NarrowTy =
5743 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
5744 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
5745
5746 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
5747 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
5748 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
5749 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
5750 SplitSrc2Regs[1]};
5751
5752 Register Hi, Lo;
5753
5754 // If Lo or Hi uses elements from at most two of the four input vectors, then
5755 // express it as a vector shuffle of those two inputs. Otherwise extract the
5756 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
5758 for (unsigned High = 0; High < 2; ++High) {
5759 Register &Output = High ? Hi : Lo;
5760
5761 // Build a shuffle mask for the output, discovering on the fly which
5762 // input vectors to use as shuffle operands (recorded in InputUsed).
5763 // If building a suitable shuffle vector proves too hard, then bail
5764 // out with useBuildVector set.
5765 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
5766 unsigned FirstMaskIdx = High * NewElts;
5767 bool UseBuildVector = false;
5768 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5769 // The mask element. This indexes into the input.
5770 int Idx = Mask[FirstMaskIdx + MaskOffset];
5771
5772 // The input vector this mask element indexes into.
5773 unsigned Input = (unsigned)Idx / NewElts;
5774
5775 if (Input >= std::size(Inputs)) {
5776 // The mask element does not index into any input vector.
5777 Ops.push_back(-1);
5778 continue;
5779 }
5780
5781 // Turn the index into an offset from the start of the input vector.
5782 Idx -= Input * NewElts;
5783
5784 // Find or create a shuffle vector operand to hold this input.
5785 unsigned OpNo;
5786 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
5787 if (InputUsed[OpNo] == Input) {
5788 // This input vector is already an operand.
5789 break;
5790 } else if (InputUsed[OpNo] == -1U) {
5791 // Create a new operand for this input vector.
5792 InputUsed[OpNo] = Input;
5793 break;
5794 }
5795 }
5796
5797 if (OpNo >= std::size(InputUsed)) {
5798 // More than two input vectors used! Give up on trying to create a
5799 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
5800 UseBuildVector = true;
5801 break;
5802 }
5803
5804 // Add the mask index for the new shuffle vector.
5805 Ops.push_back(Idx + OpNo * NewElts);
5806 }
5807
5808 if (UseBuildVector) {
5809 LLT EltTy = NarrowTy.getElementType();
5811
5812 // Extract the input elements by hand.
5813 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
5814 // The mask element. This indexes into the input.
5815 int Idx = Mask[FirstMaskIdx + MaskOffset];
5816
5817 // The input vector this mask element indexes into.
5818 unsigned Input = (unsigned)Idx / NewElts;
5819
5820 if (Input >= std::size(Inputs)) {
5821 // The mask element is "undef" or indexes off the end of the input.
5822 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
5823 continue;
5824 }
5825
5826 // Turn the index into an offset from the start of the input vector.
5827 Idx -= Input * NewElts;
5828
5829 // Extract the vector element by hand.
5830 SVOps.push_back(MIRBuilder
5831 .buildExtractVectorElement(
5832 EltTy, Inputs[Input],
5833 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
5834 .getReg(0));
5835 }
5836
5837 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
5838 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
5839 } else if (InputUsed[0] == -1U) {
5840 // No input vectors were used! The result is undefined.
5841 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
5842 } else if (NewElts == 1) {
5843 Output = MIRBuilder.buildCopy(NarrowTy, Inputs[InputUsed[0]]).getReg(0);
5844 } else {
5845 Register Op0 = Inputs[InputUsed[0]];
5846 // If only one input was used, use an undefined vector for the other.
5847 Register Op1 = InputUsed[1] == -1U
5848 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
5849 : Inputs[InputUsed[1]];
5850 // At least one input vector was used. Create a new shuffle vector.
5851 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
5852 }
5853
5854 Ops.clear();
5855 }
5856
5857 MIRBuilder.buildMergeLikeInstr(DstReg, {Lo, Hi});
5858 MI.eraseFromParent();
5859 return Legalized;
5860}
5861
5863 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
5864 auto &RdxMI = cast<GVecReduce>(MI);
5865
5866 if (TypeIdx != 1)
5867 return UnableToLegalize;
5868
5869 // The semantics of the normal non-sequential reductions allow us to freely
5870 // re-associate the operation.
5871 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
5872
5873 if (NarrowTy.isVector() &&
5874 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
5875 return UnableToLegalize;
5876
5877 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
5878 SmallVector<Register> SplitSrcs;
5879 // If NarrowTy is a scalar then we're being asked to scalarize.
5880 const unsigned NumParts =
5881 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
5882 : SrcTy.getNumElements();
5883
5884 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5885 if (NarrowTy.isScalar()) {
5886 if (DstTy != NarrowTy)
5887 return UnableToLegalize; // FIXME: handle implicit extensions.
5888
5889 if (isPowerOf2_32(NumParts)) {
5890 // Generate a tree of scalar operations to reduce the critical path.
5891 SmallVector<Register> PartialResults;
5892 unsigned NumPartsLeft = NumParts;
5893 while (NumPartsLeft > 1) {
5894 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
5895 PartialResults.emplace_back(
5897 .buildInstr(ScalarOpc, {NarrowTy},
5898 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
5899 .getReg(0));
5900 }
5901 SplitSrcs = PartialResults;
5902 PartialResults.clear();
5903 NumPartsLeft = SplitSrcs.size();
5904 }
5905 assert(SplitSrcs.size() == 1);
5906 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
5907 MI.eraseFromParent();
5908 return Legalized;
5909 }
5910 // If we can't generate a tree, then just do sequential operations.
5911 Register Acc = SplitSrcs[0];
5912 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
5913 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
5914 .getReg(0);
5915 MIRBuilder.buildCopy(DstReg, Acc);
5916 MI.eraseFromParent();
5917 return Legalized;
5918 }
5919 SmallVector<Register> PartialReductions;
5920 for (unsigned Part = 0; Part < NumParts; ++Part) {
5921 PartialReductions.push_back(
5922 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
5923 .getReg(0));
5924 }
5925
5926 // If the types involved are powers of 2, we can generate intermediate vector
5927 // ops, before generating a final reduction operation.
5928 if (isPowerOf2_32(SrcTy.getNumElements()) &&
5929 isPowerOf2_32(NarrowTy.getNumElements())) {
5930 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
5931 }
5932
5933 Register Acc = PartialReductions[0];
5934 for (unsigned Part = 1; Part < NumParts; ++Part) {
5935 if (Part == NumParts - 1) {
5936 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
5937 {Acc, PartialReductions[Part]});
5938 } else {
5939 Acc = MIRBuilder
5940 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
5941 .getReg(0);
5942 }
5943 }
5944 MI.eraseFromParent();
5945 return Legalized;
5946}
5947
5950 unsigned int TypeIdx,
5951 LLT NarrowTy) {
5952 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
5953 MI.getFirst3RegLLTs();
5954 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
5955 DstTy != NarrowTy)
5956 return UnableToLegalize;
5957
5958 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
5959 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
5960 "Unexpected vecreduce opcode");
5961 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
5962 ? TargetOpcode::G_FADD
5963 : TargetOpcode::G_FMUL;
5964
5965 SmallVector<Register> SplitSrcs;
5966 unsigned NumParts = SrcTy.getNumElements();
5967 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
5968 Register Acc = ScalarReg;
5969 for (unsigned i = 0; i < NumParts; i++)
5970 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5971 .getReg(0);
5972
5973 MIRBuilder.buildCopy(DstReg, Acc);
5974 MI.eraseFromParent();
5975 return Legalized;
5976}
5977
5979LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5980 LLT SrcTy, LLT NarrowTy,
5981 unsigned ScalarOpc) {
5982 SmallVector<Register> SplitSrcs;
5983 // Split the sources into NarrowTy size pieces.
5984 extractParts(SrcReg, NarrowTy,
5985 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5986 MIRBuilder, MRI);
5987 // We're going to do a tree reduction using vector operations until we have
5988 // one NarrowTy size value left.
5989 while (SplitSrcs.size() > 1) {
5990 SmallVector<Register> PartialRdxs;
5991 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5992 Register LHS = SplitSrcs[Idx];
5993 Register RHS = SplitSrcs[Idx + 1];
5994 // Create the intermediate vector op.
5995 Register Res =
5996 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5997 PartialRdxs.push_back(Res);
5998 }
5999 SplitSrcs = std::move(PartialRdxs);
6000 }
6001 // Finally generate the requested NarrowTy based reduction.
6002 Observer.changingInstr(MI);
6003 MI.getOperand(1).setReg(SplitSrcs[0]);
6004 Observer.changedInstr(MI);
6005 return Legalized;
6006}
6007
6010 const LLT HalfTy, const LLT AmtTy) {
6011
6012 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6013 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6014 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6015
6016 if (Amt.isZero()) {
6017 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
6018 MI.eraseFromParent();
6019 return Legalized;
6020 }
6021
6022 LLT NVT = HalfTy;
6023 unsigned NVTBits = HalfTy.getSizeInBits();
6024 unsigned VTBits = 2 * NVTBits;
6025
6026 SrcOp Lo(Register(0)), Hi(Register(0));
6027 if (MI.getOpcode() == TargetOpcode::G_SHL) {
6028 if (Amt.ugt(VTBits)) {
6029 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6030 } else if (Amt.ugt(NVTBits)) {
6031 Lo = MIRBuilder.buildConstant(NVT, 0);
6032 Hi = MIRBuilder.buildShl(NVT, InL,
6033 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6034 } else if (Amt == NVTBits) {
6035 Lo = MIRBuilder.buildConstant(NVT, 0);
6036 Hi = InL;
6037 } else {
6038 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
6039 auto OrLHS =
6040 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
6041 auto OrRHS = MIRBuilder.buildLShr(
6042 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6043 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6044 }
6045 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6046 if (Amt.ugt(VTBits)) {
6047 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
6048 } else if (Amt.ugt(NVTBits)) {
6049 Lo = MIRBuilder.buildLShr(NVT, InH,
6050 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6051 Hi = MIRBuilder.buildConstant(NVT, 0);
6052 } else if (Amt == NVTBits) {
6053 Lo = InH;
6054 Hi = MIRBuilder.buildConstant(NVT, 0);
6055 } else {
6056 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6057
6058 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6059 auto OrRHS = MIRBuilder.buildShl(
6060 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6061
6062 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6063 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
6064 }
6065 } else {
6066 if (Amt.ugt(VTBits)) {
6067 Hi = Lo = MIRBuilder.buildAShr(
6068 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6069 } else if (Amt.ugt(NVTBits)) {
6070 Lo = MIRBuilder.buildAShr(NVT, InH,
6071 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
6072 Hi = MIRBuilder.buildAShr(NVT, InH,
6073 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6074 } else if (Amt == NVTBits) {
6075 Lo = InH;
6076 Hi = MIRBuilder.buildAShr(NVT, InH,
6077 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
6078 } else {
6079 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
6080
6081 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
6082 auto OrRHS = MIRBuilder.buildShl(
6083 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
6084
6085 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
6086 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
6087 }
6088 }
6089
6090 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
6091 MI.eraseFromParent();
6092
6093 return Legalized;
6094}
6095
6098 LLT RequestedTy) {
6099 if (TypeIdx == 1) {
6100 Observer.changingInstr(MI);
6101 narrowScalarSrc(MI, RequestedTy, 2);
6102 Observer.changedInstr(MI);
6103 return Legalized;
6104 }
6105
6106 Register DstReg = MI.getOperand(0).getReg();
6107 LLT DstTy = MRI.getType(DstReg);
6108 if (DstTy.isVector())
6109 return UnableToLegalize;
6110
6111 Register Amt = MI.getOperand(2).getReg();
6112 LLT ShiftAmtTy = MRI.getType(Amt);
6113 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
6114 if (DstEltSize % 2 != 0)
6115 return UnableToLegalize;
6116
6117 // Check if we should use multi-way splitting instead of recursive binary
6118 // splitting.
6119 //
6120 // Multi-way splitting directly decomposes wide shifts (e.g., 128-bit ->
6121 // 4×32-bit) in a single legalization step, avoiding the recursive overhead
6122 // and dependency chains created by usual binary splitting approach
6123 // (128->64->32).
6124 //
6125 // The >= 8 parts threshold ensures we only use this optimization when binary
6126 // splitting would require multiple recursive passes, avoiding overhead for
6127 // simple 2-way splits where binary approach is sufficient.
6128 if (RequestedTy.isValid() && RequestedTy.isScalar() &&
6129 DstEltSize % RequestedTy.getSizeInBits() == 0) {
6130 const unsigned NumParts = DstEltSize / RequestedTy.getSizeInBits();
6131 // Use multiway if we have 8 or more parts (i.e., would need 3+ recursive
6132 // steps).
6133 if (NumParts >= 8)
6134 return narrowScalarShiftMultiway(MI, RequestedTy);
6135 }
6136
6137 // Fall back to binary splitting:
6138 // Ignore the input type. We can only go to exactly half the size of the
6139 // input. If that isn't small enough, the resulting pieces will be further
6140 // legalized.
6141 const unsigned NewBitSize = DstEltSize / 2;
6142 const LLT HalfTy = LLT::scalar(NewBitSize);
6143 const LLT CondTy = LLT::scalar(1);
6144
6145 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
6146 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
6147 ShiftAmtTy);
6148 }
6149
6150 // TODO: Expand with known bits.
6151
6152 // Handle the fully general expansion by an unknown amount.
6153 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
6154
6155 Register InL = MRI.createGenericVirtualRegister(HalfTy);
6156 Register InH = MRI.createGenericVirtualRegister(HalfTy);
6157 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
6158
6159 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
6160 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
6161
6162 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6163 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
6164 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
6165
6166 Register ResultRegs[2];
6167 switch (MI.getOpcode()) {
6168 case TargetOpcode::G_SHL: {
6169 // Short: ShAmt < NewBitSize
6170 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
6171
6172 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
6173 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
6174 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6175
6176 // Long: ShAmt >= NewBitSize
6177 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
6178 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
6179
6180 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
6181 auto Hi = MIRBuilder.buildSelect(
6182 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
6183
6184 ResultRegs[0] = Lo.getReg(0);
6185 ResultRegs[1] = Hi.getReg(0);
6186 break;
6187 }
6188 case TargetOpcode::G_LSHR:
6189 case TargetOpcode::G_ASHR: {
6190 // Short: ShAmt < NewBitSize
6191 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
6192
6193 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
6194 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
6195 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
6196
6197 // Long: ShAmt >= NewBitSize
6199 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
6200 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
6201 } else {
6202 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
6203 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
6204 }
6205 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
6206 {InH, AmtExcess}); // Lo from Hi part.
6207
6208 auto Lo = MIRBuilder.buildSelect(
6209 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
6210
6211 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
6212
6213 ResultRegs[0] = Lo.getReg(0);
6214 ResultRegs[1] = Hi.getReg(0);
6215 break;
6216 }
6217 default:
6218 llvm_unreachable("not a shift");
6219 }
6220
6221 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
6222 MI.eraseFromParent();
6223 return Legalized;
6224}
6225
6227 unsigned PartIdx,
6228 unsigned NumParts,
6229 ArrayRef<Register> SrcParts,
6230 const ShiftParams &Params,
6231 LLT TargetTy, LLT ShiftAmtTy) {
6232 auto WordShiftConst = getIConstantVRegVal(Params.WordShift, MRI);
6233 auto BitShiftConst = getIConstantVRegVal(Params.BitShift, MRI);
6234 assert(WordShiftConst && BitShiftConst && "Expected constants");
6235
6236 const unsigned ShiftWords = WordShiftConst->getZExtValue();
6237 const unsigned ShiftBits = BitShiftConst->getZExtValue();
6238 const bool NeedsInterWordShift = ShiftBits != 0;
6239
6240 switch (Opcode) {
6241 case TargetOpcode::G_SHL: {
6242 // Data moves from lower indices to higher indices
6243 // If this part would come from a source beyond our range, it's zero
6244 if (PartIdx < ShiftWords)
6245 return Params.Zero;
6246
6247 unsigned SrcIdx = PartIdx - ShiftWords;
6248 if (!NeedsInterWordShift)
6249 return SrcParts[SrcIdx];
6250
6251 // Combine shifted main part with carry from previous part
6252 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6253 if (SrcIdx > 0) {
6254 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx - 1],
6255 Params.InvBitShift);
6256 return MIRBuilder.buildOr(TargetTy, Hi, Lo).getReg(0);
6257 }
6258 return Hi.getReg(0);
6259 }
6260
6261 case TargetOpcode::G_LSHR: {
6262 unsigned SrcIdx = PartIdx + ShiftWords;
6263 if (SrcIdx >= NumParts)
6264 return Params.Zero;
6265 if (!NeedsInterWordShift)
6266 return SrcParts[SrcIdx];
6267
6268 // Combine shifted main part with carry from next part
6269 auto Lo = MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6270 if (SrcIdx + 1 < NumParts) {
6271 auto Hi = MIRBuilder.buildShl(TargetTy, SrcParts[SrcIdx + 1],
6272 Params.InvBitShift);
6273 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6274 }
6275 return Lo.getReg(0);
6276 }
6277
6278 case TargetOpcode::G_ASHR: {
6279 // Like LSHR but preserves sign bit
6280 unsigned SrcIdx = PartIdx + ShiftWords;
6281 if (SrcIdx >= NumParts)
6282 return Params.SignBit;
6283 if (!NeedsInterWordShift)
6284 return SrcParts[SrcIdx];
6285
6286 // Only the original MSB part uses arithmetic shift to preserve sign. All
6287 // other parts use logical shift since they're just moving data bits.
6288 auto Lo =
6289 (SrcIdx == NumParts - 1)
6290 ? MIRBuilder.buildAShr(TargetTy, SrcParts[SrcIdx], Params.BitShift)
6291 : MIRBuilder.buildLShr(TargetTy, SrcParts[SrcIdx], Params.BitShift);
6292 Register HiSrc =
6293 (SrcIdx + 1 < NumParts) ? SrcParts[SrcIdx + 1] : Params.SignBit;
6294 auto Hi = MIRBuilder.buildShl(TargetTy, HiSrc, Params.InvBitShift);
6295 return MIRBuilder.buildOr(TargetTy, Lo, Hi).getReg(0);
6296 }
6297
6298 default:
6299 llvm_unreachable("not a shift");
6300 }
6301}
6302
6304 Register MainOperand,
6305 Register ShiftAmt,
6306 LLT TargetTy,
6307 Register CarryOperand) {
6308 // This helper generates a single output part for variable shifts by combining
6309 // the main operand (shifted by BitShift) with carry bits from an adjacent
6310 // part.
6311
6312 // For G_ASHR, individual parts don't have their own sign bit, only the
6313 // complete value does. So we use LSHR for the main operand shift in ASHR
6314 // context.
6315 unsigned MainOpcode =
6316 (Opcode == TargetOpcode::G_ASHR) ? TargetOpcode::G_LSHR : Opcode;
6317
6318 // Perform the primary shift on the main operand
6319 Register MainShifted =
6320 MIRBuilder.buildInstr(MainOpcode, {TargetTy}, {MainOperand, ShiftAmt})
6321 .getReg(0);
6322
6323 // No carry operand available
6324 if (!CarryOperand.isValid())
6325 return MainShifted;
6326
6327 // If BitShift is 0 (word-aligned shift), no inter-word bit movement occurs,
6328 // so carry bits aren't needed.
6329 LLT ShiftAmtTy = MRI.getType(ShiftAmt);
6330 auto ZeroConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6331 LLT BoolTy = LLT::scalar(1);
6332 auto IsZeroBitShift =
6333 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, ShiftAmt, ZeroConst);
6334
6335 // Extract bits from the adjacent part that will "carry over" into this part.
6336 // The carry direction is opposite to the main shift direction, so we can
6337 // align the two shifted values before combining them with OR.
6338
6339 // Determine the carry shift opcode (opposite direction)
6340 unsigned CarryOpcode = (Opcode == TargetOpcode::G_SHL) ? TargetOpcode::G_LSHR
6341 : TargetOpcode::G_SHL;
6342
6343 // Calculate inverse shift amount: BitWidth - ShiftAmt
6344 auto TargetBitsConst =
6345 MIRBuilder.buildConstant(ShiftAmtTy, TargetTy.getScalarSizeInBits());
6346 auto InvShiftAmt = MIRBuilder.buildSub(ShiftAmtTy, TargetBitsConst, ShiftAmt);
6347
6348 // Shift the carry operand
6349 Register CarryBits =
6351 .buildInstr(CarryOpcode, {TargetTy}, {CarryOperand, InvShiftAmt})
6352 .getReg(0);
6353
6354 // If BitShift is 0, don't include carry bits (InvShiftAmt would equal
6355 // TargetBits which would be poison for the individual carry shift operation).
6356 auto ZeroReg = MIRBuilder.buildConstant(TargetTy, 0);
6357 Register SafeCarryBits =
6358 MIRBuilder.buildSelect(TargetTy, IsZeroBitShift, ZeroReg, CarryBits)
6359 .getReg(0);
6360
6361 // Combine the main shifted part with the carry bits
6362 return MIRBuilder.buildOr(TargetTy, MainShifted, SafeCarryBits).getReg(0);
6363}
6364
6367 const APInt &Amt,
6368 LLT TargetTy,
6369 LLT ShiftAmtTy) {
6370 // Any wide shift can be decomposed into WordShift + BitShift components.
6371 // When shift amount is known constant, directly compute the decomposition
6372 // values and generate constant registers.
6373 Register DstReg = MI.getOperand(0).getReg();
6374 Register SrcReg = MI.getOperand(1).getReg();
6375 LLT DstTy = MRI.getType(DstReg);
6376
6377 const unsigned DstBits = DstTy.getScalarSizeInBits();
6378 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6379 const unsigned NumParts = DstBits / TargetBits;
6380
6381 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6382
6383 // When the shift amount is known at compile time, we just calculate which
6384 // source parts contribute to each output part.
6385
6386 SmallVector<Register, 8> SrcParts;
6387 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6388
6389 if (Amt.isZero()) {
6390 // No shift needed, just copy
6391 MIRBuilder.buildMergeLikeInstr(DstReg, SrcParts);
6392 MI.eraseFromParent();
6393 return Legalized;
6394 }
6395
6396 ShiftParams Params;
6397 const unsigned ShiftWords = Amt.getZExtValue() / TargetBits;
6398 const unsigned ShiftBits = Amt.getZExtValue() % TargetBits;
6399
6400 // Generate constants and values needed by all shift types
6401 Params.WordShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftWords).getReg(0);
6402 Params.BitShift = MIRBuilder.buildConstant(ShiftAmtTy, ShiftBits).getReg(0);
6403 Params.InvBitShift =
6404 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - ShiftBits).getReg(0);
6405 Params.Zero = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6406
6407 // For ASHR, we need the sign-extended value to fill shifted-out positions
6408 if (MI.getOpcode() == TargetOpcode::G_ASHR)
6409 Params.SignBit =
6411 .buildAShr(TargetTy, SrcParts[SrcParts.size() - 1],
6412 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1))
6413 .getReg(0);
6414
6415 SmallVector<Register, 8> DstParts(NumParts);
6416 for (unsigned I = 0; I < NumParts; ++I)
6417 DstParts[I] = buildConstantShiftPart(MI.getOpcode(), I, NumParts, SrcParts,
6418 Params, TargetTy, ShiftAmtTy);
6419
6420 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6421 MI.eraseFromParent();
6422 return Legalized;
6423}
6424
6427 Register DstReg = MI.getOperand(0).getReg();
6428 Register SrcReg = MI.getOperand(1).getReg();
6429 Register AmtReg = MI.getOperand(2).getReg();
6430 LLT DstTy = MRI.getType(DstReg);
6431 LLT ShiftAmtTy = MRI.getType(AmtReg);
6432
6433 const unsigned DstBits = DstTy.getScalarSizeInBits();
6434 const unsigned TargetBits = TargetTy.getScalarSizeInBits();
6435 const unsigned NumParts = DstBits / TargetBits;
6436
6437 assert(DstBits % TargetBits == 0 && "Target type must evenly divide source");
6438 assert(isPowerOf2_32(TargetBits) && "Target bit width must be power of 2");
6439
6440 // If the shift amount is known at compile time, we can use direct indexing
6441 // instead of generating select chains in the general case.
6442 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI))
6443 return narrowScalarShiftByConstantMultiway(MI, VRegAndVal->Value, TargetTy,
6444 ShiftAmtTy);
6445
6446 // For runtime-variable shift amounts, we must generate a more complex
6447 // sequence that handles all possible shift values using select chains.
6448
6449 // Split the input into target-sized pieces
6450 SmallVector<Register, 8> SrcParts;
6451 extractParts(SrcReg, TargetTy, NumParts, SrcParts, MIRBuilder, MRI);
6452
6453 // Shifting by zero should be a no-op.
6454 auto ZeroAmtConst = MIRBuilder.buildConstant(ShiftAmtTy, 0);
6455 LLT BoolTy = LLT::scalar(1);
6456 auto IsZeroShift =
6457 MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy, AmtReg, ZeroAmtConst);
6458
6459 // Any wide shift can be decomposed into two components:
6460 // 1. WordShift: number of complete target-sized words to shift
6461 // 2. BitShift: number of bits to shift within each word
6462 //
6463 // Example: 128-bit >> 50 with 32-bit target:
6464 // WordShift = 50 / 32 = 1 (shift right by 1 complete word)
6465 // BitShift = 50 % 32 = 18 (shift each word right by 18 bits)
6466 unsigned TargetBitsLog2 = Log2_32(TargetBits);
6467 auto TargetBitsLog2Const =
6468 MIRBuilder.buildConstant(ShiftAmtTy, TargetBitsLog2);
6469 auto TargetBitsMask = MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6470
6471 Register WordShift =
6472 MIRBuilder.buildLShr(ShiftAmtTy, AmtReg, TargetBitsLog2Const).getReg(0);
6473 Register BitShift =
6474 MIRBuilder.buildAnd(ShiftAmtTy, AmtReg, TargetBitsMask).getReg(0);
6475
6476 // Fill values:
6477 // - SHL/LSHR: fill with zeros
6478 // - ASHR: fill with sign-extended MSB
6479 Register ZeroReg = MIRBuilder.buildConstant(TargetTy, 0).getReg(0);
6480
6481 Register FillValue;
6482 if (MI.getOpcode() == TargetOpcode::G_ASHR) {
6483 auto TargetBitsMinusOneConst =
6484 MIRBuilder.buildConstant(ShiftAmtTy, TargetBits - 1);
6485 FillValue = MIRBuilder
6486 .buildAShr(TargetTy, SrcParts[NumParts - 1],
6487 TargetBitsMinusOneConst)
6488 .getReg(0);
6489 } else {
6490 FillValue = ZeroReg;
6491 }
6492
6493 SmallVector<Register, 8> DstParts(NumParts);
6494
6495 // For each output part, generate a select chain that chooses the correct
6496 // result based on the runtime WordShift value. This handles all possible
6497 // word shift amounts by pre-calculating what each would produce.
6498 for (unsigned I = 0; I < NumParts; ++I) {
6499 // Initialize with appropriate default value for this shift type
6500 Register InBoundsResult = FillValue;
6501
6502 // clang-format off
6503 // Build a branchless select chain by pre-computing results for all possible
6504 // WordShift values (0 to NumParts-1). Each iteration nests a new select:
6505 //
6506 // K=0: select(WordShift==0, result0, FillValue)
6507 // K=1: select(WordShift==1, result1, select(WordShift==0, result0, FillValue))
6508 // K=2: select(WordShift==2, result2, select(WordShift==1, result1, select(...)))
6509 // clang-format on
6510 for (unsigned K = 0; K < NumParts; ++K) {
6511 auto WordShiftKConst = MIRBuilder.buildConstant(ShiftAmtTy, K);
6512 auto IsWordShiftK = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, BoolTy,
6513 WordShift, WordShiftKConst);
6514
6515 // Calculate source indices for this word shift
6516 //
6517 // For 4-part 128-bit value with K=1 word shift:
6518 // SHL: [3][2][1][0] << K => [2][1][0][Z]
6519 // -> (MainIdx = I-K, CarryIdx = I-K-1)
6520 // LSHR: [3][2][1][0] >> K => [Z][3][2][1]
6521 // -> (MainIdx = I+K, CarryIdx = I+K+1)
6522 int MainSrcIdx;
6523 int CarrySrcIdx; // Index for the word that provides the carried-in bits.
6524
6525 switch (MI.getOpcode()) {
6526 case TargetOpcode::G_SHL:
6527 MainSrcIdx = (int)I - (int)K;
6528 CarrySrcIdx = MainSrcIdx - 1;
6529 break;
6530 case TargetOpcode::G_LSHR:
6531 case TargetOpcode::G_ASHR:
6532 MainSrcIdx = (int)I + (int)K;
6533 CarrySrcIdx = MainSrcIdx + 1;
6534 break;
6535 default:
6536 llvm_unreachable("Not a shift");
6537 }
6538
6539 // Check bounds and build the result for this word shift
6540 Register ResultForK;
6541 if (MainSrcIdx >= 0 && MainSrcIdx < (int)NumParts) {
6542 Register MainOp = SrcParts[MainSrcIdx];
6543 Register CarryOp;
6544
6545 // Determine carry operand with bounds checking
6546 if (CarrySrcIdx >= 0 && CarrySrcIdx < (int)NumParts)
6547 CarryOp = SrcParts[CarrySrcIdx];
6548 else if (MI.getOpcode() == TargetOpcode::G_ASHR &&
6549 CarrySrcIdx >= (int)NumParts)
6550 CarryOp = FillValue; // Use sign extension
6551
6552 ResultForK = buildVariableShiftPart(MI.getOpcode(), MainOp, BitShift,
6553 TargetTy, CarryOp);
6554 } else {
6555 // Out of bounds - use fill value for this k
6556 ResultForK = FillValue;
6557 }
6558
6559 // Select this result if WordShift equals k
6560 InBoundsResult =
6562 .buildSelect(TargetTy, IsWordShiftK, ResultForK, InBoundsResult)
6563 .getReg(0);
6564 }
6565
6566 // Handle zero-shift special case: if shift is 0, use original input
6567 DstParts[I] =
6569 .buildSelect(TargetTy, IsZeroShift, SrcParts[I], InBoundsResult)
6570 .getReg(0);
6571 }
6572
6573 MIRBuilder.buildMergeLikeInstr(DstReg, DstParts);
6574 MI.eraseFromParent();
6575 return Legalized;
6576}
6577
6580 LLT MoreTy) {
6581 assert(TypeIdx == 0 && "Expecting only Idx 0");
6582
6583 Observer.changingInstr(MI);
6584 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
6585 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
6586 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
6587 moreElementsVectorSrc(MI, MoreTy, I);
6588 }
6589
6590 MachineBasicBlock &MBB = *MI.getParent();
6591 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
6592 moreElementsVectorDst(MI, MoreTy, 0);
6593 Observer.changedInstr(MI);
6594 return Legalized;
6595}
6596
6597MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
6598 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
6599 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
6600
6601 switch (Opcode) {
6602 default:
6604 "getNeutralElementForVecReduce called with invalid opcode!");
6605 case TargetOpcode::G_VECREDUCE_ADD:
6606 case TargetOpcode::G_VECREDUCE_OR:
6607 case TargetOpcode::G_VECREDUCE_XOR:
6608 case TargetOpcode::G_VECREDUCE_UMAX:
6609 return MIRBuilder.buildConstant(Ty, 0);
6610 case TargetOpcode::G_VECREDUCE_MUL:
6611 return MIRBuilder.buildConstant(Ty, 1);
6612 case TargetOpcode::G_VECREDUCE_AND:
6613 case TargetOpcode::G_VECREDUCE_UMIN:
6615 Ty, APInt::getAllOnes(Ty.getScalarSizeInBits()));
6616 case TargetOpcode::G_VECREDUCE_SMAX:
6618 Ty, APInt::getSignedMinValue(Ty.getSizeInBits()));
6619 case TargetOpcode::G_VECREDUCE_SMIN:
6621 Ty, APInt::getSignedMaxValue(Ty.getSizeInBits()));
6622 case TargetOpcode::G_VECREDUCE_FADD:
6623 return MIRBuilder.buildFConstant(Ty, -0.0);
6624 case TargetOpcode::G_VECREDUCE_FMUL:
6625 return MIRBuilder.buildFConstant(Ty, 1.0);
6626 case TargetOpcode::G_VECREDUCE_FMINIMUM:
6627 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
6628 assert(false && "getNeutralElementForVecReduce unimplemented for "
6629 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
6630 }
6631 llvm_unreachable("switch expected to return!");
6632}
6633
6636 LLT MoreTy) {
6637 unsigned Opc = MI.getOpcode();
6638 switch (Opc) {
6639 case TargetOpcode::G_IMPLICIT_DEF:
6640 case TargetOpcode::G_LOAD: {
6641 if (TypeIdx != 0)
6642 return UnableToLegalize;
6643 Observer.changingInstr(MI);
6644 moreElementsVectorDst(MI, MoreTy, 0);
6645 Observer.changedInstr(MI);
6646 return Legalized;
6647 }
6648 case TargetOpcode::G_STORE:
6649 if (TypeIdx != 0)
6650 return UnableToLegalize;
6651 Observer.changingInstr(MI);
6652 moreElementsVectorSrc(MI, MoreTy, 0);
6653 Observer.changedInstr(MI);
6654 return Legalized;
6655 case TargetOpcode::G_AND:
6656 case TargetOpcode::G_OR:
6657 case TargetOpcode::G_XOR:
6658 case TargetOpcode::G_ADD:
6659 case TargetOpcode::G_SUB:
6660 case TargetOpcode::G_MUL:
6661 case TargetOpcode::G_FADD:
6662 case TargetOpcode::G_FSUB:
6663 case TargetOpcode::G_FMUL:
6664 case TargetOpcode::G_FDIV:
6665 case TargetOpcode::G_FCOPYSIGN:
6666 case TargetOpcode::G_UADDSAT:
6667 case TargetOpcode::G_USUBSAT:
6668 case TargetOpcode::G_SADDSAT:
6669 case TargetOpcode::G_SSUBSAT:
6670 case TargetOpcode::G_SMIN:
6671 case TargetOpcode::G_SMAX:
6672 case TargetOpcode::G_UMIN:
6673 case TargetOpcode::G_UMAX:
6674 case TargetOpcode::G_FMINNUM:
6675 case TargetOpcode::G_FMAXNUM:
6676 case TargetOpcode::G_FMINNUM_IEEE:
6677 case TargetOpcode::G_FMAXNUM_IEEE:
6678 case TargetOpcode::G_FMINIMUM:
6679 case TargetOpcode::G_FMAXIMUM:
6680 case TargetOpcode::G_FMINIMUMNUM:
6681 case TargetOpcode::G_FMAXIMUMNUM:
6682 case TargetOpcode::G_STRICT_FADD:
6683 case TargetOpcode::G_STRICT_FSUB:
6684 case TargetOpcode::G_STRICT_FMUL: {
6685 Observer.changingInstr(MI);
6686 moreElementsVectorSrc(MI, MoreTy, 1);
6687 moreElementsVectorSrc(MI, MoreTy, 2);
6688 moreElementsVectorDst(MI, MoreTy, 0);
6689 Observer.changedInstr(MI);
6690 return Legalized;
6691 }
6692 case TargetOpcode::G_SHL:
6693 case TargetOpcode::G_ASHR:
6694 case TargetOpcode::G_LSHR: {
6695 Observer.changingInstr(MI);
6696 moreElementsVectorSrc(MI, MoreTy, 1);
6697 // The shift operand may have a different scalar type from the source and
6698 // destination operands.
6699 LLT ShiftMoreTy = MoreTy.changeElementType(
6700 MRI.getType(MI.getOperand(2).getReg()).getElementType());
6701 moreElementsVectorSrc(MI, ShiftMoreTy, 2);
6702 moreElementsVectorDst(MI, MoreTy, 0);
6703 Observer.changedInstr(MI);
6704 return Legalized;
6705 }
6706 case TargetOpcode::G_FMA:
6707 case TargetOpcode::G_STRICT_FMA:
6708 case TargetOpcode::G_FSHR:
6709 case TargetOpcode::G_FSHL: {
6710 Observer.changingInstr(MI);
6711 moreElementsVectorSrc(MI, MoreTy, 1);
6712 moreElementsVectorSrc(MI, MoreTy, 2);
6713 moreElementsVectorSrc(MI, MoreTy, 3);
6714 moreElementsVectorDst(MI, MoreTy, 0);
6715 Observer.changedInstr(MI);
6716 return Legalized;
6717 }
6718 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
6719 case TargetOpcode::G_EXTRACT:
6720 if (TypeIdx != 1)
6721 return UnableToLegalize;
6722 Observer.changingInstr(MI);
6723 moreElementsVectorSrc(MI, MoreTy, 1);
6724 Observer.changedInstr(MI);
6725 return Legalized;
6726 case TargetOpcode::G_INSERT:
6727 case TargetOpcode::G_INSERT_VECTOR_ELT:
6728 case TargetOpcode::G_FREEZE:
6729 case TargetOpcode::G_FNEG:
6730 case TargetOpcode::G_FABS:
6731 case TargetOpcode::G_FSQRT:
6732 case TargetOpcode::G_FCEIL:
6733 case TargetOpcode::G_FFLOOR:
6734 case TargetOpcode::G_FNEARBYINT:
6735 case TargetOpcode::G_FRINT:
6736 case TargetOpcode::G_INTRINSIC_ROUND:
6737 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
6738 case TargetOpcode::G_INTRINSIC_TRUNC:
6739 case TargetOpcode::G_BITREVERSE:
6740 case TargetOpcode::G_BSWAP:
6741 case TargetOpcode::G_FCANONICALIZE:
6742 case TargetOpcode::G_SEXT_INREG:
6743 case TargetOpcode::G_ABS:
6744 case TargetOpcode::G_CTLZ:
6745 case TargetOpcode::G_CTPOP:
6746 if (TypeIdx != 0)
6747 return UnableToLegalize;
6748 Observer.changingInstr(MI);
6749 moreElementsVectorSrc(MI, MoreTy, 1);
6750 moreElementsVectorDst(MI, MoreTy, 0);
6751 Observer.changedInstr(MI);
6752 return Legalized;
6753 case TargetOpcode::G_SELECT: {
6754 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
6755 if (TypeIdx == 1) {
6756 if (!CondTy.isScalar() ||
6757 DstTy.getElementCount() != MoreTy.getElementCount())
6758 return UnableToLegalize;
6759
6760 // This is turning a scalar select of vectors into a vector
6761 // select. Broadcast the select condition.
6762 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
6763 Observer.changingInstr(MI);
6764 MI.getOperand(1).setReg(ShufSplat.getReg(0));
6765 Observer.changedInstr(MI);
6766 return Legalized;
6767 }
6768
6769 if (CondTy.isVector())
6770 return UnableToLegalize;
6771
6772 Observer.changingInstr(MI);
6773 moreElementsVectorSrc(MI, MoreTy, 2);
6774 moreElementsVectorSrc(MI, MoreTy, 3);
6775 moreElementsVectorDst(MI, MoreTy, 0);
6776 Observer.changedInstr(MI);
6777 return Legalized;
6778 }
6779 case TargetOpcode::G_UNMERGE_VALUES:
6780 return UnableToLegalize;
6781 case TargetOpcode::G_PHI:
6782 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
6783 case TargetOpcode::G_SHUFFLE_VECTOR:
6784 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
6785 case TargetOpcode::G_BUILD_VECTOR: {
6787 for (auto Op : MI.uses()) {
6788 Elts.push_back(Op.getReg());
6789 }
6790
6791 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
6792 Elts.push_back(MIRBuilder.buildUndef(MoreTy.getScalarType()));
6793 }
6794
6795 MIRBuilder.buildDeleteTrailingVectorElements(
6796 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
6797 MI.eraseFromParent();
6798 return Legalized;
6799 }
6800 case TargetOpcode::G_SEXT:
6801 case TargetOpcode::G_ZEXT:
6802 case TargetOpcode::G_ANYEXT:
6803 case TargetOpcode::G_TRUNC:
6804 case TargetOpcode::G_FPTRUNC:
6805 case TargetOpcode::G_FPEXT:
6806 case TargetOpcode::G_FPTOSI:
6807 case TargetOpcode::G_FPTOUI:
6808 case TargetOpcode::G_FPTOSI_SAT:
6809 case TargetOpcode::G_FPTOUI_SAT:
6810 case TargetOpcode::G_SITOFP:
6811 case TargetOpcode::G_UITOFP: {
6812 Observer.changingInstr(MI);
6813 LLT SrcExtTy;
6814 LLT DstExtTy;
6815 if (TypeIdx == 0) {
6816 DstExtTy = MoreTy;
6817 SrcExtTy = MoreTy.changeElementType(
6818 MRI.getType(MI.getOperand(1).getReg()).getElementType());
6819 } else {
6820 DstExtTy = MoreTy.changeElementType(
6821 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6822 SrcExtTy = MoreTy;
6823 }
6824 moreElementsVectorSrc(MI, SrcExtTy, 1);
6825 moreElementsVectorDst(MI, DstExtTy, 0);
6826 Observer.changedInstr(MI);
6827 return Legalized;
6828 }
6829 case TargetOpcode::G_ICMP:
6830 case TargetOpcode::G_FCMP: {
6831 if (TypeIdx != 1)
6832 return UnableToLegalize;
6833
6834 Observer.changingInstr(MI);
6835 moreElementsVectorSrc(MI, MoreTy, 2);
6836 moreElementsVectorSrc(MI, MoreTy, 3);
6837 LLT CondTy = MoreTy.changeVectorElementType(
6838 MRI.getType(MI.getOperand(0).getReg()).getElementType());
6839 moreElementsVectorDst(MI, CondTy, 0);
6840 Observer.changedInstr(MI);
6841 return Legalized;
6842 }
6843 case TargetOpcode::G_BITCAST: {
6844 if (TypeIdx != 0)
6845 return UnableToLegalize;
6846
6847 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
6848 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
6849
6850 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
6851 if (coefficient % DstTy.getNumElements() != 0)
6852 return UnableToLegalize;
6853
6854 coefficient = coefficient / DstTy.getNumElements();
6855
6856 LLT NewTy = SrcTy.changeElementCount(
6857 ElementCount::get(coefficient, MoreTy.isScalable()));
6858 Observer.changingInstr(MI);
6859 moreElementsVectorSrc(MI, NewTy, 1);
6860 moreElementsVectorDst(MI, MoreTy, 0);
6861 Observer.changedInstr(MI);
6862 return Legalized;
6863 }
6864 case TargetOpcode::G_VECREDUCE_FADD:
6865 case TargetOpcode::G_VECREDUCE_FMUL:
6866 case TargetOpcode::G_VECREDUCE_ADD:
6867 case TargetOpcode::G_VECREDUCE_MUL:
6868 case TargetOpcode::G_VECREDUCE_AND:
6869 case TargetOpcode::G_VECREDUCE_OR:
6870 case TargetOpcode::G_VECREDUCE_XOR:
6871 case TargetOpcode::G_VECREDUCE_SMAX:
6872 case TargetOpcode::G_VECREDUCE_SMIN:
6873 case TargetOpcode::G_VECREDUCE_UMAX:
6874 case TargetOpcode::G_VECREDUCE_UMIN: {
6875 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
6876 MachineOperand &MO = MI.getOperand(1);
6877 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
6878 auto NeutralElement = getNeutralElementForVecReduce(
6879 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
6880
6881 LLT IdxTy(TLI.getVectorIdxLLT(MIRBuilder.getDataLayout()));
6882 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
6883 i != e; i++) {
6884 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
6885 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
6886 NeutralElement, Idx);
6887 }
6888
6889 Observer.changingInstr(MI);
6890 MO.setReg(NewVec.getReg(0));
6891 Observer.changedInstr(MI);
6892 return Legalized;
6893 }
6894
6895 default:
6896 return UnableToLegalize;
6897 }
6898}
6899
6902 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6903 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6904 unsigned MaskNumElts = Mask.size();
6905 unsigned SrcNumElts = SrcTy.getNumElements();
6906 LLT DestEltTy = DstTy.getElementType();
6907
6908 if (MaskNumElts == SrcNumElts)
6909 return Legalized;
6910
6911 if (MaskNumElts < SrcNumElts) {
6912 // Extend mask to match new destination vector size with
6913 // undef values.
6914 SmallVector<int, 16> NewMask(SrcNumElts, -1);
6915 llvm::copy(Mask, NewMask.begin());
6916
6917 moreElementsVectorDst(MI, SrcTy, 0);
6918 MIRBuilder.setInstrAndDebugLoc(MI);
6919 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
6920 MI.getOperand(1).getReg(),
6921 MI.getOperand(2).getReg(), NewMask);
6922 MI.eraseFromParent();
6923
6924 return Legalized;
6925 }
6926
6927 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
6928 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
6929 LLT PaddedTy =
6930 DstTy.changeVectorElementCount(ElementCount::getFixed(PaddedMaskNumElts));
6931
6932 // Create new source vectors by concatenating the initial
6933 // source vectors with undefined vectors of the same size.
6934 auto Undef = MIRBuilder.buildUndef(SrcTy);
6935 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
6936 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
6937 MOps1[0] = MI.getOperand(1).getReg();
6938 MOps2[0] = MI.getOperand(2).getReg();
6939
6940 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
6941 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
6942
6943 // Readjust mask for new input vector length.
6944 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
6945 for (unsigned I = 0; I != MaskNumElts; ++I) {
6946 int Idx = Mask[I];
6947 if (Idx >= static_cast<int>(SrcNumElts))
6948 Idx += PaddedMaskNumElts - SrcNumElts;
6949 MappedOps[I] = Idx;
6950 }
6951
6952 // If we got more elements than required, extract subvector.
6953 if (MaskNumElts != PaddedMaskNumElts) {
6954 auto Shuffle =
6955 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
6956
6957 SmallVector<Register, 16> Elts(MaskNumElts);
6958 for (unsigned I = 0; I < MaskNumElts; ++I) {
6959 Elts[I] =
6960 MIRBuilder.buildExtractVectorElementConstant(DestEltTy, Shuffle, I)
6961 .getReg(0);
6962 }
6963 MIRBuilder.buildBuildVector(DstReg, Elts);
6964 } else {
6965 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
6966 }
6967
6968 MI.eraseFromParent();
6970}
6971
6974 unsigned int TypeIdx, LLT MoreTy) {
6975 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
6976 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6977 unsigned NumElts = DstTy.getNumElements();
6978 unsigned WidenNumElts = MoreTy.getNumElements();
6979
6980 if (DstTy.isVector() && Src1Ty.isVector() &&
6981 DstTy.getNumElements() != Src1Ty.getNumElements()) {
6983 }
6984
6985 if (TypeIdx != 0)
6986 return UnableToLegalize;
6987
6988 // Expect a canonicalized shuffle.
6989 if (DstTy != Src1Ty || DstTy != Src2Ty)
6990 return UnableToLegalize;
6991
6992 moreElementsVectorSrc(MI, MoreTy, 1);
6993 moreElementsVectorSrc(MI, MoreTy, 2);
6994
6995 // Adjust mask based on new input vector length.
6996 SmallVector<int, 16> NewMask(WidenNumElts, -1);
6997 for (unsigned I = 0; I != NumElts; ++I) {
6998 int Idx = Mask[I];
6999 if (Idx < static_cast<int>(NumElts))
7000 NewMask[I] = Idx;
7001 else
7002 NewMask[I] = Idx - NumElts + WidenNumElts;
7003 }
7004 moreElementsVectorDst(MI, MoreTy, 0);
7005 MIRBuilder.setInstrAndDebugLoc(MI);
7006 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
7007 MI.getOperand(1).getReg(),
7008 MI.getOperand(2).getReg(), NewMask);
7009 MI.eraseFromParent();
7010 return Legalized;
7011}
7012
7013void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
7014 ArrayRef<Register> Src1Regs,
7015 ArrayRef<Register> Src2Regs,
7016 LLT NarrowTy) {
7018 unsigned SrcParts = Src1Regs.size();
7019 unsigned DstParts = DstRegs.size();
7020
7021 unsigned DstIdx = 0; // Low bits of the result.
7022 Register FactorSum =
7023 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
7024 DstRegs[DstIdx] = FactorSum;
7025
7026 Register CarrySumPrevDstIdx;
7028
7029 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
7030 // Collect low parts of muls for DstIdx.
7031 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
7032 i <= std::min(DstIdx, SrcParts - 1); ++i) {
7034 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
7035 Factors.push_back(Mul.getReg(0));
7036 }
7037 // Collect high parts of muls from previous DstIdx.
7038 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
7039 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
7040 MachineInstrBuilder Umulh =
7041 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
7042 Factors.push_back(Umulh.getReg(0));
7043 }
7044 // Add CarrySum from additions calculated for previous DstIdx.
7045 if (DstIdx != 1) {
7046 Factors.push_back(CarrySumPrevDstIdx);
7047 }
7048
7049 Register CarrySum;
7050 // Add all factors and accumulate all carries into CarrySum.
7051 if (DstIdx != DstParts - 1) {
7052 MachineInstrBuilder Uaddo =
7053 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
7054 FactorSum = Uaddo.getReg(0);
7055 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
7056 for (unsigned i = 2; i < Factors.size(); ++i) {
7057 MachineInstrBuilder Uaddo =
7058 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
7059 FactorSum = Uaddo.getReg(0);
7060 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
7061 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
7062 }
7063 } else {
7064 // Since value for the next index is not calculated, neither is CarrySum.
7065 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
7066 for (unsigned i = 2; i < Factors.size(); ++i)
7067 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
7068 }
7069
7070 CarrySumPrevDstIdx = CarrySum;
7071 DstRegs[DstIdx] = FactorSum;
7072 Factors.clear();
7073 }
7074}
7075
7078 LLT NarrowTy) {
7079 if (TypeIdx != 0)
7080 return UnableToLegalize;
7081
7082 Register DstReg = MI.getOperand(0).getReg();
7083 LLT DstType = MRI.getType(DstReg);
7084 // FIXME: add support for vector types
7085 if (DstType.isVector())
7086 return UnableToLegalize;
7087
7088 unsigned Opcode = MI.getOpcode();
7089 unsigned OpO, OpE, OpF;
7090 switch (Opcode) {
7091 case TargetOpcode::G_SADDO:
7092 case TargetOpcode::G_SADDE:
7093 case TargetOpcode::G_UADDO:
7094 case TargetOpcode::G_UADDE:
7095 case TargetOpcode::G_ADD:
7096 OpO = TargetOpcode::G_UADDO;
7097 OpE = TargetOpcode::G_UADDE;
7098 OpF = TargetOpcode::G_UADDE;
7099 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
7100 OpF = TargetOpcode::G_SADDE;
7101 break;
7102 case TargetOpcode::G_SSUBO:
7103 case TargetOpcode::G_SSUBE:
7104 case TargetOpcode::G_USUBO:
7105 case TargetOpcode::G_USUBE:
7106 case TargetOpcode::G_SUB:
7107 OpO = TargetOpcode::G_USUBO;
7108 OpE = TargetOpcode::G_USUBE;
7109 OpF = TargetOpcode::G_USUBE;
7110 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
7111 OpF = TargetOpcode::G_SSUBE;
7112 break;
7113 default:
7114 llvm_unreachable("Unexpected add/sub opcode!");
7115 }
7116
7117 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
7118 unsigned NumDefs = MI.getNumExplicitDefs();
7119 Register Src1 = MI.getOperand(NumDefs).getReg();
7120 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
7121 Register CarryDst, CarryIn;
7122 if (NumDefs == 2)
7123 CarryDst = MI.getOperand(1).getReg();
7124 if (MI.getNumOperands() == NumDefs + 3)
7125 CarryIn = MI.getOperand(NumDefs + 2).getReg();
7126
7127 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7128 LLT LeftoverTy, DummyTy;
7129 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
7130 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
7131 MIRBuilder, MRI);
7132 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
7133 MRI);
7134
7135 int NarrowParts = Src1Regs.size();
7136 Src1Regs.append(Src1Left);
7137 Src2Regs.append(Src2Left);
7138 DstRegs.reserve(Src1Regs.size());
7139
7140 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
7141 Register DstReg =
7142 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
7143 Register CarryOut;
7144 // Forward the final carry-out to the destination register
7145 if (i == e - 1 && CarryDst)
7146 CarryOut = CarryDst;
7147 else
7148 CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
7149
7150 if (!CarryIn) {
7151 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
7152 {Src1Regs[i], Src2Regs[i]});
7153 } else if (i == e - 1) {
7154 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
7155 {Src1Regs[i], Src2Regs[i], CarryIn});
7156 } else {
7157 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
7158 {Src1Regs[i], Src2Regs[i], CarryIn});
7159 }
7160
7161 DstRegs.push_back(DstReg);
7162 CarryIn = CarryOut;
7163 }
7164 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
7165 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
7166 ArrayRef(DstRegs).drop_front(NarrowParts));
7167
7168 MI.eraseFromParent();
7169 return Legalized;
7170}
7171
7174 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
7175
7176 LLT Ty = MRI.getType(DstReg);
7177 if (Ty.isVector())
7178 return UnableToLegalize;
7179
7180 unsigned Size = Ty.getSizeInBits();
7181 unsigned NarrowSize = NarrowTy.getSizeInBits();
7182 if (Size % NarrowSize != 0)
7183 return UnableToLegalize;
7184
7185 unsigned NumParts = Size / NarrowSize;
7186 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
7187 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
7188
7189 SmallVector<Register, 2> Src1Parts, Src2Parts;
7190 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
7191 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
7192 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
7193 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
7194
7195 // Take only high half of registers if this is high mul.
7196 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
7197 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7198 MI.eraseFromParent();
7199 return Legalized;
7200}
7201
7204 LLT NarrowTy) {
7205 if (TypeIdx != 0)
7206 return UnableToLegalize;
7207
7208 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
7209
7210 Register Src = MI.getOperand(1).getReg();
7211 LLT SrcTy = MRI.getType(Src);
7212
7213 // If all finite floats fit into the narrowed integer type, we can just swap
7214 // out the result type. This is practically only useful for conversions from
7215 // half to at least 16-bits, so just handle the one case.
7216 if (SrcTy.getScalarType() != LLT::scalar(16) ||
7217 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
7218 return UnableToLegalize;
7219
7220 Observer.changingInstr(MI);
7221 narrowScalarDst(MI, NarrowTy, 0,
7222 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
7223 Observer.changedInstr(MI);
7224 return Legalized;
7225}
7226
7229 LLT NarrowTy) {
7230 if (TypeIdx != 1)
7231 return UnableToLegalize;
7232
7233 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7234
7235 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
7236 // FIXME: add support for when SizeOp1 isn't an exact multiple of
7237 // NarrowSize.
7238 if (SizeOp1 % NarrowSize != 0)
7239 return UnableToLegalize;
7240 int NumParts = SizeOp1 / NarrowSize;
7241
7242 SmallVector<Register, 2> SrcRegs, DstRegs;
7243 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
7244 MIRBuilder, MRI);
7245
7246 Register OpReg = MI.getOperand(0).getReg();
7247 uint64_t OpStart = MI.getOperand(2).getImm();
7248 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7249 for (int i = 0; i < NumParts; ++i) {
7250 unsigned SrcStart = i * NarrowSize;
7251
7252 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
7253 // No part of the extract uses this subregister, ignore it.
7254 continue;
7255 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7256 // The entire subregister is extracted, forward the value.
7257 DstRegs.push_back(SrcRegs[i]);
7258 continue;
7259 }
7260
7261 // OpSegStart is where this destination segment would start in OpReg if it
7262 // extended infinitely in both directions.
7263 int64_t ExtractOffset;
7264 uint64_t SegSize;
7265 if (OpStart < SrcStart) {
7266 ExtractOffset = 0;
7267 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
7268 } else {
7269 ExtractOffset = OpStart - SrcStart;
7270 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
7271 }
7272
7273 Register SegReg = SrcRegs[i];
7274 if (ExtractOffset != 0 || SegSize != NarrowSize) {
7275 // A genuine extract is needed.
7276 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7277 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
7278 }
7279
7280 DstRegs.push_back(SegReg);
7281 }
7282
7283 Register DstReg = MI.getOperand(0).getReg();
7284 if (MRI.getType(DstReg).isVector())
7285 MIRBuilder.buildBuildVector(DstReg, DstRegs);
7286 else if (DstRegs.size() > 1)
7287 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7288 else
7289 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
7290 MI.eraseFromParent();
7291 return Legalized;
7292}
7293
7296 LLT NarrowTy) {
7297 // FIXME: Don't know how to handle secondary types yet.
7298 if (TypeIdx != 0)
7299 return UnableToLegalize;
7300
7301 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
7302 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
7303 LLT LeftoverTy;
7304 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
7305 LeftoverRegs, MIRBuilder, MRI);
7306
7307 SrcRegs.append(LeftoverRegs);
7308
7309 uint64_t NarrowSize = NarrowTy.getSizeInBits();
7310 Register OpReg = MI.getOperand(2).getReg();
7311 uint64_t OpStart = MI.getOperand(3).getImm();
7312 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
7313 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
7314 unsigned DstStart = I * NarrowSize;
7315
7316 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
7317 // The entire subregister is defined by this insert, forward the new
7318 // value.
7319 DstRegs.push_back(OpReg);
7320 continue;
7321 }
7322
7323 Register SrcReg = SrcRegs[I];
7324 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
7325 // The leftover reg is smaller than NarrowTy, so we need to extend it.
7326 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
7327 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
7328 }
7329
7330 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
7331 // No part of the insert affects this subregister, forward the original.
7332 DstRegs.push_back(SrcReg);
7333 continue;
7334 }
7335
7336 // OpSegStart is where this destination segment would start in OpReg if it
7337 // extended infinitely in both directions.
7338 int64_t ExtractOffset, InsertOffset;
7339 uint64_t SegSize;
7340 if (OpStart < DstStart) {
7341 InsertOffset = 0;
7342 ExtractOffset = DstStart - OpStart;
7343 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
7344 } else {
7345 InsertOffset = OpStart - DstStart;
7346 ExtractOffset = 0;
7347 SegSize =
7348 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
7349 }
7350
7351 Register SegReg = OpReg;
7352 if (ExtractOffset != 0 || SegSize != OpSize) {
7353 // A genuine extract is needed.
7354 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
7355 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
7356 }
7357
7358 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
7359 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
7360 DstRegs.push_back(DstReg);
7361 }
7362
7363 uint64_t WideSize = DstRegs.size() * NarrowSize;
7364 Register DstReg = MI.getOperand(0).getReg();
7365 if (WideSize > RegTy.getSizeInBits()) {
7366 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
7367 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
7368 MIRBuilder.buildTrunc(DstReg, MergeReg);
7369 } else
7370 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
7371
7372 MI.eraseFromParent();
7373 return Legalized;
7374}
7375
7378 LLT NarrowTy) {
7379 Register DstReg = MI.getOperand(0).getReg();
7380 LLT DstTy = MRI.getType(DstReg);
7381
7382 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
7383
7384 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7385 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
7386 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7387 LLT LeftoverTy;
7388 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
7389 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
7390 return UnableToLegalize;
7391
7392 LLT Unused;
7393 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
7394 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7395 llvm_unreachable("inconsistent extractParts result");
7396
7397 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7398 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
7399 {Src0Regs[I], Src1Regs[I]});
7400 DstRegs.push_back(Inst.getReg(0));
7401 }
7402
7403 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7404 auto Inst = MIRBuilder.buildInstr(
7405 MI.getOpcode(),
7406 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
7407 DstLeftoverRegs.push_back(Inst.getReg(0));
7408 }
7409
7410 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7411 LeftoverTy, DstLeftoverRegs);
7412
7413 MI.eraseFromParent();
7414 return Legalized;
7415}
7416
7419 LLT NarrowTy) {
7420 if (TypeIdx != 0)
7421 return UnableToLegalize;
7422
7423 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7424
7425 LLT DstTy = MRI.getType(DstReg);
7426 if (DstTy.isVector())
7427 return UnableToLegalize;
7428
7430 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
7431 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
7432 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
7433
7434 MI.eraseFromParent();
7435 return Legalized;
7436}
7437
7440 LLT NarrowTy) {
7441 if (TypeIdx != 0)
7442 return UnableToLegalize;
7443
7444 Register CondReg = MI.getOperand(1).getReg();
7445 LLT CondTy = MRI.getType(CondReg);
7446 if (CondTy.isVector()) // TODO: Handle vselect
7447 return UnableToLegalize;
7448
7449 Register DstReg = MI.getOperand(0).getReg();
7450 LLT DstTy = MRI.getType(DstReg);
7451
7452 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
7453 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
7454 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
7455 LLT LeftoverTy;
7456 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
7457 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
7458 return UnableToLegalize;
7459
7460 LLT Unused;
7461 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
7462 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
7463 llvm_unreachable("inconsistent extractParts result");
7464
7465 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
7466 auto Select = MIRBuilder.buildSelect(NarrowTy,
7467 CondReg, Src1Regs[I], Src2Regs[I]);
7468 DstRegs.push_back(Select.getReg(0));
7469 }
7470
7471 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
7472 auto Select = MIRBuilder.buildSelect(
7473 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
7474 DstLeftoverRegs.push_back(Select.getReg(0));
7475 }
7476
7477 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
7478 LeftoverTy, DstLeftoverRegs);
7479
7480 MI.eraseFromParent();
7481 return Legalized;
7482}
7483
7486 LLT NarrowTy) {
7487 if (TypeIdx != 1)
7488 return UnableToLegalize;
7489
7490 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7491 unsigned NarrowSize = NarrowTy.getSizeInBits();
7492
7493 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7494 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
7495
7497 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7498 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
7499 auto C_0 = B.buildConstant(NarrowTy, 0);
7500 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7501 UnmergeSrc.getReg(1), C_0);
7502 auto LoCTLZ = IsUndef ?
7503 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
7504 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
7505 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7506 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
7507 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
7508 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
7509
7510 MI.eraseFromParent();
7511 return Legalized;
7512 }
7513
7514 return UnableToLegalize;
7515}
7516
7519 LLT NarrowTy) {
7520 if (TypeIdx != 1)
7521 return UnableToLegalize;
7522
7523 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7524 unsigned NarrowSize = NarrowTy.getSizeInBits();
7525
7526 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7527 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
7528
7530 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
7531 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
7532 auto C_0 = B.buildConstant(NarrowTy, 0);
7533 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
7534 UnmergeSrc.getReg(0), C_0);
7535 auto HiCTTZ = IsUndef ?
7536 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
7537 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
7538 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
7539 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
7540 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
7541 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
7542
7543 MI.eraseFromParent();
7544 return Legalized;
7545 }
7546
7547 return UnableToLegalize;
7548}
7549
7552 LLT NarrowTy) {
7553 if (TypeIdx != 1)
7554 return UnableToLegalize;
7555
7556 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7557 unsigned NarrowSize = NarrowTy.getSizeInBits();
7558
7559 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
7560 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
7561
7562 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
7563 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
7564 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
7565
7566 MI.eraseFromParent();
7567 return Legalized;
7568 }
7569
7570 return UnableToLegalize;
7571}
7572
7575 LLT NarrowTy) {
7576 if (TypeIdx != 1)
7577 return UnableToLegalize;
7578
7580 Register ExpReg = MI.getOperand(2).getReg();
7581 LLT ExpTy = MRI.getType(ExpReg);
7582
7583 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
7584
7585 // Clamp the exponent to the range of the target type.
7586 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
7587 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
7588 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
7589 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
7590
7591 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
7592 Observer.changingInstr(MI);
7593 MI.getOperand(2).setReg(Trunc.getReg(0));
7594 Observer.changedInstr(MI);
7595 return Legalized;
7596}
7597
7600 unsigned Opc = MI.getOpcode();
7601 const auto &TII = MIRBuilder.getTII();
7602 auto isSupported = [this](const LegalityQuery &Q) {
7603 auto QAction = LI.getAction(Q).Action;
7604 return QAction == Legal || QAction == Libcall || QAction == Custom;
7605 };
7606 switch (Opc) {
7607 default:
7608 return UnableToLegalize;
7609 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
7610 // This trivially expands to CTLZ.
7611 Observer.changingInstr(MI);
7612 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
7613 Observer.changedInstr(MI);
7614 return Legalized;
7615 }
7616 case TargetOpcode::G_CTLZ: {
7617 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7618 unsigned Len = SrcTy.getScalarSizeInBits();
7619
7620 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7621 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
7622 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
7623 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
7624 auto ICmp = MIRBuilder.buildICmp(
7625 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
7626 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7627 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
7628 MI.eraseFromParent();
7629 return Legalized;
7630 }
7631 // for now, we do this:
7632 // NewLen = NextPowerOf2(Len);
7633 // x = x | (x >> 1);
7634 // x = x | (x >> 2);
7635 // ...
7636 // x = x | (x >>16);
7637 // x = x | (x >>32); // for 64-bit input
7638 // Upto NewLen/2
7639 // return Len - popcount(x);
7640 //
7641 // Ref: "Hacker's Delight" by Henry Warren
7642 Register Op = SrcReg;
7643 unsigned NewLen = PowerOf2Ceil(Len);
7644 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
7645 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
7646 auto MIBOp = MIRBuilder.buildOr(
7647 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
7648 Op = MIBOp.getReg(0);
7649 }
7650 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
7651 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
7652 MIBPop);
7653 MI.eraseFromParent();
7654 return Legalized;
7655 }
7656 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
7657 // This trivially expands to CTTZ.
7658 Observer.changingInstr(MI);
7659 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
7660 Observer.changedInstr(MI);
7661 return Legalized;
7662 }
7663 case TargetOpcode::G_CTTZ: {
7664 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7665
7666 unsigned Len = SrcTy.getScalarSizeInBits();
7667 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
7668 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
7669 // zero.
7670 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
7671 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
7672 auto ICmp = MIRBuilder.buildICmp(
7673 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
7674 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
7675 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
7676 MI.eraseFromParent();
7677 return Legalized;
7678 }
7679 // for now, we use: { return popcount(~x & (x - 1)); }
7680 // unless the target has ctlz but not ctpop, in which case we use:
7681 // { return 32 - nlz(~x & (x-1)); }
7682 // Ref: "Hacker's Delight" by Henry Warren
7683 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
7684 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
7685 auto MIBTmp = MIRBuilder.buildAnd(
7686 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
7687 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
7688 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
7689 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
7690 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
7691 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
7692 MI.eraseFromParent();
7693 return Legalized;
7694 }
7695 Observer.changingInstr(MI);
7696 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
7697 MI.getOperand(1).setReg(MIBTmp.getReg(0));
7698 Observer.changedInstr(MI);
7699 return Legalized;
7700 }
7701 case TargetOpcode::G_CTPOP: {
7702 Register SrcReg = MI.getOperand(1).getReg();
7703 LLT Ty = MRI.getType(SrcReg);
7704 unsigned Size = Ty.getScalarSizeInBits();
7706
7707 // Bail out on irregular type lengths.
7708 if (Size > 128 || Size % 8 != 0)
7709 return UnableToLegalize;
7710
7711 // Count set bits in blocks of 2 bits. Default approach would be
7712 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
7713 // We use following formula instead:
7714 // B2Count = val - { (val >> 1) & 0x55555555 }
7715 // since it gives same result in blocks of 2 with one instruction less.
7716 auto C_1 = B.buildConstant(Ty, 1);
7717 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
7718 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
7719 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
7720 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
7721 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
7722
7723 // In order to get count in blocks of 4 add values from adjacent block of 2.
7724 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
7725 auto C_2 = B.buildConstant(Ty, 2);
7726 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
7727 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
7728 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
7729 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
7730 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
7731 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
7732
7733 // For count in blocks of 8 bits we don't have to mask high 4 bits before
7734 // addition since count value sits in range {0,...,8} and 4 bits are enough
7735 // to hold such binary values. After addition high 4 bits still hold count
7736 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
7737 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
7738 auto C_4 = B.buildConstant(Ty, 4);
7739 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
7740 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
7741 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
7742 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
7743 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
7744
7745 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
7746 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
7747 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
7748 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
7749
7750 // Shift count result from 8 high bits to low bits.
7751 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
7752
7753 auto IsMulSupported = [this](const LLT Ty) {
7754 auto Action = LI.getAction({TargetOpcode::G_MUL, {Ty}}).Action;
7755 return Action == Legal || Action == WidenScalar || Action == Custom;
7756 };
7757 if (IsMulSupported(Ty)) {
7758 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
7759 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7760 } else {
7761 auto ResTmp = B8Count;
7762 for (unsigned Shift = 8; Shift < Size; Shift *= 2) {
7763 auto ShiftC = B.buildConstant(Ty, Shift);
7764 auto Shl = B.buildShl(Ty, ResTmp, ShiftC);
7765 ResTmp = B.buildAdd(Ty, ResTmp, Shl);
7766 }
7767 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
7768 }
7769 MI.eraseFromParent();
7770 return Legalized;
7771 }
7772 }
7773}
7774
7775// Check that (every element of) Reg is undef or not an exact multiple of BW.
7777 Register Reg, unsigned BW) {
7778 return matchUnaryPredicate(
7779 MRI, Reg,
7780 [=](const Constant *C) {
7781 // Null constant here means an undef.
7783 return !CI || CI->getValue().urem(BW) != 0;
7784 },
7785 /*AllowUndefs*/ true);
7786}
7787
7790 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7791 LLT Ty = MRI.getType(Dst);
7792 LLT ShTy = MRI.getType(Z);
7793
7794 unsigned BW = Ty.getScalarSizeInBits();
7795
7796 if (!isPowerOf2_32(BW))
7797 return UnableToLegalize;
7798
7799 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7800 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7801
7802 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7803 // fshl X, Y, Z -> fshr X, Y, -Z
7804 // fshr X, Y, Z -> fshl X, Y, -Z
7805 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
7806 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
7807 } else {
7808 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
7809 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
7810 auto One = MIRBuilder.buildConstant(ShTy, 1);
7811 if (IsFSHL) {
7812 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7813 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
7814 } else {
7815 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
7816 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
7817 }
7818
7819 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
7820 }
7821
7822 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
7823 MI.eraseFromParent();
7824 return Legalized;
7825}
7826
7829 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
7830 LLT Ty = MRI.getType(Dst);
7831 LLT ShTy = MRI.getType(Z);
7832
7833 const unsigned BW = Ty.getScalarSizeInBits();
7834 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7835
7836 Register ShX, ShY;
7837 Register ShAmt, InvShAmt;
7838
7839 // FIXME: Emit optimized urem by constant instead of letting it expand later.
7840 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
7841 // fshl: X << C | Y >> (BW - C)
7842 // fshr: X << (BW - C) | Y >> C
7843 // where C = Z % BW is not zero
7844 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7845 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7846 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
7847 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
7848 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
7849 } else {
7850 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
7851 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
7852 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
7853 if (isPowerOf2_32(BW)) {
7854 // Z % BW -> Z & (BW - 1)
7855 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
7856 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
7857 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
7858 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
7859 } else {
7860 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
7861 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
7862 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
7863 }
7864
7865 auto One = MIRBuilder.buildConstant(ShTy, 1);
7866 if (IsFSHL) {
7867 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
7868 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
7869 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
7870 } else {
7871 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
7872 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
7873 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
7874 }
7875 }
7876
7877 MIRBuilder.buildOr(Dst, ShX, ShY, MachineInstr::Disjoint);
7878 MI.eraseFromParent();
7879 return Legalized;
7880}
7881
7884 // These operations approximately do the following (while avoiding undefined
7885 // shifts by BW):
7886 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
7887 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
7888 Register Dst = MI.getOperand(0).getReg();
7889 LLT Ty = MRI.getType(Dst);
7890 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
7891
7892 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
7893 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
7894
7895 // TODO: Use smarter heuristic that accounts for vector legalization.
7896 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
7897 return lowerFunnelShiftAsShifts(MI);
7898
7899 // This only works for powers of 2, fallback to shifts if it fails.
7900 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
7901 if (Result == UnableToLegalize)
7902 return lowerFunnelShiftAsShifts(MI);
7903 return Result;
7904}
7905
7907 auto [Dst, Src] = MI.getFirst2Regs();
7908 LLT DstTy = MRI.getType(Dst);
7909 LLT SrcTy = MRI.getType(Src);
7910
7911 uint32_t DstTySize = DstTy.getSizeInBits();
7912 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
7913 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
7914
7915 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
7916 !isPowerOf2_32(SrcTyScalarSize))
7917 return UnableToLegalize;
7918
7919 // The step between extend is too large, split it by creating an intermediate
7920 // extend instruction
7921 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
7922 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
7923 // If the destination type is illegal, split it into multiple statements
7924 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
7925 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
7926 // Unmerge the vector
7927 LLT EltTy = MidTy.changeElementCount(
7929 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
7930
7931 // ZExt the vectors
7932 LLT ZExtResTy = DstTy.changeElementCount(
7934 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7935 {UnmergeSrc.getReg(0)});
7936 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
7937 {UnmergeSrc.getReg(1)});
7938
7939 // Merge the ending vectors
7940 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
7941
7942 MI.eraseFromParent();
7943 return Legalized;
7944 }
7945 return UnableToLegalize;
7946}
7947
7949 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
7950 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
7951 // Similar to how operand splitting is done in SelectiondDAG, we can handle
7952 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
7953 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
7954 // %lo16(<4 x s16>) = G_TRUNC %inlo
7955 // %hi16(<4 x s16>) = G_TRUNC %inhi
7956 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
7957 // %res(<8 x s8>) = G_TRUNC %in16
7958
7959 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
7960
7961 Register DstReg = MI.getOperand(0).getReg();
7962 Register SrcReg = MI.getOperand(1).getReg();
7963 LLT DstTy = MRI.getType(DstReg);
7964 LLT SrcTy = MRI.getType(SrcReg);
7965
7966 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
7968 isPowerOf2_32(SrcTy.getNumElements()) &&
7969 isPowerOf2_32(SrcTy.getScalarSizeInBits())) {
7970 // Split input type.
7971 LLT SplitSrcTy = SrcTy.changeElementCount(
7972 SrcTy.getElementCount().divideCoefficientBy(2));
7973
7974 // First, split the source into two smaller vectors.
7975 SmallVector<Register, 2> SplitSrcs;
7976 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
7977
7978 // Truncate the splits into intermediate narrower elements.
7979 LLT InterTy;
7980 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7981 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
7982 else
7983 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
7984 for (Register &Src : SplitSrcs)
7985 Src = MIRBuilder.buildTrunc(InterTy, Src).getReg(0);
7986
7987 // Combine the new truncates into one vector
7988 auto Merge = MIRBuilder.buildMergeLikeInstr(
7989 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
7990
7991 // Truncate the new vector to the final result type
7992 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
7993 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
7994 else
7995 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
7996
7997 MI.eraseFromParent();
7998
7999 return Legalized;
8000 }
8001 return UnableToLegalize;
8002}
8003
8006 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8007 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8008 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8009 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8010 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8011 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
8012 MI.eraseFromParent();
8013 return Legalized;
8014}
8015
8017 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
8018
8019 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
8020 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
8021
8022 MIRBuilder.setInstrAndDebugLoc(MI);
8023
8024 // If a rotate in the other direction is supported, use it.
8025 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
8026 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
8027 isPowerOf2_32(EltSizeInBits))
8028 return lowerRotateWithReverseRotate(MI);
8029
8030 // If a funnel shift is supported, use it.
8031 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8032 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
8033 bool IsFShLegal = false;
8034 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
8035 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
8036 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
8037 Register R3) {
8038 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
8039 MI.eraseFromParent();
8040 return Legalized;
8041 };
8042 // If a funnel shift in the other direction is supported, use it.
8043 if (IsFShLegal) {
8044 return buildFunnelShift(FShOpc, Dst, Src, Amt);
8045 } else if (isPowerOf2_32(EltSizeInBits)) {
8046 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
8047 return buildFunnelShift(RevFsh, Dst, Src, Amt);
8048 }
8049 }
8050
8051 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
8052 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
8053 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
8054 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
8055 Register ShVal;
8056 Register RevShiftVal;
8057 if (isPowerOf2_32(EltSizeInBits)) {
8058 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
8059 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
8060 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
8061 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
8062 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8063 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
8064 RevShiftVal =
8065 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
8066 } else {
8067 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
8068 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
8069 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
8070 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
8071 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
8072 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
8073 auto One = MIRBuilder.buildConstant(AmtTy, 1);
8074 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
8075 RevShiftVal =
8076 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
8077 }
8078 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
8079 MI.eraseFromParent();
8080 return Legalized;
8081}
8082
8083// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
8084// representation.
8087 auto [Dst, Src] = MI.getFirst2Regs();
8088 const LLT S64 = LLT::scalar(64);
8089 const LLT S32 = LLT::scalar(32);
8090 const LLT S1 = LLT::scalar(1);
8091
8092 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8093
8094 // unsigned cul2f(ulong u) {
8095 // uint lz = clz(u);
8096 // uint e = (u != 0) ? 127U + 63U - lz : 0;
8097 // u = (u << lz) & 0x7fffffffffffffffUL;
8098 // ulong t = u & 0xffffffffffUL;
8099 // uint v = (e << 23) | (uint)(u >> 40);
8100 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
8101 // return as_float(v + r);
8102 // }
8103
8104 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
8105 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
8106
8107 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
8108
8109 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
8110 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
8111
8112 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
8113 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
8114
8115 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
8116 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
8117
8118 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
8119
8120 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
8121 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
8122
8123 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
8124 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
8125 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
8126
8127 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
8128 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
8129 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
8130 auto One = MIRBuilder.buildConstant(S32, 1);
8131
8132 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
8133 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
8134 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
8135 MIRBuilder.buildAdd(Dst, V, R);
8136
8137 MI.eraseFromParent();
8138 return Legalized;
8139}
8140
8141// Expand s32 = G_UITOFP s64 to an IEEE float representation using bit
8142// operations and G_SITOFP
8145 auto [Dst, Src] = MI.getFirst2Regs();
8146 const LLT S64 = LLT::scalar(64);
8147 const LLT S32 = LLT::scalar(32);
8148 const LLT S1 = LLT::scalar(1);
8149
8150 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
8151
8152 // For i64 < INT_MAX we simply reuse SITOFP.
8153 // Otherwise, divide i64 by 2, round result by ORing with the lowest bit
8154 // saved before division, convert to float by SITOFP, multiply the result
8155 // by 2.
8156 auto One = MIRBuilder.buildConstant(S64, 1);
8157 auto Zero = MIRBuilder.buildConstant(S64, 0);
8158 // Result if Src < INT_MAX
8159 auto SmallResult = MIRBuilder.buildSITOFP(S32, Src);
8160 // Result if Src >= INT_MAX
8161 auto Halved = MIRBuilder.buildLShr(S64, Src, One);
8162 auto LowerBit = MIRBuilder.buildAnd(S64, Src, One);
8163 auto RoundedHalved = MIRBuilder.buildOr(S64, Halved, LowerBit);
8164 auto HalvedFP = MIRBuilder.buildSITOFP(S32, RoundedHalved);
8165 auto LargeResult = MIRBuilder.buildFAdd(S32, HalvedFP, HalvedFP);
8166 // Check if the original value is larger than INT_MAX by comparing with
8167 // zero to pick one of the two conversions.
8168 auto IsLarge =
8169 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_SLT, S1, Src, Zero);
8170 MIRBuilder.buildSelect(Dst, IsLarge, LargeResult, SmallResult);
8171
8172 MI.eraseFromParent();
8173 return Legalized;
8174}
8175
8176// Expand s64 = G_UITOFP s64 using bit and float arithmetic operations to an
8177// IEEE double representation.
8180 auto [Dst, Src] = MI.getFirst2Regs();
8181 const LLT S64 = LLT::scalar(64);
8182 const LLT S32 = LLT::scalar(32);
8183
8184 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S64);
8185
8186 // We create double value from 32 bit parts with 32 exponent difference.
8187 // Note that + and - are float operations that adjust the implicit leading
8188 // one, the bases 2^52 and 2^84 are for illustrative purposes.
8189 //
8190 // X = 2^52 * 1.0...LowBits
8191 // Y = 2^84 * 1.0...HighBits
8192 // Scratch = 2^84 * 1.0...HighBits - 2^84 * 1.0 - 2^52 * 1.0
8193 // = - 2^52 * 1.0...HighBits
8194 // Result = - 2^52 * 1.0...HighBits + 2^52 * 1.0...LowBits
8195 auto TwoP52 = MIRBuilder.buildConstant(S64, UINT64_C(0x4330000000000000));
8196 auto TwoP84 = MIRBuilder.buildConstant(S64, UINT64_C(0x4530000000000000));
8197 auto TwoP52P84 = llvm::bit_cast<double>(UINT64_C(0x4530000000100000));
8198 auto TwoP52P84FP = MIRBuilder.buildFConstant(S64, TwoP52P84);
8199 auto HalfWidth = MIRBuilder.buildConstant(S64, 32);
8200
8201 auto LowBits = MIRBuilder.buildTrunc(S32, Src);
8202 LowBits = MIRBuilder.buildZExt(S64, LowBits);
8203 auto LowBitsFP = MIRBuilder.buildOr(S64, TwoP52, LowBits);
8204 auto HighBits = MIRBuilder.buildLShr(S64, Src, HalfWidth);
8205 auto HighBitsFP = MIRBuilder.buildOr(S64, TwoP84, HighBits);
8206 auto Scratch = MIRBuilder.buildFSub(S64, HighBitsFP, TwoP52P84FP);
8207 MIRBuilder.buildFAdd(Dst, Scratch, LowBitsFP);
8208
8209 MI.eraseFromParent();
8210 return Legalized;
8211}
8212
8213/// i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16. We cannot
8214/// convert fpround f64->f16 without double-rounding, so we manually perform the
8215/// lowering here where we know it is valid.
8218 LLT SrcTy, MachineIRBuilder &MIRBuilder) {
8219 auto M1 = MI.getOpcode() == TargetOpcode::G_UITOFP
8220 ? MIRBuilder.buildUITOFP(SrcTy, Src)
8221 : MIRBuilder.buildSITOFP(SrcTy, Src);
8222 LLT S32Ty = SrcTy.changeElementSize(32);
8223 auto M2 = MIRBuilder.buildFPTrunc(S32Ty, M1);
8224 MIRBuilder.buildFPTrunc(Dst, M2);
8225 MI.eraseFromParent();
8227}
8228
8230 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8231
8232 if (SrcTy == LLT::scalar(1)) {
8233 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
8234 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8235 MIRBuilder.buildSelect(Dst, Src, True, False);
8236 MI.eraseFromParent();
8237 return Legalized;
8238 }
8239
8240 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8241 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8242
8243 if (SrcTy != LLT::scalar(64))
8244 return UnableToLegalize;
8245
8246 if (DstTy == LLT::scalar(32))
8247 // TODO: SelectionDAG has several alternative expansions to port which may
8248 // be more reasonable depending on the available instructions. We also need
8249 // a more advanced mechanism to choose an optimal version depending on
8250 // target features such as sitofp or CTLZ availability.
8252
8253 if (DstTy == LLT::scalar(64))
8255
8256 return UnableToLegalize;
8257}
8258
8260 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8261
8262 const LLT S64 = LLT::scalar(64);
8263 const LLT S32 = LLT::scalar(32);
8264 const LLT S1 = LLT::scalar(1);
8265
8266 if (SrcTy == S1) {
8267 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
8268 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
8269 MIRBuilder.buildSelect(Dst, Src, True, False);
8270 MI.eraseFromParent();
8271 return Legalized;
8272 }
8273
8274 if (DstTy.getScalarSizeInBits() == 16 && SrcTy.getScalarSizeInBits() == 64)
8275 return loweri64tof16ITOFP(MI, Dst, DstTy, Src, SrcTy, MIRBuilder);
8276
8277 if (SrcTy != S64)
8278 return UnableToLegalize;
8279
8280 if (DstTy == S32) {
8281 // signed cl2f(long l) {
8282 // long s = l >> 63;
8283 // float r = cul2f((l + s) ^ s);
8284 // return s ? -r : r;
8285 // }
8286 Register L = Src;
8287 auto SignBit = MIRBuilder.buildConstant(S64, 63);
8288 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
8289
8290 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
8291 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
8292 auto R = MIRBuilder.buildUITOFP(S32, Xor);
8293
8294 auto RNeg = MIRBuilder.buildFNeg(S32, R);
8295 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
8296 MIRBuilder.buildConstant(S64, 0));
8297 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
8298 MI.eraseFromParent();
8299 return Legalized;
8300 }
8301
8302 return UnableToLegalize;
8303}
8304
8306 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8307 const LLT S64 = LLT::scalar(64);
8308 const LLT S32 = LLT::scalar(32);
8309
8310 if (SrcTy != S64 && SrcTy != S32)
8311 return UnableToLegalize;
8312 if (DstTy != S32 && DstTy != S64)
8313 return UnableToLegalize;
8314
8315 // FPTOSI gives same result as FPTOUI for positive signed integers.
8316 // FPTOUI needs to deal with fp values that convert to unsigned integers
8317 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
8318
8319 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
8320 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
8322 APInt::getZero(SrcTy.getSizeInBits()));
8323 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
8324
8325 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
8326
8327 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
8328 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
8329 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
8330 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
8331 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
8332 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
8333 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
8334
8335 const LLT S1 = LLT::scalar(1);
8336
8337 MachineInstrBuilder FCMP =
8338 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
8339 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
8340
8341 MI.eraseFromParent();
8342 return Legalized;
8343}
8344
8346 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8347 const LLT S64 = LLT::scalar(64);
8348 const LLT S32 = LLT::scalar(32);
8349
8350 // FIXME: Only f32 to i64 conversions are supported.
8351 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
8352 return UnableToLegalize;
8353
8354 // Expand f32 -> i64 conversion
8355 // This algorithm comes from compiler-rt's implementation of fixsfdi:
8356 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
8357
8358 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
8359
8360 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
8361 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
8362
8363 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
8364 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
8365
8366 auto SignMask = MIRBuilder.buildConstant(SrcTy,
8367 APInt::getSignMask(SrcEltBits));
8368 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
8369 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
8370 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
8371 Sign = MIRBuilder.buildSExt(DstTy, Sign);
8372
8373 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
8374 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
8375 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
8376
8377 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
8378 R = MIRBuilder.buildZExt(DstTy, R);
8379
8380 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
8381 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
8382 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
8383 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
8384
8385 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
8386 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
8387
8388 const LLT S1 = LLT::scalar(1);
8389 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
8390 S1, Exponent, ExponentLoBit);
8391
8392 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
8393
8394 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
8395 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
8396
8397 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
8398
8399 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
8400 S1, Exponent, ZeroSrcTy);
8401
8402 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
8403 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
8404
8405 MI.eraseFromParent();
8406 return Legalized;
8407}
8408
8411 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
8412
8413 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI_SAT;
8414 unsigned SatWidth = DstTy.getScalarSizeInBits();
8415
8416 // Determine minimum and maximum integer values and their corresponding
8417 // floating-point values.
8418 APInt MinInt, MaxInt;
8419 if (IsSigned) {
8420 MinInt = APInt::getSignedMinValue(SatWidth);
8421 MaxInt = APInt::getSignedMaxValue(SatWidth);
8422 } else {
8423 MinInt = APInt::getMinValue(SatWidth);
8424 MaxInt = APInt::getMaxValue(SatWidth);
8425 }
8426
8427 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8428 APFloat MinFloat(Semantics);
8429 APFloat MaxFloat(Semantics);
8430
8431 APFloat::opStatus MinStatus =
8432 MinFloat.convertFromAPInt(MinInt, IsSigned, APFloat::rmTowardZero);
8433 APFloat::opStatus MaxStatus =
8434 MaxFloat.convertFromAPInt(MaxInt, IsSigned, APFloat::rmTowardZero);
8435 bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) &&
8436 !(MaxStatus & APFloat::opStatus::opInexact);
8437
8438 // If the integer bounds are exactly representable as floats, emit a
8439 // min+max+fptoi sequence. Otherwise we have to use a sequence of comparisons
8440 // and selects.
8441 if (AreExactFloatBounds) {
8442 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8443 auto MaxC = MIRBuilder.buildFConstant(SrcTy, MinFloat);
8444 auto MaxP = MIRBuilder.buildFCmp(CmpInst::FCMP_OGT,
8445 SrcTy.changeElementSize(1), Src, MaxC);
8446 auto Max = MIRBuilder.buildSelect(SrcTy, MaxP, Src, MaxC);
8447 // Clamp by MaxFloat from above. NaN cannot occur.
8448 auto MinC = MIRBuilder.buildFConstant(SrcTy, MaxFloat);
8449 auto MinP =
8450 MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, SrcTy.changeElementSize(1), Max,
8452 auto Min =
8453 MIRBuilder.buildSelect(SrcTy, MinP, Max, MinC, MachineInstr::FmNoNans);
8454 // Convert clamped value to integer. In the unsigned case we're done,
8455 // because we mapped NaN to MinFloat, which will cast to zero.
8456 if (!IsSigned) {
8457 MIRBuilder.buildFPTOUI(Dst, Min);
8458 MI.eraseFromParent();
8459 return Legalized;
8460 }
8461
8462 // Otherwise, select 0 if Src is NaN.
8463 auto FpToInt = MIRBuilder.buildFPTOSI(DstTy, Min);
8464 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8465 DstTy.changeElementSize(1), Src, Src);
8466 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0),
8467 FpToInt);
8468 MI.eraseFromParent();
8469 return Legalized;
8470 }
8471
8472 // Result of direct conversion. The assumption here is that the operation is
8473 // non-trapping and it's fine to apply it to an out-of-range value if we
8474 // select it away later.
8475 auto FpToInt = IsSigned ? MIRBuilder.buildFPTOSI(DstTy, Src)
8476 : MIRBuilder.buildFPTOUI(DstTy, Src);
8477
8478 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8479 // MinInt if Src is NaN.
8480 auto ULT =
8481 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, SrcTy.changeElementSize(1), Src,
8482 MIRBuilder.buildFConstant(SrcTy, MinFloat));
8483 auto Max = MIRBuilder.buildSelect(
8484 DstTy, ULT, MIRBuilder.buildConstant(DstTy, MinInt), FpToInt);
8485 // If Src OGT MaxFloat, select MaxInt.
8486 auto OGT =
8487 MIRBuilder.buildFCmp(CmpInst::FCMP_OGT, SrcTy.changeElementSize(1), Src,
8488 MIRBuilder.buildFConstant(SrcTy, MaxFloat));
8489
8490 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8491 // is already zero.
8492 if (!IsSigned) {
8493 MIRBuilder.buildSelect(Dst, OGT, MIRBuilder.buildConstant(DstTy, MaxInt),
8494 Max);
8495 MI.eraseFromParent();
8496 return Legalized;
8497 }
8498
8499 // Otherwise, select 0 if Src is NaN.
8500 auto Min = MIRBuilder.buildSelect(
8501 DstTy, OGT, MIRBuilder.buildConstant(DstTy, MaxInt), Max);
8502 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_UNO,
8503 DstTy.changeElementSize(1), Src, Src);
8504 MIRBuilder.buildSelect(Dst, IsZero, MIRBuilder.buildConstant(DstTy, 0), Min);
8505 MI.eraseFromParent();
8506 return Legalized;
8507}
8508
8509// f64 -> f16 conversion using round-to-nearest-even rounding mode.
8512 const LLT S1 = LLT::scalar(1);
8513 const LLT S32 = LLT::scalar(32);
8514
8515 auto [Dst, Src] = MI.getFirst2Regs();
8516 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
8517 MRI.getType(Src).getScalarType() == LLT::scalar(64));
8518
8519 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
8520 return UnableToLegalize;
8521
8522 if (MI.getFlag(MachineInstr::FmAfn)) {
8523 unsigned Flags = MI.getFlags();
8524 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
8525 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
8526 MI.eraseFromParent();
8527 return Legalized;
8528 }
8529
8530 const unsigned ExpMask = 0x7ff;
8531 const unsigned ExpBiasf64 = 1023;
8532 const unsigned ExpBiasf16 = 15;
8533
8534 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
8535 Register U = Unmerge.getReg(0);
8536 Register UH = Unmerge.getReg(1);
8537
8538 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
8539 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
8540
8541 // Subtract the fp64 exponent bias (1023) to get the real exponent and
8542 // add the f16 bias (15) to get the biased exponent for the f16 format.
8543 E = MIRBuilder.buildAdd(
8544 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
8545
8546 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
8547 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
8548
8549 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
8550 MIRBuilder.buildConstant(S32, 0x1ff));
8551 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
8552
8553 auto Zero = MIRBuilder.buildConstant(S32, 0);
8554 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
8555 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
8556 M = MIRBuilder.buildOr(S32, M, Lo40Set);
8557
8558 // (M != 0 ? 0x0200 : 0) | 0x7c00;
8559 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
8560 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
8561 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
8562
8563 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
8564 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
8565
8566 // N = M | (E << 12);
8567 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
8568 auto N = MIRBuilder.buildOr(S32, M, EShl12);
8569
8570 // B = clamp(1-E, 0, 13);
8571 auto One = MIRBuilder.buildConstant(S32, 1);
8572 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
8573 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
8574 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
8575
8576 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
8577 MIRBuilder.buildConstant(S32, 0x1000));
8578
8579 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
8580 auto D0 = MIRBuilder.buildShl(S32, D, B);
8581
8582 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
8583 D0, SigSetHigh);
8584 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
8585 D = MIRBuilder.buildOr(S32, D, D1);
8586
8587 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
8588 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
8589
8590 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
8591 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
8592
8593 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
8594 MIRBuilder.buildConstant(S32, 3));
8595 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
8596
8597 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
8598 MIRBuilder.buildConstant(S32, 5));
8599 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
8600
8601 V1 = MIRBuilder.buildOr(S32, V0, V1);
8602 V = MIRBuilder.buildAdd(S32, V, V1);
8603
8604 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
8605 E, MIRBuilder.buildConstant(S32, 30));
8606 V = MIRBuilder.buildSelect(S32, CmpEGt30,
8607 MIRBuilder.buildConstant(S32, 0x7c00), V);
8608
8609 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
8610 E, MIRBuilder.buildConstant(S32, 1039));
8611 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
8612
8613 // Extract the sign bit.
8614 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
8615 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
8616
8617 // Insert the sign bit
8618 V = MIRBuilder.buildOr(S32, Sign, V);
8619
8620 MIRBuilder.buildTrunc(Dst, V);
8621 MI.eraseFromParent();
8622 return Legalized;
8623}
8624
8627 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
8628 const LLT S64 = LLT::scalar(64);
8629 const LLT S16 = LLT::scalar(16);
8630
8631 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
8633
8634 return UnableToLegalize;
8635}
8636
8638 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8639 LLT Ty = MRI.getType(Dst);
8640
8641 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
8642 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
8643 MI.eraseFromParent();
8644 return Legalized;
8645}
8646
8648 switch (Opc) {
8649 case TargetOpcode::G_SMIN:
8650 return CmpInst::ICMP_SLT;
8651 case TargetOpcode::G_SMAX:
8652 return CmpInst::ICMP_SGT;
8653 case TargetOpcode::G_UMIN:
8654 return CmpInst::ICMP_ULT;
8655 case TargetOpcode::G_UMAX:
8656 return CmpInst::ICMP_UGT;
8657 default:
8658 llvm_unreachable("not in integer min/max");
8659 }
8660}
8661
8663 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8664
8665 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
8666 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
8667
8668 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
8669 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
8670
8671 MI.eraseFromParent();
8672 return Legalized;
8673}
8674
8677 GSUCmp *Cmp = cast<GSUCmp>(&MI);
8678
8679 Register Dst = Cmp->getReg(0);
8680 LLT DstTy = MRI.getType(Dst);
8681 LLT SrcTy = MRI.getType(Cmp->getReg(1));
8682 LLT CmpTy = DstTy.changeElementSize(1);
8683
8684 CmpInst::Predicate LTPredicate = Cmp->isSigned()
8687 CmpInst::Predicate GTPredicate = Cmp->isSigned()
8690
8691 auto Zero = MIRBuilder.buildConstant(DstTy, 0);
8692 auto IsGT = MIRBuilder.buildICmp(GTPredicate, CmpTy, Cmp->getLHSReg(),
8693 Cmp->getRHSReg());
8694 auto IsLT = MIRBuilder.buildICmp(LTPredicate, CmpTy, Cmp->getLHSReg(),
8695 Cmp->getRHSReg());
8696
8697 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
8698 auto BC = TLI.getBooleanContents(DstTy.isVector(), /*isFP=*/false);
8699 if (TLI.preferSelectsOverBooleanArithmetic(
8700 getApproximateEVTForLLT(SrcTy, Ctx)) ||
8702 auto One = MIRBuilder.buildConstant(DstTy, 1);
8703 auto SelectZeroOrOne = MIRBuilder.buildSelect(DstTy, IsGT, One, Zero);
8704
8705 auto MinusOne = MIRBuilder.buildConstant(DstTy, -1);
8706 MIRBuilder.buildSelect(Dst, IsLT, MinusOne, SelectZeroOrOne);
8707 } else {
8709 std::swap(IsGT, IsLT);
8710 // Extend boolean results to DstTy, which is at least i2, before subtracting
8711 // them.
8712 unsigned BoolExtOp =
8713 MIRBuilder.getBoolExtOp(DstTy.isVector(), /*isFP=*/false);
8714 IsGT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsGT});
8715 IsLT = MIRBuilder.buildInstr(BoolExtOp, {DstTy}, {IsLT});
8716 MIRBuilder.buildSub(Dst, IsGT, IsLT);
8717 }
8718
8719 MI.eraseFromParent();
8720 return Legalized;
8721}
8722
8725 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
8726 const int Src0Size = Src0Ty.getScalarSizeInBits();
8727 const int Src1Size = Src1Ty.getScalarSizeInBits();
8728
8729 auto SignBitMask = MIRBuilder.buildConstant(
8730 Src0Ty, APInt::getSignMask(Src0Size));
8731
8732 auto NotSignBitMask = MIRBuilder.buildConstant(
8733 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
8734
8735 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
8736 Register And1;
8737 if (Src0Ty == Src1Ty) {
8738 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
8739 } else if (Src0Size > Src1Size) {
8740 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
8741 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
8742 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
8743 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
8744 } else {
8745 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
8746 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
8747 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
8748 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
8749 }
8750
8751 // Be careful about setting nsz/nnan/ninf on every instruction, since the
8752 // constants are a nan and -0.0, but the final result should preserve
8753 // everything.
8754 unsigned Flags = MI.getFlags();
8755
8756 // We masked the sign bit and the not-sign bit, so these are disjoint.
8757 Flags |= MachineInstr::Disjoint;
8758
8759 MIRBuilder.buildOr(Dst, And0, And1, Flags);
8760
8761 MI.eraseFromParent();
8762 return Legalized;
8763}
8764
8767 // FIXME: fminnum/fmaxnum and fminimumnum/fmaximumnum should not have
8768 // identical handling. fminimumnum/fmaximumnum also need a path that do not
8769 // depend on fminnum/fmaxnum.
8770
8771 unsigned NewOp;
8772 switch (MI.getOpcode()) {
8773 case TargetOpcode::G_FMINNUM:
8774 NewOp = TargetOpcode::G_FMINNUM_IEEE;
8775 break;
8776 case TargetOpcode::G_FMINIMUMNUM:
8777 NewOp = TargetOpcode::G_FMINNUM;
8778 break;
8779 case TargetOpcode::G_FMAXNUM:
8780 NewOp = TargetOpcode::G_FMAXNUM_IEEE;
8781 break;
8782 case TargetOpcode::G_FMAXIMUMNUM:
8783 NewOp = TargetOpcode::G_FMAXNUM;
8784 break;
8785 default:
8786 llvm_unreachable("unexpected min/max opcode");
8787 }
8788
8789 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8790 LLT Ty = MRI.getType(Dst);
8791
8792 if (!MI.getFlag(MachineInstr::FmNoNans)) {
8793 // Insert canonicalizes if it's possible we need to quiet to get correct
8794 // sNaN behavior.
8795
8796 // Note this must be done here, and not as an optimization combine in the
8797 // absence of a dedicate quiet-snan instruction as we're using an
8798 // omni-purpose G_FCANONICALIZE.
8799 if (!isKnownNeverSNaN(Src0, MRI))
8800 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
8801
8802 if (!isKnownNeverSNaN(Src1, MRI))
8803 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
8804 }
8805
8806 // If there are no nans, it's safe to simply replace this with the non-IEEE
8807 // version.
8808 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
8809 MI.eraseFromParent();
8810 return Legalized;
8811}
8812
8815 unsigned Opc = MI.getOpcode();
8816 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
8817 LLT Ty = MRI.getType(Dst);
8818 LLT CmpTy = Ty.changeElementSize(1);
8819
8820 bool IsMax = (Opc == TargetOpcode::G_FMAXIMUM);
8821 unsigned OpcIeee =
8822 IsMax ? TargetOpcode::G_FMAXNUM_IEEE : TargetOpcode::G_FMINNUM_IEEE;
8823 unsigned OpcNonIeee =
8824 IsMax ? TargetOpcode::G_FMAXNUM : TargetOpcode::G_FMINNUM;
8825 bool MinMaxMustRespectOrderedZero = false;
8826 Register Res;
8827
8828 // IEEE variants don't need canonicalization
8829 if (LI.isLegalOrCustom({OpcIeee, Ty})) {
8830 Res = MIRBuilder.buildInstr(OpcIeee, {Ty}, {Src0, Src1}).getReg(0);
8831 MinMaxMustRespectOrderedZero = true;
8832 } else if (LI.isLegalOrCustom({OpcNonIeee, Ty})) {
8833 Res = MIRBuilder.buildInstr(OpcNonIeee, {Ty}, {Src0, Src1}).getReg(0);
8834 } else {
8835 auto Compare = MIRBuilder.buildFCmp(
8836 IsMax ? CmpInst::FCMP_OGT : CmpInst::FCMP_OLT, CmpTy, Src0, Src1);
8837 Res = MIRBuilder.buildSelect(Ty, Compare, Src0, Src1).getReg(0);
8838 }
8839
8840 // Propagate any NaN of both operands
8841 if (!MI.getFlag(MachineInstr::FmNoNans) &&
8842 (!isKnownNeverNaN(Src0, MRI) || isKnownNeverNaN(Src1, MRI))) {
8843 auto IsOrdered = MIRBuilder.buildFCmp(CmpInst::FCMP_ORD, CmpTy, Src0, Src1);
8844
8845 LLT ElementTy = Ty.isScalar() ? Ty : Ty.getElementType();
8846 APFloat NaNValue = APFloat::getNaN(getFltSemanticForLLT(ElementTy));
8847 Register NaN = MIRBuilder.buildFConstant(ElementTy, NaNValue).getReg(0);
8848 if (Ty.isVector())
8849 NaN = MIRBuilder.buildSplatBuildVector(Ty, NaN).getReg(0);
8850
8851 Res = MIRBuilder.buildSelect(Ty, IsOrdered, Res, NaN).getReg(0);
8852 }
8853
8854 // fminimum/fmaximum requires -0.0 less than +0.0
8855 if (!MinMaxMustRespectOrderedZero && !MI.getFlag(MachineInstr::FmNsz)) {
8856 GISelValueTracking VT(MIRBuilder.getMF());
8857 KnownFPClass Src0Info = VT.computeKnownFPClass(Src0, fcZero);
8858 KnownFPClass Src1Info = VT.computeKnownFPClass(Src1, fcZero);
8859
8860 if (!Src0Info.isKnownNeverZero() && !Src1Info.isKnownNeverZero()) {
8861 const unsigned Flags = MI.getFlags();
8862 Register Zero = MIRBuilder.buildFConstant(Ty, 0.0).getReg(0);
8863 auto IsZero = MIRBuilder.buildFCmp(CmpInst::FCMP_OEQ, CmpTy, Res, Zero);
8864
8865 unsigned TestClass = IsMax ? fcPosZero : fcNegZero;
8866
8867 auto LHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src0, TestClass);
8868 auto LHSSelect =
8869 MIRBuilder.buildSelect(Ty, LHSTestZero, Src0, Res, Flags);
8870
8871 auto RHSTestZero = MIRBuilder.buildIsFPClass(CmpTy, Src1, TestClass);
8872 auto RHSSelect =
8873 MIRBuilder.buildSelect(Ty, RHSTestZero, Src1, LHSSelect, Flags);
8874
8875 Res = MIRBuilder.buildSelect(Ty, IsZero, RHSSelect, Res, Flags).getReg(0);
8876 }
8877 }
8878
8879 MIRBuilder.buildCopy(Dst, Res);
8880 MI.eraseFromParent();
8881 return Legalized;
8882}
8883
8885 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
8886 Register DstReg = MI.getOperand(0).getReg();
8887 LLT Ty = MRI.getType(DstReg);
8888 unsigned Flags = MI.getFlags();
8889
8890 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
8891 Flags);
8892 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
8893 MI.eraseFromParent();
8894 return Legalized;
8895}
8896
8899 auto [DstReg, X] = MI.getFirst2Regs();
8900 const unsigned Flags = MI.getFlags();
8901 const LLT Ty = MRI.getType(DstReg);
8902 const LLT CondTy = Ty.changeElementSize(1);
8903
8904 // round(x) =>
8905 // t = trunc(x);
8906 // d = fabs(x - t);
8907 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
8908 // return t + o;
8909
8910 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
8911
8912 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
8913 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
8914
8915 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
8916 auto Cmp =
8917 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
8918
8919 // Could emit G_UITOFP instead
8920 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
8921 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8922 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
8923 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
8924
8925 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
8926
8927 MI.eraseFromParent();
8928 return Legalized;
8929}
8930
8932 auto [DstReg, SrcReg] = MI.getFirst2Regs();
8933 unsigned Flags = MI.getFlags();
8934 LLT Ty = MRI.getType(DstReg);
8935 const LLT CondTy = Ty.changeElementSize(1);
8936
8937 // result = trunc(src);
8938 // if (src < 0.0 && src != result)
8939 // result += -1.0.
8940
8941 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
8942 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
8943
8944 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
8945 SrcReg, Zero, Flags);
8946 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
8947 SrcReg, Trunc, Flags);
8948 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
8949 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
8950
8951 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
8952 MI.eraseFromParent();
8953 return Legalized;
8954}
8955
8958 const unsigned NumOps = MI.getNumOperands();
8959 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
8960 unsigned PartSize = Src0Ty.getSizeInBits();
8961
8962 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
8963 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
8964
8965 for (unsigned I = 2; I != NumOps; ++I) {
8966 const unsigned Offset = (I - 1) * PartSize;
8967
8968 Register SrcReg = MI.getOperand(I).getReg();
8969 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
8970
8971 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
8972 MRI.createGenericVirtualRegister(WideTy);
8973
8974 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
8975 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
8976 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
8977 ResultReg = NextResult;
8978 }
8979
8980 if (DstTy.isPointer()) {
8981 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
8982 DstTy.getAddressSpace())) {
8983 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
8984 return UnableToLegalize;
8985 }
8986
8987 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
8988 }
8989
8990 MI.eraseFromParent();
8991 return Legalized;
8992}
8993
8996 const unsigned NumDst = MI.getNumOperands() - 1;
8997 Register SrcReg = MI.getOperand(NumDst).getReg();
8998 Register Dst0Reg = MI.getOperand(0).getReg();
8999 LLT DstTy = MRI.getType(Dst0Reg);
9000 if (DstTy.isPointer())
9001 return UnableToLegalize; // TODO
9002
9003 SrcReg = coerceToScalar(SrcReg);
9004 if (!SrcReg)
9005 return UnableToLegalize;
9006
9007 // Expand scalarizing unmerge as bitcast to integer and shift.
9008 LLT IntTy = MRI.getType(SrcReg);
9009
9010 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
9011
9012 const unsigned DstSize = DstTy.getSizeInBits();
9013 unsigned Offset = DstSize;
9014 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
9015 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
9016 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
9017 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
9018 }
9019
9020 MI.eraseFromParent();
9021 return Legalized;
9022}
9023
9024/// Lower a vector extract or insert by writing the vector to a stack temporary
9025/// and reloading the element or vector.
9026///
9027/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
9028/// =>
9029/// %stack_temp = G_FRAME_INDEX
9030/// G_STORE %vec, %stack_temp
9031/// %idx = clamp(%idx, %vec.getNumElements())
9032/// %element_ptr = G_PTR_ADD %stack_temp, %idx
9033/// %dst = G_LOAD %element_ptr
9036 Register DstReg = MI.getOperand(0).getReg();
9037 Register SrcVec = MI.getOperand(1).getReg();
9038 Register InsertVal;
9039 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
9040 InsertVal = MI.getOperand(2).getReg();
9041
9042 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
9043
9044 LLT VecTy = MRI.getType(SrcVec);
9045 LLT EltTy = VecTy.getElementType();
9046 unsigned NumElts = VecTy.getNumElements();
9047
9048 int64_t IdxVal;
9049 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
9051 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
9052
9053 if (InsertVal) {
9054 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
9055 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
9056 } else {
9057 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
9058 }
9059
9060 MI.eraseFromParent();
9061 return Legalized;
9062 }
9063
9064 if (!EltTy.isByteSized()) { // Not implemented.
9065 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
9066 return UnableToLegalize;
9067 }
9068
9069 unsigned EltBytes = EltTy.getSizeInBytes();
9070 Align VecAlign = getStackTemporaryAlignment(VecTy);
9071 Align EltAlign;
9072
9073 MachinePointerInfo PtrInfo;
9074 auto StackTemp = createStackTemporary(
9075 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
9076 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
9077
9078 // Get the pointer to the element, and be sure not to hit undefined behavior
9079 // if the index is out of bounds.
9080 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
9081
9082 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
9083 int64_t Offset = IdxVal * EltBytes;
9084 PtrInfo = PtrInfo.getWithOffset(Offset);
9085 EltAlign = commonAlignment(VecAlign, Offset);
9086 } else {
9087 // We lose information with a variable offset.
9088 EltAlign = getStackTemporaryAlignment(EltTy);
9089 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
9090 }
9091
9092 if (InsertVal) {
9093 // Write the inserted element
9094 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
9095
9096 // Reload the whole vector.
9097 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
9098 } else {
9099 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
9100 }
9101
9102 MI.eraseFromParent();
9103 return Legalized;
9104}
9105
9108 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
9109 MI.getFirst3RegLLTs();
9110 LLT IdxTy = LLT::scalar(32);
9111
9112 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
9113 Register Undef;
9115 LLT EltTy = DstTy.getScalarType();
9116
9117 DenseMap<unsigned, Register> CachedExtract;
9118
9119 for (int Idx : Mask) {
9120 if (Idx < 0) {
9121 if (!Undef.isValid())
9122 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
9123 BuildVec.push_back(Undef);
9124 continue;
9125 }
9126
9127 assert(!Src0Ty.isScalar() && "Unexpected scalar G_SHUFFLE_VECTOR");
9128
9129 int NumElts = Src0Ty.getNumElements();
9130 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
9131 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
9132 auto [It, Inserted] = CachedExtract.try_emplace(Idx);
9133 if (Inserted) {
9134 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
9135 It->second =
9136 MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK).getReg(0);
9137 }
9138 BuildVec.push_back(It->second);
9139 }
9140
9141 assert(DstTy.isVector() && "Unexpected scalar G_SHUFFLE_VECTOR");
9142 MIRBuilder.buildBuildVector(DstReg, BuildVec);
9143 MI.eraseFromParent();
9144 return Legalized;
9145}
9146
9149 auto [Dst, DstTy, Vec, VecTy, Mask, MaskTy, Passthru, PassthruTy] =
9150 MI.getFirst4RegLLTs();
9151
9152 if (VecTy.isScalableVector())
9153 report_fatal_error("Cannot expand masked_compress for scalable vectors.");
9154
9155 Align VecAlign = getStackTemporaryAlignment(VecTy);
9156 MachinePointerInfo PtrInfo;
9157 Register StackPtr =
9158 createStackTemporary(TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign,
9159 PtrInfo)
9160 .getReg(0);
9161 MachinePointerInfo ValPtrInfo =
9163
9164 LLT IdxTy = LLT::scalar(32);
9165 LLT ValTy = VecTy.getElementType();
9166 Align ValAlign = getStackTemporaryAlignment(ValTy);
9167
9168 auto OutPos = MIRBuilder.buildConstant(IdxTy, 0);
9169
9170 bool HasPassthru =
9171 MRI.getVRegDef(Passthru)->getOpcode() != TargetOpcode::G_IMPLICIT_DEF;
9172
9173 if (HasPassthru)
9174 MIRBuilder.buildStore(Passthru, StackPtr, PtrInfo, VecAlign);
9175
9176 Register LastWriteVal;
9177 std::optional<APInt> PassthruSplatVal =
9178 isConstantOrConstantSplatVector(*MRI.getVRegDef(Passthru), MRI);
9179
9180 if (PassthruSplatVal.has_value()) {
9181 LastWriteVal =
9182 MIRBuilder.buildConstant(ValTy, PassthruSplatVal.value()).getReg(0);
9183 } else if (HasPassthru) {
9184 auto Popcount = MIRBuilder.buildZExt(MaskTy.changeElementSize(32), Mask);
9185 Popcount = MIRBuilder.buildInstr(TargetOpcode::G_VECREDUCE_ADD,
9186 {LLT::scalar(32)}, {Popcount});
9187
9188 Register LastElmtPtr =
9189 getVectorElementPointer(StackPtr, VecTy, Popcount.getReg(0));
9190 LastWriteVal =
9191 MIRBuilder.buildLoad(ValTy, LastElmtPtr, ValPtrInfo, ValAlign)
9192 .getReg(0);
9193 }
9194
9195 unsigned NumElmts = VecTy.getNumElements();
9196 for (unsigned I = 0; I < NumElmts; ++I) {
9197 auto Idx = MIRBuilder.buildConstant(IdxTy, I);
9198 auto Val = MIRBuilder.buildExtractVectorElement(ValTy, Vec, Idx);
9199 Register ElmtPtr =
9200 getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9201 MIRBuilder.buildStore(Val, ElmtPtr, ValPtrInfo, ValAlign);
9202
9203 LLT MaskITy = MaskTy.getElementType();
9204 auto MaskI = MIRBuilder.buildExtractVectorElement(MaskITy, Mask, Idx);
9205 if (MaskITy.getSizeInBits() > 1)
9206 MaskI = MIRBuilder.buildTrunc(LLT::scalar(1), MaskI);
9207
9208 MaskI = MIRBuilder.buildZExt(IdxTy, MaskI);
9209 OutPos = MIRBuilder.buildAdd(IdxTy, OutPos, MaskI);
9210
9211 if (HasPassthru && I == NumElmts - 1) {
9212 auto EndOfVector =
9213 MIRBuilder.buildConstant(IdxTy, VecTy.getNumElements() - 1);
9214 auto AllLanesSelected = MIRBuilder.buildICmp(
9215 CmpInst::ICMP_UGT, LLT::scalar(1), OutPos, EndOfVector);
9216 OutPos = MIRBuilder.buildInstr(TargetOpcode::G_UMIN, {IdxTy},
9217 {OutPos, EndOfVector});
9218 ElmtPtr = getVectorElementPointer(StackPtr, VecTy, OutPos.getReg(0));
9219
9220 LastWriteVal =
9221 MIRBuilder.buildSelect(ValTy, AllLanesSelected, Val, LastWriteVal)
9222 .getReg(0);
9223 MIRBuilder.buildStore(LastWriteVal, ElmtPtr, ValPtrInfo, ValAlign);
9224 }
9225 }
9226
9227 // TODO: Use StackPtr's FrameIndex alignment.
9228 MIRBuilder.buildLoad(Dst, StackPtr, PtrInfo, VecAlign);
9229
9230 MI.eraseFromParent();
9231 return Legalized;
9232}
9233
9235 Register AllocSize,
9236 Align Alignment,
9237 LLT PtrTy) {
9238 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
9239
9240 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
9241 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
9242
9243 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
9244 // have to generate an extra instruction to negate the alloc and then use
9245 // G_PTR_ADD to add the negative offset.
9246 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
9247 if (Alignment > Align(1)) {
9248 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
9249 AlignMask.negate();
9250 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
9251 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
9252 }
9253
9254 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
9255}
9256
9259 const auto &MF = *MI.getMF();
9260 const auto &TFI = *MF.getSubtarget().getFrameLowering();
9261 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
9262 return UnableToLegalize;
9263
9264 Register Dst = MI.getOperand(0).getReg();
9265 Register AllocSize = MI.getOperand(1).getReg();
9266 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
9267
9268 LLT PtrTy = MRI.getType(Dst);
9269 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
9270 Register SPTmp =
9271 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
9272
9273 MIRBuilder.buildCopy(SPReg, SPTmp);
9274 MIRBuilder.buildCopy(Dst, SPTmp);
9275
9276 MI.eraseFromParent();
9277 return Legalized;
9278}
9279
9282 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9283 if (!StackPtr)
9284 return UnableToLegalize;
9285
9286 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
9287 MI.eraseFromParent();
9288 return Legalized;
9289}
9290
9293 Register StackPtr = TLI.getStackPointerRegisterToSaveRestore();
9294 if (!StackPtr)
9295 return UnableToLegalize;
9296
9297 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
9298 MI.eraseFromParent();
9299 return Legalized;
9300}
9301
9304 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9305 unsigned Offset = MI.getOperand(2).getImm();
9306
9307 // Extract sub-vector or one element
9308 if (SrcTy.isVector()) {
9309 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
9310 unsigned DstSize = DstTy.getSizeInBits();
9311
9312 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
9313 (Offset + DstSize <= SrcTy.getSizeInBits())) {
9314 // Unmerge and allow access to each Src element for the artifact combiner.
9315 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
9316
9317 // Take element(s) we need to extract and copy it (merge them).
9318 SmallVector<Register, 8> SubVectorElts;
9319 for (unsigned Idx = Offset / SrcEltSize;
9320 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
9321 SubVectorElts.push_back(Unmerge.getReg(Idx));
9322 }
9323 if (SubVectorElts.size() == 1)
9324 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
9325 else
9326 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
9327
9328 MI.eraseFromParent();
9329 return Legalized;
9330 }
9331 }
9332
9333 if (DstTy.isScalar() &&
9334 (SrcTy.isScalar() ||
9335 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
9336 LLT SrcIntTy = SrcTy;
9337 if (!SrcTy.isScalar()) {
9338 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
9339 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
9340 }
9341
9342 if (Offset == 0)
9343 MIRBuilder.buildTrunc(DstReg, SrcReg);
9344 else {
9345 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
9346 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
9347 MIRBuilder.buildTrunc(DstReg, Shr);
9348 }
9349
9350 MI.eraseFromParent();
9351 return Legalized;
9352 }
9353
9354 return UnableToLegalize;
9355}
9356
9358 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
9359 uint64_t Offset = MI.getOperand(3).getImm();
9360
9361 LLT DstTy = MRI.getType(Src);
9362 LLT InsertTy = MRI.getType(InsertSrc);
9363
9364 // Insert sub-vector or one element
9365 if (DstTy.isVector() && !InsertTy.isPointer()) {
9366 LLT EltTy = DstTy.getElementType();
9367 unsigned EltSize = EltTy.getSizeInBits();
9368 unsigned InsertSize = InsertTy.getSizeInBits();
9369
9370 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
9371 (Offset + InsertSize <= DstTy.getSizeInBits())) {
9372 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
9374 unsigned Idx = 0;
9375 // Elements from Src before insert start Offset
9376 for (; Idx < Offset / EltSize; ++Idx) {
9377 DstElts.push_back(UnmergeSrc.getReg(Idx));
9378 }
9379
9380 // Replace elements in Src with elements from InsertSrc
9381 if (InsertTy.getSizeInBits() > EltSize) {
9382 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
9383 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
9384 ++Idx, ++i) {
9385 DstElts.push_back(UnmergeInsertSrc.getReg(i));
9386 }
9387 } else {
9388 DstElts.push_back(InsertSrc);
9389 ++Idx;
9390 }
9391
9392 // Remaining elements from Src after insert
9393 for (; Idx < DstTy.getNumElements(); ++Idx) {
9394 DstElts.push_back(UnmergeSrc.getReg(Idx));
9395 }
9396
9397 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
9398 MI.eraseFromParent();
9399 return Legalized;
9400 }
9401 }
9402
9403 if (InsertTy.isVector() ||
9404 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
9405 return UnableToLegalize;
9406
9407 const DataLayout &DL = MIRBuilder.getDataLayout();
9408 if ((DstTy.isPointer() &&
9409 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
9410 (InsertTy.isPointer() &&
9411 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
9412 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
9413 return UnableToLegalize;
9414 }
9415
9416 LLT IntDstTy = DstTy;
9417
9418 if (!DstTy.isScalar()) {
9419 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
9420 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
9421 }
9422
9423 if (!InsertTy.isScalar()) {
9424 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
9425 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
9426 }
9427
9428 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
9429 if (Offset != 0) {
9430 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
9431 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
9432 }
9433
9435 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
9436
9437 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
9438 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
9439 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
9440
9441 MIRBuilder.buildCast(Dst, Or);
9442 MI.eraseFromParent();
9443 return Legalized;
9444}
9445
9448 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
9449 MI.getFirst4RegLLTs();
9450 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
9451
9452 LLT Ty = Dst0Ty;
9453 LLT BoolTy = Dst1Ty;
9454
9455 Register NewDst0 = MRI.cloneVirtualRegister(Dst0);
9456
9457 if (IsAdd)
9458 MIRBuilder.buildAdd(NewDst0, LHS, RHS);
9459 else
9460 MIRBuilder.buildSub(NewDst0, LHS, RHS);
9461
9462 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
9463
9464 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9465
9466 // For an addition, the result should be less than one of the operands (LHS)
9467 // if and only if the other operand (RHS) is negative, otherwise there will
9468 // be overflow.
9469 // For a subtraction, the result should be less than one of the operands
9470 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
9471 // otherwise there will be overflow.
9472 auto ResultLowerThanLHS =
9473 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, NewDst0, LHS);
9474 auto ConditionRHS = MIRBuilder.buildICmp(
9475 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
9476
9477 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
9478
9479 MIRBuilder.buildCopy(Dst0, NewDst0);
9480 MI.eraseFromParent();
9481
9482 return Legalized;
9483}
9484
9486 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9487 const LLT Ty = MRI.getType(Res);
9488
9489 // sum = LHS + RHS + zext(CarryIn)
9490 auto Tmp = MIRBuilder.buildAdd(Ty, LHS, RHS);
9491 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9492 auto Sum = MIRBuilder.buildAdd(Ty, Tmp, CarryZ);
9493 MIRBuilder.buildCopy(Res, Sum);
9494
9495 // OvOut = icmp slt ((sum ^ lhs) & (sum ^ rhs)), 0
9496 auto AX = MIRBuilder.buildXor(Ty, Sum, LHS);
9497 auto BX = MIRBuilder.buildXor(Ty, Sum, RHS);
9498 auto T = MIRBuilder.buildAnd(Ty, AX, BX);
9499
9500 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9501 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9502
9503 MI.eraseFromParent();
9504 return Legalized;
9505}
9506
9508 auto [Res, OvOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
9509 const LLT Ty = MRI.getType(Res);
9510
9511 // Diff = LHS - (RHS + zext(CarryIn))
9512 auto CarryZ = MIRBuilder.buildZExt(Ty, CarryIn);
9513 auto RHSPlusCI = MIRBuilder.buildAdd(Ty, RHS, CarryZ);
9514 auto Diff = MIRBuilder.buildSub(Ty, LHS, RHSPlusCI);
9515 MIRBuilder.buildCopy(Res, Diff);
9516
9517 // ov = msb((LHS ^ RHS) & (LHS ^ Diff))
9518 auto X1 = MIRBuilder.buildXor(Ty, LHS, RHS);
9519 auto X2 = MIRBuilder.buildXor(Ty, LHS, Diff);
9520 auto T = MIRBuilder.buildAnd(Ty, X1, X2);
9521 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9522 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, OvOut, T, Zero);
9523
9524 MI.eraseFromParent();
9525 return Legalized;
9526}
9527
9530 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9531 LLT Ty = MRI.getType(Res);
9532 bool IsSigned;
9533 bool IsAdd;
9534 unsigned BaseOp;
9535 switch (MI.getOpcode()) {
9536 default:
9537 llvm_unreachable("unexpected addsat/subsat opcode");
9538 case TargetOpcode::G_UADDSAT:
9539 IsSigned = false;
9540 IsAdd = true;
9541 BaseOp = TargetOpcode::G_ADD;
9542 break;
9543 case TargetOpcode::G_SADDSAT:
9544 IsSigned = true;
9545 IsAdd = true;
9546 BaseOp = TargetOpcode::G_ADD;
9547 break;
9548 case TargetOpcode::G_USUBSAT:
9549 IsSigned = false;
9550 IsAdd = false;
9551 BaseOp = TargetOpcode::G_SUB;
9552 break;
9553 case TargetOpcode::G_SSUBSAT:
9554 IsSigned = true;
9555 IsAdd = false;
9556 BaseOp = TargetOpcode::G_SUB;
9557 break;
9558 }
9559
9560 if (IsSigned) {
9561 // sadd.sat(a, b) ->
9562 // hi = 0x7fffffff - smax(a, 0)
9563 // lo = 0x80000000 - smin(a, 0)
9564 // a + smin(smax(lo, b), hi)
9565 // ssub.sat(a, b) ->
9566 // lo = smax(a, -1) - 0x7fffffff
9567 // hi = smin(a, -1) - 0x80000000
9568 // a - smin(smax(lo, b), hi)
9569 // TODO: AMDGPU can use a "median of 3" instruction here:
9570 // a +/- med3(lo, b, hi)
9571 uint64_t NumBits = Ty.getScalarSizeInBits();
9572 auto MaxVal =
9573 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
9574 auto MinVal =
9575 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9577 if (IsAdd) {
9578 auto Zero = MIRBuilder.buildConstant(Ty, 0);
9579 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
9580 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
9581 } else {
9582 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
9583 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
9584 MaxVal);
9585 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
9586 MinVal);
9587 }
9588 auto RHSClamped =
9589 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
9590 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
9591 } else {
9592 // uadd.sat(a, b) -> a + umin(~a, b)
9593 // usub.sat(a, b) -> a - umin(a, b)
9594 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
9595 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
9596 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
9597 }
9598
9599 MI.eraseFromParent();
9600 return Legalized;
9601}
9602
9605 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9606 LLT Ty = MRI.getType(Res);
9607 LLT BoolTy = Ty.changeElementSize(1);
9608 bool IsSigned;
9609 bool IsAdd;
9610 unsigned OverflowOp;
9611 switch (MI.getOpcode()) {
9612 default:
9613 llvm_unreachable("unexpected addsat/subsat opcode");
9614 case TargetOpcode::G_UADDSAT:
9615 IsSigned = false;
9616 IsAdd = true;
9617 OverflowOp = TargetOpcode::G_UADDO;
9618 break;
9619 case TargetOpcode::G_SADDSAT:
9620 IsSigned = true;
9621 IsAdd = true;
9622 OverflowOp = TargetOpcode::G_SADDO;
9623 break;
9624 case TargetOpcode::G_USUBSAT:
9625 IsSigned = false;
9626 IsAdd = false;
9627 OverflowOp = TargetOpcode::G_USUBO;
9628 break;
9629 case TargetOpcode::G_SSUBSAT:
9630 IsSigned = true;
9631 IsAdd = false;
9632 OverflowOp = TargetOpcode::G_SSUBO;
9633 break;
9634 }
9635
9636 auto OverflowRes =
9637 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
9638 Register Tmp = OverflowRes.getReg(0);
9639 Register Ov = OverflowRes.getReg(1);
9640 MachineInstrBuilder Clamp;
9641 if (IsSigned) {
9642 // sadd.sat(a, b) ->
9643 // {tmp, ov} = saddo(a, b)
9644 // ov ? (tmp >>s 31) + 0x80000000 : r
9645 // ssub.sat(a, b) ->
9646 // {tmp, ov} = ssubo(a, b)
9647 // ov ? (tmp >>s 31) + 0x80000000 : r
9648 uint64_t NumBits = Ty.getScalarSizeInBits();
9649 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
9650 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
9651 auto MinVal =
9652 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
9653 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
9654 } else {
9655 // uadd.sat(a, b) ->
9656 // {tmp, ov} = uaddo(a, b)
9657 // ov ? 0xffffffff : tmp
9658 // usub.sat(a, b) ->
9659 // {tmp, ov} = usubo(a, b)
9660 // ov ? 0 : tmp
9661 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
9662 }
9663 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
9664
9665 MI.eraseFromParent();
9666 return Legalized;
9667}
9668
9671 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
9672 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
9673 "Expected shlsat opcode!");
9674 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
9675 auto [Res, LHS, RHS] = MI.getFirst3Regs();
9676 LLT Ty = MRI.getType(Res);
9677 LLT BoolTy = Ty.changeElementSize(1);
9678
9679 unsigned BW = Ty.getScalarSizeInBits();
9680 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
9681 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
9682 : MIRBuilder.buildLShr(Ty, Result, RHS);
9683
9684 MachineInstrBuilder SatVal;
9685 if (IsSigned) {
9686 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
9687 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
9688 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
9689 MIRBuilder.buildConstant(Ty, 0));
9690 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
9691 } else {
9692 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
9693 }
9694 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
9695 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
9696
9697 MI.eraseFromParent();
9698 return Legalized;
9699}
9700
9702 auto [Dst, Src] = MI.getFirst2Regs();
9703 const LLT Ty = MRI.getType(Src);
9704 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
9705 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
9706
9707 // Swap most and least significant byte, set remaining bytes in Res to zero.
9708 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
9709 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
9710 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9711 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
9712
9713 // Set i-th high/low byte in Res to i-th low/high byte from Src.
9714 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
9715 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
9716 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
9717 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
9718 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
9719 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
9720 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
9721 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
9722 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
9723 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
9724 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
9725 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
9726 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
9727 }
9728 Res.getInstr()->getOperand(0).setReg(Dst);
9729
9730 MI.eraseFromParent();
9731 return Legalized;
9732}
9733
9734//{ (Src & Mask) >> N } | { (Src << N) & Mask }
9736 MachineInstrBuilder Src, const APInt &Mask) {
9737 const LLT Ty = Dst.getLLTTy(*B.getMRI());
9738 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
9739 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
9740 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
9741 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
9742 return B.buildOr(Dst, LHS, RHS);
9743}
9744
9747 auto [Dst, Src] = MI.getFirst2Regs();
9748 const LLT SrcTy = MRI.getType(Src);
9749 unsigned Size = SrcTy.getScalarSizeInBits();
9750 unsigned VSize = SrcTy.getSizeInBits();
9751
9752 if (Size >= 8) {
9753 if (SrcTy.isVector() && (VSize % 8 == 0) &&
9754 (LI.isLegal({TargetOpcode::G_BITREVERSE,
9755 {LLT::fixed_vector(VSize / 8, 8),
9756 LLT::fixed_vector(VSize / 8, 8)}}))) {
9757 // If bitreverse is legal for i8 vector of the same size, then cast
9758 // to i8 vector type.
9759 // e.g. v4s32 -> v16s8
9760 LLT VTy = LLT::fixed_vector(VSize / 8, 8);
9761 auto BSWAP = MIRBuilder.buildBSwap(SrcTy, Src);
9762 auto Cast = MIRBuilder.buildBitcast(VTy, BSWAP);
9763 auto RBIT = MIRBuilder.buildBitReverse(VTy, Cast);
9764 MIRBuilder.buildBitcast(Dst, RBIT);
9765 } else {
9766 MachineInstrBuilder BSWAP =
9767 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {SrcTy}, {Src});
9768
9769 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
9770 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
9771 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
9772 MachineInstrBuilder Swap4 = SwapN(4, SrcTy, MIRBuilder, BSWAP,
9773 APInt::getSplat(Size, APInt(8, 0xF0)));
9774
9775 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
9776 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
9777 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
9778 MachineInstrBuilder Swap2 = SwapN(2, SrcTy, MIRBuilder, Swap4,
9779 APInt::getSplat(Size, APInt(8, 0xCC)));
9780
9781 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5
9782 // 6|7
9783 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
9784 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
9785 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
9786 }
9787 } else {
9788 // Expand bitreverse for types smaller than 8 bits.
9790 for (unsigned I = 0, J = Size - 1; I < Size; ++I, --J) {
9792 if (I < J) {
9793 auto ShAmt = MIRBuilder.buildConstant(SrcTy, J - I);
9794 Tmp2 = MIRBuilder.buildShl(SrcTy, Src, ShAmt);
9795 } else {
9796 auto ShAmt = MIRBuilder.buildConstant(SrcTy, I - J);
9797 Tmp2 = MIRBuilder.buildLShr(SrcTy, Src, ShAmt);
9798 }
9799
9800 auto Mask = MIRBuilder.buildConstant(SrcTy, 1ULL << J);
9801 Tmp2 = MIRBuilder.buildAnd(SrcTy, Tmp2, Mask);
9802 if (I == 0)
9803 Tmp = Tmp2;
9804 else
9805 Tmp = MIRBuilder.buildOr(SrcTy, Tmp, Tmp2);
9806 }
9807 MIRBuilder.buildCopy(Dst, Tmp);
9808 }
9809
9810 MI.eraseFromParent();
9811 return Legalized;
9812}
9813
9816 MachineFunction &MF = MIRBuilder.getMF();
9817
9818 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
9819 int NameOpIdx = IsRead ? 1 : 0;
9820 int ValRegIndex = IsRead ? 0 : 1;
9821
9822 Register ValReg = MI.getOperand(ValRegIndex).getReg();
9823 const LLT Ty = MRI.getType(ValReg);
9824 const MDString *RegStr = cast<MDString>(
9825 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
9826
9827 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
9828 if (!PhysReg) {
9829 const Function &Fn = MF.getFunction();
9831 "invalid register \"" + Twine(RegStr->getString().data()) + "\" for " +
9832 (IsRead ? "llvm.read_register" : "llvm.write_register"),
9833 Fn, MI.getDebugLoc()));
9834 if (IsRead)
9835 MIRBuilder.buildUndef(ValReg);
9836
9837 MI.eraseFromParent();
9838 return Legalized;
9839 }
9840
9841 if (IsRead)
9842 MIRBuilder.buildCopy(ValReg, PhysReg);
9843 else
9844 MIRBuilder.buildCopy(PhysReg, ValReg);
9845
9846 MI.eraseFromParent();
9847 return Legalized;
9848}
9849
9852 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
9853 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
9854 Register Result = MI.getOperand(0).getReg();
9855 LLT OrigTy = MRI.getType(Result);
9856 auto SizeInBits = OrigTy.getScalarSizeInBits();
9857 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
9858
9859 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
9860 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
9861 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
9862 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
9863
9864 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
9865 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
9866 MIRBuilder.buildTrunc(Result, Shifted);
9867
9868 MI.eraseFromParent();
9869 return Legalized;
9870}
9871
9874 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
9875 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
9876
9877 if (Mask == fcNone) {
9878 MIRBuilder.buildConstant(DstReg, 0);
9879 MI.eraseFromParent();
9880 return Legalized;
9881 }
9882 if (Mask == fcAllFlags) {
9883 MIRBuilder.buildConstant(DstReg, 1);
9884 MI.eraseFromParent();
9885 return Legalized;
9886 }
9887
9888 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
9889 // version
9890
9891 unsigned BitSize = SrcTy.getScalarSizeInBits();
9892 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
9893
9894 LLT IntTy = SrcTy.changeElementType(LLT::scalar(BitSize));
9895 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
9896
9897 // Various masks.
9898 APInt SignBit = APInt::getSignMask(BitSize);
9899 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
9900 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
9901 APInt ExpMask = Inf;
9902 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
9903 APInt QNaNBitMask =
9904 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
9905 APInt InversionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
9906
9907 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
9908 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
9909 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
9910 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
9911 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
9912
9913 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
9914 auto Sign =
9915 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_NE, DstTy, AsInt, Abs);
9916
9917 auto Res = MIRBuilder.buildConstant(DstTy, 0);
9918 // Clang doesn't support capture of structured bindings:
9919 LLT DstTyCopy = DstTy;
9920 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
9921 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
9922 };
9923
9924 // Tests that involve more than one class should be processed first.
9925 if ((Mask & fcFinite) == fcFinite) {
9926 // finite(V) ==> abs(V) u< exp_mask
9927 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9928 ExpMaskC));
9929 Mask &= ~fcFinite;
9930 } else if ((Mask & fcFinite) == fcPosFinite) {
9931 // finite(V) && V > 0 ==> V u< exp_mask
9932 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
9933 ExpMaskC));
9934 Mask &= ~fcPosFinite;
9935 } else if ((Mask & fcFinite) == fcNegFinite) {
9936 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
9937 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
9938 ExpMaskC);
9939 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
9940 appendToRes(And);
9941 Mask &= ~fcNegFinite;
9942 }
9943
9944 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
9945 // fcZero | fcSubnormal => test all exponent bits are 0
9946 // TODO: Handle sign bit specific cases
9947 // TODO: Handle inverted case
9948 if (PartialCheck == (fcZero | fcSubnormal)) {
9949 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
9950 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9951 ExpBits, ZeroC));
9952 Mask &= ~PartialCheck;
9953 }
9954 }
9955
9956 // Check for individual classes.
9957 if (FPClassTest PartialCheck = Mask & fcZero) {
9958 if (PartialCheck == fcPosZero)
9959 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9960 AsInt, ZeroC));
9961 else if (PartialCheck == fcZero)
9962 appendToRes(
9963 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, ZeroC));
9964 else // fcNegZero
9965 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9966 AsInt, SignBitC));
9967 }
9968
9969 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
9970 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
9971 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
9972 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
9973 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
9974 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
9975 auto SubnormalRes =
9976 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, VMinusOne,
9977 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
9978 if (PartialCheck == fcNegSubnormal)
9979 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
9980 appendToRes(SubnormalRes);
9981 }
9982
9983 if (FPClassTest PartialCheck = Mask & fcInf) {
9984 if (PartialCheck == fcPosInf)
9985 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9986 AsInt, InfC));
9987 else if (PartialCheck == fcInf)
9988 appendToRes(
9989 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy, Abs, InfC));
9990 else { // fcNegInf
9991 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
9992 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
9993 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, DstTy,
9994 AsInt, NegInfC));
9995 }
9996 }
9997
9998 if (FPClassTest PartialCheck = Mask & fcNan) {
9999 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
10000 if (PartialCheck == fcNan) {
10001 // isnan(V) ==> abs(V) u> int(inf)
10002 appendToRes(
10003 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC));
10004 } else if (PartialCheck == fcQNan) {
10005 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
10006 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
10007 InfWithQnanBitC));
10008 } else { // fcSNan
10009 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
10010 // abs(V) u< (unsigned(Inf) | quiet_bit)
10011 auto IsNan =
10012 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGT, DstTy, Abs, InfC);
10013 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
10014 Abs, InfWithQnanBitC);
10015 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
10016 }
10017 }
10018
10019 if (FPClassTest PartialCheck = Mask & fcNormal) {
10020 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
10021 // (max_exp-1))
10022 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
10023 auto ExpMinusOne = MIRBuilder.buildSub(
10024 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
10025 APInt MaxExpMinusOne = ExpMask - ExpLSB;
10026 auto NormalRes =
10027 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, ExpMinusOne,
10028 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
10029 if (PartialCheck == fcNegNormal)
10030 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
10031 else if (PartialCheck == fcPosNormal) {
10032 auto PosSign = MIRBuilder.buildXor(
10033 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InversionMask));
10034 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
10035 }
10036 appendToRes(NormalRes);
10037 }
10038
10039 MIRBuilder.buildCopy(DstReg, Res);
10040 MI.eraseFromParent();
10041 return Legalized;
10042}
10043
10045 // Implement G_SELECT in terms of XOR, AND, OR.
10046 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
10047 MI.getFirst4RegLLTs();
10048
10049 bool IsEltPtr = DstTy.isPointerOrPointerVector();
10050 if (IsEltPtr) {
10051 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
10052 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
10053 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
10054 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
10055 DstTy = NewTy;
10056 }
10057
10058 if (MaskTy.isScalar()) {
10059 // Turn the scalar condition into a vector condition mask if needed.
10060
10061 Register MaskElt = MaskReg;
10062
10063 // The condition was potentially zero extended before, but we want a sign
10064 // extended boolean.
10065 if (MaskTy != LLT::scalar(1))
10066 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
10067
10068 // Continue the sign extension (or truncate) to match the data type.
10069 MaskElt =
10070 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
10071
10072 if (DstTy.isVector()) {
10073 // Generate a vector splat idiom.
10074 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
10075 MaskReg = ShufSplat.getReg(0);
10076 } else {
10077 MaskReg = MaskElt;
10078 }
10079 MaskTy = DstTy;
10080 } else if (!DstTy.isVector()) {
10081 // Cannot handle the case that mask is a vector and dst is a scalar.
10082 return UnableToLegalize;
10083 }
10084
10085 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
10086 return UnableToLegalize;
10087 }
10088
10089 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
10090 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
10091 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
10092 if (IsEltPtr) {
10093 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
10094 MIRBuilder.buildIntToPtr(DstReg, Or);
10095 } else {
10096 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
10097 }
10098 MI.eraseFromParent();
10099 return Legalized;
10100}
10101
10103 // Split DIVREM into individual instructions.
10104 unsigned Opcode = MI.getOpcode();
10105
10106 MIRBuilder.buildInstr(
10107 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
10108 : TargetOpcode::G_UDIV,
10109 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10110 MIRBuilder.buildInstr(
10111 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
10112 : TargetOpcode::G_UREM,
10113 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
10114 MI.eraseFromParent();
10115 return Legalized;
10116}
10117
10120 // Expand %res = G_ABS %a into:
10121 // %v1 = G_ASHR %a, scalar_size-1
10122 // %v2 = G_ADD %a, %v1
10123 // %res = G_XOR %v2, %v1
10124 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
10125 Register OpReg = MI.getOperand(1).getReg();
10126 auto ShiftAmt =
10127 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
10128 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
10129 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
10130 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
10131 MI.eraseFromParent();
10132 return Legalized;
10133}
10134
10137 // Expand %res = G_ABS %a into:
10138 // %v1 = G_CONSTANT 0
10139 // %v2 = G_SUB %v1, %a
10140 // %res = G_SMAX %a, %v2
10141 Register SrcReg = MI.getOperand(1).getReg();
10142 LLT Ty = MRI.getType(SrcReg);
10143 auto Zero = MIRBuilder.buildConstant(Ty, 0);
10144 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
10145 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
10146 MI.eraseFromParent();
10147 return Legalized;
10148}
10149
10152 Register SrcReg = MI.getOperand(1).getReg();
10153 Register DestReg = MI.getOperand(0).getReg();
10154 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
10155 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
10156 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
10157 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
10158 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
10159 MI.eraseFromParent();
10160 return Legalized;
10161}
10162
10165 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10166 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10167 "Expected G_ABDS or G_ABDU instruction");
10168
10169 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10170 LLT Ty = MRI.getType(LHS);
10171
10172 // abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10173 // abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
10174 Register LHSSub = MIRBuilder.buildSub(Ty, LHS, RHS).getReg(0);
10175 Register RHSSub = MIRBuilder.buildSub(Ty, RHS, LHS).getReg(0);
10176 CmpInst::Predicate Pred = (MI.getOpcode() == TargetOpcode::G_ABDS)
10179 auto ICmp = MIRBuilder.buildICmp(Pred, LLT::scalar(1), LHS, RHS);
10180 MIRBuilder.buildSelect(DstReg, ICmp, LHSSub, RHSSub);
10181
10182 MI.eraseFromParent();
10183 return Legalized;
10184}
10185
10188 assert((MI.getOpcode() == TargetOpcode::G_ABDS ||
10189 MI.getOpcode() == TargetOpcode::G_ABDU) &&
10190 "Expected G_ABDS or G_ABDU instruction");
10191
10192 auto [DstReg, LHS, RHS] = MI.getFirst3Regs();
10193 LLT Ty = MRI.getType(LHS);
10194
10195 // abds(lhs, rhs) -→ sub(smax(lhs, rhs), smin(lhs, rhs))
10196 // abdu(lhs, rhs) -→ sub(umax(lhs, rhs), umin(lhs, rhs))
10197 Register MaxReg, MinReg;
10198 if (MI.getOpcode() == TargetOpcode::G_ABDS) {
10199 MaxReg = MIRBuilder.buildSMax(Ty, LHS, RHS).getReg(0);
10200 MinReg = MIRBuilder.buildSMin(Ty, LHS, RHS).getReg(0);
10201 } else {
10202 MaxReg = MIRBuilder.buildUMax(Ty, LHS, RHS).getReg(0);
10203 MinReg = MIRBuilder.buildUMin(Ty, LHS, RHS).getReg(0);
10204 }
10205 MIRBuilder.buildSub(DstReg, MaxReg, MinReg);
10206
10207 MI.eraseFromParent();
10208 return Legalized;
10209}
10210
10212 Register SrcReg = MI.getOperand(1).getReg();
10213 Register DstReg = MI.getOperand(0).getReg();
10214
10215 LLT Ty = MRI.getType(DstReg);
10216
10217 // Reset sign bit
10218 MIRBuilder.buildAnd(
10219 DstReg, SrcReg,
10220 MIRBuilder.buildConstant(
10221 Ty, APInt::getSignedMaxValue(Ty.getScalarSizeInBits())));
10222
10223 MI.eraseFromParent();
10224 return Legalized;
10225}
10226
10229 Register SrcReg = MI.getOperand(1).getReg();
10230 LLT SrcTy = MRI.getType(SrcReg);
10231 LLT DstTy = MRI.getType(SrcReg);
10232
10233 // The source could be a scalar if the IR type was <1 x sN>.
10234 if (SrcTy.isScalar()) {
10235 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
10236 return UnableToLegalize; // FIXME: handle extension.
10237 // This can be just a plain copy.
10238 Observer.changingInstr(MI);
10239 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
10240 Observer.changedInstr(MI);
10241 return Legalized;
10242 }
10243 return UnableToLegalize;
10244}
10245
10247 MachineFunction &MF = *MI.getMF();
10248 const DataLayout &DL = MIRBuilder.getDataLayout();
10249 LLVMContext &Ctx = MF.getFunction().getContext();
10250 Register ListPtr = MI.getOperand(1).getReg();
10251 LLT PtrTy = MRI.getType(ListPtr);
10252
10253 // LstPtr is a pointer to the head of the list. Get the address
10254 // of the head of the list.
10255 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
10256 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
10257 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
10258 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
10259
10260 const Align A(MI.getOperand(2).getImm());
10261 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
10262 if (A > TLI.getMinStackArgumentAlignment()) {
10263 Register AlignAmt =
10264 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
10265 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
10266 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
10267 VAList = AndDst.getReg(0);
10268 }
10269
10270 // Increment the pointer, VAList, to the next vaarg
10271 // The list should be bumped by the size of element in the current head of
10272 // list.
10273 Register Dst = MI.getOperand(0).getReg();
10274 LLT LLTTy = MRI.getType(Dst);
10275 Type *Ty = getTypeForLLT(LLTTy, Ctx);
10276 auto IncAmt =
10277 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
10278 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
10279
10280 // Store the increment VAList to the legalized pointer
10282 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
10283 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
10284 // Load the actual argument out of the pointer VAList
10285 Align EltAlignment = DL.getABITypeAlign(Ty);
10286 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
10287 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
10288 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
10289
10290 MI.eraseFromParent();
10291 return Legalized;
10292}
10293
10295 // On Darwin, -Os means optimize for size without hurting performance, so
10296 // only really optimize for size when -Oz (MinSize) is used.
10298 return MF.getFunction().hasMinSize();
10299 return MF.getFunction().hasOptSize();
10300}
10301
10302// Returns a list of types to use for memory op lowering in MemOps. A partial
10303// port of findOptimalMemOpLowering in TargetLowering.
10304static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
10305 unsigned Limit, const MemOp &Op,
10306 unsigned DstAS, unsigned SrcAS,
10307 const AttributeList &FuncAttributes,
10308 const TargetLowering &TLI) {
10309 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
10310 return false;
10311
10312 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
10313
10314 if (Ty == LLT()) {
10315 // Use the largest scalar type whose alignment constraints are satisfied.
10316 // We only need to check DstAlign here as SrcAlign is always greater or
10317 // equal to DstAlign (or zero).
10318 Ty = LLT::scalar(64);
10319 if (Op.isFixedDstAlign())
10320 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
10321 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
10322 Ty = LLT::scalar(Ty.getSizeInBytes());
10323 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
10324 // FIXME: check for the largest legal type we can load/store to.
10325 }
10326
10327 unsigned NumMemOps = 0;
10328 uint64_t Size = Op.size();
10329 while (Size) {
10330 unsigned TySize = Ty.getSizeInBytes();
10331 while (TySize > Size) {
10332 // For now, only use non-vector load / store's for the left-over pieces.
10333 LLT NewTy = Ty;
10334 // FIXME: check for mem op safety and legality of the types. Not all of
10335 // SDAGisms map cleanly to GISel concepts.
10336 if (NewTy.isVector())
10337 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
10338 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
10339 unsigned NewTySize = NewTy.getSizeInBytes();
10340 assert(NewTySize > 0 && "Could not find appropriate type");
10341
10342 // If the new LLT cannot cover all of the remaining bits, then consider
10343 // issuing a (or a pair of) unaligned and overlapping load / store.
10344 unsigned Fast;
10345 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
10346 MVT VT = getMVTForLLT(Ty);
10347 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
10349 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
10351 Fast)
10352 TySize = Size;
10353 else {
10354 Ty = NewTy;
10355 TySize = NewTySize;
10356 }
10357 }
10358
10359 if (++NumMemOps > Limit)
10360 return false;
10361
10362 MemOps.push_back(Ty);
10363 Size -= TySize;
10364 }
10365
10366 return true;
10367}
10368
10369// Get a vectorized representation of the memset value operand, GISel edition.
10371 MachineRegisterInfo &MRI = *MIB.getMRI();
10372 unsigned NumBits = Ty.getScalarSizeInBits();
10373 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10374 if (!Ty.isVector() && ValVRegAndVal) {
10375 APInt Scalar = ValVRegAndVal->Value.trunc(8);
10376 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
10377 return MIB.buildConstant(Ty, SplatVal).getReg(0);
10378 }
10379
10380 // Extend the byte value to the larger type, and then multiply by a magic
10381 // value 0x010101... in order to replicate it across every byte.
10382 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
10383 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
10384 return MIB.buildConstant(Ty, 0).getReg(0);
10385 }
10386
10387 LLT ExtType = Ty.getScalarType();
10388 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
10389 if (NumBits > 8) {
10390 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
10391 auto MagicMI = MIB.buildConstant(ExtType, Magic);
10392 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
10393 }
10394
10395 // For vector types create a G_BUILD_VECTOR.
10396 if (Ty.isVector())
10397 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
10398
10399 return Val;
10400}
10401
10403LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
10404 uint64_t KnownLen, Align Alignment,
10405 bool IsVolatile) {
10406 auto &MF = *MI.getParent()->getParent();
10407 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10408 auto &DL = MF.getDataLayout();
10409 LLVMContext &C = MF.getFunction().getContext();
10410
10411 assert(KnownLen != 0 && "Have a zero length memset length!");
10412
10413 bool DstAlignCanChange = false;
10414 MachineFrameInfo &MFI = MF.getFrameInfo();
10415 bool OptSize = shouldLowerMemFuncForSize(MF);
10416
10417 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10418 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10419 DstAlignCanChange = true;
10420
10421 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
10422 std::vector<LLT> MemOps;
10423
10424 const auto &DstMMO = **MI.memoperands_begin();
10425 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10426
10427 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
10428 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
10429
10430 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
10431 MemOp::Set(KnownLen, DstAlignCanChange,
10432 Alignment,
10433 /*IsZeroMemset=*/IsZeroVal,
10434 /*IsVolatile=*/IsVolatile),
10435 DstPtrInfo.getAddrSpace(), ~0u,
10436 MF.getFunction().getAttributes(), TLI))
10437 return UnableToLegalize;
10438
10439 if (DstAlignCanChange) {
10440 // Get an estimate of the type from the LLT.
10441 Type *IRTy = getTypeForLLT(MemOps[0], C);
10442 Align NewAlign = DL.getABITypeAlign(IRTy);
10443 if (NewAlign > Alignment) {
10444 Alignment = NewAlign;
10445 unsigned FI = FIDef->getOperand(1).getIndex();
10446 // Give the stack frame object a larger alignment if needed.
10447 if (MFI.getObjectAlign(FI) < Alignment)
10448 MFI.setObjectAlignment(FI, Alignment);
10449 }
10450 }
10451
10452 MachineIRBuilder MIB(MI);
10453 // Find the largest store and generate the bit pattern for it.
10454 LLT LargestTy = MemOps[0];
10455 for (unsigned i = 1; i < MemOps.size(); i++)
10456 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
10457 LargestTy = MemOps[i];
10458
10459 // The memset stored value is always defined as an s8, so in order to make it
10460 // work with larger store types we need to repeat the bit pattern across the
10461 // wider type.
10462 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
10463
10464 if (!MemSetValue)
10465 return UnableToLegalize;
10466
10467 // Generate the stores. For each store type in the list, we generate the
10468 // matching store of that type to the destination address.
10469 LLT PtrTy = MRI.getType(Dst);
10470 unsigned DstOff = 0;
10471 unsigned Size = KnownLen;
10472 for (unsigned I = 0; I < MemOps.size(); I++) {
10473 LLT Ty = MemOps[I];
10474 unsigned TySize = Ty.getSizeInBytes();
10475 if (TySize > Size) {
10476 // Issuing an unaligned load / store pair that overlaps with the previous
10477 // pair. Adjust the offset accordingly.
10478 assert(I == MemOps.size() - 1 && I != 0);
10479 DstOff -= TySize - Size;
10480 }
10481
10482 // If this store is smaller than the largest store see whether we can get
10483 // the smaller value for free with a truncate.
10484 Register Value = MemSetValue;
10485 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
10486 MVT VT = getMVTForLLT(Ty);
10487 MVT LargestVT = getMVTForLLT(LargestTy);
10488 if (!LargestTy.isVector() && !Ty.isVector() &&
10489 TLI.isTruncateFree(LargestVT, VT))
10490 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
10491 else
10492 Value = getMemsetValue(Val, Ty, MIB);
10493 if (!Value)
10494 return UnableToLegalize;
10495 }
10496
10497 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
10498
10499 Register Ptr = Dst;
10500 if (DstOff != 0) {
10501 auto Offset =
10502 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
10503 Ptr = MIB.buildObjectPtrOffset(PtrTy, Dst, Offset).getReg(0);
10504 }
10505
10506 MIB.buildStore(Value, Ptr, *StoreMMO);
10507 DstOff += Ty.getSizeInBytes();
10508 Size -= TySize;
10509 }
10510
10511 MI.eraseFromParent();
10512 return Legalized;
10513}
10514
10516LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
10517 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10518
10519 auto [Dst, Src, Len] = MI.getFirst3Regs();
10520
10521 const auto *MMOIt = MI.memoperands_begin();
10522 const MachineMemOperand *MemOp = *MMOIt;
10523 bool IsVolatile = MemOp->isVolatile();
10524
10525 // See if this is a constant length copy
10526 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10527 // FIXME: support dynamically sized G_MEMCPY_INLINE
10528 assert(LenVRegAndVal &&
10529 "inline memcpy with dynamic size is not yet supported");
10530 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10531 if (KnownLen == 0) {
10532 MI.eraseFromParent();
10533 return Legalized;
10534 }
10535
10536 const auto &DstMMO = **MI.memoperands_begin();
10537 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10538 Align DstAlign = DstMMO.getBaseAlign();
10539 Align SrcAlign = SrcMMO.getBaseAlign();
10540
10541 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
10542 IsVolatile);
10543}
10544
10546LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
10547 uint64_t KnownLen, Align DstAlign,
10548 Align SrcAlign, bool IsVolatile) {
10549 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
10550 return lowerMemcpy(MI, Dst, Src, KnownLen,
10551 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
10552 IsVolatile);
10553}
10554
10556LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
10557 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
10558 Align SrcAlign, bool IsVolatile) {
10559 auto &MF = *MI.getParent()->getParent();
10560 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10561 auto &DL = MF.getDataLayout();
10563
10564 assert(KnownLen != 0 && "Have a zero length memcpy length!");
10565
10566 bool DstAlignCanChange = false;
10567 MachineFrameInfo &MFI = MF.getFrameInfo();
10568 Align Alignment = std::min(DstAlign, SrcAlign);
10569
10570 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10571 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10572 DstAlignCanChange = true;
10573
10574 // FIXME: infer better src pointer alignment like SelectionDAG does here.
10575 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
10576 // if the memcpy is in a tail call position.
10577
10578 std::vector<LLT> MemOps;
10579
10580 const auto &DstMMO = **MI.memoperands_begin();
10581 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10582 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10583 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10584
10586 MemOps, Limit,
10587 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10588 IsVolatile),
10589 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10590 MF.getFunction().getAttributes(), TLI))
10591 return UnableToLegalize;
10592
10593 if (DstAlignCanChange) {
10594 // Get an estimate of the type from the LLT.
10595 Type *IRTy = getTypeForLLT(MemOps[0], C);
10596 Align NewAlign = DL.getABITypeAlign(IRTy);
10597
10598 // Don't promote to an alignment that would require dynamic stack
10599 // realignment.
10601 if (!TRI->hasStackRealignment(MF))
10602 if (MaybeAlign StackAlign = DL.getStackAlignment())
10603 NewAlign = std::min(NewAlign, *StackAlign);
10604
10605 if (NewAlign > Alignment) {
10606 Alignment = NewAlign;
10607 unsigned FI = FIDef->getOperand(1).getIndex();
10608 // Give the stack frame object a larger alignment if needed.
10609 if (MFI.getObjectAlign(FI) < Alignment)
10610 MFI.setObjectAlignment(FI, Alignment);
10611 }
10612 }
10613
10614 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
10615
10616 MachineIRBuilder MIB(MI);
10617 // Now we need to emit a pair of load and stores for each of the types we've
10618 // collected. I.e. for each type, generate a load from the source pointer of
10619 // that type width, and then generate a corresponding store to the dest buffer
10620 // of that value loaded. This can result in a sequence of loads and stores
10621 // mixed types, depending on what the target specifies as good types to use.
10622 unsigned CurrOffset = 0;
10623 unsigned Size = KnownLen;
10624 for (auto CopyTy : MemOps) {
10625 // Issuing an unaligned load / store pair that overlaps with the previous
10626 // pair. Adjust the offset accordingly.
10627 if (CopyTy.getSizeInBytes() > Size)
10628 CurrOffset -= CopyTy.getSizeInBytes() - Size;
10629
10630 // Construct MMOs for the accesses.
10631 auto *LoadMMO =
10632 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10633 auto *StoreMMO =
10634 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10635
10636 // Create the load.
10637 Register LoadPtr = Src;
10639 if (CurrOffset != 0) {
10640 LLT SrcTy = MRI.getType(Src);
10641 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
10642 .getReg(0);
10643 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10644 }
10645 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
10646
10647 // Create the store.
10648 Register StorePtr = Dst;
10649 if (CurrOffset != 0) {
10650 LLT DstTy = MRI.getType(Dst);
10651 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10652 }
10653 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
10654 CurrOffset += CopyTy.getSizeInBytes();
10655 Size -= CopyTy.getSizeInBytes();
10656 }
10657
10658 MI.eraseFromParent();
10659 return Legalized;
10660}
10661
10663LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
10664 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
10665 bool IsVolatile) {
10666 auto &MF = *MI.getParent()->getParent();
10667 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10668 auto &DL = MF.getDataLayout();
10669 LLVMContext &C = MF.getFunction().getContext();
10670
10671 assert(KnownLen != 0 && "Have a zero length memmove length!");
10672
10673 bool DstAlignCanChange = false;
10674 MachineFrameInfo &MFI = MF.getFrameInfo();
10675 bool OptSize = shouldLowerMemFuncForSize(MF);
10676 Align Alignment = std::min(DstAlign, SrcAlign);
10677
10678 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
10679 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
10680 DstAlignCanChange = true;
10681
10682 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
10683 std::vector<LLT> MemOps;
10684
10685 const auto &DstMMO = **MI.memoperands_begin();
10686 const auto &SrcMMO = **std::next(MI.memoperands_begin());
10687 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
10688 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
10689
10690 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
10691 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
10692 // same thing here.
10694 MemOps, Limit,
10695 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
10696 /*IsVolatile*/ true),
10697 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
10698 MF.getFunction().getAttributes(), TLI))
10699 return UnableToLegalize;
10700
10701 if (DstAlignCanChange) {
10702 // Get an estimate of the type from the LLT.
10703 Type *IRTy = getTypeForLLT(MemOps[0], C);
10704 Align NewAlign = DL.getABITypeAlign(IRTy);
10705
10706 // Don't promote to an alignment that would require dynamic stack
10707 // realignment.
10708 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
10709 if (!TRI->hasStackRealignment(MF))
10710 if (MaybeAlign StackAlign = DL.getStackAlignment())
10711 NewAlign = std::min(NewAlign, *StackAlign);
10712
10713 if (NewAlign > Alignment) {
10714 Alignment = NewAlign;
10715 unsigned FI = FIDef->getOperand(1).getIndex();
10716 // Give the stack frame object a larger alignment if needed.
10717 if (MFI.getObjectAlign(FI) < Alignment)
10718 MFI.setObjectAlignment(FI, Alignment);
10719 }
10720 }
10721
10722 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
10723
10724 MachineIRBuilder MIB(MI);
10725 // Memmove requires that we perform the loads first before issuing the stores.
10726 // Apart from that, this loop is pretty much doing the same thing as the
10727 // memcpy codegen function.
10728 unsigned CurrOffset = 0;
10729 SmallVector<Register, 16> LoadVals;
10730 for (auto CopyTy : MemOps) {
10731 // Construct MMO for the load.
10732 auto *LoadMMO =
10733 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
10734
10735 // Create the load.
10736 Register LoadPtr = Src;
10737 if (CurrOffset != 0) {
10738 LLT SrcTy = MRI.getType(Src);
10739 auto Offset =
10740 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
10741 LoadPtr = MIB.buildObjectPtrOffset(SrcTy, Src, Offset).getReg(0);
10742 }
10743 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
10744 CurrOffset += CopyTy.getSizeInBytes();
10745 }
10746
10747 CurrOffset = 0;
10748 for (unsigned I = 0; I < MemOps.size(); ++I) {
10749 LLT CopyTy = MemOps[I];
10750 // Now store the values loaded.
10751 auto *StoreMMO =
10752 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
10753
10754 Register StorePtr = Dst;
10755 if (CurrOffset != 0) {
10756 LLT DstTy = MRI.getType(Dst);
10757 auto Offset =
10758 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
10759 StorePtr = MIB.buildObjectPtrOffset(DstTy, Dst, Offset).getReg(0);
10760 }
10761 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
10762 CurrOffset += CopyTy.getSizeInBytes();
10763 }
10764 MI.eraseFromParent();
10765 return Legalized;
10766}
10767
10770 const unsigned Opc = MI.getOpcode();
10771 // This combine is fairly complex so it's not written with a separate
10772 // matcher function.
10773 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
10774 Opc == TargetOpcode::G_MEMSET) &&
10775 "Expected memcpy like instruction");
10776
10777 auto MMOIt = MI.memoperands_begin();
10778 const MachineMemOperand *MemOp = *MMOIt;
10779
10780 Align DstAlign = MemOp->getBaseAlign();
10781 Align SrcAlign;
10782 auto [Dst, Src, Len] = MI.getFirst3Regs();
10783
10784 if (Opc != TargetOpcode::G_MEMSET) {
10785 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
10786 MemOp = *(++MMOIt);
10787 SrcAlign = MemOp->getBaseAlign();
10788 }
10789
10790 // See if this is a constant length copy
10791 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
10792 if (!LenVRegAndVal)
10793 return UnableToLegalize;
10794 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
10795
10796 if (KnownLen == 0) {
10797 MI.eraseFromParent();
10798 return Legalized;
10799 }
10800
10801 if (MaxLen && KnownLen > MaxLen)
10802 return UnableToLegalize;
10803
10804 bool IsVolatile = MemOp->isVolatile();
10805 if (Opc == TargetOpcode::G_MEMCPY) {
10806 auto &MF = *MI.getParent()->getParent();
10807 const auto &TLI = *MF.getSubtarget().getTargetLowering();
10808 bool OptSize = shouldLowerMemFuncForSize(MF);
10809 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
10810 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
10811 IsVolatile);
10812 }
10813 if (Opc == TargetOpcode::G_MEMMOVE)
10814 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
10815 if (Opc == TargetOpcode::G_MEMSET)
10816 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
10817 return UnableToLegalize;
10818}
unsigned const MachineRegisterInfo * MRI
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S16
constexpr LLT S1
constexpr LLT S32
constexpr LLT S64
AMDGPU Register Bank Select
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition Utils.h:75
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define RTLIBCASE_CMP(LibcallPrefix, ICmpPred)
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver, const TargetLowering &TLI, bool IsSigned=false)
static std::pair< RTLIB::Libcall, CmpInst::Predicate > getFCMPLibcallDesc(const CmpInst::Predicate Pred, unsigned Size)
Returns the corresponding libcall for the given Pred and the ICMP predicate that should be generated ...
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static LegalizerHelper::LegalizeResult loweri64tof16ITOFP(MachineInstr &MI, Register Dst, LLT DstTy, Register Src, LLT SrcTy, MachineIRBuilder &MIRBuilder)
i64->fp16 itofp can be lowered to i64->f64,f64->f32,f32->f16.
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
Implement a low-level type suitable for MachineInstr level instruction selection.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
Register Reg
Register const TargetRegisterInfo * TRI
#define R2(n)
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
uint64_t High
R600 Clause Merge
static constexpr MCPhysReg SPReg
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
The Input class is used to parse a yaml document into in-memory structs and vectors.
static const fltSemantics & IEEEsingle()
Definition APFloat.h:296
static constexpr roundingMode rmTowardZero
Definition APFloat.h:348
static const fltSemantics & IEEEdouble()
Definition APFloat.h:297
static constexpr roundingMode rmNearestTiesToEven
Definition APFloat.h:344
opStatus
IEEE-754R 7: Default exception handling.
Definition APFloat.h:360
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition APFloat.h:1329
APInt bitcastToAPInt() const
Definition APFloat.h:1335
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition APFloat.h:1120
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition APFloat.h:1080
static APFloat getNaN(const fltSemantics &Sem, bool Negative=false, uint64_t payload=0)
Factory for NaN values.
Definition APFloat.h:1091
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition APInt.h:230
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1541
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition APInt.h:1513
LLVM_ABI APInt trunc(unsigned width) const
Truncate to new width.
Definition APInt.cpp:936
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition APInt.h:207
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
LLVM_ABI APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition APInt.cpp:1666
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition APInt.h:210
static APInt getMinValue(unsigned numBits)
Gets minimum unsigned value of APInt for a specific bit width.
Definition APInt.h:217
void negate()
Negate this APInt in place.
Definition APInt.h:1469
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition APInt.h:220
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:874
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition APInt.h:852
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition APInt.h:271
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ FCMP_OEQ
0 0 0 1 True if ordered and equal
Definition InstrTypes.h:679
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition InstrTypes.h:682
@ FCMP_ULE
1 1 0 1 True if unordered, less than, or equal
Definition InstrTypes.h:691
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
Definition InstrTypes.h:680
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition InstrTypes.h:681
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition InstrTypes.h:690
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition InstrTypes.h:684
@ FCMP_UEQ
1 0 0 1 True if unordered or equal
Definition InstrTypes.h:687
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ FCMP_UGT
1 0 1 0 True if unordered or greater than
Definition InstrTypes.h:688
@ FCMP_OLE
0 1 0 1 True if ordered and less than or equal
Definition InstrTypes.h:683
@ FCMP_ORD
0 1 1 1 True if ordered (no nans)
Definition InstrTypes.h:685
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ FCMP_UNE
1 1 1 0 True if unordered or not equal
Definition InstrTypes.h:692
@ FCMP_UGE
1 0 1 1 True if unordered, greater than, or equal
Definition InstrTypes.h:689
@ FCMP_UNO
1 0 0 0 True if unordered: isnan(X) | isnan(Y)
Definition InstrTypes.h:686
bool isSigned() const
Definition InstrTypes.h:930
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
const APFloat & getValueAPF() const
Definition Constants.h:325
This is the shared class of boolean and integer constants.
Definition Constants.h:87
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
bool isBigEndian() const
Definition DataLayout.h:215
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:256
LLT getLLTTy(const MachineRegisterInfo &MRI) const
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition TypeSize.h:309
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition TypeSize.h:315
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
AttributeList getAttributes() const
Return the attribute list for this Function.
Definition Function.h:352
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents a insert subvector.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
Represents a threeway compare.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr bool isValid() const
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
constexpr bool isByteSized() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
constexpr bool isPointer() const
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
constexpr ElementCount getElementCount() const
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
constexpr unsigned getAddressSpace() const
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
constexpr LLT changeVectorElementType(LLT NewEltTy) const
Returns a vector with the same number of elements but the new element type.
constexpr LLT getScalarType() const
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
constexpr LLT changeVectorElementCount(ElementCount EC) const
Return a vector with the same element type and the new element count.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
LLVM_ABI LegalizeResult lowerShlSat(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerThreewayCompare(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LLVM_ABI LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LLVM_ABI LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBitCount(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LLVM_ABI LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF64BitFloatOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSSUBE(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
LLVM_ABI void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LLVM_ABI LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerLoad(GAnyLoad &MI)
LLVM_ABI LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
LLVM_ABI void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LLVM_ABI LegalizeResult lowerFConstant(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerBitreverse(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LLVM_ABI LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LLVM_ABI LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPTOINT_SAT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerEXT(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerStore(GStore &MI)
LLVM_ABI LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastExtractSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_EXTRACT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult narrowScalarShiftMultiway(MachineInstr &MI, LLT TargetTy)
Multi-way shift legalization: directly split wide shifts into target-sized parts in a single step,...
LLVM_ABI LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
LLVM_ABI MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
LLVM_ABI Register buildConstantShiftPart(unsigned Opcode, unsigned PartIdx, unsigned NumParts, ArrayRef< Register > SrcParts, const ShiftParams &Params, LLT TargetTy, LLT ShiftAmtTy)
Generates a single output part for constant shifts using direct indexing.
LLVM_ABI void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LLVM_ABI LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LLVM_ABI LegalizeResult lowerFPTOUI(MachineInstr &MI)
const TargetLowering & getTargetLowering() const
LLVM_ABI LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LLVM_ABI LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult bitcastInsertSubvector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
This attempts to bitcast G_INSERT_SUBVECTOR to CastTy.
LLVM_ABI LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LLVM_ABI LegalizeResult scalarizeVectorBooleanStore(GStore &MI)
Given a store of a boolean vector, scalarize it.
LLVM_ABI LegalizeResult lowerBitcast(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerInsert(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerExtract(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LLVM_ABI LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFPOWI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFAbs(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerVectorReduction(MachineInstr &MI)
const LegalizerInfo & getLegalizerInfo() const
Expose LegalizerInfo so the clients can re-use.
LLVM_ABI LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LLVM_ABI LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarShiftByConstantMultiway(MachineInstr &MI, const APInt &Amt, LLT TargetTy, LLT ShiftAmtTy)
Optimized path for constant shift amounts using static indexing.
LLVM_ABI MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val)
Create a store of Val to a stack temporary and return a load as the same type as Res.
LLVM_ABI LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LLVM_ABI LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LLVM_ABI LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFCopySign(MachineInstr &MI)
LLVM_ABI LegalizeResult bitcastConcatVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LLVM_ABI LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSADDE(MachineInstr &MI)
LLVM_ABI LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LLVM_ABI LegalizeResult lowerFunnelShift(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
LLVM_ABI void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LLVM_ABI LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerFMad(MachineInstr &MI)
LLVM_ABI LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LLVM_ABI LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult lowerFFloor(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerAbsDiffToMinMax(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LLVM_ABI LegalizeResult lowerFPTOSI(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerUITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerShuffleVector(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerMergeValues(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerVECTOR_COMPRESS(MachineInstr &MI)
LLVM_ABI void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LLVM_ABI LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LLVM_ABI LegalizeResult lowerRotate(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerU64ToF32WithSITOFP(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
LLVM_ABI Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LLVM_ABI LegalizeResult bitcastShuffleVector(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
LLVM_ABI LegalizeResult lowerDIVREM(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerSelect(MachineInstr &MI)
LLVM_ABI LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LLVM_ABI Register buildVariableShiftPart(unsigned Opcode, Register MainOperand, Register ShiftAmt, LLT TargetTy, Register CarryOperand=Register())
Generates a shift part with carry for variable shifts.
LLVM_ABI void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
LLVM_ABI void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LLVM_ABI LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LLVM_ABI LegalizeResult lowerStackRestore(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult lowerStackSave(MachineInstr &MI)
LLVM_ABI LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LLVM_ABI LegalizeResult lowerTRUNC(MachineInstr &MI)
LLVM_ABI LegalizeResult lowerBswap(MachineInstr &MI)
LLVM_ABI Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LLVM_ABI LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LLVM_ABI Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LLVM_ABI LegalizeResult lowerConstant(MachineInstr &MI)
LLVM_ABI void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LLVM_ABI LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
LLVM_ABI LegalizeResult lowerFMinimumMaximum(MachineInstr &MI)
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition MCInstrInfo.h:90
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition MCInstrInfo.h:97
A single uniqued string.
Definition Metadata.h:721
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:618
Machine Value Type.
static LLVM_ABI MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
LLVM_ABI iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ZEXT Op.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
void clearRanges()
Unset the tracked range metadata.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLT getLLTTy(const MachineRegisterInfo &MRI) const
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition StringRef.h:140
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
TargetInstrInfo - Interface to description of machine instruction set.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool shouldSignExtendTypeInLibCall(Type *Ty, bool IsSigned) const
Returns true if arguments should be sign-extended in lib calls.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
const Triple & getTargetTriple() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, DriverKit, XROS, or bridgeOS).
Definition Triple.h:627
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getFP128Ty(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI Type * getDoubleTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getX86_FP80Ty(LLVMContext &C)
Definition Type.cpp:288
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition TypeSize.h:252
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
@ Bitcast
Perform the operation on a different, but equivalently sized type.
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
@ Custom
The target wants to do something special with this combination of operand and type.
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
LLVM_ABI Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
LLVM_ABI Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Invariant opcodes: All instruction sets have these as their low opcodes.
This is an optimization pass for GlobalISel generic memory operations.
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:829
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI Type * getTypeForLLT(LLT Ty, LLVMContext &C)
Get the type back from LLT.
Definition Utils.cpp:2039
LLVM_ABI MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition Utils.cpp:652
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI std::optional< APInt > getIConstantVRegVal(Register VReg, const MachineRegisterInfo &MRI)
If VReg is defined by a G_CONSTANT, return the corresponding value.
Definition Utils.cpp:295
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
constexpr int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition MathExtras.h:223
LLVM_ABI MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2148
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition Utils.cpp:1569
LLVM_ABI bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition Utils.cpp:1626
LLVM_ABI LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition Utils.cpp:1193
unsigned M1(unsigned Val)
Definition VE.h:377
constexpr T MinAlign(U A, V B)
A and B are either alignments or offsets.
Definition MathExtras.h:357
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
LLVM_ABI LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI EVT getApproximateEVTForLLT(LLT Ty, LLVMContext &Ctx)
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
To bit_cast(const From &from) noexcept
Definition bit.h:90
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1847
constexpr int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition MathExtras.h:232
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI bool isKnownNeverNaN(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if the floating-point scalar value is not a NaN or if the floating-point vector value has...
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:434
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition Utils.h:349
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1909
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition Alignment.h:100
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition Alignment.h:197
LLVM_ABI LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition Utils.cpp:1281
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition bit.h:330
LLVM_ABI void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition Utils.cpp:610
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
SmallVector< ISD::ArgFlagsTy, 4 > Flags
CallingConv::ID CallConv
Calling convention to be used for the call.
bool isKnownNeverZero() const
Return true if it's known this can never be a zero.
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
LLVM_ABI unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static LLVM_ABI MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static LLVM_ABI MachinePointerInfo getUnknownStack(MachineFunction &MF)
Stack memory without other information.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)