LLVM 19.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
35#include "llvm/Support/Debug.h"
39#include <numeric>
40#include <optional>
41
42#define DEBUG_TYPE "legalizer"
43
44using namespace llvm;
45using namespace LegalizeActions;
46using namespace MIPatternMatch;
47
48/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
49///
50/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
51/// with any leftover piece as type \p LeftoverTy
52///
53/// Returns -1 in the first element of the pair if the breakdown is not
54/// satisfiable.
55static std::pair<int, int>
56getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
57 assert(!LeftoverTy.isValid() && "this is an out argument");
58
59 unsigned Size = OrigTy.getSizeInBits();
60 unsigned NarrowSize = NarrowTy.getSizeInBits();
61 unsigned NumParts = Size / NarrowSize;
62 unsigned LeftoverSize = Size - NumParts * NarrowSize;
63 assert(Size > NarrowSize);
64
65 if (LeftoverSize == 0)
66 return {NumParts, 0};
67
68 if (NarrowTy.isVector()) {
69 unsigned EltSize = OrigTy.getScalarSizeInBits();
70 if (LeftoverSize % EltSize != 0)
71 return {-1, -1};
72 LeftoverTy = LLT::scalarOrVector(
73 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
74 } else {
75 LeftoverTy = LLT::scalar(LeftoverSize);
76 }
77
78 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
79 return std::make_pair(NumParts, NumLeftover);
80}
81
83
84 if (!Ty.isScalar())
85 return nullptr;
86
87 switch (Ty.getSizeInBits()) {
88 case 16:
89 return Type::getHalfTy(Ctx);
90 case 32:
91 return Type::getFloatTy(Ctx);
92 case 64:
93 return Type::getDoubleTy(Ctx);
94 case 80:
95 return Type::getX86_FP80Ty(Ctx);
96 case 128:
97 return Type::getFP128Ty(Ctx);
98 default:
99 return nullptr;
100 }
101}
102
104 GISelChangeObserver &Observer,
105 MachineIRBuilder &Builder)
106 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
107 LI(*MF.getSubtarget().getLegalizerInfo()),
108 TLI(*MF.getSubtarget().getTargetLowering()), KB(nullptr) {}
109
111 GISelChangeObserver &Observer,
113 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
114 TLI(*MF.getSubtarget().getTargetLowering()), KB(KB) {}
115
118 LostDebugLocObserver &LocObserver) {
119 LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
120
122
123 if (isa<GIntrinsic>(MI))
124 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
125 auto Step = LI.getAction(MI, MRI);
126 switch (Step.Action) {
127 case Legal:
128 LLVM_DEBUG(dbgs() << ".. Already legal\n");
129 return AlreadyLegal;
130 case Libcall:
131 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
132 return libcall(MI, LocObserver);
133 case NarrowScalar:
134 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
135 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
136 case WidenScalar:
137 LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
138 return widenScalar(MI, Step.TypeIdx, Step.NewType);
139 case Bitcast:
140 LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
141 return bitcast(MI, Step.TypeIdx, Step.NewType);
142 case Lower:
143 LLVM_DEBUG(dbgs() << ".. Lower\n");
144 return lower(MI, Step.TypeIdx, Step.NewType);
145 case FewerElements:
146 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
147 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
148 case MoreElements:
149 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
150 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
151 case Custom:
152 LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
153 return LI.legalizeCustom(*this, MI, LocObserver) ? Legalized
155 default:
156 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
157 return UnableToLegalize;
158 }
159}
160
161void LegalizerHelper::insertParts(Register DstReg,
162 LLT ResultTy, LLT PartTy,
163 ArrayRef<Register> PartRegs,
164 LLT LeftoverTy,
165 ArrayRef<Register> LeftoverRegs) {
166 if (!LeftoverTy.isValid()) {
167 assert(LeftoverRegs.empty());
168
169 if (!ResultTy.isVector()) {
170 MIRBuilder.buildMergeLikeInstr(DstReg, PartRegs);
171 return;
172 }
173
174 if (PartTy.isVector())
175 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
176 else
177 MIRBuilder.buildBuildVector(DstReg, PartRegs);
178 return;
179 }
180
181 // Merge sub-vectors with different number of elements and insert into DstReg.
182 if (ResultTy.isVector()) {
183 assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
185 for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
186 AllRegs.push_back(Reg);
187 return mergeMixedSubvectors(DstReg, AllRegs);
188 }
189
190 SmallVector<Register> GCDRegs;
191 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
192 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
193 extractGCDType(GCDRegs, GCDTy, PartReg);
194 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
195 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
196}
197
198void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
199 Register Reg) {
200 LLT Ty = MRI.getType(Reg);
202 extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts,
203 MIRBuilder, MRI);
204 Elts.append(RegElts);
205}
206
207/// Merge \p PartRegs with different types into \p DstReg.
208void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
209 ArrayRef<Register> PartRegs) {
211 for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
212 appendVectorElts(AllElts, PartRegs[i]);
213
214 Register Leftover = PartRegs[PartRegs.size() - 1];
215 if (MRI.getType(Leftover).isScalar())
216 AllElts.push_back(Leftover);
217 else
218 appendVectorElts(AllElts, Leftover);
219
220 MIRBuilder.buildMergeLikeInstr(DstReg, AllElts);
221}
222
223/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
225 const MachineInstr &MI) {
226 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
227
228 const int StartIdx = Regs.size();
229 const int NumResults = MI.getNumOperands() - 1;
230 Regs.resize(Regs.size() + NumResults);
231 for (int I = 0; I != NumResults; ++I)
232 Regs[StartIdx + I] = MI.getOperand(I).getReg();
233}
234
235void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
236 LLT GCDTy, Register SrcReg) {
237 LLT SrcTy = MRI.getType(SrcReg);
238 if (SrcTy == GCDTy) {
239 // If the source already evenly divides the result type, we don't need to do
240 // anything.
241 Parts.push_back(SrcReg);
242 } else {
243 // Need to split into common type sized pieces.
244 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
245 getUnmergeResults(Parts, *Unmerge);
246 }
247}
248
249LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
250 LLT NarrowTy, Register SrcReg) {
251 LLT SrcTy = MRI.getType(SrcReg);
252 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
253 extractGCDType(Parts, GCDTy, SrcReg);
254 return GCDTy;
255}
256
257LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
259 unsigned PadStrategy) {
260 LLT LCMTy = getLCMType(DstTy, NarrowTy);
261
262 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
263 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
264 int NumOrigSrc = VRegs.size();
265
266 Register PadReg;
267
268 // Get a value we can use to pad the source value if the sources won't evenly
269 // cover the result type.
270 if (NumOrigSrc < NumParts * NumSubParts) {
271 if (PadStrategy == TargetOpcode::G_ZEXT)
272 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
273 else if (PadStrategy == TargetOpcode::G_ANYEXT)
274 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
275 else {
276 assert(PadStrategy == TargetOpcode::G_SEXT);
277
278 // Shift the sign bit of the low register through the high register.
279 auto ShiftAmt =
281 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
282 }
283 }
284
285 // Registers for the final merge to be produced.
286 SmallVector<Register, 4> Remerge(NumParts);
287
288 // Registers needed for intermediate merges, which will be merged into a
289 // source for Remerge.
290 SmallVector<Register, 4> SubMerge(NumSubParts);
291
292 // Once we've fully read off the end of the original source bits, we can reuse
293 // the same high bits for remaining padding elements.
294 Register AllPadReg;
295
296 // Build merges to the LCM type to cover the original result type.
297 for (int I = 0; I != NumParts; ++I) {
298 bool AllMergePartsArePadding = true;
299
300 // Build the requested merges to the requested type.
301 for (int J = 0; J != NumSubParts; ++J) {
302 int Idx = I * NumSubParts + J;
303 if (Idx >= NumOrigSrc) {
304 SubMerge[J] = PadReg;
305 continue;
306 }
307
308 SubMerge[J] = VRegs[Idx];
309
310 // There are meaningful bits here we can't reuse later.
311 AllMergePartsArePadding = false;
312 }
313
314 // If we've filled up a complete piece with padding bits, we can directly
315 // emit the natural sized constant if applicable, rather than a merge of
316 // smaller constants.
317 if (AllMergePartsArePadding && !AllPadReg) {
318 if (PadStrategy == TargetOpcode::G_ANYEXT)
319 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
320 else if (PadStrategy == TargetOpcode::G_ZEXT)
321 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
322
323 // If this is a sign extension, we can't materialize a trivial constant
324 // with the right type and have to produce a merge.
325 }
326
327 if (AllPadReg) {
328 // Avoid creating additional instructions if we're just adding additional
329 // copies of padding bits.
330 Remerge[I] = AllPadReg;
331 continue;
332 }
333
334 if (NumSubParts == 1)
335 Remerge[I] = SubMerge[0];
336 else
337 Remerge[I] = MIRBuilder.buildMergeLikeInstr(NarrowTy, SubMerge).getReg(0);
338
339 // In the sign extend padding case, re-use the first all-signbit merge.
340 if (AllMergePartsArePadding && !AllPadReg)
341 AllPadReg = Remerge[I];
342 }
343
344 VRegs = std::move(Remerge);
345 return LCMTy;
346}
347
348void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
349 ArrayRef<Register> RemergeRegs) {
350 LLT DstTy = MRI.getType(DstReg);
351
352 // Create the merge to the widened source, and extract the relevant bits into
353 // the result.
354
355 if (DstTy == LCMTy) {
356 MIRBuilder.buildMergeLikeInstr(DstReg, RemergeRegs);
357 return;
358 }
359
360 auto Remerge = MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs);
361 if (DstTy.isScalar() && LCMTy.isScalar()) {
362 MIRBuilder.buildTrunc(DstReg, Remerge);
363 return;
364 }
365
366 if (LCMTy.isVector()) {
367 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
368 SmallVector<Register, 8> UnmergeDefs(NumDefs);
369 UnmergeDefs[0] = DstReg;
370 for (unsigned I = 1; I != NumDefs; ++I)
371 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
372
373 MIRBuilder.buildUnmerge(UnmergeDefs,
374 MIRBuilder.buildMergeLikeInstr(LCMTy, RemergeRegs));
375 return;
376 }
377
378 llvm_unreachable("unhandled case");
379}
380
381static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
382#define RTLIBCASE_INT(LibcallPrefix) \
383 do { \
384 switch (Size) { \
385 case 32: \
386 return RTLIB::LibcallPrefix##32; \
387 case 64: \
388 return RTLIB::LibcallPrefix##64; \
389 case 128: \
390 return RTLIB::LibcallPrefix##128; \
391 default: \
392 llvm_unreachable("unexpected size"); \
393 } \
394 } while (0)
395
396#define RTLIBCASE(LibcallPrefix) \
397 do { \
398 switch (Size) { \
399 case 32: \
400 return RTLIB::LibcallPrefix##32; \
401 case 64: \
402 return RTLIB::LibcallPrefix##64; \
403 case 80: \
404 return RTLIB::LibcallPrefix##80; \
405 case 128: \
406 return RTLIB::LibcallPrefix##128; \
407 default: \
408 llvm_unreachable("unexpected size"); \
409 } \
410 } while (0)
411
412 switch (Opcode) {
413 case TargetOpcode::G_MUL:
414 RTLIBCASE_INT(MUL_I);
415 case TargetOpcode::G_SDIV:
416 RTLIBCASE_INT(SDIV_I);
417 case TargetOpcode::G_UDIV:
418 RTLIBCASE_INT(UDIV_I);
419 case TargetOpcode::G_SREM:
420 RTLIBCASE_INT(SREM_I);
421 case TargetOpcode::G_UREM:
422 RTLIBCASE_INT(UREM_I);
423 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
424 RTLIBCASE_INT(CTLZ_I);
425 case TargetOpcode::G_FADD:
426 RTLIBCASE(ADD_F);
427 case TargetOpcode::G_FSUB:
428 RTLIBCASE(SUB_F);
429 case TargetOpcode::G_FMUL:
430 RTLIBCASE(MUL_F);
431 case TargetOpcode::G_FDIV:
432 RTLIBCASE(DIV_F);
433 case TargetOpcode::G_FEXP:
434 RTLIBCASE(EXP_F);
435 case TargetOpcode::G_FEXP2:
436 RTLIBCASE(EXP2_F);
437 case TargetOpcode::G_FEXP10:
438 RTLIBCASE(EXP10_F);
439 case TargetOpcode::G_FREM:
440 RTLIBCASE(REM_F);
441 case TargetOpcode::G_FPOW:
442 RTLIBCASE(POW_F);
443 case TargetOpcode::G_FPOWI:
444 RTLIBCASE(POWI_F);
445 case TargetOpcode::G_FMA:
446 RTLIBCASE(FMA_F);
447 case TargetOpcode::G_FSIN:
448 RTLIBCASE(SIN_F);
449 case TargetOpcode::G_FCOS:
450 RTLIBCASE(COS_F);
451 case TargetOpcode::G_FLOG10:
452 RTLIBCASE(LOG10_F);
453 case TargetOpcode::G_FLOG:
454 RTLIBCASE(LOG_F);
455 case TargetOpcode::G_FLOG2:
456 RTLIBCASE(LOG2_F);
457 case TargetOpcode::G_FLDEXP:
458 RTLIBCASE(LDEXP_F);
459 case TargetOpcode::G_FCEIL:
460 RTLIBCASE(CEIL_F);
461 case TargetOpcode::G_FFLOOR:
462 RTLIBCASE(FLOOR_F);
463 case TargetOpcode::G_FMINNUM:
464 RTLIBCASE(FMIN_F);
465 case TargetOpcode::G_FMAXNUM:
466 RTLIBCASE(FMAX_F);
467 case TargetOpcode::G_FSQRT:
468 RTLIBCASE(SQRT_F);
469 case TargetOpcode::G_FRINT:
470 RTLIBCASE(RINT_F);
471 case TargetOpcode::G_FNEARBYINT:
472 RTLIBCASE(NEARBYINT_F);
473 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
474 RTLIBCASE(ROUNDEVEN_F);
475 }
476 llvm_unreachable("Unknown libcall function");
477}
478
479/// True if an instruction is in tail position in its caller. Intended for
480/// legalizing libcalls as tail calls when possible.
483 const TargetInstrInfo &TII,
485 MachineBasicBlock &MBB = *MI.getParent();
486 const Function &F = MBB.getParent()->getFunction();
487
488 // Conservatively require the attributes of the call to match those of
489 // the return. Ignore NoAlias and NonNull because they don't affect the
490 // call sequence.
491 AttributeList CallerAttrs = F.getAttributes();
492 if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
493 .removeAttribute(Attribute::NoAlias)
494 .removeAttribute(Attribute::NonNull)
495 .hasAttributes())
496 return false;
497
498 // It's not safe to eliminate the sign / zero extension of the return value.
499 if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
500 CallerAttrs.hasRetAttr(Attribute::SExt))
501 return false;
502
503 // Only tail call if the following instruction is a standard return or if we
504 // have a `thisreturn` callee, and a sequence like:
505 //
506 // G_MEMCPY %0, %1, %2
507 // $x0 = COPY %0
508 // RET_ReallyLR implicit $x0
509 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
510 if (Next != MBB.instr_end() && Next->isCopy()) {
511 if (MI.getOpcode() == TargetOpcode::G_BZERO)
512 return false;
513
514 // For MEMCPY/MOMMOVE/MEMSET these will be the first use (the dst), as the
515 // mempy/etc routines return the same parameter. For other it will be the
516 // returned value.
517 Register VReg = MI.getOperand(0).getReg();
518 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
519 return false;
520
521 Register PReg = Next->getOperand(0).getReg();
522 if (!PReg.isPhysical())
523 return false;
524
525 auto Ret = next_nodbg(Next, MBB.instr_end());
526 if (Ret == MBB.instr_end() || !Ret->isReturn())
527 return false;
528
529 if (Ret->getNumImplicitOperands() != 1)
530 return false;
531
532 if (!Ret->getOperand(0).isReg() || PReg != Ret->getOperand(0).getReg())
533 return false;
534
535 // Skip over the COPY that we just validated.
536 Next = Ret;
537 }
538
539 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
540 return false;
541
542 return true;
543}
544
547 const CallLowering::ArgInfo &Result,
549 const CallingConv::ID CC, LostDebugLocObserver &LocObserver,
550 MachineInstr *MI) {
551 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
552
554 Info.CallConv = CC;
556 Info.OrigRet = Result;
557 if (MI)
558 Info.IsTailCall =
559 (Result.Ty->isVoidTy() ||
560 Result.Ty == MIRBuilder.getMF().getFunction().getReturnType()) &&
561 isLibCallInTailPosition(Result, *MI, MIRBuilder.getTII(),
562 *MIRBuilder.getMRI());
563
564 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
565 if (!CLI.lowerCall(MIRBuilder, Info))
567
568 if (MI && Info.LoweredTailCall) {
569 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
570
571 // Check debug locations before removing the return.
572 LocObserver.checkpoint(true);
573
574 // We must have a return following the call (or debug insts) to get past
575 // isLibCallInTailPosition.
576 do {
577 MachineInstr *Next = MI->getNextNode();
578 assert(Next &&
579 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
580 "Expected instr following MI to be return or debug inst?");
581 // We lowered a tail call, so the call is now the return from the block.
582 // Delete the old return.
583 Next->eraseFromParent();
584 } while (MI->getNextNode());
585
586 // We expect to lose the debug location from the return.
587 LocObserver.checkpoint(false);
588 }
590}
591
594 const CallLowering::ArgInfo &Result,
596 LostDebugLocObserver &LocObserver, MachineInstr *MI) {
597 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
598 const char *Name = TLI.getLibcallName(Libcall);
599 if (!Name)
601 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
602 return createLibcall(MIRBuilder, Name, Result, Args, CC, LocObserver, MI);
603}
604
605// Useful for libcalls where all operands have the same type.
608 Type *OpType, LostDebugLocObserver &LocObserver) {
609 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
610
611 // FIXME: What does the original arg index mean here?
613 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
614 Args.push_back({MO.getReg(), OpType, 0});
615 return createLibcall(MIRBuilder, Libcall,
616 {MI.getOperand(0).getReg(), OpType, 0}, Args,
617 LocObserver, &MI);
618}
619
622 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
623 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
624
626 // Add all the args, except for the last which is an imm denoting 'tail'.
627 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
628 Register Reg = MI.getOperand(i).getReg();
629
630 // Need derive an IR type for call lowering.
631 LLT OpLLT = MRI.getType(Reg);
632 Type *OpTy = nullptr;
633 if (OpLLT.isPointer())
634 OpTy = PointerType::get(Ctx, OpLLT.getAddressSpace());
635 else
636 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
637 Args.push_back({Reg, OpTy, 0});
638 }
639
640 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
641 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
642 RTLIB::Libcall RTLibcall;
643 unsigned Opc = MI.getOpcode();
644 switch (Opc) {
645 case TargetOpcode::G_BZERO:
646 RTLibcall = RTLIB::BZERO;
647 break;
648 case TargetOpcode::G_MEMCPY:
649 RTLibcall = RTLIB::MEMCPY;
650 Args[0].Flags[0].setReturned();
651 break;
652 case TargetOpcode::G_MEMMOVE:
653 RTLibcall = RTLIB::MEMMOVE;
654 Args[0].Flags[0].setReturned();
655 break;
656 case TargetOpcode::G_MEMSET:
657 RTLibcall = RTLIB::MEMSET;
658 Args[0].Flags[0].setReturned();
659 break;
660 default:
661 llvm_unreachable("unsupported opcode");
662 }
663 const char *Name = TLI.getLibcallName(RTLibcall);
664
665 // Unsupported libcall on the target.
666 if (!Name) {
667 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
668 << MIRBuilder.getTII().getName(Opc) << "\n");
670 }
671
673 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
675 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
676 Info.IsTailCall =
677 MI.getOperand(MI.getNumOperands() - 1).getImm() &&
678 isLibCallInTailPosition(Info.OrigRet, MI, MIRBuilder.getTII(), MRI);
679
680 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
681 if (!CLI.lowerCall(MIRBuilder, Info))
683
684 if (Info.LoweredTailCall) {
685 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
686
687 // Check debug locations before removing the return.
688 LocObserver.checkpoint(true);
689
690 // We must have a return following the call (or debug insts) to get past
691 // isLibCallInTailPosition.
692 do {
693 MachineInstr *Next = MI.getNextNode();
694 assert(Next &&
695 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
696 "Expected instr following MI to be return or debug inst?");
697 // We lowered a tail call, so the call is now the return from the block.
698 // Delete the old return.
699 Next->eraseFromParent();
700 } while (MI.getNextNode());
701
702 // We expect to lose the debug location from the return.
703 LocObserver.checkpoint(false);
704 }
705
707}
708
710 unsigned Opc = MI.getOpcode();
711 auto &AtomicMI = cast<GMemOperation>(MI);
712 auto &MMO = AtomicMI.getMMO();
713 auto Ordering = MMO.getMergedOrdering();
714 LLT MemType = MMO.getMemoryType();
715 uint64_t MemSize = MemType.getSizeInBytes();
716 if (MemType.isVector())
717 return RTLIB::UNKNOWN_LIBCALL;
718
719#define LCALLS(A, B) \
720 { A##B##_RELAX, A##B##_ACQ, A##B##_REL, A##B##_ACQ_REL }
721#define LCALL5(A) \
722 LCALLS(A, 1), LCALLS(A, 2), LCALLS(A, 4), LCALLS(A, 8), LCALLS(A, 16)
723 switch (Opc) {
724 case TargetOpcode::G_ATOMIC_CMPXCHG:
725 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
726 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_CAS)};
727 return getOutlineAtomicHelper(LC, Ordering, MemSize);
728 }
729 case TargetOpcode::G_ATOMICRMW_XCHG: {
730 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_SWP)};
731 return getOutlineAtomicHelper(LC, Ordering, MemSize);
732 }
733 case TargetOpcode::G_ATOMICRMW_ADD:
734 case TargetOpcode::G_ATOMICRMW_SUB: {
735 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDADD)};
736 return getOutlineAtomicHelper(LC, Ordering, MemSize);
737 }
738 case TargetOpcode::G_ATOMICRMW_AND: {
739 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDCLR)};
740 return getOutlineAtomicHelper(LC, Ordering, MemSize);
741 }
742 case TargetOpcode::G_ATOMICRMW_OR: {
743 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDSET)};
744 return getOutlineAtomicHelper(LC, Ordering, MemSize);
745 }
746 case TargetOpcode::G_ATOMICRMW_XOR: {
747 const RTLIB::Libcall LC[5][4] = {LCALL5(RTLIB::OUTLINE_ATOMIC_LDEOR)};
748 return getOutlineAtomicHelper(LC, Ordering, MemSize);
749 }
750 default:
751 return RTLIB::UNKNOWN_LIBCALL;
752 }
753#undef LCALLS
754#undef LCALL5
755}
756
759 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
760
761 Type *RetTy;
762 SmallVector<Register> RetRegs;
764 unsigned Opc = MI.getOpcode();
765 switch (Opc) {
766 case TargetOpcode::G_ATOMIC_CMPXCHG:
767 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
769 LLT SuccessLLT;
770 auto [Ret, RetLLT, Mem, MemLLT, Cmp, CmpLLT, New, NewLLT] =
771 MI.getFirst4RegLLTs();
772 RetRegs.push_back(Ret);
773 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
774 if (Opc == TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS) {
775 std::tie(Ret, RetLLT, Success, SuccessLLT, Mem, MemLLT, Cmp, CmpLLT, New,
776 NewLLT) = MI.getFirst5RegLLTs();
777 RetRegs.push_back(Success);
779 Ctx, {RetTy, IntegerType::get(Ctx, SuccessLLT.getSizeInBits())});
780 }
781 Args.push_back({Cmp, IntegerType::get(Ctx, CmpLLT.getSizeInBits()), 0});
782 Args.push_back({New, IntegerType::get(Ctx, NewLLT.getSizeInBits()), 0});
783 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
784 break;
785 }
786 case TargetOpcode::G_ATOMICRMW_XCHG:
787 case TargetOpcode::G_ATOMICRMW_ADD:
788 case TargetOpcode::G_ATOMICRMW_SUB:
789 case TargetOpcode::G_ATOMICRMW_AND:
790 case TargetOpcode::G_ATOMICRMW_OR:
791 case TargetOpcode::G_ATOMICRMW_XOR: {
792 auto [Ret, RetLLT, Mem, MemLLT, Val, ValLLT] = MI.getFirst3RegLLTs();
793 RetRegs.push_back(Ret);
794 RetTy = IntegerType::get(Ctx, RetLLT.getSizeInBits());
795 if (Opc == TargetOpcode::G_ATOMICRMW_AND)
796 Val =
797 MIRBuilder.buildXor(ValLLT, MIRBuilder.buildConstant(ValLLT, -1), Val)
798 .getReg(0);
799 else if (Opc == TargetOpcode::G_ATOMICRMW_SUB)
800 Val =
801 MIRBuilder.buildSub(ValLLT, MIRBuilder.buildConstant(ValLLT, 0), Val)
802 .getReg(0);
803 Args.push_back({Val, IntegerType::get(Ctx, ValLLT.getSizeInBits()), 0});
804 Args.push_back({Mem, PointerType::get(Ctx, MemLLT.getAddressSpace()), 0});
805 break;
806 }
807 default:
808 llvm_unreachable("unsupported opcode");
809 }
810
811 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
812 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
814 const char *Name = TLI.getLibcallName(RTLibcall);
815
816 // Unsupported libcall on the target.
817 if (!Name) {
818 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
819 << MIRBuilder.getTII().getName(Opc) << "\n");
821 }
822
824 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
826 Info.OrigRet = CallLowering::ArgInfo(RetRegs, RetTy, 0);
827
828 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
829 if (!CLI.lowerCall(MIRBuilder, Info))
831
833}
834
835static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
836 Type *FromType) {
837 auto ToMVT = MVT::getVT(ToType);
838 auto FromMVT = MVT::getVT(FromType);
839
840 switch (Opcode) {
841 case TargetOpcode::G_FPEXT:
842 return RTLIB::getFPEXT(FromMVT, ToMVT);
843 case TargetOpcode::G_FPTRUNC:
844 return RTLIB::getFPROUND(FromMVT, ToMVT);
845 case TargetOpcode::G_FPTOSI:
846 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
847 case TargetOpcode::G_FPTOUI:
848 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
849 case TargetOpcode::G_SITOFP:
850 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
851 case TargetOpcode::G_UITOFP:
852 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
853 }
854 llvm_unreachable("Unsupported libcall function");
855}
856
859 Type *FromType, LostDebugLocObserver &LocObserver) {
860 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
861 return createLibcall(
862 MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType, 0},
863 {{MI.getOperand(1).getReg(), FromType, 0}}, LocObserver, &MI);
864}
865
866static RTLIB::Libcall
868 RTLIB::Libcall RTLibcall;
869 switch (MI.getOpcode()) {
870 case TargetOpcode::G_GET_FPENV:
871 RTLibcall = RTLIB::FEGETENV;
872 break;
873 case TargetOpcode::G_SET_FPENV:
874 case TargetOpcode::G_RESET_FPENV:
875 RTLibcall = RTLIB::FESETENV;
876 break;
877 case TargetOpcode::G_GET_FPMODE:
878 RTLibcall = RTLIB::FEGETMODE;
879 break;
880 case TargetOpcode::G_SET_FPMODE:
881 case TargetOpcode::G_RESET_FPMODE:
882 RTLibcall = RTLIB::FESETMODE;
883 break;
884 default:
885 llvm_unreachable("Unexpected opcode");
886 }
887 return RTLibcall;
888}
889
890// Some library functions that read FP state (fegetmode, fegetenv) write the
891// state into a region in memory. IR intrinsics that do the same operations
892// (get_fpmode, get_fpenv) return the state as integer value. To implement these
893// intrinsics via the library functions, we need to use temporary variable,
894// for example:
895//
896// %0:_(s32) = G_GET_FPMODE
897//
898// is transformed to:
899//
900// %1:_(p0) = G_FRAME_INDEX %stack.0
901// BL &fegetmode
902// %0:_(s32) = G_LOAD % 1
903//
905LegalizerHelper::createGetStateLibcall(MachineIRBuilder &MIRBuilder,
907 LostDebugLocObserver &LocObserver) {
909 auto &MF = MIRBuilder.getMF();
910 auto &MRI = *MIRBuilder.getMRI();
911 auto &Ctx = MF.getFunction().getContext();
912
913 // Create temporary, where library function will put the read state.
914 Register Dst = MI.getOperand(0).getReg();
915 LLT StateTy = MRI.getType(Dst);
916 TypeSize StateSize = StateTy.getSizeInBytes();
918 MachinePointerInfo TempPtrInfo;
919 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
920
921 // Create a call to library function, with the temporary as an argument.
922 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
923 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
925 auto Res =
926 createLibcall(MIRBuilder, RTLibcall,
928 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
929 LocObserver, nullptr);
931 return Res;
932
933 // Create a load from the temporary.
934 MachineMemOperand *MMO = MF.getMachineMemOperand(
935 TempPtrInfo, MachineMemOperand::MOLoad, StateTy, TempAlign);
936 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, Dst, Temp, *MMO);
937
939}
940
941// Similar to `createGetStateLibcall` the function calls a library function
942// using transient space in stack. In this case the library function reads
943// content of memory region.
945LegalizerHelper::createSetStateLibcall(MachineIRBuilder &MIRBuilder,
947 LostDebugLocObserver &LocObserver) {
949 auto &MF = MIRBuilder.getMF();
950 auto &MRI = *MIRBuilder.getMRI();
951 auto &Ctx = MF.getFunction().getContext();
952
953 // Create temporary, where library function will get the new state.
954 Register Src = MI.getOperand(0).getReg();
955 LLT StateTy = MRI.getType(Src);
956 TypeSize StateSize = StateTy.getSizeInBytes();
958 MachinePointerInfo TempPtrInfo;
959 auto Temp = createStackTemporary(StateSize, TempAlign, TempPtrInfo);
960
961 // Put the new state into the temporary.
962 MachineMemOperand *MMO = MF.getMachineMemOperand(
963 TempPtrInfo, MachineMemOperand::MOStore, StateTy, TempAlign);
964 MIRBuilder.buildStore(Src, Temp, *MMO);
965
966 // Create a call to library function, with the temporary as an argument.
967 unsigned TempAddrSpace = DL.getAllocaAddrSpace();
968 Type *StatePtrTy = PointerType::get(Ctx, TempAddrSpace);
970 return createLibcall(MIRBuilder, RTLibcall,
972 CallLowering::ArgInfo({Temp.getReg(0), StatePtrTy, 0}),
973 LocObserver, nullptr);
974}
975
976// The function is used to legalize operations that set default environment
977// state. In C library a call like `fesetmode(FE_DFL_MODE)` is used for that.
978// On most targets supported in glibc FE_DFL_MODE is defined as
979// `((const femode_t *) -1)`. Such assumption is used here. If for some target
980// it is not true, the target must provide custom lowering.
982LegalizerHelper::createResetStateLibcall(MachineIRBuilder &MIRBuilder,
984 LostDebugLocObserver &LocObserver) {
986 auto &MF = MIRBuilder.getMF();
987 auto &Ctx = MF.getFunction().getContext();
988
989 // Create an argument for the library function.
990 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
991 Type *StatePtrTy = PointerType::get(Ctx, AddrSpace);
992 unsigned PtrSize = DL.getPointerSizeInBits(AddrSpace);
993 LLT MemTy = LLT::pointer(AddrSpace, PtrSize);
994 auto DefValue = MIRBuilder.buildConstant(LLT::scalar(PtrSize), -1LL);
995 DstOp Dest(MRI.createGenericVirtualRegister(MemTy));
996 MIRBuilder.buildIntToPtr(Dest, DefValue);
997
999 return createLibcall(MIRBuilder, RTLibcall,
1001 CallLowering::ArgInfo({Dest.getReg(), StatePtrTy, 0}),
1002 LocObserver, &MI);
1003}
1004
1007 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
1008
1009 switch (MI.getOpcode()) {
1010 default:
1011 return UnableToLegalize;
1012 case TargetOpcode::G_MUL:
1013 case TargetOpcode::G_SDIV:
1014 case TargetOpcode::G_UDIV:
1015 case TargetOpcode::G_SREM:
1016 case TargetOpcode::G_UREM:
1017 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
1018 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1019 unsigned Size = LLTy.getSizeInBits();
1020 Type *HLTy = IntegerType::get(Ctx, Size);
1021 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1022 if (Status != Legalized)
1023 return Status;
1024 break;
1025 }
1026 case TargetOpcode::G_FADD:
1027 case TargetOpcode::G_FSUB:
1028 case TargetOpcode::G_FMUL:
1029 case TargetOpcode::G_FDIV:
1030 case TargetOpcode::G_FMA:
1031 case TargetOpcode::G_FPOW:
1032 case TargetOpcode::G_FREM:
1033 case TargetOpcode::G_FCOS:
1034 case TargetOpcode::G_FSIN:
1035 case TargetOpcode::G_FLOG10:
1036 case TargetOpcode::G_FLOG:
1037 case TargetOpcode::G_FLOG2:
1038 case TargetOpcode::G_FLDEXP:
1039 case TargetOpcode::G_FEXP:
1040 case TargetOpcode::G_FEXP2:
1041 case TargetOpcode::G_FEXP10:
1042 case TargetOpcode::G_FCEIL:
1043 case TargetOpcode::G_FFLOOR:
1044 case TargetOpcode::G_FMINNUM:
1045 case TargetOpcode::G_FMAXNUM:
1046 case TargetOpcode::G_FSQRT:
1047 case TargetOpcode::G_FRINT:
1048 case TargetOpcode::G_FNEARBYINT:
1049 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
1050 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1051 unsigned Size = LLTy.getSizeInBits();
1052 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1053 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1054 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1055 return UnableToLegalize;
1056 }
1057 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy, LocObserver);
1058 if (Status != Legalized)
1059 return Status;
1060 break;
1061 }
1062 case TargetOpcode::G_FPOWI: {
1063 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
1064 unsigned Size = LLTy.getSizeInBits();
1065 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
1066 Type *ITy = IntegerType::get(
1067 Ctx, MRI.getType(MI.getOperand(2).getReg()).getSizeInBits());
1068 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
1069 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
1070 return UnableToLegalize;
1071 }
1072 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
1073 std::initializer_list<CallLowering::ArgInfo> Args = {
1074 {MI.getOperand(1).getReg(), HLTy, 0},
1075 {MI.getOperand(2).getReg(), ITy, 1}};
1077 createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), HLTy, 0},
1078 Args, LocObserver, &MI);
1079 if (Status != Legalized)
1080 return Status;
1081 break;
1082 }
1083 case TargetOpcode::G_FPEXT:
1084 case TargetOpcode::G_FPTRUNC: {
1085 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
1086 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
1087 if (!FromTy || !ToTy)
1088 return UnableToLegalize;
1090 conversionLibcall(MI, MIRBuilder, ToTy, FromTy, LocObserver);
1091 if (Status != Legalized)
1092 return Status;
1093 break;
1094 }
1095 case TargetOpcode::G_FPTOSI:
1096 case TargetOpcode::G_FPTOUI: {
1097 // FIXME: Support other types
1098 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1099 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1100 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
1101 return UnableToLegalize;
1103 MI, MIRBuilder,
1104 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1105 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1106 LocObserver);
1107 if (Status != Legalized)
1108 return Status;
1109 break;
1110 }
1111 case TargetOpcode::G_SITOFP:
1112 case TargetOpcode::G_UITOFP: {
1113 // FIXME: Support other types
1114 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1115 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1116 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
1117 return UnableToLegalize;
1119 MI, MIRBuilder,
1120 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
1121 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
1122 LocObserver);
1123 if (Status != Legalized)
1124 return Status;
1125 break;
1126 }
1127 case TargetOpcode::G_ATOMICRMW_XCHG:
1128 case TargetOpcode::G_ATOMICRMW_ADD:
1129 case TargetOpcode::G_ATOMICRMW_SUB:
1130 case TargetOpcode::G_ATOMICRMW_AND:
1131 case TargetOpcode::G_ATOMICRMW_OR:
1132 case TargetOpcode::G_ATOMICRMW_XOR:
1133 case TargetOpcode::G_ATOMIC_CMPXCHG:
1134 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1136 if (Status != Legalized)
1137 return Status;
1138 break;
1139 }
1140 case TargetOpcode::G_BZERO:
1141 case TargetOpcode::G_MEMCPY:
1142 case TargetOpcode::G_MEMMOVE:
1143 case TargetOpcode::G_MEMSET: {
1144 LegalizeResult Result =
1145 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
1146 if (Result != Legalized)
1147 return Result;
1148 MI.eraseFromParent();
1149 return Result;
1150 }
1151 case TargetOpcode::G_GET_FPENV:
1152 case TargetOpcode::G_GET_FPMODE: {
1153 LegalizeResult Result = createGetStateLibcall(MIRBuilder, MI, LocObserver);
1154 if (Result != Legalized)
1155 return Result;
1156 break;
1157 }
1158 case TargetOpcode::G_SET_FPENV:
1159 case TargetOpcode::G_SET_FPMODE: {
1160 LegalizeResult Result = createSetStateLibcall(MIRBuilder, MI, LocObserver);
1161 if (Result != Legalized)
1162 return Result;
1163 break;
1164 }
1165 case TargetOpcode::G_RESET_FPENV:
1166 case TargetOpcode::G_RESET_FPMODE: {
1167 LegalizeResult Result =
1168 createResetStateLibcall(MIRBuilder, MI, LocObserver);
1169 if (Result != Legalized)
1170 return Result;
1171 break;
1172 }
1173 }
1174
1175 MI.eraseFromParent();
1176 return Legalized;
1177}
1178
1180 unsigned TypeIdx,
1181 LLT NarrowTy) {
1182 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
1183 uint64_t NarrowSize = NarrowTy.getSizeInBits();
1184
1185 switch (MI.getOpcode()) {
1186 default:
1187 return UnableToLegalize;
1188 case TargetOpcode::G_IMPLICIT_DEF: {
1189 Register DstReg = MI.getOperand(0).getReg();
1190 LLT DstTy = MRI.getType(DstReg);
1191
1192 // If SizeOp0 is not an exact multiple of NarrowSize, emit
1193 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
1194 // FIXME: Although this would also be legal for the general case, it causes
1195 // a lot of regressions in the emitted code (superfluous COPYs, artifact
1196 // combines not being hit). This seems to be a problem related to the
1197 // artifact combiner.
1198 if (SizeOp0 % NarrowSize != 0) {
1199 LLT ImplicitTy = NarrowTy;
1200 if (DstTy.isVector())
1201 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
1202
1203 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
1204 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
1205
1206 MI.eraseFromParent();
1207 return Legalized;
1208 }
1209
1210 int NumParts = SizeOp0 / NarrowSize;
1211
1213 for (int i = 0; i < NumParts; ++i)
1214 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
1215
1216 if (DstTy.isVector())
1217 MIRBuilder.buildBuildVector(DstReg, DstRegs);
1218 else
1219 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1220 MI.eraseFromParent();
1221 return Legalized;
1222 }
1223 case TargetOpcode::G_CONSTANT: {
1224 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1225 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
1226 unsigned TotalSize = Ty.getSizeInBits();
1227 unsigned NarrowSize = NarrowTy.getSizeInBits();
1228 int NumParts = TotalSize / NarrowSize;
1229
1230 SmallVector<Register, 4> PartRegs;
1231 for (int I = 0; I != NumParts; ++I) {
1232 unsigned Offset = I * NarrowSize;
1233 auto K = MIRBuilder.buildConstant(NarrowTy,
1234 Val.lshr(Offset).trunc(NarrowSize));
1235 PartRegs.push_back(K.getReg(0));
1236 }
1237
1238 LLT LeftoverTy;
1239 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
1240 SmallVector<Register, 1> LeftoverRegs;
1241 if (LeftoverBits != 0) {
1242 LeftoverTy = LLT::scalar(LeftoverBits);
1243 auto K = MIRBuilder.buildConstant(
1244 LeftoverTy,
1245 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
1246 LeftoverRegs.push_back(K.getReg(0));
1247 }
1248
1249 insertParts(MI.getOperand(0).getReg(),
1250 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
1251
1252 MI.eraseFromParent();
1253 return Legalized;
1254 }
1255 case TargetOpcode::G_SEXT:
1256 case TargetOpcode::G_ZEXT:
1257 case TargetOpcode::G_ANYEXT:
1258 return narrowScalarExt(MI, TypeIdx, NarrowTy);
1259 case TargetOpcode::G_TRUNC: {
1260 if (TypeIdx != 1)
1261 return UnableToLegalize;
1262
1263 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
1264 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
1265 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
1266 return UnableToLegalize;
1267 }
1268
1269 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
1270 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
1271 MI.eraseFromParent();
1272 return Legalized;
1273 }
1274
1275 case TargetOpcode::G_FREEZE: {
1276 if (TypeIdx != 0)
1277 return UnableToLegalize;
1278
1279 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1280 // Should widen scalar first
1281 if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1282 return UnableToLegalize;
1283
1284 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1286 for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1287 Parts.push_back(
1288 MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1289 }
1290
1291 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0).getReg(), Parts);
1292 MI.eraseFromParent();
1293 return Legalized;
1294 }
1295 case TargetOpcode::G_ADD:
1296 case TargetOpcode::G_SUB:
1297 case TargetOpcode::G_SADDO:
1298 case TargetOpcode::G_SSUBO:
1299 case TargetOpcode::G_SADDE:
1300 case TargetOpcode::G_SSUBE:
1301 case TargetOpcode::G_UADDO:
1302 case TargetOpcode::G_USUBO:
1303 case TargetOpcode::G_UADDE:
1304 case TargetOpcode::G_USUBE:
1305 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1306 case TargetOpcode::G_MUL:
1307 case TargetOpcode::G_UMULH:
1308 return narrowScalarMul(MI, NarrowTy);
1309 case TargetOpcode::G_EXTRACT:
1310 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1311 case TargetOpcode::G_INSERT:
1312 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1313 case TargetOpcode::G_LOAD: {
1314 auto &LoadMI = cast<GLoad>(MI);
1315 Register DstReg = LoadMI.getDstReg();
1316 LLT DstTy = MRI.getType(DstReg);
1317 if (DstTy.isVector())
1318 return UnableToLegalize;
1319
1320 if (8 * LoadMI.getMemSize().getValue() != DstTy.getSizeInBits()) {
1321 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1322 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1323 MIRBuilder.buildAnyExt(DstReg, TmpReg);
1324 LoadMI.eraseFromParent();
1325 return Legalized;
1326 }
1327
1328 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1329 }
1330 case TargetOpcode::G_ZEXTLOAD:
1331 case TargetOpcode::G_SEXTLOAD: {
1332 auto &LoadMI = cast<GExtLoad>(MI);
1333 Register DstReg = LoadMI.getDstReg();
1334 Register PtrReg = LoadMI.getPointerReg();
1335
1336 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1337 auto &MMO = LoadMI.getMMO();
1338 unsigned MemSize = MMO.getSizeInBits().getValue();
1339
1340 if (MemSize == NarrowSize) {
1341 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1342 } else if (MemSize < NarrowSize) {
1343 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1344 } else if (MemSize > NarrowSize) {
1345 // FIXME: Need to split the load.
1346 return UnableToLegalize;
1347 }
1348
1349 if (isa<GZExtLoad>(LoadMI))
1350 MIRBuilder.buildZExt(DstReg, TmpReg);
1351 else
1352 MIRBuilder.buildSExt(DstReg, TmpReg);
1353
1354 LoadMI.eraseFromParent();
1355 return Legalized;
1356 }
1357 case TargetOpcode::G_STORE: {
1358 auto &StoreMI = cast<GStore>(MI);
1359
1360 Register SrcReg = StoreMI.getValueReg();
1361 LLT SrcTy = MRI.getType(SrcReg);
1362 if (SrcTy.isVector())
1363 return UnableToLegalize;
1364
1365 int NumParts = SizeOp0 / NarrowSize;
1366 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1367 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1368 if (SrcTy.isVector() && LeftoverBits != 0)
1369 return UnableToLegalize;
1370
1371 if (8 * StoreMI.getMemSize().getValue() != SrcTy.getSizeInBits()) {
1372 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1373 MIRBuilder.buildTrunc(TmpReg, SrcReg);
1374 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1375 StoreMI.eraseFromParent();
1376 return Legalized;
1377 }
1378
1379 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1380 }
1381 case TargetOpcode::G_SELECT:
1382 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1383 case TargetOpcode::G_AND:
1384 case TargetOpcode::G_OR:
1385 case TargetOpcode::G_XOR: {
1386 // Legalize bitwise operation:
1387 // A = BinOp<Ty> B, C
1388 // into:
1389 // B1, ..., BN = G_UNMERGE_VALUES B
1390 // C1, ..., CN = G_UNMERGE_VALUES C
1391 // A1 = BinOp<Ty/N> B1, C2
1392 // ...
1393 // AN = BinOp<Ty/N> BN, CN
1394 // A = G_MERGE_VALUES A1, ..., AN
1395 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1396 }
1397 case TargetOpcode::G_SHL:
1398 case TargetOpcode::G_LSHR:
1399 case TargetOpcode::G_ASHR:
1400 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1401 case TargetOpcode::G_CTLZ:
1402 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1403 case TargetOpcode::G_CTTZ:
1404 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1405 case TargetOpcode::G_CTPOP:
1406 if (TypeIdx == 1)
1407 switch (MI.getOpcode()) {
1408 case TargetOpcode::G_CTLZ:
1409 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1410 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1411 case TargetOpcode::G_CTTZ:
1412 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1413 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1414 case TargetOpcode::G_CTPOP:
1415 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1416 default:
1417 return UnableToLegalize;
1418 }
1419
1421 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1423 return Legalized;
1424 case TargetOpcode::G_INTTOPTR:
1425 if (TypeIdx != 1)
1426 return UnableToLegalize;
1427
1429 narrowScalarSrc(MI, NarrowTy, 1);
1431 return Legalized;
1432 case TargetOpcode::G_PTRTOINT:
1433 if (TypeIdx != 0)
1434 return UnableToLegalize;
1435
1437 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1439 return Legalized;
1440 case TargetOpcode::G_PHI: {
1441 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1442 // NarrowSize.
1443 if (SizeOp0 % NarrowSize != 0)
1444 return UnableToLegalize;
1445
1446 unsigned NumParts = SizeOp0 / NarrowSize;
1447 SmallVector<Register, 2> DstRegs(NumParts);
1448 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1450 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1451 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1453 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1454 SrcRegs[i / 2], MIRBuilder, MRI);
1455 }
1456 MachineBasicBlock &MBB = *MI.getParent();
1458 for (unsigned i = 0; i < NumParts; ++i) {
1459 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1461 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1462 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1463 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1464 }
1466 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1468 MI.eraseFromParent();
1469 return Legalized;
1470 }
1471 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1472 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1473 if (TypeIdx != 2)
1474 return UnableToLegalize;
1475
1476 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1478 narrowScalarSrc(MI, NarrowTy, OpIdx);
1480 return Legalized;
1481 }
1482 case TargetOpcode::G_ICMP: {
1483 Register LHS = MI.getOperand(2).getReg();
1484 LLT SrcTy = MRI.getType(LHS);
1485 uint64_t SrcSize = SrcTy.getSizeInBits();
1486 CmpInst::Predicate Pred =
1487 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1488
1489 // TODO: Handle the non-equality case for weird sizes.
1490 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1491 return UnableToLegalize;
1492
1493 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1494 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1495 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1496 LHSLeftoverRegs, MIRBuilder, MRI))
1497 return UnableToLegalize;
1498
1499 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1500 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1501 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1502 RHSPartRegs, RHSLeftoverRegs, MIRBuilder, MRI))
1503 return UnableToLegalize;
1504
1505 // We now have the LHS and RHS of the compare split into narrow-type
1506 // registers, plus potentially some leftover type.
1507 Register Dst = MI.getOperand(0).getReg();
1508 LLT ResTy = MRI.getType(Dst);
1509 if (ICmpInst::isEquality(Pred)) {
1510 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1511 // them together. For each equal part, the result should be all 0s. For
1512 // each non-equal part, we'll get at least one 1.
1513 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1515 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1516 auto LHS = std::get<0>(LHSAndRHS);
1517 auto RHS = std::get<1>(LHSAndRHS);
1518 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1519 Xors.push_back(Xor);
1520 }
1521
1522 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1523 // to the desired narrow type so that we can OR them together later.
1524 SmallVector<Register, 4> WidenedXors;
1525 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1526 auto LHS = std::get<0>(LHSAndRHS);
1527 auto RHS = std::get<1>(LHSAndRHS);
1528 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1529 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1530 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1531 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1532 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1533 }
1534
1535 // Now, for each part we broke up, we know if they are equal/not equal
1536 // based off the G_XOR. We can OR these all together and compare against
1537 // 0 to get the result.
1538 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1539 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1540 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1541 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1542 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1543 } else {
1544 // TODO: Handle non-power-of-two types.
1545 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1546 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1547 Register LHSL = LHSPartRegs[0];
1548 Register LHSH = LHSPartRegs[1];
1549 Register RHSL = RHSPartRegs[0];
1550 Register RHSH = RHSPartRegs[1];
1551 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1552 MachineInstrBuilder CmpHEQ =
1555 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1556 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1557 }
1558 MI.eraseFromParent();
1559 return Legalized;
1560 }
1561 case TargetOpcode::G_FCMP:
1562 if (TypeIdx != 0)
1563 return UnableToLegalize;
1564
1566 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1568 return Legalized;
1569
1570 case TargetOpcode::G_SEXT_INREG: {
1571 if (TypeIdx != 0)
1572 return UnableToLegalize;
1573
1574 int64_t SizeInBits = MI.getOperand(2).getImm();
1575
1576 // So long as the new type has more bits than the bits we're extending we
1577 // don't need to break it apart.
1578 if (NarrowTy.getScalarSizeInBits() > SizeInBits) {
1580 // We don't lose any non-extension bits by truncating the src and
1581 // sign-extending the dst.
1582 MachineOperand &MO1 = MI.getOperand(1);
1583 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1584 MO1.setReg(TruncMIB.getReg(0));
1585
1586 MachineOperand &MO2 = MI.getOperand(0);
1587 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1589 MIRBuilder.buildSExt(MO2, DstExt);
1590 MO2.setReg(DstExt);
1592 return Legalized;
1593 }
1594
1595 // Break it apart. Components below the extension point are unmodified. The
1596 // component containing the extension point becomes a narrower SEXT_INREG.
1597 // Components above it are ashr'd from the component containing the
1598 // extension point.
1599 if (SizeOp0 % NarrowSize != 0)
1600 return UnableToLegalize;
1601 int NumParts = SizeOp0 / NarrowSize;
1602
1603 // List the registers where the destination will be scattered.
1605 // List the registers where the source will be split.
1607
1608 // Create all the temporary registers.
1609 for (int i = 0; i < NumParts; ++i) {
1610 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1611
1612 SrcRegs.push_back(SrcReg);
1613 }
1614
1615 // Explode the big arguments into smaller chunks.
1616 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1617
1618 Register AshrCstReg =
1619 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1620 .getReg(0);
1621 Register FullExtensionReg;
1622 Register PartialExtensionReg;
1623
1624 // Do the operation on each small part.
1625 for (int i = 0; i < NumParts; ++i) {
1626 if ((i + 1) * NarrowTy.getScalarSizeInBits() <= SizeInBits) {
1627 DstRegs.push_back(SrcRegs[i]);
1628 PartialExtensionReg = DstRegs.back();
1629 } else if (i * NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1630 assert(PartialExtensionReg &&
1631 "Expected to visit partial extension before full");
1632 if (FullExtensionReg) {
1633 DstRegs.push_back(FullExtensionReg);
1634 continue;
1635 }
1636 DstRegs.push_back(
1637 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1638 .getReg(0));
1639 FullExtensionReg = DstRegs.back();
1640 } else {
1641 DstRegs.push_back(
1643 .buildInstr(
1644 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1645 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1646 .getReg(0));
1647 PartialExtensionReg = DstRegs.back();
1648 }
1649 }
1650
1651 // Gather the destination registers into the final destination.
1652 Register DstReg = MI.getOperand(0).getReg();
1653 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
1654 MI.eraseFromParent();
1655 return Legalized;
1656 }
1657 case TargetOpcode::G_BSWAP:
1658 case TargetOpcode::G_BITREVERSE: {
1659 if (SizeOp0 % NarrowSize != 0)
1660 return UnableToLegalize;
1661
1663 SmallVector<Register, 2> SrcRegs, DstRegs;
1664 unsigned NumParts = SizeOp0 / NarrowSize;
1665 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
1666 MIRBuilder, MRI);
1667
1668 for (unsigned i = 0; i < NumParts; ++i) {
1669 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1670 {SrcRegs[NumParts - 1 - i]});
1671 DstRegs.push_back(DstPart.getReg(0));
1672 }
1673
1674 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), DstRegs);
1675
1677 MI.eraseFromParent();
1678 return Legalized;
1679 }
1680 case TargetOpcode::G_PTR_ADD:
1681 case TargetOpcode::G_PTRMASK: {
1682 if (TypeIdx != 1)
1683 return UnableToLegalize;
1685 narrowScalarSrc(MI, NarrowTy, 2);
1687 return Legalized;
1688 }
1689 case TargetOpcode::G_FPTOUI:
1690 case TargetOpcode::G_FPTOSI:
1691 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1692 case TargetOpcode::G_FPEXT:
1693 if (TypeIdx != 0)
1694 return UnableToLegalize;
1696 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1698 return Legalized;
1699 case TargetOpcode::G_FLDEXP:
1700 case TargetOpcode::G_STRICT_FLDEXP:
1701 return narrowScalarFLDEXP(MI, TypeIdx, NarrowTy);
1702 case TargetOpcode::G_VSCALE: {
1703 Register Dst = MI.getOperand(0).getReg();
1704 LLT Ty = MRI.getType(Dst);
1705
1706 // Assume VSCALE(1) fits into a legal integer
1707 const APInt One(NarrowTy.getSizeInBits(), 1);
1708 auto VScaleBase = MIRBuilder.buildVScale(NarrowTy, One);
1709 auto ZExt = MIRBuilder.buildZExt(Ty, VScaleBase);
1710 auto C = MIRBuilder.buildConstant(Ty, *MI.getOperand(1).getCImm());
1711 MIRBuilder.buildMul(Dst, ZExt, C);
1712
1713 MI.eraseFromParent();
1714 return Legalized;
1715 }
1716 }
1717}
1718
1720 LLT Ty = MRI.getType(Val);
1721 if (Ty.isScalar())
1722 return Val;
1723
1725 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1726 if (Ty.isPointer()) {
1727 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1728 return Register();
1729 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1730 }
1731
1732 Register NewVal = Val;
1733
1734 assert(Ty.isVector());
1735 if (Ty.isPointerVector())
1736 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1737 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1738}
1739
1741 unsigned OpIdx, unsigned ExtOpcode) {
1742 MachineOperand &MO = MI.getOperand(OpIdx);
1743 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1744 MO.setReg(ExtB.getReg(0));
1745}
1746
1748 unsigned OpIdx) {
1749 MachineOperand &MO = MI.getOperand(OpIdx);
1750 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1751 MO.setReg(ExtB.getReg(0));
1752}
1753
1755 unsigned OpIdx, unsigned TruncOpcode) {
1756 MachineOperand &MO = MI.getOperand(OpIdx);
1757 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1759 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1760 MO.setReg(DstExt);
1761}
1762
1764 unsigned OpIdx, unsigned ExtOpcode) {
1765 MachineOperand &MO = MI.getOperand(OpIdx);
1766 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1768 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1769 MO.setReg(DstTrunc);
1770}
1771
1773 unsigned OpIdx) {
1774 MachineOperand &MO = MI.getOperand(OpIdx);
1776 Register Dst = MO.getReg();
1777 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1778 MO.setReg(DstExt);
1780}
1781
1783 unsigned OpIdx) {
1784 MachineOperand &MO = MI.getOperand(OpIdx);
1787}
1788
1789void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1790 MachineOperand &Op = MI.getOperand(OpIdx);
1791 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1792}
1793
1794void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1795 MachineOperand &MO = MI.getOperand(OpIdx);
1796 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1798 MIRBuilder.buildBitcast(MO, CastDst);
1799 MO.setReg(CastDst);
1800}
1801
1803LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1804 LLT WideTy) {
1805 if (TypeIdx != 1)
1806 return UnableToLegalize;
1807
1808 auto [DstReg, DstTy, Src1Reg, Src1Ty] = MI.getFirst2RegLLTs();
1809 if (DstTy.isVector())
1810 return UnableToLegalize;
1811
1812 LLT SrcTy = MRI.getType(Src1Reg);
1813 const int DstSize = DstTy.getSizeInBits();
1814 const int SrcSize = SrcTy.getSizeInBits();
1815 const int WideSize = WideTy.getSizeInBits();
1816 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1817
1818 unsigned NumOps = MI.getNumOperands();
1819 unsigned NumSrc = MI.getNumOperands() - 1;
1820 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1821
1822 if (WideSize >= DstSize) {
1823 // Directly pack the bits in the target type.
1824 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1Reg).getReg(0);
1825
1826 for (unsigned I = 2; I != NumOps; ++I) {
1827 const unsigned Offset = (I - 1) * PartSize;
1828
1829 Register SrcReg = MI.getOperand(I).getReg();
1830 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1831
1832 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1833
1834 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1835 MRI.createGenericVirtualRegister(WideTy);
1836
1837 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1838 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1839 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1840 ResultReg = NextResult;
1841 }
1842
1843 if (WideSize > DstSize)
1844 MIRBuilder.buildTrunc(DstReg, ResultReg);
1845 else if (DstTy.isPointer())
1846 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1847
1848 MI.eraseFromParent();
1849 return Legalized;
1850 }
1851
1852 // Unmerge the original values to the GCD type, and recombine to the next
1853 // multiple greater than the original type.
1854 //
1855 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1856 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1857 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1858 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1859 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1860 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1861 // %12:_(s12) = G_MERGE_VALUES %10, %11
1862 //
1863 // Padding with undef if necessary:
1864 //
1865 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1866 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1867 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1868 // %7:_(s2) = G_IMPLICIT_DEF
1869 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1870 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1871 // %10:_(s12) = G_MERGE_VALUES %8, %9
1872
1873 const int GCD = std::gcd(SrcSize, WideSize);
1874 LLT GCDTy = LLT::scalar(GCD);
1875
1877 SmallVector<Register, 8> NewMergeRegs;
1878 SmallVector<Register, 8> Unmerges;
1879 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1880
1881 // Decompose the original operands if they don't evenly divide.
1882 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1883 Register SrcReg = MO.getReg();
1884 if (GCD == SrcSize) {
1885 Unmerges.push_back(SrcReg);
1886 } else {
1887 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1888 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1889 Unmerges.push_back(Unmerge.getReg(J));
1890 }
1891 }
1892
1893 // Pad with undef to the next size that is a multiple of the requested size.
1894 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1895 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1896 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1897 Unmerges.push_back(UndefReg);
1898 }
1899
1900 const int PartsPerGCD = WideSize / GCD;
1901
1902 // Build merges of each piece.
1903 ArrayRef<Register> Slicer(Unmerges);
1904 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1905 auto Merge =
1906 MIRBuilder.buildMergeLikeInstr(WideTy, Slicer.take_front(PartsPerGCD));
1907 NewMergeRegs.push_back(Merge.getReg(0));
1908 }
1909
1910 // A truncate may be necessary if the requested type doesn't evenly divide the
1911 // original result type.
1912 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1913 MIRBuilder.buildMergeLikeInstr(DstReg, NewMergeRegs);
1914 } else {
1915 auto FinalMerge = MIRBuilder.buildMergeLikeInstr(WideDstTy, NewMergeRegs);
1916 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1917 }
1918
1919 MI.eraseFromParent();
1920 return Legalized;
1921}
1922
1924LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1925 LLT WideTy) {
1926 if (TypeIdx != 0)
1927 return UnableToLegalize;
1928
1929 int NumDst = MI.getNumOperands() - 1;
1930 Register SrcReg = MI.getOperand(NumDst).getReg();
1931 LLT SrcTy = MRI.getType(SrcReg);
1932 if (SrcTy.isVector())
1933 return UnableToLegalize;
1934
1935 Register Dst0Reg = MI.getOperand(0).getReg();
1936 LLT DstTy = MRI.getType(Dst0Reg);
1937 if (!DstTy.isScalar())
1938 return UnableToLegalize;
1939
1940 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1941 if (SrcTy.isPointer()) {
1943 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1944 LLVM_DEBUG(
1945 dbgs() << "Not casting non-integral address space integer\n");
1946 return UnableToLegalize;
1947 }
1948
1949 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1950 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1951 }
1952
1953 // Widen SrcTy to WideTy. This does not affect the result, but since the
1954 // user requested this size, it is probably better handled than SrcTy and
1955 // should reduce the total number of legalization artifacts.
1956 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1957 SrcTy = WideTy;
1958 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1959 }
1960
1961 // Theres no unmerge type to target. Directly extract the bits from the
1962 // source type
1963 unsigned DstSize = DstTy.getSizeInBits();
1964
1965 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1966 for (int I = 1; I != NumDst; ++I) {
1967 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1968 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1969 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1970 }
1971
1972 MI.eraseFromParent();
1973 return Legalized;
1974 }
1975
1976 // Extend the source to a wider type.
1977 LLT LCMTy = getLCMType(SrcTy, WideTy);
1978
1979 Register WideSrc = SrcReg;
1980 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1981 // TODO: If this is an integral address space, cast to integer and anyext.
1982 if (SrcTy.isPointer()) {
1983 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1984 return UnableToLegalize;
1985 }
1986
1987 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1988 }
1989
1990 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1991
1992 // Create a sequence of unmerges and merges to the original results. Since we
1993 // may have widened the source, we will need to pad the results with dead defs
1994 // to cover the source register.
1995 // e.g. widen s48 to s64:
1996 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1997 //
1998 // =>
1999 // %4:_(s192) = G_ANYEXT %0:_(s96)
2000 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
2001 // ; unpack to GCD type, with extra dead defs
2002 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
2003 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
2004 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
2005 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
2006 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
2007 const LLT GCDTy = getGCDType(WideTy, DstTy);
2008 const int NumUnmerge = Unmerge->getNumOperands() - 1;
2009 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
2010
2011 // Directly unmerge to the destination without going through a GCD type
2012 // if possible
2013 if (PartsPerRemerge == 1) {
2014 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
2015
2016 for (int I = 0; I != NumUnmerge; ++I) {
2017 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
2018
2019 for (int J = 0; J != PartsPerUnmerge; ++J) {
2020 int Idx = I * PartsPerUnmerge + J;
2021 if (Idx < NumDst)
2022 MIB.addDef(MI.getOperand(Idx).getReg());
2023 else {
2024 // Create dead def for excess components.
2025 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
2026 }
2027 }
2028
2029 MIB.addUse(Unmerge.getReg(I));
2030 }
2031 } else {
2033 for (int J = 0; J != NumUnmerge; ++J)
2034 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
2035
2036 SmallVector<Register, 8> RemergeParts;
2037 for (int I = 0; I != NumDst; ++I) {
2038 for (int J = 0; J < PartsPerRemerge; ++J) {
2039 const int Idx = I * PartsPerRemerge + J;
2040 RemergeParts.emplace_back(Parts[Idx]);
2041 }
2042
2043 MIRBuilder.buildMergeLikeInstr(MI.getOperand(I).getReg(), RemergeParts);
2044 RemergeParts.clear();
2045 }
2046 }
2047
2048 MI.eraseFromParent();
2049 return Legalized;
2050}
2051
2053LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2054 LLT WideTy) {
2055 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
2056 unsigned Offset = MI.getOperand(2).getImm();
2057
2058 if (TypeIdx == 0) {
2059 if (SrcTy.isVector() || DstTy.isVector())
2060 return UnableToLegalize;
2061
2062 SrcOp Src(SrcReg);
2063 if (SrcTy.isPointer()) {
2064 // Extracts from pointers can be handled only if they are really just
2065 // simple integers.
2067 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
2068 return UnableToLegalize;
2069
2070 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
2071 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
2072 SrcTy = SrcAsIntTy;
2073 }
2074
2075 if (DstTy.isPointer())
2076 return UnableToLegalize;
2077
2078 if (Offset == 0) {
2079 // Avoid a shift in the degenerate case.
2080 MIRBuilder.buildTrunc(DstReg,
2081 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
2082 MI.eraseFromParent();
2083 return Legalized;
2084 }
2085
2086 // Do a shift in the source type.
2087 LLT ShiftTy = SrcTy;
2088 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
2089 Src = MIRBuilder.buildAnyExt(WideTy, Src);
2090 ShiftTy = WideTy;
2091 }
2092
2093 auto LShr = MIRBuilder.buildLShr(
2094 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
2095 MIRBuilder.buildTrunc(DstReg, LShr);
2096 MI.eraseFromParent();
2097 return Legalized;
2098 }
2099
2100 if (SrcTy.isScalar()) {
2102 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2104 return Legalized;
2105 }
2106
2107 if (!SrcTy.isVector())
2108 return UnableToLegalize;
2109
2110 if (DstTy != SrcTy.getElementType())
2111 return UnableToLegalize;
2112
2113 if (Offset % SrcTy.getScalarSizeInBits() != 0)
2114 return UnableToLegalize;
2115
2117 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2118
2119 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
2120 Offset);
2121 widenScalarDst(MI, WideTy.getScalarType(), 0);
2123 return Legalized;
2124}
2125
2127LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2128 LLT WideTy) {
2129 if (TypeIdx != 0 || WideTy.isVector())
2130 return UnableToLegalize;
2132 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2133 widenScalarDst(MI, WideTy);
2135 return Legalized;
2136}
2137
2139LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
2140 LLT WideTy) {
2141 unsigned Opcode;
2142 unsigned ExtOpcode;
2143 std::optional<Register> CarryIn;
2144 switch (MI.getOpcode()) {
2145 default:
2146 llvm_unreachable("Unexpected opcode!");
2147 case TargetOpcode::G_SADDO:
2148 Opcode = TargetOpcode::G_ADD;
2149 ExtOpcode = TargetOpcode::G_SEXT;
2150 break;
2151 case TargetOpcode::G_SSUBO:
2152 Opcode = TargetOpcode::G_SUB;
2153 ExtOpcode = TargetOpcode::G_SEXT;
2154 break;
2155 case TargetOpcode::G_UADDO:
2156 Opcode = TargetOpcode::G_ADD;
2157 ExtOpcode = TargetOpcode::G_ZEXT;
2158 break;
2159 case TargetOpcode::G_USUBO:
2160 Opcode = TargetOpcode::G_SUB;
2161 ExtOpcode = TargetOpcode::G_ZEXT;
2162 break;
2163 case TargetOpcode::G_SADDE:
2164 Opcode = TargetOpcode::G_UADDE;
2165 ExtOpcode = TargetOpcode::G_SEXT;
2166 CarryIn = MI.getOperand(4).getReg();
2167 break;
2168 case TargetOpcode::G_SSUBE:
2169 Opcode = TargetOpcode::G_USUBE;
2170 ExtOpcode = TargetOpcode::G_SEXT;
2171 CarryIn = MI.getOperand(4).getReg();
2172 break;
2173 case TargetOpcode::G_UADDE:
2174 Opcode = TargetOpcode::G_UADDE;
2175 ExtOpcode = TargetOpcode::G_ZEXT;
2176 CarryIn = MI.getOperand(4).getReg();
2177 break;
2178 case TargetOpcode::G_USUBE:
2179 Opcode = TargetOpcode::G_USUBE;
2180 ExtOpcode = TargetOpcode::G_ZEXT;
2181 CarryIn = MI.getOperand(4).getReg();
2182 break;
2183 }
2184
2185 if (TypeIdx == 1) {
2186 unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
2187
2189 if (CarryIn)
2190 widenScalarSrc(MI, WideTy, 4, BoolExtOp);
2191 widenScalarDst(MI, WideTy, 1);
2192
2194 return Legalized;
2195 }
2196
2197 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
2198 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
2199 // Do the arithmetic in the larger type.
2200 Register NewOp;
2201 if (CarryIn) {
2202 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
2203 NewOp = MIRBuilder
2204 .buildInstr(Opcode, {WideTy, CarryOutTy},
2205 {LHSExt, RHSExt, *CarryIn})
2206 .getReg(0);
2207 } else {
2208 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
2209 }
2210 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
2211 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
2212 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
2213 // There is no overflow if the ExtOp is the same as NewOp.
2214 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
2215 // Now trunc the NewOp to the original result.
2216 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
2217 MI.eraseFromParent();
2218 return Legalized;
2219}
2220
2222LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
2223 LLT WideTy) {
2224 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
2225 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
2226 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
2227 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
2228 MI.getOpcode() == TargetOpcode::G_USHLSAT;
2229 // We can convert this to:
2230 // 1. Any extend iN to iM
2231 // 2. SHL by M-N
2232 // 3. [US][ADD|SUB|SHL]SAT
2233 // 4. L/ASHR by M-N
2234 //
2235 // It may be more efficient to lower this to a min and a max operation in
2236 // the higher precision arithmetic if the promoted operation isn't legal,
2237 // but this decision is up to the target's lowering request.
2238 Register DstReg = MI.getOperand(0).getReg();
2239
2240 unsigned NewBits = WideTy.getScalarSizeInBits();
2241 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
2242
2243 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
2244 // must not left shift the RHS to preserve the shift amount.
2245 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
2246 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
2247 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
2248 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
2249 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
2250 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
2251
2252 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
2253 {ShiftL, ShiftR}, MI.getFlags());
2254
2255 // Use a shift that will preserve the number of sign bits when the trunc is
2256 // folded away.
2257 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
2258 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
2259
2260 MIRBuilder.buildTrunc(DstReg, Result);
2261 MI.eraseFromParent();
2262 return Legalized;
2263}
2264
2266LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
2267 LLT WideTy) {
2268 if (TypeIdx == 1) {
2270 widenScalarDst(MI, WideTy, 1);
2272 return Legalized;
2273 }
2274
2275 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2276 auto [Result, OriginalOverflow, LHS, RHS] = MI.getFirst4Regs();
2277 LLT SrcTy = MRI.getType(LHS);
2278 LLT OverflowTy = MRI.getType(OriginalOverflow);
2279 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2280
2281 // To determine if the result overflowed in the larger type, we extend the
2282 // input to the larger type, do the multiply (checking if it overflows),
2283 // then also check the high bits of the result to see if overflow happened
2284 // there.
2285 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2286 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2287 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2288
2289 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2290 // so we don't need to check the overflow result of larger type Mulo.
2291 bool WideMulCanOverflow = WideTy.getScalarSizeInBits() < 2 * SrcBitWidth;
2292
2293 unsigned MulOpc =
2294 WideMulCanOverflow ? MI.getOpcode() : (unsigned)TargetOpcode::G_MUL;
2295
2297 if (WideMulCanOverflow)
2298 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy, OverflowTy},
2299 {LeftOperand, RightOperand});
2300 else
2301 Mulo = MIRBuilder.buildInstr(MulOpc, {WideTy}, {LeftOperand, RightOperand});
2302
2303 auto Mul = Mulo->getOperand(0);
2304 MIRBuilder.buildTrunc(Result, Mul);
2305
2306 MachineInstrBuilder ExtResult;
2307 // Overflow occurred if it occurred in the larger type, or if the high part
2308 // of the result does not zero/sign-extend the low part. Check this second
2309 // possibility first.
2310 if (IsSigned) {
2311 // For signed, overflow occurred when the high part does not sign-extend
2312 // the low part.
2313 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2314 } else {
2315 // Unsigned overflow occurred when the high part does not zero-extend the
2316 // low part.
2317 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2318 }
2319
2320 if (WideMulCanOverflow) {
2321 auto Overflow =
2322 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2323 // Finally check if the multiplication in the larger type itself overflowed.
2324 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2325 } else {
2326 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2327 }
2328 MI.eraseFromParent();
2329 return Legalized;
2330}
2331
2334 switch (MI.getOpcode()) {
2335 default:
2336 return UnableToLegalize;
2337 case TargetOpcode::G_ATOMICRMW_XCHG:
2338 case TargetOpcode::G_ATOMICRMW_ADD:
2339 case TargetOpcode::G_ATOMICRMW_SUB:
2340 case TargetOpcode::G_ATOMICRMW_AND:
2341 case TargetOpcode::G_ATOMICRMW_OR:
2342 case TargetOpcode::G_ATOMICRMW_XOR:
2343 case TargetOpcode::G_ATOMICRMW_MIN:
2344 case TargetOpcode::G_ATOMICRMW_MAX:
2345 case TargetOpcode::G_ATOMICRMW_UMIN:
2346 case TargetOpcode::G_ATOMICRMW_UMAX:
2347 assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2349 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2350 widenScalarDst(MI, WideTy, 0);
2352 return Legalized;
2353 case TargetOpcode::G_ATOMIC_CMPXCHG:
2354 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2356 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2357 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2358 widenScalarDst(MI, WideTy, 0);
2360 return Legalized;
2361 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2362 if (TypeIdx == 0) {
2364 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2365 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2366 widenScalarDst(MI, WideTy, 0);
2368 return Legalized;
2369 }
2370 assert(TypeIdx == 1 &&
2371 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2373 widenScalarDst(MI, WideTy, 1);
2375 return Legalized;
2376 case TargetOpcode::G_EXTRACT:
2377 return widenScalarExtract(MI, TypeIdx, WideTy);
2378 case TargetOpcode::G_INSERT:
2379 return widenScalarInsert(MI, TypeIdx, WideTy);
2380 case TargetOpcode::G_MERGE_VALUES:
2381 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2382 case TargetOpcode::G_UNMERGE_VALUES:
2383 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2384 case TargetOpcode::G_SADDO:
2385 case TargetOpcode::G_SSUBO:
2386 case TargetOpcode::G_UADDO:
2387 case TargetOpcode::G_USUBO:
2388 case TargetOpcode::G_SADDE:
2389 case TargetOpcode::G_SSUBE:
2390 case TargetOpcode::G_UADDE:
2391 case TargetOpcode::G_USUBE:
2392 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2393 case TargetOpcode::G_UMULO:
2394 case TargetOpcode::G_SMULO:
2395 return widenScalarMulo(MI, TypeIdx, WideTy);
2396 case TargetOpcode::G_SADDSAT:
2397 case TargetOpcode::G_SSUBSAT:
2398 case TargetOpcode::G_SSHLSAT:
2399 case TargetOpcode::G_UADDSAT:
2400 case TargetOpcode::G_USUBSAT:
2401 case TargetOpcode::G_USHLSAT:
2402 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2403 case TargetOpcode::G_CTTZ:
2404 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2405 case TargetOpcode::G_CTLZ:
2406 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2407 case TargetOpcode::G_CTPOP: {
2408 if (TypeIdx == 0) {
2410 widenScalarDst(MI, WideTy, 0);
2412 return Legalized;
2413 }
2414
2415 Register SrcReg = MI.getOperand(1).getReg();
2416
2417 // First extend the input.
2418 unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2419 MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2420 ? TargetOpcode::G_ANYEXT
2421 : TargetOpcode::G_ZEXT;
2422 auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2423 LLT CurTy = MRI.getType(SrcReg);
2424 unsigned NewOpc = MI.getOpcode();
2425 if (NewOpc == TargetOpcode::G_CTTZ) {
2426 // The count is the same in the larger type except if the original
2427 // value was zero. This can be handled by setting the bit just off
2428 // the top of the original type.
2429 auto TopBit =
2431 MIBSrc = MIRBuilder.buildOr(
2432 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2433 // Now we know the operand is non-zero, use the more relaxed opcode.
2434 NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2435 }
2436
2437 // Perform the operation at the larger size.
2438 auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2439 // This is already the correct result for CTPOP and CTTZs
2440 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2441 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2442 // The correct result is NewOp - (Difference in widety and current ty).
2443 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2444 MIBNewOp = MIRBuilder.buildSub(
2445 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2446 }
2447
2448 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2449 MI.eraseFromParent();
2450 return Legalized;
2451 }
2452 case TargetOpcode::G_BSWAP: {
2454 Register DstReg = MI.getOperand(0).getReg();
2455
2456 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2457 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2458 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2459 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2460
2461 MI.getOperand(0).setReg(DstExt);
2462
2464
2465 LLT Ty = MRI.getType(DstReg);
2466 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2467 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2468 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2469
2470 MIRBuilder.buildTrunc(DstReg, ShrReg);
2472 return Legalized;
2473 }
2474 case TargetOpcode::G_BITREVERSE: {
2476
2477 Register DstReg = MI.getOperand(0).getReg();
2478 LLT Ty = MRI.getType(DstReg);
2479 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2480
2481 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2482 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2483 MI.getOperand(0).setReg(DstExt);
2485
2486 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2487 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2488 MIRBuilder.buildTrunc(DstReg, Shift);
2490 return Legalized;
2491 }
2492 case TargetOpcode::G_FREEZE:
2494 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2495 widenScalarDst(MI, WideTy);
2497 return Legalized;
2498
2499 case TargetOpcode::G_ABS:
2501 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2502 widenScalarDst(MI, WideTy);
2504 return Legalized;
2505
2506 case TargetOpcode::G_ADD:
2507 case TargetOpcode::G_AND:
2508 case TargetOpcode::G_MUL:
2509 case TargetOpcode::G_OR:
2510 case TargetOpcode::G_XOR:
2511 case TargetOpcode::G_SUB:
2512 case TargetOpcode::G_SHUFFLE_VECTOR:
2513 // Perform operation at larger width (any extension is fines here, high bits
2514 // don't affect the result) and then truncate the result back to the
2515 // original type.
2517 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2518 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2519 widenScalarDst(MI, WideTy);
2521 return Legalized;
2522
2523 case TargetOpcode::G_SBFX:
2524 case TargetOpcode::G_UBFX:
2526
2527 if (TypeIdx == 0) {
2528 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2529 widenScalarDst(MI, WideTy);
2530 } else {
2531 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2532 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2533 }
2534
2536 return Legalized;
2537
2538 case TargetOpcode::G_SHL:
2540
2541 if (TypeIdx == 0) {
2542 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2543 widenScalarDst(MI, WideTy);
2544 } else {
2545 assert(TypeIdx == 1);
2546 // The "number of bits to shift" operand must preserve its value as an
2547 // unsigned integer:
2548 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2549 }
2550
2552 return Legalized;
2553
2554 case TargetOpcode::G_ROTR:
2555 case TargetOpcode::G_ROTL:
2556 if (TypeIdx != 1)
2557 return UnableToLegalize;
2558
2560 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2562 return Legalized;
2563
2564 case TargetOpcode::G_SDIV:
2565 case TargetOpcode::G_SREM:
2566 case TargetOpcode::G_SMIN:
2567 case TargetOpcode::G_SMAX:
2569 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2570 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2571 widenScalarDst(MI, WideTy);
2573 return Legalized;
2574
2575 case TargetOpcode::G_SDIVREM:
2577 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2578 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2579 widenScalarDst(MI, WideTy);
2580 widenScalarDst(MI, WideTy, 1);
2582 return Legalized;
2583
2584 case TargetOpcode::G_ASHR:
2585 case TargetOpcode::G_LSHR:
2587
2588 if (TypeIdx == 0) {
2589 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2590 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2591
2592 widenScalarSrc(MI, WideTy, 1, CvtOp);
2593 widenScalarDst(MI, WideTy);
2594 } else {
2595 assert(TypeIdx == 1);
2596 // The "number of bits to shift" operand must preserve its value as an
2597 // unsigned integer:
2598 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2599 }
2600
2602 return Legalized;
2603 case TargetOpcode::G_UDIV:
2604 case TargetOpcode::G_UREM:
2605 case TargetOpcode::G_UMIN:
2606 case TargetOpcode::G_UMAX:
2608 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2609 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2610 widenScalarDst(MI, WideTy);
2612 return Legalized;
2613
2614 case TargetOpcode::G_UDIVREM:
2616 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2617 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2618 widenScalarDst(MI, WideTy);
2619 widenScalarDst(MI, WideTy, 1);
2621 return Legalized;
2622
2623 case TargetOpcode::G_SELECT:
2625 if (TypeIdx == 0) {
2626 // Perform operation at larger width (any extension is fine here, high
2627 // bits don't affect the result) and then truncate the result back to the
2628 // original type.
2629 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2630 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2631 widenScalarDst(MI, WideTy);
2632 } else {
2633 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2634 // Explicit extension is required here since high bits affect the result.
2635 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2636 }
2638 return Legalized;
2639
2640 case TargetOpcode::G_FPTOSI:
2641 case TargetOpcode::G_FPTOUI:
2642 case TargetOpcode::G_IS_FPCLASS:
2644
2645 if (TypeIdx == 0)
2646 widenScalarDst(MI, WideTy);
2647 else
2648 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2649
2651 return Legalized;
2652 case TargetOpcode::G_SITOFP:
2654
2655 if (TypeIdx == 0)
2656 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2657 else
2658 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2659
2661 return Legalized;
2662 case TargetOpcode::G_UITOFP:
2664
2665 if (TypeIdx == 0)
2666 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2667 else
2668 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2669
2671 return Legalized;
2672 case TargetOpcode::G_LOAD:
2673 case TargetOpcode::G_SEXTLOAD:
2674 case TargetOpcode::G_ZEXTLOAD:
2676 widenScalarDst(MI, WideTy);
2678 return Legalized;
2679
2680 case TargetOpcode::G_STORE: {
2681 if (TypeIdx != 0)
2682 return UnableToLegalize;
2683
2684 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2685 if (!Ty.isScalar())
2686 return UnableToLegalize;
2687
2689
2690 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2691 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2692 widenScalarSrc(MI, WideTy, 0, ExtType);
2693
2695 return Legalized;
2696 }
2697 case TargetOpcode::G_CONSTANT: {
2698 MachineOperand &SrcMO = MI.getOperand(1);
2700 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2701 MRI.getType(MI.getOperand(0).getReg()));
2702 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2703 ExtOpc == TargetOpcode::G_ANYEXT) &&
2704 "Illegal Extend");
2705 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2706 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2707 ? SrcVal.sext(WideTy.getSizeInBits())
2708 : SrcVal.zext(WideTy.getSizeInBits());
2710 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2711
2712 widenScalarDst(MI, WideTy);
2714 return Legalized;
2715 }
2716 case TargetOpcode::G_FCONSTANT: {
2717 // To avoid changing the bits of the constant due to extension to a larger
2718 // type and then using G_FPTRUNC, we simply convert to a G_CONSTANT.
2719 MachineOperand &SrcMO = MI.getOperand(1);
2720 APInt Val = SrcMO.getFPImm()->getValueAPF().bitcastToAPInt();
2722 auto IntCst = MIRBuilder.buildConstant(MI.getOperand(0).getReg(), Val);
2723 widenScalarDst(*IntCst, WideTy, 0, TargetOpcode::G_TRUNC);
2724 MI.eraseFromParent();
2725 return Legalized;
2726 }
2727 case TargetOpcode::G_IMPLICIT_DEF: {
2729 widenScalarDst(MI, WideTy);
2731 return Legalized;
2732 }
2733 case TargetOpcode::G_BRCOND:
2735 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2737 return Legalized;
2738
2739 case TargetOpcode::G_FCMP:
2741 if (TypeIdx == 0)
2742 widenScalarDst(MI, WideTy);
2743 else {
2744 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2745 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2746 }
2748 return Legalized;
2749
2750 case TargetOpcode::G_ICMP:
2752 if (TypeIdx == 0)
2753 widenScalarDst(MI, WideTy);
2754 else {
2755 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2756 MI.getOperand(1).getPredicate()))
2757 ? TargetOpcode::G_SEXT
2758 : TargetOpcode::G_ZEXT;
2759 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2760 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2761 }
2763 return Legalized;
2764
2765 case TargetOpcode::G_PTR_ADD:
2766 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2768 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2770 return Legalized;
2771
2772 case TargetOpcode::G_PHI: {
2773 assert(TypeIdx == 0 && "Expecting only Idx 0");
2774
2776 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2777 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2779 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2780 }
2781
2782 MachineBasicBlock &MBB = *MI.getParent();
2784 widenScalarDst(MI, WideTy);
2786 return Legalized;
2787 }
2788 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2789 if (TypeIdx == 0) {
2790 Register VecReg = MI.getOperand(1).getReg();
2791 LLT VecTy = MRI.getType(VecReg);
2793
2795 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2796 TargetOpcode::G_ANYEXT);
2797
2798 widenScalarDst(MI, WideTy, 0);
2800 return Legalized;
2801 }
2802
2803 if (TypeIdx != 2)
2804 return UnableToLegalize;
2806 // TODO: Probably should be zext
2807 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2809 return Legalized;
2810 }
2811 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2812 if (TypeIdx == 0) {
2814 const LLT WideEltTy = WideTy.getElementType();
2815
2816 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2817 widenScalarSrc(MI, WideEltTy, 2, TargetOpcode::G_ANYEXT);
2818 widenScalarDst(MI, WideTy, 0);
2820 return Legalized;
2821 }
2822
2823 if (TypeIdx == 1) {
2825
2826 Register VecReg = MI.getOperand(1).getReg();
2827 LLT VecTy = MRI.getType(VecReg);
2828 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2829
2830 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2831 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2832 widenScalarDst(MI, WideVecTy, 0);
2834 return Legalized;
2835 }
2836
2837 if (TypeIdx == 2) {
2839 // TODO: Probably should be zext
2840 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2842 return Legalized;
2843 }
2844
2845 return UnableToLegalize;
2846 }
2847 case TargetOpcode::G_FADD:
2848 case TargetOpcode::G_FMUL:
2849 case TargetOpcode::G_FSUB:
2850 case TargetOpcode::G_FMA:
2851 case TargetOpcode::G_FMAD:
2852 case TargetOpcode::G_FNEG:
2853 case TargetOpcode::G_FABS:
2854 case TargetOpcode::G_FCANONICALIZE:
2855 case TargetOpcode::G_FMINNUM:
2856 case TargetOpcode::G_FMAXNUM:
2857 case TargetOpcode::G_FMINNUM_IEEE:
2858 case TargetOpcode::G_FMAXNUM_IEEE:
2859 case TargetOpcode::G_FMINIMUM:
2860 case TargetOpcode::G_FMAXIMUM:
2861 case TargetOpcode::G_FDIV:
2862 case TargetOpcode::G_FREM:
2863 case TargetOpcode::G_FCEIL:
2864 case TargetOpcode::G_FFLOOR:
2865 case TargetOpcode::G_FCOS:
2866 case TargetOpcode::G_FSIN:
2867 case TargetOpcode::G_FLOG10:
2868 case TargetOpcode::G_FLOG:
2869 case TargetOpcode::G_FLOG2:
2870 case TargetOpcode::G_FRINT:
2871 case TargetOpcode::G_FNEARBYINT:
2872 case TargetOpcode::G_FSQRT:
2873 case TargetOpcode::G_FEXP:
2874 case TargetOpcode::G_FEXP2:
2875 case TargetOpcode::G_FEXP10:
2876 case TargetOpcode::G_FPOW:
2877 case TargetOpcode::G_INTRINSIC_TRUNC:
2878 case TargetOpcode::G_INTRINSIC_ROUND:
2879 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2880 assert(TypeIdx == 0);
2882
2883 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2884 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2885
2886 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2888 return Legalized;
2889 case TargetOpcode::G_FPOWI:
2890 case TargetOpcode::G_FLDEXP:
2891 case TargetOpcode::G_STRICT_FLDEXP: {
2892 if (TypeIdx == 0) {
2893 if (MI.getOpcode() == TargetOpcode::G_STRICT_FLDEXP)
2894 return UnableToLegalize;
2895
2897 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2898 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2900 return Legalized;
2901 }
2902
2903 if (TypeIdx == 1) {
2904 // For some reason SelectionDAG tries to promote to a libcall without
2905 // actually changing the integer type for promotion.
2907 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2909 return Legalized;
2910 }
2911
2912 return UnableToLegalize;
2913 }
2914 case TargetOpcode::G_FFREXP: {
2916
2917 if (TypeIdx == 0) {
2918 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2919 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2920 } else {
2921 widenScalarDst(MI, WideTy, 1);
2922 }
2923
2925 return Legalized;
2926 }
2927 case TargetOpcode::G_INTTOPTR:
2928 if (TypeIdx != 1)
2929 return UnableToLegalize;
2930
2932 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2934 return Legalized;
2935 case TargetOpcode::G_PTRTOINT:
2936 if (TypeIdx != 0)
2937 return UnableToLegalize;
2938
2940 widenScalarDst(MI, WideTy, 0);
2942 return Legalized;
2943 case TargetOpcode::G_BUILD_VECTOR: {
2945
2946 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2947 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2948 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2949
2950 // Avoid changing the result vector type if the source element type was
2951 // requested.
2952 if (TypeIdx == 1) {
2953 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2954 } else {
2955 widenScalarDst(MI, WideTy, 0);
2956 }
2957
2959 return Legalized;
2960 }
2961 case TargetOpcode::G_SEXT_INREG:
2962 if (TypeIdx != 0)
2963 return UnableToLegalize;
2964
2966 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2967 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2969 return Legalized;
2970 case TargetOpcode::G_PTRMASK: {
2971 if (TypeIdx != 1)
2972 return UnableToLegalize;
2974 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2976 return Legalized;
2977 }
2978 case TargetOpcode::G_VECREDUCE_FADD:
2979 case TargetOpcode::G_VECREDUCE_FMUL:
2980 case TargetOpcode::G_VECREDUCE_FMIN:
2981 case TargetOpcode::G_VECREDUCE_FMAX:
2982 case TargetOpcode::G_VECREDUCE_FMINIMUM:
2983 case TargetOpcode::G_VECREDUCE_FMAXIMUM: {
2984 if (TypeIdx != 0)
2985 return UnableToLegalize;
2987 Register VecReg = MI.getOperand(1).getReg();
2988 LLT VecTy = MRI.getType(VecReg);
2989 LLT WideVecTy = VecTy.isVector()
2990 ? LLT::vector(VecTy.getElementCount(), WideTy)
2991 : WideTy;
2992 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_FPEXT);
2993 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2995 return Legalized;
2996 }
2997 case TargetOpcode::G_VSCALE: {
2998 MachineOperand &SrcMO = MI.getOperand(1);
3000 const APInt &SrcVal = SrcMO.getCImm()->getValue();
3001 // The CImm is always a signed value
3002 const APInt Val = SrcVal.sext(WideTy.getSizeInBits());
3004 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
3005 widenScalarDst(MI, WideTy);
3007 return Legalized;
3008 }
3009 }
3010}
3011
3013 MachineIRBuilder &B, Register Src, LLT Ty) {
3014 auto Unmerge = B.buildUnmerge(Ty, Src);
3015 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
3016 Pieces.push_back(Unmerge.getReg(I));
3017}
3018
3019static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal,
3020 MachineIRBuilder &MIRBuilder) {
3021 MachineRegisterInfo &MRI = *MIRBuilder.getMRI();
3022 MachineFunction &MF = MIRBuilder.getMF();
3023 const DataLayout &DL = MIRBuilder.getDataLayout();
3024 unsigned AddrSpace = DL.getDefaultGlobalsAddressSpace();
3025 LLT AddrPtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3026 LLT DstLLT = MRI.getType(DstReg);
3027
3028 Align Alignment(DL.getABITypeAlign(ConstVal->getType()));
3029
3030 auto Addr = MIRBuilder.buildConstantPool(
3031 AddrPtrTy,
3032 MF.getConstantPool()->getConstantPoolIndex(ConstVal, Alignment));
3033
3034 MachineMemOperand *MMO =
3036 MachineMemOperand::MOLoad, DstLLT, Alignment);
3037
3038 MIRBuilder.buildLoadInstr(TargetOpcode::G_LOAD, DstReg, Addr, *MMO);
3039}
3040
3043 const MachineOperand &ConstOperand = MI.getOperand(1);
3044 const Constant *ConstantVal = ConstOperand.getCImm();
3045
3046 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3047 MI.eraseFromParent();
3048
3049 return Legalized;
3050}
3051
3054 const MachineOperand &ConstOperand = MI.getOperand(1);
3055 const Constant *ConstantVal = ConstOperand.getFPImm();
3056
3057 emitLoadFromConstantPool(MI.getOperand(0).getReg(), ConstantVal, MIRBuilder);
3058 MI.eraseFromParent();
3059
3060 return Legalized;
3061}
3062
3065 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
3066 if (SrcTy.isVector()) {
3067 LLT SrcEltTy = SrcTy.getElementType();
3069
3070 if (DstTy.isVector()) {
3071 int NumDstElt = DstTy.getNumElements();
3072 int NumSrcElt = SrcTy.getNumElements();
3073
3074 LLT DstEltTy = DstTy.getElementType();
3075 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
3076 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
3077
3078 // If there's an element size mismatch, insert intermediate casts to match
3079 // the result element type.
3080 if (NumSrcElt < NumDstElt) { // Source element type is larger.
3081 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
3082 //
3083 // =>
3084 //
3085 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
3086 // %3:_(<2 x s8>) = G_BITCAST %2
3087 // %4:_(<2 x s8>) = G_BITCAST %3
3088 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
3089 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
3090 SrcPartTy = SrcEltTy;
3091 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
3092 //
3093 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
3094 //
3095 // =>
3096 //
3097 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
3098 // %3:_(s16) = G_BITCAST %2
3099 // %4:_(s16) = G_BITCAST %3
3100 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
3101 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
3102 DstCastTy = DstEltTy;
3103 }
3104
3105 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
3106 for (Register &SrcReg : SrcRegs)
3107 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
3108 } else
3109 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
3110
3111 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3112 MI.eraseFromParent();
3113 return Legalized;
3114 }
3115
3116 if (DstTy.isVector()) {
3118 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
3119 MIRBuilder.buildMergeLikeInstr(Dst, SrcRegs);
3120 MI.eraseFromParent();
3121 return Legalized;
3122 }
3123
3124 return UnableToLegalize;
3125}
3126
3127/// Figure out the bit offset into a register when coercing a vector index for
3128/// the wide element type. This is only for the case when promoting vector to
3129/// one with larger elements.
3130//
3131///
3132/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3133/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3135 Register Idx,
3136 unsigned NewEltSize,
3137 unsigned OldEltSize) {
3138 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3139 LLT IdxTy = B.getMRI()->getType(Idx);
3140
3141 // Now figure out the amount we need to shift to get the target bits.
3142 auto OffsetMask = B.buildConstant(
3143 IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
3144 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
3145 return B.buildShl(IdxTy, OffsetIdx,
3146 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
3147}
3148
3149/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
3150/// is casting to a vector with a smaller element size, perform multiple element
3151/// extracts and merge the results. If this is coercing to a vector with larger
3152/// elements, index the bitcasted vector and extract the target element with bit
3153/// operations. This is intended to force the indexing in the native register
3154/// size for architectures that can dynamically index the register file.
3157 LLT CastTy) {
3158 if (TypeIdx != 1)
3159 return UnableToLegalize;
3160
3161 auto [Dst, DstTy, SrcVec, SrcVecTy, Idx, IdxTy] = MI.getFirst3RegLLTs();
3162
3163 LLT SrcEltTy = SrcVecTy.getElementType();
3164 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3165 unsigned OldNumElts = SrcVecTy.getNumElements();
3166
3167 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3168 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3169
3170 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3171 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
3172 if (NewNumElts > OldNumElts) {
3173 // Decreasing the vector element size
3174 //
3175 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
3176 // =>
3177 // v4i32:castx = bitcast x:v2i64
3178 //
3179 // i64 = bitcast
3180 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
3181 // (i32 (extract_vector_elt castx, (2 * y + 1)))
3182 //
3183 if (NewNumElts % OldNumElts != 0)
3184 return UnableToLegalize;
3185
3186 // Type of the intermediate result vector.
3187 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
3188 LLT MidTy =
3189 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
3190
3191 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
3192
3193 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
3194 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
3195
3196 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
3197 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
3198 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
3199 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
3200 NewOps[I] = Elt.getReg(0);
3201 }
3202
3203 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
3204 MIRBuilder.buildBitcast(Dst, NewVec);
3205 MI.eraseFromParent();
3206 return Legalized;
3207 }
3208
3209 if (NewNumElts < OldNumElts) {
3210 if (NewEltSize % OldEltSize != 0)
3211 return UnableToLegalize;
3212
3213 // This only depends on powers of 2 because we use bit tricks to figure out
3214 // the bit offset we need to shift to get the target element. A general
3215 // expansion could emit division/multiply.
3216 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3217 return UnableToLegalize;
3218
3219 // Increasing the vector element size.
3220 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
3221 //
3222 // =>
3223 //
3224 // %cast = G_BITCAST %vec
3225 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
3226 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
3227 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
3228 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
3229 // %elt_bits = G_LSHR %wide_elt, %offset_bits
3230 // %elt = G_TRUNC %elt_bits
3231
3232 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3233 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3234
3235 // Divide to get the index in the wider element type.
3236 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3237
3238 Register WideElt = CastVec;
3239 if (CastTy.isVector()) {
3240 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3241 ScaledIdx).getReg(0);
3242 }
3243
3244 // Compute the bit offset into the register of the target element.
3246 MIRBuilder, Idx, NewEltSize, OldEltSize);
3247
3248 // Shift the wide element to get the target element.
3249 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
3250 MIRBuilder.buildTrunc(Dst, ExtractedBits);
3251 MI.eraseFromParent();
3252 return Legalized;
3253 }
3254
3255 return UnableToLegalize;
3256}
3257
3258/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
3259/// TargetReg, while preserving other bits in \p TargetReg.
3260///
3261/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
3263 Register TargetReg, Register InsertReg,
3264 Register OffsetBits) {
3265 LLT TargetTy = B.getMRI()->getType(TargetReg);
3266 LLT InsertTy = B.getMRI()->getType(InsertReg);
3267 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
3268 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
3269
3270 // Produce a bitmask of the value to insert
3271 auto EltMask = B.buildConstant(
3272 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
3273 InsertTy.getSizeInBits()));
3274 // Shift it into position
3275 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
3276 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
3277
3278 // Clear out the bits in the wide element
3279 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
3280
3281 // The value to insert has all zeros already, so stick it into the masked
3282 // wide element.
3283 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
3284}
3285
3286/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
3287/// is increasing the element size, perform the indexing in the target element
3288/// type, and use bit operations to insert at the element position. This is
3289/// intended for architectures that can dynamically index the register file and
3290/// want to force indexing in the native register size.
3293 LLT CastTy) {
3294 if (TypeIdx != 0)
3295 return UnableToLegalize;
3296
3297 auto [Dst, DstTy, SrcVec, SrcVecTy, Val, ValTy, Idx, IdxTy] =
3298 MI.getFirst4RegLLTs();
3299 LLT VecTy = DstTy;
3300
3301 LLT VecEltTy = VecTy.getElementType();
3302 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
3303 const unsigned NewEltSize = NewEltTy.getSizeInBits();
3304 const unsigned OldEltSize = VecEltTy.getSizeInBits();
3305
3306 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
3307 unsigned OldNumElts = VecTy.getNumElements();
3308
3309 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
3310 if (NewNumElts < OldNumElts) {
3311 if (NewEltSize % OldEltSize != 0)
3312 return UnableToLegalize;
3313
3314 // This only depends on powers of 2 because we use bit tricks to figure out
3315 // the bit offset we need to shift to get the target element. A general
3316 // expansion could emit division/multiply.
3317 if (!isPowerOf2_32(NewEltSize / OldEltSize))
3318 return UnableToLegalize;
3319
3320 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
3321 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
3322
3323 // Divide to get the index in the wider element type.
3324 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
3325
3326 Register ExtractedElt = CastVec;
3327 if (CastTy.isVector()) {
3328 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
3329 ScaledIdx).getReg(0);
3330 }
3331
3332 // Compute the bit offset into the register of the target element.
3334 MIRBuilder, Idx, NewEltSize, OldEltSize);
3335
3336 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
3337 Val, OffsetBits);
3338 if (CastTy.isVector()) {
3340 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
3341 }
3342
3343 MIRBuilder.buildBitcast(Dst, InsertedElt);
3344 MI.eraseFromParent();
3345 return Legalized;
3346 }
3347
3348 return UnableToLegalize;
3349}
3350
3352 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
3353 Register DstReg = LoadMI.getDstReg();
3354 Register PtrReg = LoadMI.getPointerReg();
3355 LLT DstTy = MRI.getType(DstReg);
3356 MachineMemOperand &MMO = LoadMI.getMMO();
3357 LLT MemTy = MMO.getMemoryType();
3359
3360 unsigned MemSizeInBits = MemTy.getSizeInBits();
3361 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
3362
3363 if (MemSizeInBits != MemStoreSizeInBits) {
3364 if (MemTy.isVector())
3365 return UnableToLegalize;
3366
3367 // Promote to a byte-sized load if not loading an integral number of
3368 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
3369 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
3370 MachineMemOperand *NewMMO =
3371 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
3372
3373 Register LoadReg = DstReg;
3374 LLT LoadTy = DstTy;
3375
3376 // If this wasn't already an extending load, we need to widen the result
3377 // register to avoid creating a load with a narrower result than the source.
3378 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3379 LoadTy = WideMemTy;
3380 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3381 }
3382
3383 if (isa<GSExtLoad>(LoadMI)) {
3384 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3385 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3386 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3387 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3388 // The extra bits are guaranteed to be zero, since we stored them that
3389 // way. A zext load from Wide thus automatically gives zext from MemVT.
3390 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3391 } else {
3392 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3393 }
3394
3395 if (DstTy != LoadTy)
3396 MIRBuilder.buildTrunc(DstReg, LoadReg);
3397
3398 LoadMI.eraseFromParent();
3399 return Legalized;
3400 }
3401
3402 // Big endian lowering not implemented.
3404 return UnableToLegalize;
3405
3406 // This load needs splitting into power of 2 sized loads.
3407 //
3408 // Our strategy here is to generate anyextending loads for the smaller
3409 // types up to next power-2 result type, and then combine the two larger
3410 // result values together, before truncating back down to the non-pow-2
3411 // type.
3412 // E.g. v1 = i24 load =>
3413 // v2 = i32 zextload (2 byte)
3414 // v3 = i32 load (1 byte)
3415 // v4 = i32 shl v3, 16
3416 // v5 = i32 or v4, v2
3417 // v1 = i24 trunc v5
3418 // By doing this we generate the correct truncate which should get
3419 // combined away as an artifact with a matching extend.
3420
3421 uint64_t LargeSplitSize, SmallSplitSize;
3422
3423 if (!isPowerOf2_32(MemSizeInBits)) {
3424 // This load needs splitting into power of 2 sized loads.
3425 LargeSplitSize = llvm::bit_floor(MemSizeInBits);
3426 SmallSplitSize = MemSizeInBits - LargeSplitSize;
3427 } else {
3428 // This is already a power of 2, but we still need to split this in half.
3429 //
3430 // Assume we're being asked to decompose an unaligned load.
3431 // TODO: If this requires multiple splits, handle them all at once.
3432 auto &Ctx = MF.getFunction().getContext();
3433 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3434 return UnableToLegalize;
3435
3436 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3437 }
3438
3439 if (MemTy.isVector()) {
3440 // TODO: Handle vector extloads
3441 if (MemTy != DstTy)
3442 return UnableToLegalize;
3443
3444 // TODO: We can do better than scalarizing the vector and at least split it
3445 // in half.
3446 return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3447 }
3448
3449 MachineMemOperand *LargeMMO =
3450 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3451 MachineMemOperand *SmallMMO =
3452 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3453
3454 LLT PtrTy = MRI.getType(PtrReg);
3455 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3456 LLT AnyExtTy = LLT::scalar(AnyExtSize);
3457 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3458 PtrReg, *LargeMMO);
3459
3460 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3461 LargeSplitSize / 8);
3462 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3463 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3464 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3465 SmallPtr, *SmallMMO);
3466
3467 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3468 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3469
3470 if (AnyExtTy == DstTy)
3471 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3472 else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3473 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3474 MIRBuilder.buildTrunc(DstReg, {Or});
3475 } else {
3476 assert(DstTy.isPointer() && "expected pointer");
3477 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3478
3479 // FIXME: We currently consider this to be illegal for non-integral address
3480 // spaces, but we need still need a way to reinterpret the bits.
3481 MIRBuilder.buildIntToPtr(DstReg, Or);
3482 }
3483
3484 LoadMI.eraseFromParent();
3485 return Legalized;
3486}
3487
3489 // Lower a non-power of 2 store into multiple pow-2 stores.
3490 // E.g. split an i24 store into an i16 store + i8 store.
3491 // We do this by first extending the stored value to the next largest power
3492 // of 2 type, and then using truncating stores to store the components.
3493 // By doing this, likewise with G_LOAD, generate an extend that can be
3494 // artifact-combined away instead of leaving behind extracts.
3495 Register SrcReg = StoreMI.getValueReg();
3496 Register PtrReg = StoreMI.getPointerReg();
3497 LLT SrcTy = MRI.getType(SrcReg);
3499 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3500 LLT MemTy = MMO.getMemoryType();
3501
3502 unsigned StoreWidth = MemTy.getSizeInBits();
3503 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3504
3505 if (StoreWidth != StoreSizeInBits) {
3506 if (SrcTy.isVector())
3507 return UnableToLegalize;
3508
3509 // Promote to a byte-sized store with upper bits zero if not
3510 // storing an integral number of bytes. For example, promote
3511 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3512 LLT WideTy = LLT::scalar(StoreSizeInBits);
3513
3514 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3515 // Avoid creating a store with a narrower source than result.
3516 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3517 SrcTy = WideTy;
3518 }
3519
3520 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3521
3522 MachineMemOperand *NewMMO =
3523 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3524 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3525 StoreMI.eraseFromParent();
3526 return Legalized;
3527 }
3528
3529 if (MemTy.isVector()) {
3530 // TODO: Handle vector trunc stores
3531 if (MemTy != SrcTy)
3532 return UnableToLegalize;
3533
3534 // TODO: We can do better than scalarizing the vector and at least split it
3535 // in half.
3536 return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3537 }
3538
3539 unsigned MemSizeInBits = MemTy.getSizeInBits();
3540 uint64_t LargeSplitSize, SmallSplitSize;
3541
3542 if (!isPowerOf2_32(MemSizeInBits)) {
3543 LargeSplitSize = llvm::bit_floor<uint64_t>(MemTy.getSizeInBits());
3544 SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3545 } else {
3546 auto &Ctx = MF.getFunction().getContext();
3547 if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3548 return UnableToLegalize; // Don't know what we're being asked to do.
3549
3550 SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3551 }
3552
3553 // Extend to the next pow-2. If this store was itself the result of lowering,
3554 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3555 // that's wider than the stored size.
3556 unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3557 const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3558
3559 if (SrcTy.isPointer()) {
3560 const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3561 SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3562 }
3563
3564 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3565
3566 // Obtain the smaller value by shifting away the larger value.
3567 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3568 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3569
3570 // Generate the PtrAdd and truncating stores.
3571 LLT PtrTy = MRI.getType(PtrReg);
3572 auto OffsetCst = MIRBuilder.buildConstant(
3573 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3574 auto SmallPtr =
3575 MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3576
3577 MachineMemOperand *LargeMMO =
3578 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3579 MachineMemOperand *SmallMMO =
3580 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3581 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3582 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3583 StoreMI.eraseFromParent();
3584 return Legalized;
3585}
3586
3588LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3589 switch (MI.getOpcode()) {
3590 case TargetOpcode::G_LOAD: {
3591 if (TypeIdx != 0)
3592 return UnableToLegalize;
3593 MachineMemOperand &MMO = **MI.memoperands_begin();
3594
3595 // Not sure how to interpret a bitcast of an extending load.
3596 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3597 return UnableToLegalize;
3598
3600 bitcastDst(MI, CastTy, 0);
3601 MMO.setType(CastTy);
3603 return Legalized;
3604 }
3605 case TargetOpcode::G_STORE: {
3606 if (TypeIdx != 0)
3607 return UnableToLegalize;
3608
3609 MachineMemOperand &MMO = **MI.memoperands_begin();
3610
3611 // Not sure how to interpret a bitcast of a truncating store.
3612 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3613 return UnableToLegalize;
3614
3616 bitcastSrc(MI, CastTy, 0);
3617 MMO.setType(CastTy);
3619 return Legalized;
3620 }
3621 case TargetOpcode::G_SELECT: {
3622 if (TypeIdx != 0)
3623 return UnableToLegalize;
3624
3625 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3626 LLVM_DEBUG(
3627 dbgs() << "bitcast action not implemented for vector select\n");
3628 return UnableToLegalize;
3629 }
3630
3632 bitcastSrc(MI, CastTy, 2);
3633 bitcastSrc(MI, CastTy, 3);
3634 bitcastDst(MI, CastTy, 0);
3636 return Legalized;
3637 }
3638 case TargetOpcode::G_AND:
3639 case TargetOpcode::G_OR:
3640 case TargetOpcode::G_XOR: {
3642 bitcastSrc(MI, CastTy, 1);
3643 bitcastSrc(MI, CastTy, 2);
3644 bitcastDst(MI, CastTy, 0);
3646 return Legalized;
3647 }
3648 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3649 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3650 case TargetOpcode::G_INSERT_VECTOR_ELT:
3651 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3652 default:
3653 return UnableToLegalize;
3654 }
3655}
3656
3657// Legalize an instruction by changing the opcode in place.
3658void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3660 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3662}
3663
3665LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3666 using namespace TargetOpcode;
3667
3668 switch(MI.getOpcode()) {
3669 default:
3670 return UnableToLegalize;
3671 case TargetOpcode::G_FCONSTANT:
3672 return lowerFConstant(MI);
3673 case TargetOpcode::G_BITCAST:
3674 return lowerBitcast(MI);
3675 case TargetOpcode::G_SREM:
3676 case TargetOpcode::G_UREM: {
3677 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3678 auto Quot =
3679 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3680 {MI.getOperand(1), MI.getOperand(2)});
3681
3682 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3683 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3684 MI.eraseFromParent();
3685 return Legalized;
3686 }
3687 case TargetOpcode::G_SADDO:
3688 case TargetOpcode::G_SSUBO:
3689 return lowerSADDO_SSUBO(MI);
3690 case TargetOpcode::G_UMULH:
3691 case TargetOpcode::G_SMULH:
3692 return lowerSMULH_UMULH(MI);
3693 case TargetOpcode::G_SMULO:
3694 case TargetOpcode::G_UMULO: {
3695 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3696 // result.
3697 auto [Res, Overflow, LHS, RHS] = MI.getFirst4Regs();
3698 LLT Ty = MRI.getType(Res);
3699
3700 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3701 ? TargetOpcode::G_SMULH
3702 : TargetOpcode::G_UMULH;
3703
3705 const auto &TII = MIRBuilder.getTII();
3706 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3707 MI.removeOperand(1);
3709
3710 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3711 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3712
3713 // Move insert point forward so we can use the Res register if needed.
3715
3716 // For *signed* multiply, overflow is detected by checking:
3717 // (hi != (lo >> bitwidth-1))
3718 if (Opcode == TargetOpcode::G_SMULH) {
3719 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3720 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3721 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3722 } else {
3723 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3724 }
3725 return Legalized;
3726 }
3727 case TargetOpcode::G_FNEG: {
3728 auto [Res, SubByReg] = MI.getFirst2Regs();
3729 LLT Ty = MRI.getType(Res);
3730
3731 // TODO: Handle vector types once we are able to
3732 // represent them.
3733 if (Ty.isVector())
3734 return UnableToLegalize;
3735 auto SignMask =
3737 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3738 MI.eraseFromParent();
3739 return Legalized;
3740 }
3741 case TargetOpcode::G_FSUB:
3742 case TargetOpcode::G_STRICT_FSUB: {
3743 auto [Res, LHS, RHS] = MI.getFirst3Regs();
3744 LLT Ty = MRI.getType(Res);
3745
3746 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3747 auto Neg = MIRBuilder.buildFNeg(Ty, RHS);
3748
3749 if (MI.getOpcode() == TargetOpcode::G_STRICT_FSUB)
3750 MIRBuilder.buildStrictFAdd(Res, LHS, Neg, MI.getFlags());
3751 else
3752 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3753
3754 MI.eraseFromParent();
3755 return Legalized;
3756 }
3757 case TargetOpcode::G_FMAD:
3758 return lowerFMad(MI);
3759 case TargetOpcode::G_FFLOOR:
3760 return lowerFFloor(MI);
3761 case TargetOpcode::G_INTRINSIC_ROUND:
3762 return lowerIntrinsicRound(MI);
3763 case TargetOpcode::G_FRINT: {
3764 // Since round even is the assumed rounding mode for unconstrained FP
3765 // operations, rint and roundeven are the same operation.
3766 changeOpcode(MI, TargetOpcode::G_INTRINSIC_ROUNDEVEN);
3767 return Legalized;
3768 }
3769 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3770 auto [OldValRes, SuccessRes, Addr, CmpVal, NewVal] = MI.getFirst5Regs();
3771 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3772 **MI.memoperands_begin());
3773 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3774 MI.eraseFromParent();
3775 return Legalized;
3776 }
3777 case TargetOpcode::G_LOAD:
3778 case TargetOpcode::G_SEXTLOAD:
3779 case TargetOpcode::G_ZEXTLOAD:
3780 return lowerLoad(cast<GAnyLoad>(MI));
3781 case TargetOpcode::G_STORE:
3782 return lowerStore(cast<GStore>(MI));
3783 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3784 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3785 case TargetOpcode::G_CTLZ:
3786 case TargetOpcode::G_CTTZ:
3787 case TargetOpcode::G_CTPOP:
3788 return lowerBitCount(MI);
3789 case G_UADDO: {
3790 auto [Res, CarryOut, LHS, RHS] = MI.getFirst4Regs();
3791
3792 MIRBuilder.buildAdd(Res, LHS, RHS);
3793 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3794
3795 MI.eraseFromParent();
3796 return Legalized;
3797 }
3798 case G_UADDE: {
3799 auto [Res, CarryOut, LHS, RHS, CarryIn] = MI.getFirst5Regs();
3800 const LLT CondTy = MRI.getType(CarryOut);
3801 const LLT Ty = MRI.getType(Res);
3802
3803 // Initial add of the two operands.
3804 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3805
3806 // Initial check for carry.
3807 auto Carry = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, TmpRes, LHS);
3808
3809 // Add the sum and the carry.
3810 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3811 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3812
3813 // Second check for carry. We can only carry if the initial sum is all 1s
3814 // and the carry is set, resulting in a new sum of 0.
3815 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3816 auto ResEqZero = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, Res, Zero);
3817 auto Carry2 = MIRBuilder.buildAnd(CondTy, ResEqZero, CarryIn);
3818 MIRBuilder.buildOr(CarryOut, Carry, Carry2);
3819
3820 MI.eraseFromParent();
3821 return Legalized;
3822 }
3823 case G_USUBO: {
3824 auto [Res, BorrowOut, LHS, RHS] = MI.getFirst4Regs();
3825
3826 MIRBuilder.buildSub(Res, LHS, RHS);
3828
3829 MI.eraseFromParent();
3830 return Legalized;
3831 }
3832 case G_USUBE: {
3833 auto [Res, BorrowOut, LHS, RHS, BorrowIn] = MI.getFirst5Regs();
3834 const LLT CondTy = MRI.getType(BorrowOut);
3835 const LLT Ty = MRI.getType(Res);
3836
3837 // Initial subtract of the two operands.
3838 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3839
3840 // Initial check for borrow.
3841 auto Borrow = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, CondTy, TmpRes, LHS);
3842
3843 // Subtract the borrow from the first subtract.
3844 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3845 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3846
3847 // Second check for borrow. We can only borrow if the initial difference is
3848 // 0 and the borrow is set, resulting in a new difference of all 1s.
3849 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3850 auto TmpResEqZero =
3851 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, TmpRes, Zero);
3852 auto Borrow2 = MIRBuilder.buildAnd(CondTy, TmpResEqZero, BorrowIn);
3853 MIRBuilder.buildOr(BorrowOut, Borrow, Borrow2);
3854
3855 MI.eraseFromParent();
3856 return Legalized;
3857 }
3858 case G_UITOFP:
3859 return lowerUITOFP(MI);
3860 case G_SITOFP:
3861 return lowerSITOFP(MI);
3862 case G_FPTOUI:
3863 return lowerFPTOUI(MI);
3864 case G_FPTOSI:
3865 return lowerFPTOSI(MI);
3866 case G_FPTRUNC:
3867 return lowerFPTRUNC(MI);
3868 case G_FPOWI:
3869 return lowerFPOWI(MI);
3870 case G_SMIN:
3871 case G_SMAX:
3872 case G_UMIN:
3873 case G_UMAX:
3874 return lowerMinMax(MI);
3875 case G_FCOPYSIGN:
3876 return lowerFCopySign(MI);
3877 case G_FMINNUM:
3878 case G_FMAXNUM:
3879 return lowerFMinNumMaxNum(MI);
3880 case G_MERGE_VALUES:
3881 return lowerMergeValues(MI);
3882 case G_UNMERGE_VALUES:
3883 return lowerUnmergeValues(MI);
3884 case TargetOpcode::G_SEXT_INREG: {
3885 assert(MI.getOperand(2).isImm() && "Expected immediate");
3886 int64_t SizeInBits = MI.getOperand(2).getImm();
3887
3888 auto [DstReg, SrcReg] = MI.getFirst2Regs();
3889 LLT DstTy = MRI.getType(DstReg);
3890 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3891
3892 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3893 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3894 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3895 MI.eraseFromParent();
3896 return Legalized;
3897 }
3898 case G_EXTRACT_VECTOR_ELT:
3899 case G_INSERT_VECTOR_ELT:
3901 case G_SHUFFLE_VECTOR:
3902 return lowerShuffleVector(MI);
3903 case G_DYN_STACKALLOC:
3904 return lowerDynStackAlloc(MI);
3905 case G_STACKSAVE:
3906 return lowerStackSave(MI);
3907 case G_STACKRESTORE:
3908 return lowerStackRestore(MI);
3909 case G_EXTRACT:
3910 return lowerExtract(MI);
3911 case G_INSERT:
3912 return lowerInsert(MI);
3913 case G_BSWAP:
3914 return lowerBswap(MI);
3915 case G_BITREVERSE:
3916 return lowerBitreverse(MI);
3917 case G_READ_REGISTER:
3918 case G_WRITE_REGISTER:
3919 return lowerReadWriteRegister(MI);
3920 case G_UADDSAT:
3921 case G_USUBSAT: {
3922 // Try to make a reasonable guess about which lowering strategy to use. The
3923 // target can override this with custom lowering and calling the
3924 // implementation functions.
3925 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3926 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3927 return lowerAddSubSatToMinMax(MI);
3929 }
3930 case G_SADDSAT:
3931 case G_SSUBSAT: {
3932 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3933
3934 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3935 // since it's a shorter expansion. However, we would need to figure out the
3936 // preferred boolean type for the carry out for the query.
3937 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3938 return lowerAddSubSatToMinMax(MI);
3940 }
3941 case G_SSHLSAT:
3942 case G_USHLSAT:
3943 return lowerShlSat(MI);
3944 case G_ABS:
3945 return lowerAbsToAddXor(MI);
3946 case G_SELECT:
3947 return lowerSelect(MI);
3948 case G_IS_FPCLASS:
3949 return lowerISFPCLASS(MI);
3950 case G_SDIVREM:
3951 case G_UDIVREM:
3952 return lowerDIVREM(MI);
3953 case G_FSHL:
3954 case G_FSHR:
3955 return lowerFunnelShift(MI);
3956 case G_ROTL:
3957 case G_ROTR:
3958 return lowerRotate(MI);
3959 case G_MEMSET:
3960 case G_MEMCPY:
3961 case G_MEMMOVE:
3962 return lowerMemCpyFamily(MI);
3963 case G_MEMCPY_INLINE:
3964 return lowerMemcpyInline(MI);
3965 case G_ZEXT:
3966 case G_SEXT:
3967 case G_ANYEXT:
3968 return lowerEXT(MI);
3969 case G_TRUNC:
3970 return lowerTRUNC(MI);
3972 return lowerVectorReduction(MI);
3973 case G_VAARG:
3974 return lowerVAArg(MI);
3975 }
3976}
3977
3979 Align MinAlign) const {
3980 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3981 // datalayout for the preferred alignment. Also there should be a target hook
3982 // for this to allow targets to reduce the alignment and ignore the
3983 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3984 // the type.
3985 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3986}
3987
3990 MachinePointerInfo &PtrInfo) {
3993 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3994
3995 unsigned AddrSpace = DL.getAllocaAddrSpace();
3996 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3997
3998 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3999 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
4000}
4001
4003 LLT VecTy) {
4004 LLT IdxTy = B.getMRI()->getType(IdxReg);
4005 unsigned NElts = VecTy.getNumElements();
4006
4007 int64_t IdxVal;
4008 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal))) {
4009 if (IdxVal < VecTy.getNumElements())
4010 return IdxReg;
4011 // If a constant index would be out of bounds, clamp it as well.
4012 }
4013
4014 if (isPowerOf2_32(NElts)) {
4015 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
4016 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
4017 }
4018
4019 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
4020 .getReg(0);
4021}
4022
4024 Register Index) {
4025 LLT EltTy = VecTy.getElementType();
4026
4027 // Calculate the element offset and add it to the pointer.
4028 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
4029 assert(EltSize * 8 == EltTy.getSizeInBits() &&
4030 "Converting bits to bytes lost precision");
4031
4033
4034 // Convert index to the correct size for the address space.
4036 unsigned AS = MRI.getType(VecPtr).getAddressSpace();
4037 unsigned IndexSizeInBits = DL.getIndexSize(AS) * 8;
4038 LLT IdxTy = MRI.getType(Index).changeElementSize(IndexSizeInBits);
4039 if (IdxTy != MRI.getType(Index))
4041
4042 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
4043 MIRBuilder.buildConstant(IdxTy, EltSize));
4044
4045 LLT PtrTy = MRI.getType(VecPtr);
4046 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
4047}
4048
4049#ifndef NDEBUG
4050/// Check that all vector operands have same number of elements. Other operands
4051/// should be listed in NonVecOp.
4054 std::initializer_list<unsigned> NonVecOpIndices) {
4055 if (MI.getNumMemOperands() != 0)
4056 return false;
4057
4058 LLT VecTy = MRI.getType(MI.getReg(0));
4059 if (!VecTy.isVector())
4060 return false;
4061 unsigned NumElts = VecTy.getNumElements();
4062
4063 for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
4064 MachineOperand &Op = MI.getOperand(OpIdx);
4065 if (!Op.isReg()) {
4066 if (!is_contained(NonVecOpIndices, OpIdx))
4067 return false;
4068 continue;
4069 }
4070
4071 LLT Ty = MRI.getType(Op.getReg());
4072 if (!Ty.isVector()) {
4073 if (!is_contained(NonVecOpIndices, OpIdx))
4074 return false;
4075 continue;
4076 }
4077
4078 if (Ty.getNumElements() != NumElts)
4079 return false;
4080 }
4081
4082 return true;
4083}
4084#endif
4085
4086/// Fill \p DstOps with DstOps that have same number of elements combined as
4087/// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
4088/// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
4089/// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
4090static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
4091 unsigned NumElts) {
4092 LLT LeftoverTy;
4093 assert(Ty.isVector() && "Expected vector type");
4094 LLT EltTy = Ty.getElementType();
4095 LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
4096 int NumParts, NumLeftover;
4097 std::tie(NumParts, NumLeftover) =
4098 getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
4099
4100 assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
4101 for (int i = 0; i < NumParts; ++i) {
4102 DstOps.push_back(NarrowTy);
4103 }
4104
4105 if (LeftoverTy.isValid()) {
4106 assert(NumLeftover == 1 && "expected exactly one leftover");
4107 DstOps.push_back(LeftoverTy);
4108 }
4109}
4110
4111/// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
4112/// made from \p Op depending on operand type.
4113static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
4114 MachineOperand &Op) {
4115 for (unsigned i = 0; i < N; ++i) {
4116 if (Op.isReg())
4117 Ops.push_back(Op.getReg());
4118 else if (Op.isImm())
4119 Ops.push_back(Op.getImm());
4120 else if (Op.isPredicate())
4121 Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
4122 else
4123 llvm_unreachable("Unsupported type");
4124 }
4125}
4126
4127// Handle splitting vector operations which need to have the same number of
4128// elements in each type index, but each type index may have a different element
4129// type.
4130//
4131// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
4132// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4133// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4134//
4135// Also handles some irregular breakdown cases, e.g.
4136// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
4137// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
4138// s64 = G_SHL s64, s32
4141 GenericMachineInstr &MI, unsigned NumElts,
4142 std::initializer_list<unsigned> NonVecOpIndices) {
4143 assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
4144 "Non-compatible opcode or not specified non-vector operands");
4145 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4146
4147 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4148 unsigned NumDefs = MI.getNumDefs();
4149
4150 // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
4151 // Build instructions with DstOps to use instruction found by CSE directly.
4152 // CSE copies found instruction into given vreg when building with vreg dest.
4153 SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
4154 // Output registers will be taken from created instructions.
4155 SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
4156 for (unsigned i = 0; i < NumDefs; ++i) {
4157 makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
4158 }
4159
4160 // Split vector input operands into sub-vectors with NumElts elts + Leftover.
4161 // Operands listed in NonVecOpIndices will be used as is without splitting;
4162 // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
4163 // scalar condition (op 1), immediate in sext_inreg (op 2).
4164 SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
4165 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4166 ++UseIdx, ++UseNo) {
4167 if (is_contained(NonVecOpIndices, UseIdx)) {
4168 broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
4169 MI.getOperand(UseIdx));
4170 } else {
4171 SmallVector<Register, 8> SplitPieces;
4172 extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces, MIRBuilder,
4173 MRI);
4174 for (auto Reg : SplitPieces)
4175 InputOpsPieces[UseNo].push_back(Reg);
4176 }
4177 }
4178
4179 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4180
4181 // Take i-th piece of each input operand split and build sub-vector/scalar
4182 // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
4183 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4185 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4186 Defs.push_back(OutputOpsPieces[DstNo][i]);
4187
4189 for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
4190 Uses.push_back(InputOpsPieces[InputNo][i]);
4191
4192 auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
4193 for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
4194 OutputRegs[DstNo].push_back(I.getReg(DstNo));
4195 }
4196
4197 // Merge small outputs into MI's output for each def operand.
4198 if (NumLeftovers) {
4199 for (unsigned i = 0; i < NumDefs; ++i)
4200 mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
4201 } else {
4202 for (unsigned i = 0; i < NumDefs; ++i)
4203 MIRBuilder.buildMergeLikeInstr(MI.getReg(i), OutputRegs[i]);
4204 }
4205
4206 MI.eraseFromParent();
4207 return Legalized;
4208}
4209
4212 unsigned NumElts) {
4213 unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
4214
4215 unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
4216 unsigned NumDefs = MI.getNumDefs();
4217
4218 SmallVector<DstOp, 8> OutputOpsPieces;
4219 SmallVector<Register, 8> OutputRegs;
4220 makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
4221
4222 // Instructions that perform register split will be inserted in basic block
4223 // where register is defined (basic block is in the next operand).
4224 SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
4225 for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
4226 UseIdx += 2, ++UseNo) {
4227 MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
4229 extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo],
4230 MIRBuilder, MRI);
4231 }
4232
4233 // Build PHIs with fewer elements.
4234 unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
4235 MIRBuilder.setInsertPt(*MI.getParent(), MI);
4236 for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
4237 auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
4238 Phi.addDef(
4239 MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
4240 OutputRegs.push_back(Phi.getReg(0));
4241
4242 for (unsigned j = 0; j < NumInputs / 2; ++j) {
4243 Phi.addUse(InputOpsPieces[j][i]);
4244 Phi.add(MI.getOperand(1 + j * 2 + 1));
4245 }
4246 }
4247
4248 // Merge small outputs into MI's def.
4249 if (NumLeftovers) {
4250 mergeMixedSubvectors(MI.getReg(0), OutputRegs);
4251 } else {
4252 MIRBuilder.buildMergeLikeInstr(MI.getReg(0), OutputRegs);
4253 }
4254
4255 MI.eraseFromParent();
4256 return Legalized;
4257}
4258
4261 unsigned TypeIdx,
4262 LLT NarrowTy) {
4263 const int NumDst = MI.getNumOperands() - 1;
4264 const Register SrcReg = MI.getOperand(NumDst).getReg();
4265 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4266 LLT SrcTy = MRI.getType(SrcReg);
4267
4268 if (TypeIdx != 1 || NarrowTy == DstTy)
4269 return UnableToLegalize;
4270
4271 // Requires compatible types. Otherwise SrcReg should have been defined by
4272 // merge-like instruction that would get artifact combined. Most likely
4273 // instruction that defines SrcReg has to perform more/fewer elements
4274 // legalization compatible with NarrowTy.
4275 assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4276 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4277
4278 if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4279 (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
4280 return UnableToLegalize;
4281
4282 // This is most likely DstTy (smaller then register size) packed in SrcTy
4283 // (larger then register size) and since unmerge was not combined it will be
4284 // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
4285 // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
4286
4287 // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
4288 //
4289 // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
4290 // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
4291 // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
4292 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
4293 const int NumUnmerge = Unmerge->getNumOperands() - 1;
4294 const int PartsPerUnmerge = NumDst / NumUnmerge;
4295
4296 for (int I = 0; I != NumUnmerge; ++I) {
4297 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4298
4299 for (int J = 0; J != PartsPerUnmerge; ++J)
4300 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
4301 MIB.addUse(Unmerge.getReg(I));
4302 }
4303
4304 MI.eraseFromParent();
4305 return Legalized;
4306}
4307
4310 LLT NarrowTy) {
4311 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4312 // Requires compatible types. Otherwise user of DstReg did not perform unmerge
4313 // that should have been artifact combined. Most likely instruction that uses
4314 // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
4315 assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
4316 assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4317 if (NarrowTy == SrcTy)
4318 return UnableToLegalize;
4319
4320 // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
4321 // is for old mir tests. Since the changes to more/fewer elements it should no
4322 // longer be possible to generate MIR like this when starting from llvm-ir
4323 // because LCMTy approach was replaced with merge/unmerge to vector elements.
4324 if (TypeIdx == 1) {
4325 assert(SrcTy.isVector() && "Expected vector types");
4326 assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
4327 if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
4328 (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
4329 return UnableToLegalize;
4330 // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
4331 //
4332 // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
4333 // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
4334 // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
4335 // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
4336 // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
4337 // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
4338
4340 LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
4341 for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
4342 auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
4343 for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
4344 Elts.push_back(Unmerge.getReg(j));
4345 }
4346
4347 SmallVector<Register, 8> NarrowTyElts;
4348 unsigned NumNarrowTyElts = NarrowTy.getNumElements();
4349 unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
4350 for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
4351 ++i, Offset += NumNarrowTyElts) {
4352 ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
4353 NarrowTyElts.push_back(
4354 MIRBuilder.buildMergeLikeInstr(NarrowTy, Pieces).getReg(0));
4355 }
4356
4357 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4358 MI.eraseFromParent();
4359 return Legalized;
4360 }
4361
4362 assert(TypeIdx == 0 && "Bad type index");
4363 if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
4364 (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
4365 return UnableToLegalize;
4366
4367 // This is most likely SrcTy (smaller then register size) packed in DstTy
4368 // (larger then register size) and since merge was not combined it will be
4369 // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
4370 // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
4371
4372 // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
4373 //
4374 // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
4375 // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
4376 // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
4377 SmallVector<Register, 8> NarrowTyElts;
4378 unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
4379 unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
4380 unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
4381 for (unsigned i = 0; i < NumParts; ++i) {
4383 for (unsigned j = 0; j < NumElts; ++j)
4384 Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
4385 NarrowTyElts.push_back(
4386 MIRBuilder.buildMergeLikeInstr(NarrowTy, Sources).getReg(0));
4387 }
4388
4389 MIRBuilder.buildMergeLikeInstr(DstReg, NarrowTyElts);
4390 MI.eraseFromParent();
4391 return Legalized;
4392}
4393
4396 unsigned TypeIdx,
4397 LLT NarrowVecTy) {
4398 auto [DstReg, SrcVec] = MI.getFirst2Regs();
4399 Register InsertVal;
4400 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4401
4402 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4403 if (IsInsert)
4404 InsertVal = MI.getOperand(2).getReg();
4405
4406 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4407
4408 // TODO: Handle total scalarization case.
4409 if (!NarrowVecTy.isVector())
4410 return UnableToLegalize;
4411
4412 LLT VecTy = MRI.getType(SrcVec);
4413
4414 // If the index is a constant, we can really break this down as you would
4415 // expect, and index into the target size pieces.
4416 int64_t IdxVal;
4417 auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4418 if (MaybeCst) {
4419 IdxVal = MaybeCst->Value.getSExtValue();
4420 // Avoid out of bounds indexing the pieces.
4421 if (IdxVal >= VecTy.getNumElements()) {
4422 MIRBuilder.buildUndef(DstReg);
4423 MI.eraseFromParent();
4424 return Legalized;
4425 }
4426
4427 SmallVector<Register, 8> VecParts;
4428 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4429
4430 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4431 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4432 TargetOpcode::G_ANYEXT);
4433
4434 unsigned NewNumElts = NarrowVecTy.getNumElements();
4435
4436 LLT IdxTy = MRI.getType(Idx);
4437 int64_t PartIdx = IdxVal / NewNumElts;
4438 auto NewIdx =
4439 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4440
4441 if (IsInsert) {
4442 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4443
4444 // Use the adjusted index to insert into one of the subvectors.
4445 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4446 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4447 VecParts[PartIdx] = InsertPart.getReg(0);
4448
4449 // Recombine the inserted subvector with the others to reform the result
4450 // vector.
4451 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4452 } else {
4453 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4454 }
4455
4456 MI.eraseFromParent();
4457 return Legalized;
4458 }
4459
4460 // With a variable index, we can't perform the operation in a smaller type, so
4461 // we're forced to expand this.
4462 //
4463 // TODO: We could emit a chain of compare/select to figure out which piece to
4464 // index.
4466}
4467
4470 LLT NarrowTy) {
4471 // FIXME: Don't know how to handle secondary types yet.
4472 if (TypeIdx != 0)
4473 return UnableToLegalize;
4474
4475 // This implementation doesn't work for atomics. Give up instead of doing
4476 // something invalid.
4477 if (LdStMI.isAtomic())
4478 return UnableToLegalize;
4479
4480 bool IsLoad = isa<GLoad>(LdStMI);
4481 Register ValReg = LdStMI.getReg(0);
4482 Register AddrReg = LdStMI.getPointerReg();
4483 LLT ValTy = MRI.getType(ValReg);
4484
4485 // FIXME: Do we need a distinct NarrowMemory legalize action?
4486 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize().getValue()) {
4487 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4488 return UnableToLegalize;
4489 }
4490
4491 int NumParts = -1;
4492 int NumLeftover = -1;
4493 LLT LeftoverTy;
4494 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4495 if (IsLoad) {
4496 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4497 } else {
4498 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4499 NarrowLeftoverRegs, MIRBuilder, MRI)) {
4500 NumParts = NarrowRegs.size();
4501 NumLeftover = NarrowLeftoverRegs.size();
4502 }
4503 }
4504
4505 if (NumParts == -1)
4506 return UnableToLegalize;
4507
4508 LLT PtrTy = MRI.getType(AddrReg);
4509 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4510
4511 unsigned TotalSize = ValTy.getSizeInBits();
4512
4513 // Split the load/store into PartTy sized pieces starting at Offset. If this
4514 // is a load, return the new registers in ValRegs. For a store, each elements
4515 // of ValRegs should be PartTy. Returns the next offset that needs to be
4516 // handled.
4518 auto MMO = LdStMI.getMMO();
4519 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4520 unsigned NumParts, unsigned Offset) -> unsigned {
4522 unsigned PartSize = PartTy.getSizeInBits();
4523 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4524 ++Idx) {
4525 unsigned ByteOffset = Offset / 8;
4526 Register NewAddrReg;
4527
4528 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4529
4530 MachineMemOperand *NewMMO =
4531 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4532
4533 if (IsLoad) {
4534 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4535 ValRegs.push_back(Dst);
4536 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4537 } else {
4538 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4539 }
4540 Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4541 }
4542
4543 return Offset;
4544 };
4545
4546 unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4547 unsigned HandledOffset =
4548 splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4549
4550 // Handle the rest of the register if this isn't an even type breakdown.
4551 if (LeftoverTy.isValid())
4552 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4553
4554 if (IsLoad) {
4555 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4556 LeftoverTy, NarrowLeftoverRegs);
4557 }
4558
4559 LdStMI.eraseFromParent();
4560 return Legalized;
4561}
4562
4565 LLT NarrowTy) {
4566 using namespace TargetOpcode;
4567 GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4568 unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4569
4570 switch (MI.getOpcode()) {
4571 case G_IMPLICIT_DEF:
4572 case G_TRUNC:
4573 case G_AND:
4574 case G_OR:
4575 case G_XOR:
4576 case G_ADD:
4577 case G_SUB:
4578 case G_MUL:
4579 case G_PTR_ADD:
4580 case G_SMULH:
4581 case G_UMULH:
4582 case G_FADD:
4583 case G_FMUL:
4584 case G_FSUB:
4585 case G_FNEG:
4586 case G_FABS:
4587 case G_FCANONICALIZE:
4588 case G_FDIV:
4589 case G_FREM:
4590 case G_FMA:
4591 case G_FMAD:
4592 case G_FPOW:
4593 case G_FEXP:
4594 case G_FEXP2:
4595 case G_FEXP10:
4596 case G_FLOG:
4597 case G_FLOG2:
4598 case G_FLOG10:
4599 case G_FLDEXP:
4600 case G_FNEARBYINT:
4601 case G_FCEIL:
4602 case G_FFLOOR:
4603 case G_FRINT:
4604 case G_INTRINSIC_ROUND:
4605 case G_INTRINSIC_ROUNDEVEN:
4606 case G_INTRINSIC_TRUNC:
4607 case G_FCOS:
4608 case G_FSIN:
4609 case G_FSQRT:
4610 case G_BSWAP:
4611 case G_BITREVERSE:
4612 case G_SDIV:
4613 case G_UDIV:
4614 case G_SREM:
4615 case G_UREM:
4616 case G_SDIVREM:
4617 case G_UDIVREM:
4618 case G_SMIN:
4619 case G_SMAX:
4620 case G_UMIN:
4621 case G_UMAX:
4622 case G_ABS:
4623 case G_FMINNUM:
4624 case G_FMAXNUM:
4625 case G_FMINNUM_IEEE:
4626 case G_FMAXNUM_IEEE:
4627 case G_FMINIMUM:
4628 case G_FMAXIMUM:
4629 case G_FSHL:
4630 case G_FSHR:
4631 case G_ROTL:
4632 case G_ROTR:
4633 case G_FREEZE:
4634 case G_SADDSAT:
4635 case G_SSUBSAT:
4636 case G_UADDSAT:
4637 case G_USUBSAT:
4638 case G_UMULO:
4639 case G_SMULO:
4640 case G_SHL:
4641 case G_LSHR:
4642 case G_ASHR:
4643 case G_SSHLSAT:
4644 case G_USHLSAT:
4645 case G_CTLZ:
4646 case G_CTLZ_ZERO_UNDEF:
4647 case G_CTTZ:
4648 case G_CTTZ_ZERO_UNDEF:
4649 case G_CTPOP:
4650 case G_FCOPYSIGN:
4651 case G_ZEXT:
4652 case G_SEXT:
4653 case G_ANYEXT:
4654 case G_FPEXT:
4655 case G_FPTRUNC:
4656 case G_SITOFP:
4657 case G_UITOFP:
4658 case G_FPTOSI:
4659 case G_FPTOUI:
4660 case G_INTTOPTR:
4661 case G_PTRTOINT:
4662 case G_ADDRSPACE_CAST:
4663 case G_UADDO:
4664 case G_USUBO:
4665 case G_UADDE:
4666 case G_USUBE:
4667 case G_SADDO:
4668 case G_SSUBO:
4669 case G_SADDE:
4670 case G_SSUBE:
4671 case G_STRICT_FADD:
4672 case G_STRICT_FSUB:
4673 case G_STRICT_FMUL:
4674 case G_STRICT_FMA:
4675 case G_STRICT_FLDEXP:
4676 case G_FFREXP:
4677 return fewerElementsVectorMultiEltType(GMI, NumElts);
4678 case G_ICMP:
4679 case G_FCMP:
4680 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4681 case G_IS_FPCLASS:
4682 return fewerElementsVectorMultiEltType(GMI, NumElts, {2, 3 /*mask,fpsem*/});
4683 case G_SELECT:
4684 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4685 return fewerElementsVectorMultiEltType(GMI, NumElts);
4686 return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4687 case G_PHI:
4688 return fewerElementsVectorPhi(GMI, NumElts);
4689 case G_UNMERGE_VALUES:
4690 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4691 case G_BUILD_VECTOR:
4692 assert(TypeIdx == 0 && "not a vector type index");
4693 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4694 case G_CONCAT_VECTORS:
4695 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4696 return UnableToLegalize;
4697 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4698 case G_EXTRACT_VECTOR_ELT:
4699 case G_INSERT_VECTOR_ELT:
4700 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4701 case G_LOAD:
4702 case G_STORE:
4703 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4704 case G_SEXT_INREG:
4705 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4707 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4708 case TargetOpcode::G_VECREDUCE_SEQ_FADD:
4709 case TargetOpcode::G_VECREDUCE_SEQ_FMUL:
4710 return fewerElementsVectorSeqReductions(MI, TypeIdx, NarrowTy);
4711 case G_SHUFFLE_VECTOR:
4712 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4713 case G_FPOWI:
4714 return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*pow*/});
4715 case G_BITCAST:
4716 return fewerElementsBitcast(MI, TypeIdx, NarrowTy);
4717 default:
4718 return UnableToLegalize;
4719 }
4720}
4721
4724 LLT NarrowTy) {
4725 assert(MI.getOpcode() == TargetOpcode::G_BITCAST &&
4726 "Not a bitcast operation");
4727
4728 if (TypeIdx != 0)
4729 return UnableToLegalize;
4730
4731 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
4732
4733 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
4734 LLT SrcNarrowTy =
4735 LLT::fixed_vector(NarrowTy.getSizeInBits() / SrcScalSize, SrcScalSize);
4736
4737 // Split the Src and Dst Reg into smaller registers
4738 SmallVector<Register> SrcVRegs, BitcastVRegs;
4739 if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
4740 return UnableToLegalize;
4741
4742 // Build new smaller bitcast instructions
4743 // Not supporting Leftover types for now but will have to
4744 for (unsigned i = 0; i < SrcVRegs.size(); i++)
4745 BitcastVRegs.push_back(
4746 MIRBuilder.buildBitcast(NarrowTy, SrcVRegs[i]).getReg(0));
4747
4748 MIRBuilder.buildMergeLikeInstr(DstReg, BitcastVRegs);
4749 MI.eraseFromParent();
4750 return Legalized;
4751}
4752
4754 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4755 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4756 if (TypeIdx != 0)
4757 return UnableToLegalize;
4758
4759 auto [DstReg, DstTy, Src1Reg, Src1Ty, Src2Reg, Src2Ty] =
4760 MI.getFirst3RegLLTs();
4761 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4762 // The shuffle should be canonicalized by now.
4763 if (DstTy != Src1Ty)
4764 return UnableToLegalize;
4765 if (DstTy != Src2Ty)
4766 return UnableToLegalize;
4767
4768 if (!isPowerOf2_32(DstTy.getNumElements()))
4769 return UnableToLegalize;
4770
4771 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4772 // Further legalization attempts will be needed to do split further.
4773 NarrowTy =
4774 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4775 unsigned NewElts = NarrowTy.getNumElements();
4776
4777 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4778 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs, MIRBuilder, MRI);
4779 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs, MIRBuilder, MRI);
4780 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4781 SplitSrc2Regs[1]};
4782
4783 Register Hi, Lo;
4784
4785 // If Lo or Hi uses elements from at most two of the four input vectors, then
4786 // express it as a vector shuffle of those two inputs. Otherwise extract the
4787 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4789 for (unsigned High = 0; High < 2; ++High) {
4790 Register &Output = High ? Hi : Lo;
4791
4792 // Build a shuffle mask for the output, discovering on the fly which
4793 // input vectors to use as shuffle operands (recorded in InputUsed).
4794 // If building a suitable shuffle vector proves too hard, then bail
4795 // out with useBuildVector set.
4796 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4797 unsigned FirstMaskIdx = High * NewElts;
4798 bool UseBuildVector = false;
4799 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4800 // The mask element. This indexes into the input.
4801 int Idx = Mask[FirstMaskIdx + MaskOffset];
4802
4803 // The input vector this mask element indexes into.
4804 unsigned Input = (unsigned)Idx / NewElts;
4805
4806 if (Input >= std::size(Inputs)) {
4807 // The mask element does not index into any input vector.
4808 Ops.push_back(-1);
4809 continue;
4810 }
4811
4812 // Turn the index into an offset from the start of the input vector.
4813 Idx -= Input * NewElts;
4814
4815 // Find or create a shuffle vector operand to hold this input.
4816 unsigned OpNo;
4817 for (OpNo = 0; OpNo < std::size(InputUsed); ++OpNo) {
4818 if (InputUsed[OpNo] == Input) {
4819 // This input vector is already an operand.
4820 break;
4821 } else if (InputUsed[OpNo] == -1U) {
4822 // Create a new operand for this input vector.
4823 InputUsed[OpNo] = Input;
4824 break;
4825 }
4826 }
4827
4828 if (OpNo >= std::size(InputUsed)) {
4829 // More than two input vectors used! Give up on trying to create a
4830 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4831 UseBuildVector = true;
4832 break;
4833 }
4834
4835 // Add the mask index for the new shuffle vector.
4836 Ops.push_back(Idx + OpNo * NewElts);
4837 }
4838
4839 if (UseBuildVector) {
4840 LLT EltTy = NarrowTy.getElementType();
4842
4843 // Extract the input elements by hand.
4844 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4845 // The mask element. This indexes into the input.
4846 int Idx = Mask[FirstMaskIdx + MaskOffset];
4847
4848 // The input vector this mask element indexes into.
4849 unsigned Input = (unsigned)Idx / NewElts;
4850
4851 if (Input >= std::size(Inputs)) {
4852 // The mask element is "undef" or indexes off the end of the input.
4853 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4854 continue;
4855 }
4856
4857 // Turn the index into an offset from the start of the input vector.
4858 Idx -= Input * NewElts;
4859
4860 // Extract the vector element by hand.
4861 SVOps.push_back(MIRBuilder
4862 .buildExtractVectorElement(
4863 EltTy, Inputs[Input],
4865 .getReg(0));
4866 }
4867
4868 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4869 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4870 } else if (InputUsed[0] == -1U) {
4871 // No input vectors were used! The result is undefined.
4872 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4873 } else {
4874 Register Op0 = Inputs[InputUsed[0]];
4875 // If only one input was used, use an undefined vector for the other.
4876 Register Op1 = InputUsed[1] == -1U
4877 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4878 : Inputs[InputUsed[1]];
4879 // At least one input vector was used. Create a new shuffle vector.
4880 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4881 }
4882
4883 Ops.clear();
4884 }
4885
4886 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4887 MI.eraseFromParent();
4888 return Legalized;
4889}
4890
4892 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4893 auto &RdxMI = cast<GVecReduce>(MI);
4894
4895 if (TypeIdx != 1)
4896 return UnableToLegalize;
4897
4898 // The semantics of the normal non-sequential reductions allow us to freely
4899 // re-associate the operation.
4900 auto [DstReg, DstTy, SrcReg, SrcTy] = RdxMI.getFirst2RegLLTs();
4901
4902 if (NarrowTy.isVector() &&
4903 (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4904 return UnableToLegalize;
4905
4906 unsigned ScalarOpc = RdxMI.getScalarOpcForReduction();
4907 SmallVector<Register> SplitSrcs;
4908 // If NarrowTy is a scalar then we're being asked to scalarize.
4909 const unsigned NumParts =
4910 NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4911 : SrcTy.getNumElements();
4912
4913 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4914 if (NarrowTy.isScalar()) {
4915 if (DstTy != NarrowTy)
4916 return UnableToLegalize; // FIXME: handle implicit extensions.
4917
4918 if (isPowerOf2_32(NumParts)) {
4919 // Generate a tree of scalar operations to reduce the critical path.
4920 SmallVector<Register> PartialResults;
4921 unsigned NumPartsLeft = NumParts;
4922 while (NumPartsLeft > 1) {
4923 for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4924 PartialResults.emplace_back(
4926 .buildInstr(ScalarOpc, {NarrowTy},
4927 {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4928 .getReg(0));
4929 }
4930 SplitSrcs = PartialResults;
4931 PartialResults.clear();
4932 NumPartsLeft = SplitSrcs.size();
4933 }
4934 assert(SplitSrcs.size() == 1);
4935 MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4936 MI.eraseFromParent();
4937 return Legalized;
4938 }
4939 // If we can't generate a tree, then just do sequential operations.
4940 Register Acc = SplitSrcs[0];
4941 for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4942 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4943 .getReg(0);
4944 MIRBuilder.buildCopy(DstReg, Acc);
4945 MI.eraseFromParent();
4946 return Legalized;
4947 }
4948 SmallVector<Register> PartialReductions;
4949 for (unsigned Part = 0; Part < NumParts; ++Part) {
4950 PartialReductions.push_back(
4951 MIRBuilder.buildInstr(RdxMI.getOpcode(), {DstTy}, {SplitSrcs[Part]})
4952 .getReg(0));
4953 }
4954
4955 // If the types involved are powers of 2, we can generate intermediate vector
4956 // ops, before generating a final reduction operation.
4957 if (isPowerOf2_32(SrcTy.getNumElements()) &&
4958 isPowerOf2_32(NarrowTy.getNumElements())) {
4959 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4960 }
4961
4962 Register Acc = PartialReductions[0];
4963 for (unsigned Part = 1; Part < NumParts; ++Part) {
4964 if (Part == NumParts - 1) {
4965 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4966 {Acc, PartialReductions[Part]});
4967 } else {
4968 Acc = MIRBuilder
4969 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4970 .getReg(0);
4971 }
4972 }
4973 MI.eraseFromParent();
4974 return Legalized;
4975}
4976
4979 unsigned int TypeIdx,
4980 LLT NarrowTy) {
4981 auto [DstReg, DstTy, ScalarReg, ScalarTy, SrcReg, SrcTy] =
4982 MI.getFirst3RegLLTs();
4983 if (!NarrowTy.isScalar() || TypeIdx != 2 || DstTy != ScalarTy ||
4984 DstTy != NarrowTy)
4985 return UnableToLegalize;
4986
4987 assert((MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD ||
4988 MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FMUL) &&
4989 "Unexpected vecreduce opcode");
4990 unsigned ScalarOpc = MI.getOpcode() == TargetOpcode::G_VECREDUCE_SEQ_FADD
4991 ? TargetOpcode::G_FADD
4992 : TargetOpcode::G_FMUL;
4993
4994 SmallVector<Register> SplitSrcs;
4995 unsigned NumParts = SrcTy.getNumElements();
4996 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs, MIRBuilder, MRI);
4997 Register Acc = ScalarReg;
4998 for (unsigned i = 0; i < NumParts; i++)
4999 Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[i]})
5000 .getReg(0);
5001
5002 MIRBuilder.buildCopy(DstReg, Acc);
5003 MI.eraseFromParent();
5004 return Legalized;
5005}
5006
5008LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
5009 LLT SrcTy, LLT NarrowTy,
5010 unsigned ScalarOpc) {
5011 SmallVector<Register> SplitSrcs;
5012 // Split the sources into NarrowTy size pieces.
5013 extractParts(SrcReg, NarrowTy,
5014 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs,
5015 MIRBuilder, MRI);
5016 // We're going to do a tree reduction using vector operations until we have
5017 // one NarrowTy size value left.
5018 while (SplitSrcs.size() > 1) {
5019 SmallVector<Register> PartialRdxs;
5020 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
5021 Register LHS = SplitSrcs[Idx];
5022 Register RHS = SplitSrcs[Idx + 1];
5023 // Create the intermediate vector op.
5024 Register Res =
5025 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
5026 PartialRdxs.push_back(Res);
5027 }
5028 SplitSrcs = std::move(PartialRdxs);
5029 }
5030 // Finally generate the requested NarrowTy based reduction.
5032 MI.getOperand(1).setReg(SplitSrcs[0]);
5034 return Legalized;
5035}
5036
5039 const LLT HalfTy, const LLT AmtTy) {
5040
5041 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5042 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5043 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5044
5045 if (Amt.isZero()) {
5046 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {InL, InH});
5047 MI.eraseFromParent();
5048 return Legalized;
5049 }
5050
5051 LLT NVT = HalfTy;
5052 unsigned NVTBits = HalfTy.getSizeInBits();
5053 unsigned VTBits = 2 * NVTBits;
5054
5055 SrcOp Lo(Register(0)), Hi(Register(0));
5056 if (MI.getOpcode() == TargetOpcode::G_SHL) {
5057 if (Amt.ugt(VTBits)) {
5058 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5059 } else if (Amt.ugt(NVTBits)) {
5060 Lo = MIRBuilder.buildConstant(NVT, 0);
5061 Hi = MIRBuilder.buildShl(NVT, InL,
5062 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5063 } else if (Amt == NVTBits) {
5064 Lo = MIRBuilder.buildConstant(NVT, 0);
5065 Hi = InL;
5066 } else {
5067 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
5068 auto OrLHS =
5069 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
5070 auto OrRHS = MIRBuilder.buildLShr(
5071 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5072 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5073 }
5074 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5075 if (Amt.ugt(VTBits)) {
5076 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
5077 } else if (Amt.ugt(NVTBits)) {
5078 Lo = MIRBuilder.buildLShr(NVT, InH,
5079 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5080 Hi = MIRBuilder.buildConstant(NVT, 0);
5081 } else if (Amt == NVTBits) {
5082 Lo = InH;
5083 Hi = MIRBuilder.buildConstant(NVT, 0);
5084 } else {
5085 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5086
5087 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5088 auto OrRHS = MIRBuilder.buildShl(
5089 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5090
5091 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5092 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
5093 }
5094 } else {
5095 if (Amt.ugt(VTBits)) {
5097 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5098 } else if (Amt.ugt(NVTBits)) {
5099 Lo = MIRBuilder.buildAShr(NVT, InH,
5100 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
5101 Hi = MIRBuilder.buildAShr(NVT, InH,
5102 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5103 } else if (Amt == NVTBits) {
5104 Lo = InH;
5105 Hi = MIRBuilder.buildAShr(NVT, InH,
5106 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
5107 } else {
5108 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
5109
5110 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
5111 auto OrRHS = MIRBuilder.buildShl(
5112 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
5113
5114 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
5115 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
5116 }
5117 }
5118
5119 MIRBuilder.buildMergeLikeInstr(MI.getOperand(0), {Lo, Hi});
5120 MI.eraseFromParent();
5121
5122 return Legalized;
5123}
5124
5125// TODO: Optimize if constant shift amount.
5128 LLT RequestedTy) {
5129 if (TypeIdx == 1) {
5131 narrowScalarSrc(MI, RequestedTy, 2);
5133 return Legalized;
5134 }
5135
5136 Register DstReg = MI.getOperand(0).getReg();
5137 LLT DstTy = MRI.getType(DstReg);
5138 if (DstTy.isVector())
5139 return UnableToLegalize;
5140
5141 Register Amt = MI.getOperand(2).getReg();
5142 LLT ShiftAmtTy = MRI.getType(Amt);
5143 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
5144 if (DstEltSize % 2 != 0)
5145 return UnableToLegalize;
5146
5147 // Ignore the input type. We can only go to exactly half the size of the
5148 // input. If that isn't small enough, the resulting pieces will be further
5149 // legalized.
5150 const unsigned NewBitSize = DstEltSize / 2;
5151 const LLT HalfTy = LLT::scalar(NewBitSize);
5152 const LLT CondTy = LLT::scalar(1);
5153
5154 if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
5155 return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
5156 ShiftAmtTy);
5157 }
5158
5159 // TODO: Expand with known bits.
5160
5161 // Handle the fully general expansion by an unknown amount.
5162 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
5163
5164 Register InL = MRI.createGenericVirtualRegister(HalfTy);
5165 Register InH = MRI.createGenericVirtualRegister(HalfTy);
5166 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
5167
5168 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
5169 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
5170
5171 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
5172 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
5173 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
5174
5175 Register ResultRegs[2];
5176 switch (MI.getOpcode()) {
5177 case TargetOpcode::G_SHL: {
5178 // Short: ShAmt < NewBitSize
5179 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
5180
5181 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
5182 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
5183 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5184
5185 // Long: ShAmt >= NewBitSize
5186 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
5187 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
5188
5189 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
5190 auto Hi = MIRBuilder.buildSelect(
5191 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
5192
5193 ResultRegs[0] = Lo.getReg(0);
5194 ResultRegs[1] = Hi.getReg(0);
5195 break;
5196 }
5197 case TargetOpcode::G_LSHR:
5198 case TargetOpcode::G_ASHR: {
5199 // Short: ShAmt < NewBitSize
5200 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
5201
5202 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
5203 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
5204 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
5205
5206 // Long: ShAmt >= NewBitSize
5208 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
5209 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
5210 } else {
5211 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
5212 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
5213 }
5214 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
5215 {InH, AmtExcess}); // Lo from Hi part.
5216
5217 auto Lo = MIRBuilder.buildSelect(
5218 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
5219
5220 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
5221
5222 ResultRegs[0] = Lo.getReg(0);
5223 ResultRegs[1] = Hi.getReg(0);
5224 break;
5225 }
5226 default:
5227 llvm_unreachable("not a shift");
5228 }
5229
5230 MIRBuilder.buildMergeLikeInstr(DstReg, ResultRegs);
5231 MI.eraseFromParent();
5232 return Legalized;
5233}
5234
5237 LLT MoreTy) {
5238 assert(TypeIdx == 0 && "Expecting only Idx 0");
5239
5241 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
5242 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
5244 moreElementsVectorSrc(MI, MoreTy, I);
5245 }
5246
5247 MachineBasicBlock &MBB = *MI.getParent();
5249 moreElementsVectorDst(MI, MoreTy, 0);
5251 return Legalized;
5252}
5253
5254MachineInstrBuilder LegalizerHelper::getNeutralElementForVecReduce(
5255 unsigned Opcode, MachineIRBuilder &MIRBuilder, LLT Ty) {
5256 assert(Ty.isScalar() && "Expected scalar type to make neutral element for");
5257
5258 switch (Opcode) {
5259 default:
5261 "getNeutralElementForVecReduce called with invalid opcode!");
5262 case TargetOpcode::G_VECREDUCE_ADD:
5263 case TargetOpcode::G_VECREDUCE_OR:
5264 case TargetOpcode::G_VECREDUCE_XOR:
5265 case TargetOpcode::G_VECREDUCE_UMAX:
5266 return MIRBuilder.buildConstant(Ty, 0);
5267 case TargetOpcode::G_VECREDUCE_MUL:
5268 return MIRBuilder.buildConstant(Ty, 1);
5269 case TargetOpcode::G_VECREDUCE_AND:
5270 case TargetOpcode::G_VECREDUCE_UMIN:
5273 case TargetOpcode::G_VECREDUCE_SMAX:
5276 case TargetOpcode::G_VECREDUCE_SMIN:
5279 case TargetOpcode::G_VECREDUCE_FADD:
5280 return MIRBuilder.buildFConstant(Ty, -0.0);
5281 case TargetOpcode::G_VECREDUCE_FMUL:
5282 return MIRBuilder.buildFConstant(Ty, 1.0);
5283 case TargetOpcode::G_VECREDUCE_FMINIMUM:
5284 case TargetOpcode::G_VECREDUCE_FMAXIMUM:
5285 assert(false && "getNeutralElementForVecReduce unimplemented for "
5286 "G_VECREDUCE_FMINIMUM and G_VECREDUCE_FMAXIMUM!");
5287 }
5288 llvm_unreachable("switch expected to return!");
5289}
5290
5293 LLT MoreTy) {
5294 unsigned Opc = MI.getOpcode();
5295 switch (Opc) {
5296 case TargetOpcode::G_IMPLICIT_DEF:
5297 case TargetOpcode::G_LOAD: {
5298 if (TypeIdx != 0)
5299 return UnableToLegalize;
5301 moreElementsVectorDst(MI, MoreTy, 0);
5303 return Legalized;
5304 }
5305 case TargetOpcode::G_STORE:
5306 if (TypeIdx != 0)
5307 return UnableToLegalize;
5309 moreElementsVectorSrc(MI, MoreTy, 0);
5311 return Legalized;
5312 case TargetOpcode::G_AND:
5313 case TargetOpcode::G_OR:
5314 case TargetOpcode::G_XOR:
5315 case TargetOpcode::G_ADD:
5316 case TargetOpcode::G_SUB:
5317 case TargetOpcode::G_MUL:
5318 case TargetOpcode::G_FADD:
5319 case TargetOpcode::G_FSUB:
5320 case TargetOpcode::G_FMUL:
5321 case TargetOpcode::G_FDIV:
5322 case TargetOpcode::G_FCOPYSIGN:
5323 case TargetOpcode::G_UADDSAT:
5324 case TargetOpcode::G_USUBSAT:
5325 case TargetOpcode::G_SADDSAT:
5326 case TargetOpcode::G_SSUBSAT:
5327 case TargetOpcode::G_SMIN:
5328 case TargetOpcode::G_SMAX:
5329 case TargetOpcode::G_UMIN:
5330 case TargetOpcode::G_UMAX:
5331 case TargetOpcode::G_FMINNUM:
5332 case TargetOpcode::G_FMAXNUM:
5333 case TargetOpcode::G_FMINNUM_IEEE:
5334 case TargetOpcode::G_FMAXNUM_IEEE:
5335 case TargetOpcode::G_FMINIMUM:
5336 case TargetOpcode::G_FMAXIMUM:
5337 case TargetOpcode::G_STRICT_FADD:
5338 case TargetOpcode::G_STRICT_FSUB:
5339 case TargetOpcode::G_STRICT_FMUL:
5340 case TargetOpcode::G_SHL:
5341 case TargetOpcode::G_ASHR:
5342 case TargetOpcode::G_LSHR: {
5344 moreElementsVectorSrc(MI, MoreTy, 1);
5345 moreElementsVectorSrc(MI, MoreTy, 2);
5346 moreElementsVectorDst(MI, MoreTy, 0);
5348 return Legalized;
5349 }
5350 case TargetOpcode::G_FMA:
5351 case TargetOpcode::G_STRICT_FMA:
5352 case TargetOpcode::G_FSHR:
5353 case TargetOpcode::G_FSHL: {
5355 moreElementsVectorSrc(MI, MoreTy, 1);
5356 moreElementsVectorSrc(MI, MoreTy, 2);
5357 moreElementsVectorSrc(MI, MoreTy, 3);
5358 moreElementsVectorDst(MI, MoreTy, 0);
5360 return Legalized;
5361 }
5362 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
5363 case TargetOpcode::G_EXTRACT:
5364 if (TypeIdx != 1)
5365 return UnableToLegalize;
5367 moreElementsVectorSrc(MI, MoreTy, 1);
5369 return Legalized;
5370 case TargetOpcode::G_INSERT:
5371 case TargetOpcode::G_INSERT_VECTOR_ELT:
5372 case TargetOpcode::G_FREEZE:
5373 case TargetOpcode::G_FNEG:
5374 case TargetOpcode::G_FABS:
5375 case TargetOpcode::G_FSQRT:
5376 case TargetOpcode::G_FCEIL:
5377 case TargetOpcode::G_FFLOOR:
5378 case TargetOpcode::G_FNEARBYINT:
5379 case TargetOpcode::G_FRINT:
5380 case TargetOpcode::G_INTRINSIC_ROUND:
5381 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
5382 case TargetOpcode::G_INTRINSIC_TRUNC:
5383 case TargetOpcode::G_BSWAP:
5384 case TargetOpcode::G_FCANONICALIZE:
5385 case TargetOpcode::G_SEXT_INREG:
5386 case TargetOpcode::G_ABS:
5387 if (TypeIdx != 0)
5388 return UnableToLegalize;
5390 moreElementsVectorSrc(MI, MoreTy, 1);
5391 moreElementsVectorDst(MI, MoreTy, 0);
5393 return Legalized;
5394 case TargetOpcode::G_SELECT: {
5395 auto [DstReg, DstTy, CondReg, CondTy] = MI.getFirst2RegLLTs();
5396 if (TypeIdx == 1) {
5397 if (!CondTy.isScalar() ||
5398 DstTy.getElementCount() != MoreTy.getElementCount())
5399 return UnableToLegalize;
5400
5401 // This is turning a scalar select of vectors into a vector
5402 // select. Broadcast the select condition.
5403 auto ShufSplat = MIRBuilder.buildShuffleSplat(MoreTy, CondReg);
5405 MI.getOperand(1).setReg(ShufSplat.getReg(0));
5407 return Legalized;
5408 }
5409
5410 if (CondTy.isVector())
5411 return UnableToLegalize;
5412
5414 moreElementsVectorSrc(MI, MoreTy, 2);
5415 moreElementsVectorSrc(MI, MoreTy, 3);
5416 moreElementsVectorDst(MI, MoreTy, 0);
5418 return Legalized;
5419 }
5420 case TargetOpcode::G_UNMERGE_VALUES:
5421 return UnableToLegalize;
5422 case TargetOpcode::G_PHI:
5423 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5424 case TargetOpcode::G_SHUFFLE_VECTOR:
5425 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5426 case TargetOpcode::G_BUILD_VECTOR: {
5428 for (auto Op : MI.uses()) {
5429 Elts.push_back(Op.getReg());
5430 }
5431
5432 for (unsigned i = Elts.size(); i < MoreTy.getNumElements(); ++i) {
5434 }
5435
5437 MI.getOperand(0).getReg(), MIRBuilder.buildInstr(Opc, {MoreTy}, Elts));
5438 MI.eraseFromParent();
5439 return Legalized;
5440 }
5441 case TargetOpcode::G_SEXT:
5442 case TargetOpcode::G_ZEXT:
5443 case TargetOpcode::G_ANYEXT:
5444 case TargetOpcode::G_TRUNC:
5445 case TargetOpcode::G_FPTRUNC:
5446 case TargetOpcode::G_FPEXT:
5447 case TargetOpcode::G_FPTOSI:
5448 case TargetOpcode::G_FPTOUI:
5449 case TargetOpcode::G_SITOFP:
5450 case TargetOpcode::G_UITOFP: {
5452 LLT SrcExtTy;
5453 LLT DstExtTy;
5454 if (TypeIdx == 0) {
5455 DstExtTy = MoreTy;
5456 SrcExtTy = LLT::fixed_vector(
5457 MoreTy.getNumElements(),
5458 MRI.getType(MI.getOperand(1).getReg()).getElementType());
5459 } else {
5460 DstExtTy = LLT::fixed_vector(
5461 MoreTy.getNumElements(),
5462 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5463 SrcExtTy = MoreTy;
5464 }
5465 moreElementsVectorSrc(MI, SrcExtTy, 1);
5466 moreElementsVectorDst(MI, DstExtTy, 0);
5468 return Legalized;
5469 }
5470 case TargetOpcode::G_ICMP:
5471 case TargetOpcode::G_FCMP: {
5472 if (TypeIdx != 1)
5473 return UnableToLegalize;
5474
5476 moreElementsVectorSrc(MI, MoreTy, 2);
5477 moreElementsVectorSrc(MI, MoreTy, 3);
5478 LLT CondTy = LLT::fixed_vector(
5479 MoreTy.getNumElements(),
5480 MRI.getType(MI.getOperand(0).getReg()).getElementType());
5481 moreElementsVectorDst(MI, CondTy, 0);
5483 return Legalized;
5484 }
5485 case TargetOpcode::G_BITCAST: {
5486 if (TypeIdx != 0)
5487 return UnableToLegalize;
5488
5489 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5490 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
5491
5492 unsigned coefficient = SrcTy.getNumElements() * MoreTy.getNumElements();
5493 if (coefficient % DstTy.getNumElements() != 0)
5494 return UnableToLegalize;
5495
5496 coefficient = coefficient / DstTy.getNumElements();
5497
5498 LLT NewTy = SrcTy.changeElementCount(
5499 ElementCount::get(coefficient, MoreTy.isScalable()));
5501 moreElementsVectorSrc(MI, NewTy, 1);
5502 moreElementsVectorDst(MI, MoreTy, 0);
5504 return Legalized;
5505 }
5506 case TargetOpcode::G_VECREDUCE_FADD:
5507 case TargetOpcode::G_VECREDUCE_FMUL:
5508 case TargetOpcode::G_VECREDUCE_ADD:
5509 case TargetOpcode::G_VECREDUCE_MUL:
5510 case TargetOpcode::G_VECREDUCE_AND:
5511 case TargetOpcode::G_VECREDUCE_OR:
5512 case TargetOpcode::G_VECREDUCE_XOR:
5513 case TargetOpcode::G_VECREDUCE_SMAX:
5514 case TargetOpcode::G_VECREDUCE_SMIN:
5515 case TargetOpcode::G_VECREDUCE_UMAX:
5516 case TargetOpcode::G_VECREDUCE_UMIN: {
5517 LLT OrigTy = MRI.getType(MI.getOperand(1).getReg());
5518 MachineOperand &MO = MI.getOperand(1);
5519 auto NewVec = MIRBuilder.buildPadVectorWithUndefElements(MoreTy, MO);
5520 auto NeutralElement = getNeutralElementForVecReduce(
5521 MI.getOpcode(), MIRBuilder, MoreTy.getElementType());
5522
5524 for (size_t i = OrigTy.getNumElements(), e = MoreTy.getNumElements();
5525 i != e; i++) {
5526 auto Idx = MIRBuilder.buildConstant(IdxTy, i);
5527 NewVec = MIRBuilder.buildInsertVectorElement(MoreTy, NewVec,
5528 NeutralElement, Idx);
5529 }
5530
5532 MO.setReg(NewVec.getReg(0));
5534 return Legalized;
5535 }
5536
5537 default:
5538 return UnableToLegalize;
5539 }
5540}
5541
5544 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
5545 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5546 unsigned MaskNumElts = Mask.size();
5547 unsigned SrcNumElts = SrcTy.getNumElements();
5548 LLT DestEltTy = DstTy.getElementType();
5549
5550 if (MaskNumElts == SrcNumElts)
5551 return Legalized;
5552
5553 if (MaskNumElts < SrcNumElts) {
5554 // Extend mask to match new destination vector size with
5555 // undef values.
5556 SmallVector<int, 16> NewMask(Mask);
5557 for (unsigned I = MaskNumElts; I < SrcNumElts; ++I)
5558 NewMask.push_back(-1);
5559
5560 moreElementsVectorDst(MI, SrcTy, 0);
5562 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5563 MI.getOperand(1).getReg(),
5564 MI.getOperand(2).getReg(), NewMask);
5565 MI.eraseFromParent();
5566
5567 return Legalized;
5568 }
5569
5570 unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
5571 unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
5572 LLT PaddedTy = LLT::fixed_vector(PaddedMaskNumElts, DestEltTy);
5573
5574 // Create new source vectors by concatenating the initial
5575 // source vectors with undefined vectors of the same size.
5576 auto Undef = MIRBuilder.buildUndef(SrcTy);
5577 SmallVector<Register, 8> MOps1(NumConcat, Undef.getReg(0));
5578 SmallVector<Register, 8> MOps2(NumConcat, Undef.getReg(0));
5579 MOps1[0] = MI.getOperand(1).getReg();
5580 MOps2[0] = MI.getOperand(2).getReg();
5581
5582 auto Src1 = MIRBuilder.buildConcatVectors(PaddedTy, MOps1);
5583 auto Src2 = MIRBuilder.buildConcatVectors(PaddedTy, MOps2);
5584
5585 // Readjust mask for new input vector length.
5586 SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
5587 for (unsigned I = 0; I != MaskNumElts; ++I) {
5588 int Idx = Mask[I];
5589 if (Idx >= static_cast<int>(SrcNumElts))
5590 Idx += PaddedMaskNumElts - SrcNumElts;
5591 MappedOps[I] = Idx;
5592 }
5593
5594 // If we got more elements than required, extract subvector.
5595 if (MaskNumElts != PaddedMaskNumElts) {
5596 auto Shuffle =
5597 MIRBuilder.buildShuffleVector(PaddedTy, Src1, Src2, MappedOps);
5598
5599 SmallVector<Register, 16> Elts(MaskNumElts);
5600 for (unsigned I = 0; I < MaskNumElts; ++I) {
5601 Elts[I] =
5603 .getReg(0);
5604 }
5605 MIRBuilder.buildBuildVector(DstReg, Elts);
5606 } else {
5607 MIRBuilder.buildShuffleVector(DstReg, Src1, Src2, MappedOps);
5608 }
5609
5610 MI.eraseFromParent();
5612}
5613
5616 unsigned int TypeIdx, LLT MoreTy) {
5617 auto [DstTy, Src1Ty, Src2Ty] = MI.getFirst3LLTs();
5618 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5619 unsigned NumElts = DstTy.getNumElements();
5620 unsigned WidenNumElts = MoreTy.getNumElements();
5621
5622 if (DstTy.isVector() && Src1Ty.isVector() &&
5623 DstTy.getNumElements() != Src1Ty.getNumElements()) {
5625 }
5626
5627 if (TypeIdx != 0)
5628 return UnableToLegalize;
5629
5630 // Expect a canonicalized shuffle.
5631 if (DstTy != Src1Ty || DstTy != Src2Ty)
5632 return UnableToLegalize;
5633
5634 moreElementsVectorSrc(MI, MoreTy, 1);
5635 moreElementsVectorSrc(MI, MoreTy, 2);
5636
5637 // Adjust mask based on new input vector length.
5638 SmallVector<int, 16> NewMask;
5639 for (unsigned I = 0; I != NumElts; ++I) {
5640 int Idx = Mask[I];
5641 if (Idx < static_cast<int>(NumElts))
5642 NewMask.push_back(Idx);
5643 else
5644 NewMask.push_back(Idx - NumElts + WidenNumElts);
5645 }
5646 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5647 NewMask.push_back(-1);
5648 moreElementsVectorDst(MI, MoreTy, 0);
5650 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5651 MI.getOperand(1).getReg(),
5652 MI.getOperand(2).getReg(), NewMask);
5653 MI.eraseFromParent();
5654 return Legalized;
5655}
5656
5657void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5658 ArrayRef<Register> Src1Regs,
5659 ArrayRef<Register> Src2Regs,
5660 LLT NarrowTy) {
5662 unsigned SrcParts = Src1Regs.size();
5663 unsigned DstParts = DstRegs.size();
5664
5665 unsigned DstIdx = 0; // Low bits of the result.
5666 Register FactorSum =
5667 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5668 DstRegs[DstIdx] = FactorSum;
5669
5670 unsigned CarrySumPrevDstIdx;
5672
5673 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5674 // Collect low parts of muls for DstIdx.
5675 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5676 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5678 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5679 Factors.push_back(Mul.getReg(0));
5680 }
5681 // Collect high parts of muls from previous DstIdx.
5682 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5683 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5684 MachineInstrBuilder Umulh =
5685 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5686 Factors.push_back(Umulh.getReg(0));
5687 }
5688 // Add CarrySum from additions calculated for previous DstIdx.
5689 if (DstIdx != 1) {
5690 Factors.push_back(CarrySumPrevDstIdx);
5691 }
5692
5693 Register CarrySum;
5694 // Add all factors and accumulate all carries into CarrySum.
5695 if (DstIdx != DstParts - 1) {
5696 MachineInstrBuilder Uaddo =
5697 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5698 FactorSum = Uaddo.getReg(0);
5699 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5700 for (unsigned i = 2; i < Factors.size(); ++i) {
5701 MachineInstrBuilder Uaddo =
5702 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5703 FactorSum = Uaddo.getReg(0);
5704 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5705 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5706 }
5707 } else {
5708 // Since value for the next index is not calculated, neither is CarrySum.
5709 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5710 for (unsigned i = 2; i < Factors.size(); ++i)
5711 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5712 }
5713
5714 CarrySumPrevDstIdx = CarrySum;
5715 DstRegs[DstIdx] = FactorSum;
5716 Factors.clear();
5717 }
5718}
5719
5722 LLT NarrowTy) {
5723 if (TypeIdx != 0)
5724 return UnableToLegalize;
5725
5726 Register DstReg = MI.getOperand(0).getReg();
5727 LLT DstType = MRI.getType(DstReg);
5728 // FIXME: add support for vector types
5729 if (DstType.isVector())
5730 return UnableToLegalize;
5731
5732 unsigned Opcode = MI.getOpcode();
5733 unsigned OpO, OpE, OpF;
5734 switch (Opcode) {
5735 case TargetOpcode::G_SADDO:
5736 case TargetOpcode::G_SADDE:
5737 case TargetOpcode::G_UADDO:
5738 case TargetOpcode::G_UADDE:
5739 case TargetOpcode::G_ADD:
5740 OpO = TargetOpcode::G_UADDO;
5741 OpE = TargetOpcode::G_UADDE;
5742 OpF = TargetOpcode::G_UADDE;
5743 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5744 OpF = TargetOpcode::G_SADDE;
5745 break;
5746 case TargetOpcode::G_SSUBO:
5747 case TargetOpcode::G_SSUBE:
5748 case TargetOpcode::G_USUBO:
5749 case TargetOpcode::G_USUBE:
5750 case TargetOpcode::G_SUB:
5751 OpO = TargetOpcode::G_USUBO;
5752 OpE = TargetOpcode::G_USUBE;
5753 OpF = TargetOpcode::G_USUBE;
5754 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5755 OpF = TargetOpcode::G_SSUBE;
5756 break;
5757 default:
5758 llvm_unreachable("Unexpected add/sub opcode!");
5759 }
5760
5761 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5762 unsigned NumDefs = MI.getNumExplicitDefs();
5763 Register Src1 = MI.getOperand(NumDefs).getReg();
5764 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5765 Register CarryDst, CarryIn;
5766 if (NumDefs == 2)
5767 CarryDst = MI.getOperand(1).getReg();
5768 if (MI.getNumOperands() == NumDefs + 3)
5769 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5770
5771 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5772 LLT LeftoverTy, DummyTy;
5773 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5774 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left,
5775 MIRBuilder, MRI);
5776 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left, MIRBuilder,
5777 MRI);
5778
5779 int NarrowParts = Src1Regs.size();
5780 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5781 Src1Regs.push_back(Src1Left[I]);
5782 Src2Regs.push_back(Src2Left[I]);
5783 }
5784 DstRegs.reserve(Src1Regs.size());
5785
5786 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5787 Register DstReg =
5788 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5790 // Forward the final carry-out to the destination register
5791 if (i == e - 1 && CarryDst)
5792 CarryOut = CarryDst;
5793
5794 if (!CarryIn) {
5795 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5796 {Src1Regs[i], Src2Regs[i]});
5797 } else if (i == e - 1) {
5798 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5799 {Src1Regs[i], Src2Regs[i], CarryIn});
5800 } else {
5801 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5802 {Src1Regs[i], Src2Regs[i], CarryIn});
5803 }
5804
5805 DstRegs.push_back(DstReg);
5806 CarryIn = CarryOut;
5807 }
5808 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5809 ArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5810 ArrayRef(DstRegs).drop_front(NarrowParts));
5811
5812 MI.eraseFromParent();
5813 return Legalized;
5814}
5815
5818 auto [DstReg, Src1, Src2] = MI.getFirst3Regs();
5819
5820 LLT Ty = MRI.getType(DstReg);
5821 if (Ty.isVector())
5822 return UnableToLegalize;
5823
5824 unsigned Size = Ty.getSizeInBits();
5825 unsigned NarrowSize = NarrowTy.getSizeInBits();
5826 if (Size % NarrowSize != 0)
5827 return UnableToLegalize;
5828
5829 unsigned NumParts = Size / NarrowSize;
5830 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5831 unsigned DstTmpParts = NumParts * (IsMulHigh ? 2 : 1);
5832
5833 SmallVector<Register, 2> Src1Parts, Src2Parts;
5834 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5835 extractParts(Src1, NarrowTy, NumParts, Src1Parts, MIRBuilder, MRI);
5836 extractParts(Src2, NarrowTy, NumParts, Src2Parts, MIRBuilder, MRI);
5837 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5838
5839 // Take only high half of registers if this is high mul.
5840 ArrayRef<Register> DstRegs(&DstTmpRegs[DstTmpParts - NumParts], NumParts);
5841 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5842 MI.eraseFromParent();
5843 return Legalized;
5844}
5845
5848 LLT NarrowTy) {
5849 if (TypeIdx != 0)
5850 return UnableToLegalize;
5851
5852 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5853
5854 Register Src = MI.getOperand(1).getReg();
5855 LLT SrcTy = MRI.getType(Src);
5856
5857 // If all finite floats fit into the narrowed integer type, we can just swap
5858 // out the result type. This is practically only useful for conversions from
5859 // half to at least 16-bits, so just handle the one case.
5860 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5861 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5862 return UnableToLegalize;
5863
5865 narrowScalarDst(MI, NarrowTy, 0,
5866 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5868 return Legalized;
5869}
5870
5873 LLT NarrowTy) {
5874 if (TypeIdx != 1)
5875 return UnableToLegalize;
5876
5877 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5878
5879 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5880 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5881 // NarrowSize.
5882 if (SizeOp1 % NarrowSize != 0)
5883 return UnableToLegalize;
5884 int NumParts = SizeOp1 / NarrowSize;
5885
5886 SmallVector<Register, 2> SrcRegs, DstRegs;
5888 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs,
5889 MIRBuilder, MRI);
5890
5891 Register OpReg = MI.getOperand(0).getReg();
5892 uint64_t OpStart = MI.getOperand(2).getImm();
5893 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5894 for (int i = 0; i < NumParts; ++i) {
5895 unsigned SrcStart = i * NarrowSize;
5896
5897 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5898 // No part of the extract uses this subregister, ignore it.
5899 continue;
5900 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5901 // The entire subregister is extracted, forward the value.
5902 DstRegs.push_back(SrcRegs[i]);
5903 continue;
5904 }
5905
5906 // OpSegStart is where this destination segment would start in OpReg if it
5907 // extended infinitely in both directions.
5908 int64_t ExtractOffset;
5909 uint64_t SegSize;
5910 if (OpStart < SrcStart) {
5911 ExtractOffset = 0;
5912 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5913 } else {
5914 ExtractOffset = OpStart - SrcStart;
5915 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5916 }
5917
5918 Register SegReg = SrcRegs[i];
5919 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5920 // A genuine extract is needed.
5921 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5922 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5923 }
5924
5925 DstRegs.push_back(SegReg);
5926 }
5927
5928 Register DstReg = MI.getOperand(0).getReg();
5929 if (MRI.getType(DstReg).isVector())
5930 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5931 else if (DstRegs.size() > 1)
5932 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
5933 else
5934 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5935 MI.eraseFromParent();
5936 return Legalized;
5937}
5938
5941 LLT NarrowTy) {
5942 // FIXME: Don't know how to handle secondary types yet.
5943 if (TypeIdx != 0)
5944 return UnableToLegalize;
5945
5946 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
5948 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5949 LLT LeftoverTy;
5950 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
5951 LeftoverRegs, MIRBuilder, MRI);
5952
5953 for (Register Reg : LeftoverRegs)
5954 SrcRegs.push_back(Reg);
5955
5956 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5957 Register OpReg = MI.getOperand(2).getReg();
5958 uint64_t OpStart = MI.getOperand(3).getImm();
5959 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5960 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5961 unsigned DstStart = I * NarrowSize;
5962
5963 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5964 // The entire subregister is defined by this insert, forward the new
5965 // value.
5966 DstRegs.push_back(OpReg);
5967 continue;
5968 }
5969
5970 Register SrcReg = SrcRegs[I];
5971 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5972 // The leftover reg is smaller than NarrowTy, so we need to extend it.
5973 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5974 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5975 }
5976
5977 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5978 // No part of the insert affects this subregister, forward the original.
5979 DstRegs.push_back(SrcReg);
5980 continue;
5981 }
5982
5983 // OpSegStart is where this destination segment would start in OpReg if it
5984 // extended infinitely in both directions.
5985 int64_t ExtractOffset, InsertOffset;
5986 uint64_t SegSize;
5987 if (OpStart < DstStart) {
5988 InsertOffset = 0;
5989 ExtractOffset = DstStart - OpStart;
5990 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
5991 } else {
5992 InsertOffset = OpStart - DstStart;
5993 ExtractOffset = 0;
5994 SegSize =
5995 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
5996 }
5997
5998 Register SegReg = OpReg;
5999 if (ExtractOffset != 0 || SegSize != OpSize) {
6000 // A genuine extract is needed.
6001 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
6002 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
6003 }
6004
6005 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
6006 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
6007 DstRegs.push_back(DstReg);
6008 }
6009
6010 uint64_t WideSize = DstRegs.size() * NarrowSize;
6011 Register DstReg = MI.getOperand(0).getReg();
6012 if (WideSize > RegTy.getSizeInBits()) {
6013 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
6014 MIRBuilder.buildMergeLikeInstr(MergeReg, DstRegs);
6015 MIRBuilder.buildTrunc(DstReg, MergeReg);
6016 } else
6017 MIRBuilder.buildMergeLikeInstr(DstReg, DstRegs);
6018
6019 MI.eraseFromParent();
6020 return Legalized;
6021}
6022
6025 LLT NarrowTy) {
6026 Register DstReg = MI.getOperand(0).getReg();
6027 LLT DstTy = MRI.getType(DstReg);
6028
6029 assert(MI.getNumOperands() == 3 && TypeIdx == 0);
6030
6031 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6032 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
6033 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6034 LLT LeftoverTy;
6035 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
6036 Src0Regs, Src0LeftoverRegs, MIRBuilder, MRI))
6037 return UnableToLegalize;
6038
6039 LLT Unused;
6040 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
6041 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6042 llvm_unreachable("inconsistent extractParts result");
6043
6044 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6045 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
6046 {Src0Regs[I], Src1Regs[I]});
6047 DstRegs.push_back(Inst.getReg(0));
6048 }
6049
6050 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6051 auto Inst = MIRBuilder.buildInstr(
6052 MI.getOpcode(),
6053 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
6054 DstLeftoverRegs.push_back(Inst.getReg(0));
6055 }
6056
6057 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6058 LeftoverTy, DstLeftoverRegs);
6059
6060 MI.eraseFromParent();
6061 return Legalized;
6062}
6063
6066 LLT NarrowTy) {
6067 if (TypeIdx != 0)
6068 return UnableToLegalize;
6069
6070 auto [DstReg, SrcReg] = MI.getFirst2Regs();
6071
6072 LLT DstTy = MRI.getType(DstReg);
6073 if (DstTy.isVector())
6074 return UnableToLegalize;
6075
6077 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
6078 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
6079 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
6080
6081 MI.eraseFromParent();
6082 return Legalized;
6083}
6084
6087 LLT NarrowTy) {
6088 if (TypeIdx != 0)
6089 return UnableToLegalize;
6090
6091 Register CondReg = MI.getOperand(1).getReg();
6092 LLT CondTy = MRI.getType(CondReg);
6093 if (CondTy.isVector()) // TODO: Handle vselect
6094 return UnableToLegalize;
6095
6096 Register DstReg = MI.getOperand(0).getReg();
6097 LLT DstTy = MRI.getType(DstReg);
6098
6099 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
6100 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
6101 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
6102 LLT LeftoverTy;
6103 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
6104 Src1Regs, Src1LeftoverRegs, MIRBuilder, MRI))
6105 return UnableToLegalize;
6106
6107 LLT Unused;
6108 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
6109 Src2Regs, Src2LeftoverRegs, MIRBuilder, MRI))
6110 llvm_unreachable("inconsistent extractParts result");
6111
6112 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
6113 auto Select = MIRBuilder.buildSelect(NarrowTy,
6114 CondReg, Src1Regs[I], Src2Regs[I]);
6115 DstRegs.push_back(Select.getReg(0));
6116 }
6117
6118 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
6120 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
6121 DstLeftoverRegs.push_back(Select.getReg(0));
6122 }
6123
6124 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
6125 LeftoverTy, DstLeftoverRegs);
6126
6127 MI.eraseFromParent();
6128 return Legalized;
6129}
6130
6133 LLT NarrowTy) {
6134 if (TypeIdx != 1)
6135 return UnableToLegalize;
6136
6137 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6138 unsigned NarrowSize = NarrowTy.getSizeInBits();
6139
6140 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6141 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
6142
6144 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6145 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
6146 auto C_0 = B.buildConstant(NarrowTy, 0);
6147 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6148 UnmergeSrc.getReg(1), C_0);
6149 auto LoCTLZ = IsUndef ?
6150 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
6151 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
6152 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6153 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
6154 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
6155 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
6156
6157 MI.eraseFromParent();
6158 return Legalized;
6159 }
6160
6161 return UnableToLegalize;
6162}
6163
6166 LLT NarrowTy) {
6167 if (TypeIdx != 1)
6168 return UnableToLegalize;
6169
6170 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6171 unsigned NarrowSize = NarrowTy.getSizeInBits();
6172
6173 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6174 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
6175
6177 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
6178 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
6179 auto C_0 = B.buildConstant(NarrowTy, 0);
6180 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
6181 UnmergeSrc.getReg(0), C_0);
6182 auto HiCTTZ = IsUndef ?
6183 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
6184 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
6185 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
6186 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
6187 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
6188 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
6189
6190 MI.eraseFromParent();
6191 return Legalized;
6192 }
6193
6194 return UnableToLegalize;
6195}
6196
6199 LLT NarrowTy) {
6200 if (TypeIdx != 1)
6201 return UnableToLegalize;
6202
6203 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6204 unsigned NarrowSize = NarrowTy.getSizeInBits();
6205
6206 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
6207 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
6208
6209 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
6210 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
6211 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
6212
6213 MI.eraseFromParent();
6214 return Legalized;
6215 }
6216
6217 return UnableToLegalize;
6218}
6219
6222 LLT NarrowTy) {
6223 if (TypeIdx != 1)
6224 return UnableToLegalize;
6225
6227 Register ExpReg = MI.getOperand(2).getReg();
6228 LLT ExpTy = MRI.getType(ExpReg);
6229
6230 unsigned ClampSize = NarrowTy.getScalarSizeInBits();
6231
6232 // Clamp the exponent to the range of the target type.
6233 auto MinExp = B.buildConstant(ExpTy, minIntN(ClampSize));
6234 auto ClampMin = B.buildSMax(ExpTy, ExpReg, MinExp);
6235 auto MaxExp = B.buildConstant(ExpTy, maxIntN(ClampSize));
6236 auto Clamp = B.buildSMin(ExpTy, ClampMin, MaxExp);
6237
6238 auto Trunc = B.buildTrunc(NarrowTy, Clamp);
6240 MI.getOperand(2).setReg(Trunc.getReg(0));
6242 return Legalized;
6243}
6244
6247 unsigned Opc = MI.getOpcode();
6248 const auto &TII = MIRBuilder.getTII();
6249 auto isSupported = [this](const LegalityQuery &Q) {
6250 auto QAction = LI.getAction(Q).Action;
6251 return QAction == Legal || QAction == Libcall || QAction == Custom;
6252 };
6253 switch (Opc) {
6254 default:
6255 return UnableToLegalize;
6256 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
6257 // This trivially expands to CTLZ.
6259 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
6261 return Legalized;
6262 }
6263 case TargetOpcode::G_CTLZ: {
6264 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6265 unsigned Len = SrcTy.getSizeInBits();
6266
6267 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6268 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
6269 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
6270 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
6271 auto ICmp = MIRBuilder.buildICmp(
6272 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
6273 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6274 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
6275 MI.eraseFromParent();
6276 return Legalized;
6277 }
6278 // for now, we do this:
6279 // NewLen = NextPowerOf2(Len);
6280 // x = x | (x >> 1);
6281 // x = x | (x >> 2);
6282 // ...
6283 // x = x | (x >>16);
6284 // x = x | (x >>32); // for 64-bit input
6285 // Upto NewLen/2
6286 // return Len - popcount(x);
6287 //
6288 // Ref: "Hacker's Delight" by Henry Warren
6289 Register Op = SrcReg;
6290 unsigned NewLen = PowerOf2Ceil(Len);
6291 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
6292 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
6293 auto MIBOp = MIRBuilder.buildOr(
6294 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
6295 Op = MIBOp.getReg(0);
6296 }
6297 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
6298 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
6299 MIBPop);
6300 MI.eraseFromParent();
6301 return Legalized;
6302 }
6303 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
6304 // This trivially expands to CTTZ.
6306 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
6308 return Legalized;
6309 }
6310 case TargetOpcode::G_CTTZ: {
6311 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
6312
6313 unsigned Len = SrcTy.getSizeInBits();
6314 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
6315 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
6316 // zero.
6317 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
6318 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
6319 auto ICmp = MIRBuilder.buildICmp(
6320 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
6321 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
6322 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
6323 MI.eraseFromParent();
6324 return Legalized;
6325 }
6326 // for now, we use: { return popcount(~x & (x - 1)); }
6327 // unless the target has ctlz but not ctpop, in which case we use:
6328 // { return 32 - nlz(~x & (x-1)); }
6329 // Ref: "Hacker's Delight" by Henry Warren
6330 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
6331 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
6332 auto MIBTmp = MIRBuilder.buildAnd(
6333 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
6334 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
6335 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
6336 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
6337 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
6338 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
6339 MI.eraseFromParent();
6340 return Legalized;
6341 }
6343 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
6344 MI.getOperand(1).setReg(MIBTmp.getReg(0));
6346 return Legalized;
6347 }
6348 case TargetOpcode::G_CTPOP: {
6349 Register SrcReg = MI.getOperand(1).getReg();
6350 LLT Ty = MRI.getType(SrcReg);
6351 unsigned Size = Ty.getSizeInBits();
6353
6354 // Count set bits in blocks of 2 bits. Default approach would be
6355 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
6356 // We use following formula instead:
6357 // B2Count = val - { (val >> 1) & 0x55555555 }
6358 // since it gives same result in blocks of 2 with one instruction less.
6359 auto C_1 = B.buildConstant(Ty, 1);
6360 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
6361 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
6362 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
6363 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
6364 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
6365
6366 // In order to get count in blocks of 4 add values from adjacent block of 2.
6367 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
6368 auto C_2 = B.buildConstant(Ty, 2);
6369 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
6370 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
6371 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
6372 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
6373 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
6374 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
6375
6376 // For count in blocks of 8 bits we don't have to mask high 4 bits before
6377 // addition since count value sits in range {0,...,8} and 4 bits are enough
6378 // to hold such binary values. After addition high 4 bits still hold count
6379 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
6380 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
6381 auto C_4 = B.buildConstant(Ty, 4);
6382 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
6383 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
6384 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
6385 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
6386 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
6387
6388 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
6389 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
6390 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
6391 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
6392 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
6393
6394 // Shift count result from 8 high bits to low bits.
6395 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
6396 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
6397
6398 MI.eraseFromParent();
6399 return Legalized;
6400 }
6401 }
6402}
6403
6404// Check that (every element of) Reg is undef or not an exact multiple of BW.
6406 Register Reg, unsigned BW) {
6407 return matchUnaryPredicate(
6408 MRI, Reg,
6409 [=](const Constant *C) {
6410 // Null constant here means an undef.
6411 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
6412 return !CI || CI->getValue().urem(BW) != 0;
6413 },
6414 /*AllowUndefs*/ true);
6415}
6416
6419 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6420 LLT Ty = MRI.getType(Dst);
6421 LLT ShTy = MRI.getType(Z);
6422
6423 unsigned BW = Ty.getScalarSizeInBits();
6424
6425 if (!isPowerOf2_32(BW))
6426 return UnableToLegalize;
6427
6428 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6429 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6430
6431 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6432 // fshl X, Y, Z -> fshr X, Y, -Z
6433 // fshr X, Y, Z -> fshl X, Y, -Z
6434 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
6435 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
6436 } else {
6437 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6438 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6439 auto One = MIRBuilder.buildConstant(ShTy, 1);
6440 if (IsFSHL) {
6441 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6442 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
6443 } else {
6444 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
6445 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
6446 }
6447
6448 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
6449 }
6450
6451 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
6452 MI.eraseFromParent();
6453 return Legalized;
6454}
6455
6458 auto [Dst, X, Y, Z] = MI.getFirst4Regs();
6459 LLT Ty = MRI.getType(Dst);
6460 LLT ShTy = MRI.getType(Z);
6461
6462 const unsigned BW = Ty.getScalarSizeInBits();
6463 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6464
6465 Register ShX, ShY;
6466 Register ShAmt, InvShAmt;
6467
6468 // FIXME: Emit optimized urem by constant instead of letting it expand later.
6469 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
6470 // fshl: X << C | Y >> (BW - C)
6471 // fshr: X << (BW - C) | Y >> C
6472 // where C = Z % BW is not zero
6473 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6474 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6475 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
6476 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
6477 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
6478 } else {
6479 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6480 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6481 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
6482 if (isPowerOf2_32(BW)) {
6483 // Z % BW -> Z & (BW - 1)
6484 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
6485 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6486 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
6487 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
6488 } else {
6489 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
6490 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
6491 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
6492 }
6493
6494 auto One = MIRBuilder.buildConstant(ShTy, 1);
6495 if (IsFSHL) {
6496 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
6497 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
6498 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
6499 } else {
6500 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
6501 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
6502 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
6503 }
6504 }
6505
6506 MIRBuilder.buildOr(Dst, ShX, ShY);
6507 MI.eraseFromParent();
6508 return Legalized;
6509}
6510
6513 // These operations approximately do the following (while avoiding undefined
6514 // shifts by BW):
6515 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
6516 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
6517 Register Dst = MI.getOperand(0).getReg();
6518 LLT Ty = MRI.getType(Dst);
6519 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
6520
6521 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
6522 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
6523
6524 // TODO: Use smarter heuristic that accounts for vector legalization.
6525 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
6526 return lowerFunnelShiftAsShifts(MI);
6527
6528 // This only works for powers of 2, fallback to shifts if it fails.
6529 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
6530 if (Result == UnableToLegalize)
6531 return lowerFunnelShiftAsShifts(MI);
6532 return Result;
6533}
6534
6536 auto [Dst, Src] = MI.getFirst2Regs();
6537 LLT DstTy = MRI.getType(Dst);
6538 LLT SrcTy = MRI.getType(Src);
6539
6540 uint32_t DstTySize = DstTy.getSizeInBits();
6541 uint32_t DstTyScalarSize = DstTy.getScalarSizeInBits();
6542 uint32_t SrcTyScalarSize = SrcTy.getScalarSizeInBits();
6543
6544 if (!isPowerOf2_32(DstTySize) || !isPowerOf2_32(DstTyScalarSize) ||
6545 !isPowerOf2_32(SrcTyScalarSize))
6546 return UnableToLegalize;
6547
6548 // The step between extend is too large, split it by creating an intermediate
6549 // extend instruction
6550 if (SrcTyScalarSize * 2 < DstTyScalarSize) {
6551 LLT MidTy = SrcTy.changeElementSize(SrcTyScalarSize * 2);
6552 // If the destination type is illegal, split it into multiple statements
6553 // zext x -> zext(merge(zext(unmerge), zext(unmerge)))
6554 auto NewExt = MIRBuilder.buildInstr(MI.getOpcode(), {MidTy}, {Src});
6555 // Unmerge the vector
6556 LLT EltTy = MidTy.changeElementCount(
6558 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, NewExt);
6559
6560 // ZExt the vectors
6561 LLT ZExtResTy = DstTy.changeElementCount(
6563 auto ZExtRes1 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6564 {UnmergeSrc.getReg(0)});
6565 auto ZExtRes2 = MIRBuilder.buildInstr(MI.getOpcode(), {ZExtResTy},
6566 {UnmergeSrc.getReg(1)});
6567
6568 // Merge the ending vectors
6569 MIRBuilder.buildMergeLikeInstr(Dst, {ZExtRes1, ZExtRes2});
6570
6571 MI.eraseFromParent();
6572 return Legalized;
6573 }
6574 return UnableToLegalize;
6575}
6576
6578 // MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;
6580 // Similar to how operand splitting is done in SelectiondDAG, we can handle
6581 // %res(v8s8) = G_TRUNC %in(v8s32) by generating:
6582 // %inlo(<4x s32>), %inhi(<4 x s32>) = G_UNMERGE %in(<8 x s32>)
6583 // %lo16(<4 x s16>) = G_TRUNC %inlo
6584 // %hi16(<4 x s16>) = G_TRUNC %inhi
6585 // %in16(<8 x s16>) = G_CONCAT_VECTORS %lo16, %hi16
6586 // %res(<8 x s8>) = G_TRUNC %in16
6587
6588 assert(MI.getOpcode() == TargetOpcode::G_TRUNC);
6589
6590 Register DstReg = MI.getOperand(0).getReg();
6591 Register SrcReg = MI.getOperand(1).getReg();
6592 LLT DstTy = MRI.getType(DstReg);
6593 LLT SrcTy = MRI.getType(SrcReg);
6594
6595 if (DstTy.isVector() && isPowerOf2_32(DstTy.getNumElements()) &&
6597 isPowerOf2_32(SrcTy.getNumElements()) &&
6599 // Split input type.
6600 LLT SplitSrcTy = SrcTy.changeElementCount(
6602
6603 // First, split the source into two smaller vectors.
6604 SmallVector<Register, 2> SplitSrcs;
6605 extractParts(SrcReg, SplitSrcTy, 2, SplitSrcs, MIRBuilder, MRI);
6606
6607 // Truncate the splits into intermediate narrower elements.
6608 LLT InterTy;
6609 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6610 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits() * 2);
6611 else
6612 InterTy = SplitSrcTy.changeElementSize(DstTy.getScalarSizeInBits());
6613 for (unsigned I = 0; I < SplitSrcs.size(); ++I) {
6614 SplitSrcs[I] = MIRBuilder.buildTrunc(InterTy, SplitSrcs[I]).getReg(0);
6615 }
6616
6617 // Combine the new truncates into one vector
6619 DstTy.changeElementSize(InterTy.getScalarSizeInBits()), SplitSrcs);
6620
6621 // Truncate the new vector to the final result type
6622 if (DstTy.getScalarSizeInBits() * 2 < SrcTy.getScalarSizeInBits())
6623 MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), Merge.getReg(0));
6624 else
6625 MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Merge.getReg(0));
6626
6627 MI.eraseFromParent();
6628
6629 return Legalized;
6630 }
6631 return UnableToLegalize;
6632}
6633
6636 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6637 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6638 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6639 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6640 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6641 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
6642 MI.eraseFromParent();
6643 return Legalized;
6644}
6645
6647 auto [Dst, DstTy, Src, SrcTy, Amt, AmtTy] = MI.getFirst3RegLLTs();
6648
6649 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
6650 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
6651
6653
6654 // If a rotate in the other direction is supported, use it.
6655 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
6656 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
6657 isPowerOf2_32(EltSizeInBits))
6658 return lowerRotateWithReverseRotate(MI);
6659
6660 // If a funnel shift is supported, use it.
6661 unsigned FShOpc = IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6662 unsigned RevFsh = !IsLeft ? TargetOpcode::G_FSHL : TargetOpcode::G_FSHR;
6663 bool IsFShLegal = false;
6664 if ((IsFShLegal = LI.isLegalOrCustom({FShOpc, {DstTy, AmtTy}})) ||
6665 LI.isLegalOrCustom({RevFsh, {DstTy, AmtTy}})) {
6666 auto buildFunnelShift = [&](unsigned Opc, Register R1, Register R2,
6667 Register R3) {
6668 MIRBuilder.buildInstr(Opc, {R1}, {R2, R2, R3});
6669 MI.eraseFromParent();
6670 return Legalized;
6671 };
6672 // If a funnel shift in the other direction is supported, use it.
6673 if (IsFShLegal) {
6674 return buildFunnelShift(FShOpc, Dst, Src, Amt);
6675 } else if (isPowerOf2_32(EltSizeInBits)) {
6676 Amt = MIRBuilder.buildNeg(DstTy, Amt).getReg(0);
6677 return buildFunnelShift(RevFsh, Dst, Src, Amt);
6678 }
6679 }
6680
6681 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
6682 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
6683 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
6684 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
6685 Register ShVal;
6686 Register RevShiftVal;
6687 if (isPowerOf2_32(EltSizeInBits)) {
6688 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6689 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6690 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
6691 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
6692 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6693 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
6694 RevShiftVal =
6695 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
6696 } else {
6697 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6698 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6699 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
6700 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
6701 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
6702 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
6703 auto One = MIRBuilder.buildConstant(AmtTy, 1);
6704 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
6705 RevShiftVal =
6706 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
6707 }
6708 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
6709 MI.eraseFromParent();
6710 return Legalized;
6711}
6712
6713// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6714// representation.
6717 auto [Dst, Src] = MI.getFirst2Regs();
6718 const LLT S64 = LLT::scalar(64);
6719 const LLT S32 = LLT::scalar(32);
6720 const LLT S1 = LLT::scalar(1);
6721
6722 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
6723
6724 // unsigned cul2f(ulong u) {
6725 // uint lz = clz(u);
6726 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6727 // u = (u << lz) & 0x7fffffffffffffffUL;
6728 // ulong t = u & 0xffffffffffUL;
6729 // uint v = (e << 23) | (uint)(u >> 40);
6730 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6731 // return as_float(v + r);
6732 // }
6733
6734 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6735 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6736
6737 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6738
6739 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6740 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6741
6742 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6743 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6744
6745 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6746 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6747
6748 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6749
6750 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6751 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6752
6753 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6754 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6755 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6756
6757 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6758 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6759 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6760 auto One = MIRBuilder.buildConstant(S32, 1);
6761
6762 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6763 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6764 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6765 MIRBuilder.buildAdd(Dst, V, R);
6766
6767 MI.eraseFromParent();
6768 return Legalized;
6769}
6770
6772 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6773
6774 if (SrcTy == LLT::scalar(1)) {
6775 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6776 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6777 MIRBuilder.buildSelect(Dst, Src, True, False);
6778 MI.eraseFromParent();
6779 return Legalized;
6780 }
6781
6782 if (SrcTy != LLT::scalar(64))
6783 return UnableToLegalize;
6784
6785 if (DstTy == LLT::scalar(32)) {
6786 // TODO: SelectionDAG has several alternative expansions to port which may
6787 // be more reasonble depending on the available instructions. If a target
6788 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6789 // intermediate type, this is probably worse.
6790 return lowerU64ToF32BitOps(MI);
6791 }
6792
6793 return UnableToLegalize;
6794}
6795
6797 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6798
6799 const LLT S64 = LLT::scalar(64);
6800 const LLT S32 = LLT::scalar(32);
6801 const LLT S1 = LLT::scalar(1);
6802
6803 if (SrcTy == S1) {
6804 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6805 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6806 MIRBuilder.buildSelect(Dst, Src, True, False);
6807 MI.eraseFromParent();
6808 return Legalized;
6809 }
6810
6811 if (SrcTy != S64)
6812 return UnableToLegalize;
6813
6814 if (DstTy == S32) {
6815 // signed cl2f(long l) {
6816 // long s = l >> 63;
6817 // float r = cul2f((l + s) ^ s);
6818 // return s ? -r : r;
6819 // }
6820 Register L = Src;
6821 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6822 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6823
6824 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6825 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6826 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6827
6828 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6829 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6831 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6832 MI.eraseFromParent();
6833 return Legalized;
6834 }
6835
6836 return UnableToLegalize;
6837}
6838
6840 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6841 const LLT S64 = LLT::scalar(64);
6842 const LLT S32 = LLT::scalar(32);
6843
6844 if (SrcTy != S64 && SrcTy != S32)
6845 return UnableToLegalize;
6846 if (DstTy != S32 && DstTy != S64)
6847 return UnableToLegalize;
6848
6849 // FPTOSI gives same result as FPTOUI for positive signed integers.
6850 // FPTOUI needs to deal with fp values that convert to unsigned integers
6851 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6852
6853 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
6854 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6856 APInt::getZero(SrcTy.getSizeInBits()));
6857 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
6858
6859 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
6860
6861 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
6862 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6863 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6864 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
6865 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
6866 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
6867 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
6868
6869 const LLT S1 = LLT::scalar(1);
6870
6871 MachineInstrBuilder FCMP =
6872 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
6873 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
6874
6875 MI.eraseFromParent();
6876 return Legalized;
6877}
6878
6880 auto [Dst, DstTy, Src, SrcTy] = MI.getFirst2RegLLTs();
6881 const LLT S64 = LLT::scalar(64);
6882 const LLT S32 = LLT::scalar(32);
6883
6884 // FIXME: Only f32 to i64 conversions are supported.
6885 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6886 return UnableToLegalize;
6887
6888 // Expand f32 -> i64 conversion
6889 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6890 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6891
6892 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6893
6894 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
6895 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
6896
6897 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
6898 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
6899
6900 auto SignMask = MIRBuilder.buildConstant(SrcTy,
6901 APInt::getSignMask(SrcEltBits));
6902 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
6903 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
6904 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
6905 Sign = MIRBuilder.buildSExt(DstTy, Sign);
6906
6907 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
6908 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
6909 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
6910
6911 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
6912 R = MIRBuilder.buildZExt(DstTy, R);
6913
6914 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
6915 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
6916 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
6917 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
6918
6919 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
6920 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
6921
6922 const LLT S1 = LLT::scalar(1);
6924 S1, Exponent, ExponentLoBit);
6925
6926 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
6927
6928 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
6929 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
6930
6931 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
6932
6933 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
6934 S1, Exponent, ZeroSrcTy);
6935
6936 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
6937 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
6938
6939 MI.eraseFromParent();
6940 return Legalized;
6941}
6942
6943// f64 -> f16 conversion using round-to-nearest-even rounding mode.
6946 const LLT S1 = LLT::scalar(1);
6947 const LLT S32 = LLT::scalar(32);
6948
6949 auto [Dst, Src] = MI.getFirst2Regs();
6950 assert(MRI.getType(Dst).getScalarType() == LLT::scalar(16) &&
6951 MRI.getType(Src).getScalarType() == LLT::scalar(64));
6952
6953 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
6954 return UnableToLegalize;
6955
6957 unsigned Flags = MI.getFlags();
6958 auto Src32 = MIRBuilder.buildFPTrunc(S32, Src, Flags);
6959 MIRBuilder.buildFPTrunc(Dst, Src32, Flags);
6960 MI.eraseFromParent();
6961 return Legalized;
6962 }
6963
6964 const unsigned ExpMask = 0x7ff;
6965 const unsigned ExpBiasf64 = 1023;
6966 const unsigned ExpBiasf16 = 15;
6967
6968 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
6969 Register U = Unmerge.getReg(0);
6970 Register UH = Unmerge.getReg(1);
6971
6972 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
6974
6975 // Subtract the fp64 exponent bias (1023) to get the real exponent and
6976 // add the f16 bias (15) to get the biased exponent for the f16 format.
6977 E = MIRBuilder.buildAdd(
6978 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
6979
6982
6983 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
6984 MIRBuilder.buildConstant(S32, 0x1ff));
6985 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
6986
6987 auto Zero = MIRBuilder.buildConstant(S32, 0);
6988 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
6989 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
6990 M = MIRBuilder.buildOr(S32, M, Lo40Set);
6991
6992 // (M != 0 ? 0x0200 : 0) | 0x7c00;
6993 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
6994 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
6995 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
6996
6997 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
6998 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
6999
7000 // N = M | (E << 12);
7001 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
7002 auto N = MIRBuilder.buildOr(S32, M, EShl12);
7003
7004 // B = clamp(1-E, 0, 13);
7005 auto One = MIRBuilder.buildConstant(S32, 1);
7006 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
7007 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
7009
7010 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
7011 MIRBuilder.buildConstant(S32, 0x1000));
7012
7013 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
7014 auto D0 = MIRBuilder.buildShl(S32, D, B);
7015
7016 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
7017 D0, SigSetHigh);
7018 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
7019 D = MIRBuilder.buildOr(S32, D, D1);
7020
7021 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
7022 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
7023
7024 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
7026
7027 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
7029 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
7030
7031 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
7033 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
7034
7035 V1 = MIRBuilder.buildOr(S32, V0, V1);
7036 V = MIRBuilder.buildAdd(S32, V, V1);
7037
7038 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
7039 E, MIRBuilder.buildConstant(S32, 30));
7040 V = MIRBuilder.buildSelect(S32, CmpEGt30,
7041 MIRBuilder.buildConstant(S32, 0x7c00), V);
7042
7043 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
7044 E, MIRBuilder.buildConstant(S32, 1039));
7045 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
7046
7047 // Extract the sign bit.
7048 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
7049 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
7050
7051 // Insert the sign bit
7052 V = MIRBuilder.buildOr(S32, Sign, V);
7053
7054 MIRBuilder.buildTrunc(Dst, V);
7055 MI.eraseFromParent();
7056 return Legalized;
7057}
7058
7061 auto [DstTy, SrcTy] = MI.getFirst2LLTs();
7062 const LLT S64 = LLT::scalar(64);
7063 const LLT S16 = LLT::scalar(16);
7064
7065 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
7067
7068 return UnableToLegalize;
7069}
7070
7071// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
7072// multiplication tree.
7074 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7075 LLT Ty = MRI.getType(Dst);
7076
7077 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
7078 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
7079 MI.eraseFromParent();
7080 return Legalized;
7081}
7082
7084 switch (Opc) {
7085 case TargetOpcode::G_SMIN:
7086 return CmpInst::ICMP_SLT;
7087 case TargetOpcode::G_SMAX:
7088 return CmpInst::ICMP_SGT;
7089 case TargetOpcode::G_UMIN:
7090 return CmpInst::ICMP_ULT;
7091 case TargetOpcode::G_UMAX:
7092 return CmpInst::ICMP_UGT;
7093 default:
7094 llvm_unreachable("not in integer min/max");
7095 }
7096}
7097
7099 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7100
7101 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
7102 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
7103
7104 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
7105 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
7106
7107 MI.eraseFromParent();
7108 return Legalized;
7109}
7110
7113 auto [Dst, DstTy, Src0, Src0Ty, Src1, Src1Ty] = MI.getFirst3RegLLTs();
7114 const int Src0Size = Src0Ty.getScalarSizeInBits();
7115 const int Src1Size = Src1Ty.getScalarSizeInBits();
7116
7117 auto SignBitMask = MIRBuilder.buildConstant(
7118 Src0Ty, APInt::getSignMask(Src0Size));
7119
7120 auto NotSignBitMask = MIRBuilder.buildConstant(
7121 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
7122
7123 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
7124 Register And1;
7125 if (Src0Ty == Src1Ty) {
7126 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
7127 } else if (Src0Size > Src1Size) {
7128 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
7129 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
7130 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
7131 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
7132 } else {
7133 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
7134 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
7135 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
7136 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
7137 }
7138
7139 // Be careful about setting nsz/nnan/ninf on every instruction, since the
7140 // constants are a nan and -0.0, but the final result should preserve
7141 // everything.
7142 unsigned Flags = MI.getFlags();
7143 MIRBuilder.buildOr(Dst, And0, And1, Flags);
7144
7145 MI.eraseFromParent();
7146 return Legalized;
7147}
7148
7151 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
7152 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
7153
7154 auto [Dst, Src0, Src1] = MI.getFirst3Regs();
7155 LLT Ty = MRI.getType(Dst);
7156
7157 if (!MI.getFlag(MachineInstr::FmNoNans)) {
7158 // Insert canonicalizes if it's possible we need to quiet to get correct
7159 // sNaN behavior.
7160
7161 // Note this must be done here, and not as an optimization combine in the
7162 // absence of a dedicate quiet-snan instruction as we're using an
7163 // omni-purpose G_FCANONICALIZE.
7164 if (!isKnownNeverSNaN(Src0, MRI))
7165 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
7166
7167 if (!isKnownNeverSNaN(Src1, MRI))
7168 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
7169 }
7170
7171 // If there are no nans, it's safe to simply replace this with the non-IEEE
7172 // version.
7173 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
7174 MI.eraseFromParent();
7175 return Legalized;
7176}
7177
7179 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
7180 Register DstReg = MI.getOperand(0).getReg();
7181 LLT Ty = MRI.getType(DstReg);
7182 unsigned Flags = MI.getFlags();
7183
7184 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
7185 Flags);
7186 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
7187 MI.eraseFromParent();
7188 return Legalized;
7189}
7190
7193 auto [DstReg, X] = MI.getFirst2Regs();
7194 const unsigned Flags = MI.getFlags();
7195 const LLT Ty = MRI.getType(DstReg);
7196 const LLT CondTy = Ty.changeElementSize(1);
7197
7198 // round(x) =>
7199 // t = trunc(x);
7200 // d = fabs(x - t);
7201 // o = copysign(d >= 0.5 ? 1.0 : 0.0, x);
7202 // return t + o;
7203
7204 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
7205
7206 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
7207 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
7208
7209 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
7210 auto Cmp =
7211 MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half, Flags);
7212
7213 // Could emit G_UITOFP instead
7214 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
7215 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7216 auto BoolFP = MIRBuilder.buildSelect(Ty, Cmp, One, Zero);
7217 auto SignedOffset = MIRBuilder.buildFCopysign(Ty, BoolFP, X);
7218
7219 MIRBuilder.buildFAdd(DstReg, T, SignedOffset, Flags);
7220
7221 MI.eraseFromParent();
7222 return Legalized;
7223}
7224
7226 auto [DstReg, SrcReg] = MI.getFirst2Regs();
7227 unsigned Flags = MI.getFlags();
7228 LLT Ty = MRI.getType(DstReg);
7229 const LLT CondTy = Ty.changeElementSize(1);
7230
7231 // result = trunc(src);
7232 // if (src < 0.0 && src != result)
7233 // result += -1.0.
7234
7235 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
7236 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
7237
7238 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
7239 SrcReg, Zero, Flags);
7240 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
7241 SrcReg, Trunc, Flags);
7242 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
7243 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
7244
7245 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
7246 MI.eraseFromParent();
7247 return Legalized;
7248}
7249
7252 const unsigned NumOps = MI.getNumOperands();
7253 auto [DstReg, DstTy, Src0Reg, Src0Ty] = MI.getFirst2RegLLTs();
7254 unsigned PartSize = Src0Ty.getSizeInBits();
7255
7256 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
7257 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
7258
7259 for (unsigned I = 2; I != NumOps; ++I) {
7260 const unsigned Offset = (I - 1) * PartSize;
7261
7262 Register SrcReg = MI.getOperand(I).getReg();
7263 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
7264
7265 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
7266 MRI.createGenericVirtualRegister(WideTy);
7267
7268 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
7269 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
7270 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
7271 ResultReg = NextResult;
7272 }
7273
7274 if (DstTy.isPointer()) {
7276 DstTy.getAddressSpace())) {
7277 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
7278 return UnableToLegalize;
7279 }
7280
7281 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
7282 }
7283
7284 MI.eraseFromParent();
7285 return Legalized;
7286}
7287
7290 const unsigned NumDst = MI.getNumOperands() - 1;
7291 Register SrcReg = MI.getOperand(NumDst).getReg();
7292 Register Dst0Reg = MI.getOperand(0).getReg();
7293 LLT DstTy = MRI.getType(Dst0Reg);
7294 if (DstTy.isPointer())
7295 return UnableToLegalize; // TODO
7296
7297 SrcReg = coerceToScalar(SrcReg);
7298 if (!SrcReg)
7299 return UnableToLegalize;
7300
7301 // Expand scalarizing unmerge as bitcast to integer and shift.
7302 LLT IntTy = MRI.getType(SrcReg);
7303
7304 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
7305
7306 const unsigned DstSize = DstTy.getSizeInBits();
7307 unsigned Offset = DstSize;
7308 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
7309 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
7310 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
7311 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
7312 }
7313
7314 MI.eraseFromParent();
7315 return Legalized;
7316}
7317
7318/// Lower a vector extract or insert by writing the vector to a stack temporary
7319/// and reloading the element or vector.
7320///
7321/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
7322/// =>
7323/// %stack_temp = G_FRAME_INDEX
7324/// G_STORE %vec, %stack_temp
7325/// %idx = clamp(%idx, %vec.getNumElements())
7326/// %element_ptr = G_PTR_ADD %stack_temp, %idx
7327/// %dst = G_LOAD %element_ptr
7330 Register DstReg = MI.getOperand(0).getReg();
7331 Register SrcVec = MI.getOperand(1).getReg();
7332 Register InsertVal;
7333 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
7334 InsertVal = MI.getOperand(2).getReg();
7335
7336 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
7337
7338 LLT VecTy = MRI.getType(SrcVec);
7339 LLT EltTy = VecTy.getElementType();
7340 unsigned NumElts = VecTy.getNumElements();
7341
7342 int64_t IdxVal;
7343 if (mi_match(Idx, MRI, m_ICst(IdxVal)) && IdxVal <= NumElts) {
7345 extractParts(SrcVec, EltTy, NumElts, SrcRegs, MIRBuilder, MRI);
7346
7347 if (InsertVal) {
7348 SrcRegs[IdxVal] = MI.getOperand(2).getReg();
7349 MIRBuilder.buildMergeLikeInstr(DstReg, SrcRegs);
7350 } else {
7351 MIRBuilder.buildCopy(DstReg, SrcRegs[IdxVal]);
7352 }
7353
7354 MI.eraseFromParent();
7355 return Legalized;
7356 }
7357
7358 if (!EltTy.isByteSized()) { // Not implemented.
7359 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
7360 return UnableToLegalize;
7361 }
7362
7363 unsigned EltBytes = EltTy.getSizeInBytes();
7364 Align VecAlign = getStackTemporaryAlignment(VecTy);
7365 Align EltAlign;
7366
7367 MachinePointerInfo PtrInfo;
7368 auto StackTemp = createStackTemporary(
7369 TypeSize::getFixed(VecTy.getSizeInBytes()), VecAlign, PtrInfo);
7370 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
7371
7372 // Get the pointer to the element, and be sure not to hit undefined behavior
7373 // if the index is out of bounds.
7374 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
7375
7376 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
7377 int64_t Offset = IdxVal * EltBytes;
7378 PtrInfo = PtrInfo.getWithOffset(Offset);
7379 EltAlign = commonAlignment(VecAlign, Offset);
7380 } else {
7381 // We lose information with a variable offset.
7382 EltAlign = getStackTemporaryAlignment(EltTy);
7383 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
7384 }
7385
7386 if (InsertVal) {
7387 // Write the inserted element
7388 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
7389
7390 // Reload the whole vector.
7391 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
7392 } else {
7393 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
7394 }
7395
7396 MI.eraseFromParent();
7397 return Legalized;
7398}
7399
7402 auto [DstReg, DstTy, Src0Reg, Src0Ty, Src1Reg, Src1Ty] =
7403 MI.getFirst3RegLLTs();
7404 LLT IdxTy = LLT::scalar(32);
7405
7406 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
7407 Register Undef;
7409 LLT EltTy = DstTy.getScalarType();
7410
7411 for (int Idx : Mask) {
7412 if (Idx < 0) {
7413 if (!Undef.isValid())
7414 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
7415 BuildVec.push_back(Undef);
7416 continue;
7417 }
7418
7419 if (Src0Ty.isScalar()) {
7420 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
7421 } else {
7422 int NumElts = Src0Ty.getNumElements();
7423 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
7424 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
7425 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
7426 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
7427 BuildVec.push_back(Extract.getReg(0));
7428 }
7429 }
7430
7431 if (DstTy.isScalar())
7432 MIRBuilder.buildCopy(DstReg, BuildVec[0]);
7433 else
7434 MIRBuilder.buildBuildVector(DstReg, BuildVec);
7435 MI.eraseFromParent();
7436 return Legalized;
7437}
7438
7440 Register AllocSize,
7441 Align Alignment,
7442 LLT PtrTy) {
7443 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
7444
7445 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
7446 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
7447
7448 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
7449 // have to generate an extra instruction to negate the alloc and then use
7450 // G_PTR_ADD to add the negative offset.
7451 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
7452 if (Alignment > Align(1)) {
7453 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
7454 AlignMask.negate();
7455 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
7456 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
7457 }
7458
7459 return MIRBuilder.buildCast(PtrTy, Alloc).getReg(0);
7460}
7461
7464 const auto &MF = *MI.getMF();
7465 const auto &TFI = *MF.getSubtarget().getFrameLowering();
7466 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
7467 return UnableToLegalize;
7468
7469 Register Dst = MI.getOperand(0).getReg();
7470 Register AllocSize = MI.getOperand(1).getReg();
7471 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
7472
7473 LLT PtrTy = MRI.getType(Dst);
7475 Register SPTmp =
7476 getDynStackAllocTargetPtr(SPReg, AllocSize, Alignment, PtrTy);
7477
7478 MIRBuilder.buildCopy(SPReg, SPTmp);
7479 MIRBuilder.buildCopy(Dst, SPTmp);
7480
7481 MI.eraseFromParent();
7482 return Legalized;
7483}
7484
7488 if (!StackPtr)
7489 return UnableToLegalize;
7490
7491 MIRBuilder.buildCopy(MI.getOperand(0), StackPtr);
7492 MI.eraseFromParent();
7493 return Legalized;
7494}
7495
7499 if (!StackPtr)
7500 return UnableToLegalize;
7501
7502 MIRBuilder.buildCopy(StackPtr, MI.getOperand(0));
7503 MI.eraseFromParent();
7504 return Legalized;
7505}
7506
7509 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7510 unsigned Offset = MI.getOperand(2).getImm();
7511
7512 // Extract sub-vector or one element
7513 if (SrcTy.isVector()) {
7514 unsigned SrcEltSize = SrcTy.getElementType().getSizeInBits();
7515 unsigned DstSize = DstTy.getSizeInBits();
7516
7517 if ((Offset % SrcEltSize == 0) && (DstSize % SrcEltSize == 0) &&
7518 (Offset + DstSize <= SrcTy.getSizeInBits())) {
7519 // Unmerge and allow access to each Src element for the artifact combiner.
7520 auto Unmerge = MIRBuilder.buildUnmerge(SrcTy.getElementType(), SrcReg);
7521
7522 // Take element(s) we need to extract and copy it (merge them).
7523 SmallVector<Register, 8> SubVectorElts;
7524 for (unsigned Idx = Offset / SrcEltSize;
7525 Idx < (Offset + DstSize) / SrcEltSize; ++Idx) {
7526 SubVectorElts.push_back(Unmerge.getReg(Idx));
7527 }
7528 if (SubVectorElts.size() == 1)
7529 MIRBuilder.buildCopy(DstReg, SubVectorElts[0]);
7530 else
7531 MIRBuilder.buildMergeLikeInstr(DstReg, SubVectorElts);
7532
7533 MI.eraseFromParent();
7534 return Legalized;
7535 }
7536 }
7537
7538 if (DstTy.isScalar() &&
7539 (SrcTy.isScalar() ||
7540 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
7541 LLT SrcIntTy = SrcTy;
7542 if (!SrcTy.isScalar()) {
7543 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
7544 SrcReg = MIRBuilder.buildBitcast(SrcIntTy, SrcReg).getReg(0);
7545 }
7546
7547 if (Offset == 0)
7548 MIRBuilder.buildTrunc(DstReg, SrcReg);
7549 else {
7550 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
7551 auto Shr = MIRBuilder.buildLShr(SrcIntTy, SrcReg, ShiftAmt);
7552 MIRBuilder.buildTrunc(DstReg, Shr);
7553 }
7554
7555 MI.eraseFromParent();
7556 return Legalized;
7557 }
7558
7559 return UnableToLegalize;
7560}
7561
7563 auto [Dst, Src, InsertSrc] = MI.getFirst3Regs();
7564 uint64_t Offset = MI.getOperand(3).getImm();
7565
7566 LLT DstTy = MRI.getType(Src);
7567 LLT InsertTy = MRI.getType(InsertSrc);
7568
7569 // Insert sub-vector or one element
7570 if (DstTy.isVector() && !InsertTy.isPointer()) {
7571 LLT EltTy = DstTy.getElementType();
7572 unsigned EltSize = EltTy.getSizeInBits();
7573 unsigned InsertSize = InsertTy.getSizeInBits();
7574
7575 if ((Offset % EltSize == 0) && (InsertSize % EltSize == 0) &&
7576 (Offset + InsertSize <= DstTy.getSizeInBits())) {
7577 auto UnmergeSrc = MIRBuilder.buildUnmerge(EltTy, Src);
7579 unsigned Idx = 0;
7580 // Elements from Src before insert start Offset
7581 for (; Idx < Offset / EltSize; ++Idx) {
7582 DstElts.push_back(UnmergeSrc.getReg(Idx));
7583 }
7584
7585 // Replace elements in Src with elements from InsertSrc
7586 if (InsertTy.getSizeInBits() > EltSize) {
7587 auto UnmergeInsertSrc = MIRBuilder.buildUnmerge(EltTy, InsertSrc);
7588 for (unsigned i = 0; Idx < (Offset + InsertSize) / EltSize;
7589 ++Idx, ++i) {
7590 DstElts.push_back(UnmergeInsertSrc.getReg(i));
7591 }
7592 } else {
7593 DstElts.push_back(InsertSrc);
7594 ++Idx;
7595 }
7596
7597 // Remaining elements from Src after insert
7598 for (; Idx < DstTy.getNumElements(); ++Idx) {
7599 DstElts.push_back(UnmergeSrc.getReg(Idx));
7600 }
7601
7602 MIRBuilder.buildMergeLikeInstr(Dst, DstElts);
7603 MI.eraseFromParent();
7604 return Legalized;
7605 }
7606 }
7607
7608 if (InsertTy.isVector() ||
7609 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
7610 return UnableToLegalize;
7611
7613 if ((DstTy.isPointer() &&
7614 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
7615 (InsertTy.isPointer() &&
7616 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
7617 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
7618 return UnableToLegalize;
7619 }
7620
7621 LLT IntDstTy = DstTy;
7622
7623 if (!DstTy.isScalar()) {
7624 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
7625 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
7626 }
7627
7628 if (!InsertTy.isScalar()) {
7629 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
7630 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
7631 }
7632
7633 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
7634 if (Offset != 0) {
7635 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
7636 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
7637 }
7638
7640 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
7641
7642 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
7643 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
7644 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
7645
7646 MIRBuilder.buildCast(Dst, Or);
7647 MI.eraseFromParent();
7648 return Legalized;
7649}
7650
7653 auto [Dst0, Dst0Ty, Dst1, Dst1Ty, LHS, LHSTy, RHS, RHSTy] =
7654 MI.getFirst4RegLLTs();
7655 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
7656
7657 LLT Ty = Dst0Ty;
7658 LLT BoolTy = Dst1Ty;
7659
7660 if (IsAdd)
7661 MIRBuilder.buildAdd(Dst0, LHS, RHS);
7662 else
7663 MIRBuilder.buildSub(Dst0, LHS, RHS);
7664
7665 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
7666
7667 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7668
7669 // For an addition, the result should be less than one of the operands (LHS)
7670 // if and only if the other operand (RHS) is negative, otherwise there will
7671 // be overflow.
7672 // For a subtraction, the result should be less than one of the operands
7673 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
7674 // otherwise there will be overflow.
7675 auto ResultLowerThanLHS =
7676 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
7677 auto ConditionRHS = MIRBuilder.buildICmp(
7678 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
7679
7680 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
7681 MI.eraseFromParent();
7682 return Legalized;
7683}
7684
7687 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7688 LLT Ty = MRI.getType(Res);
7689 bool IsSigned;
7690 bool IsAdd;
7691 unsigned BaseOp;
7692 switch (MI.getOpcode()) {
7693 default:
7694 llvm_unreachable("unexpected addsat/subsat opcode");
7695 case TargetOpcode::G_UADDSAT:
7696 IsSigned = false;
7697 IsAdd = true;
7698 BaseOp = TargetOpcode::G_ADD;
7699 break;
7700 case TargetOpcode::G_SADDSAT:
7701 IsSigned = true;
7702 IsAdd = true;
7703 BaseOp = TargetOpcode::G_ADD;
7704 break;
7705 case TargetOpcode::G_USUBSAT:
7706 IsSigned = false;
7707 IsAdd = false;
7708 BaseOp = TargetOpcode::G_SUB;
7709 break;
7710 case TargetOpcode::G_SSUBSAT:
7711 IsSigned = true;
7712 IsAdd = false;
7713 BaseOp = TargetOpcode::G_SUB;
7714 break;
7715 }
7716
7717 if (IsSigned) {
7718 // sadd.sat(a, b) ->
7719 // hi = 0x7fffffff - smax(a, 0)
7720 // lo = 0x80000000 - smin(a, 0)
7721 // a + smin(smax(lo, b), hi)
7722 // ssub.sat(a, b) ->
7723 // lo = smax(a, -1) - 0x7fffffff
7724 // hi = smin(a, -1) - 0x80000000
7725 // a - smin(smax(lo, b), hi)
7726 // TODO: AMDGPU can use a "median of 3" instruction here:
7727 // a +/- med3(lo, b, hi)
7728 uint64_t NumBits = Ty.getScalarSizeInBits();
7729 auto MaxVal =
7731 auto MinVal =
7734 if (IsAdd) {
7735 auto Zero = MIRBuilder.buildConstant(Ty, 0);
7736 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
7737 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
7738 } else {
7739 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
7740 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
7741 MaxVal);
7742 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
7743 MinVal);
7744 }
7745 auto RHSClamped =
7747 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
7748 } else {
7749 // uadd.sat(a, b) -> a + umin(~a, b)
7750 // usub.sat(a, b) -> a - umin(a, b)
7751 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
7752 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
7753 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
7754 }
7755
7756 MI.eraseFromParent();
7757 return Legalized;
7758}
7759
7762 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7763 LLT Ty = MRI.getType(Res);
7764 LLT BoolTy = Ty.changeElementSize(1);
7765 bool IsSigned;
7766 bool IsAdd;
7767 unsigned OverflowOp;
7768 switch (MI.getOpcode()) {
7769 default:
7770 llvm_unreachable("unexpected addsat/subsat opcode");
7771 case TargetOpcode::G_UADDSAT:
7772 IsSigned = false;
7773 IsAdd = true;
7774 OverflowOp = TargetOpcode::G_UADDO;
7775 break;
7776 case TargetOpcode::G_SADDSAT:
7777 IsSigned = true;
7778 IsAdd = true;
7779 OverflowOp = TargetOpcode::G_SADDO;
7780 break;
7781 case TargetOpcode::G_USUBSAT:
7782 IsSigned = false;
7783 IsAdd = false;
7784 OverflowOp = TargetOpcode::G_USUBO;
7785 break;
7786 case TargetOpcode::G_SSUBSAT:
7787 IsSigned = true;
7788 IsAdd = false;
7789 OverflowOp = TargetOpcode::G_SSUBO;
7790 break;
7791 }
7792
7793 auto OverflowRes =
7794 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7795 Register Tmp = OverflowRes.getReg(0);
7796 Register Ov = OverflowRes.getReg(1);
7797 MachineInstrBuilder Clamp;
7798 if (IsSigned) {
7799 // sadd.sat(a, b) ->
7800 // {tmp, ov} = saddo(a, b)
7801 // ov ? (tmp >>s 31) + 0x80000000 : r
7802 // ssub.sat(a, b) ->
7803 // {tmp, ov} = ssubo(a, b)
7804 // ov ? (tmp >>s 31) + 0x80000000 : r
7805 uint64_t NumBits = Ty.getScalarSizeInBits();
7806 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7807 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7808 auto MinVal =
7810 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7811 } else {
7812 // uadd.sat(a, b) ->
7813 // {tmp, ov} = uaddo(a, b)
7814 // ov ? 0xffffffff : tmp
7815 // usub.sat(a, b) ->
7816 // {tmp, ov} = usubo(a, b)
7817 // ov ? 0 : tmp
7818 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7819 }
7820 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7821
7822 MI.eraseFromParent();
7823 return Legalized;
7824}
7825
7828 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
7829 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
7830 "Expected shlsat opcode!");
7831 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7832 auto [Res, LHS, RHS] = MI.getFirst3Regs();
7833 LLT Ty = MRI.getType(Res);
7834 LLT BoolTy = Ty.changeElementSize(1);
7835
7836 unsigned BW = Ty.getScalarSizeInBits();
7837 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7838 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7839 : MIRBuilder.buildLShr(Ty, Result, RHS);
7840
7841 MachineInstrBuilder SatVal;
7842 if (IsSigned) {
7843 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7844 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7845 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7846 MIRBuilder.buildConstant(Ty, 0));
7847 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7848 } else {
7850 }
7851 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7852 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7853
7854 MI.eraseFromParent();
7855 return Legalized;
7856}
7857
7859 auto [Dst, Src] = MI.getFirst2Regs();
7860 const LLT Ty = MRI.getType(Src);
7861 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7862 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7863
7864 // Swap most and least significant byte, set remaining bytes in Res to zero.
7865 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7866 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7867 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7868 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7869
7870 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7871 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7872 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7873 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7874 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7875 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7876 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7877 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7878 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7879 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7880 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7881 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7882 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7883 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7884 }
7885 Res.getInstr()->getOperand(0).setReg(Dst);
7886
7887 MI.eraseFromParent();
7888 return Legalized;
7889}
7890
7891//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7893 MachineInstrBuilder Src, const APInt &Mask) {
7894 const LLT Ty = Dst.getLLTTy(*B.getMRI());
7895 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7896 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7897 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7898 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7899 return B.buildOr(Dst, LHS, RHS);
7900}
7901
7904 auto [Dst, Src] = MI.getFirst2Regs();
7905 const LLT Ty = MRI.getType(Src);
7906 unsigned Size = Ty.getSizeInBits();
7907
7908 MachineInstrBuilder BSWAP =
7909 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7910
7911 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7912 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7913 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7914 MachineInstrBuilder Swap4 =
7915 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7916
7917 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7918 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7919 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7920 MachineInstrBuilder Swap2 =
7921 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7922
7923 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7924 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7925 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7926 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7927
7928 MI.eraseFromParent();
7929 return Legalized;
7930}
7931
7935
7936 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
7937 int NameOpIdx = IsRead ? 1 : 0;
7938 int ValRegIndex = IsRead ? 0 : 1;
7939
7940 Register ValReg = MI.getOperand(ValRegIndex).getReg();
7941 const LLT Ty = MRI.getType(ValReg);
7942 const MDString *RegStr = cast<MDString>(
7943 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
7944
7945 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
7946 if (!PhysReg.isValid())
7947 return UnableToLegalize;
7948
7949 if (IsRead)
7950 MIRBuilder.buildCopy(ValReg, PhysReg);
7951 else
7952 MIRBuilder.buildCopy(PhysReg, ValReg);
7953
7954 MI.eraseFromParent();
7955 return Legalized;
7956}
7957
7960 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
7961 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
7962 Register Result = MI.getOperand(0).getReg();
7963 LLT OrigTy = MRI.getType(Result);
7964 auto SizeInBits = OrigTy.getScalarSizeInBits();
7965 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
7966
7967 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
7968 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
7969 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
7970 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
7971
7972 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
7973 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
7974 MIRBuilder.buildTrunc(Result, Shifted);
7975
7976 MI.eraseFromParent();
7977 return Legalized;
7978}
7979
7982 auto [DstReg, DstTy, SrcReg, SrcTy] = MI.getFirst2RegLLTs();
7983 FPClassTest Mask = static_cast<FPClassTest>(MI.getOperand(2).getImm());
7984
7985 if (Mask == fcNone) {
7986 MIRBuilder.buildConstant(DstReg, 0);
7987 MI.eraseFromParent();
7988 return Legalized;
7989 }
7990 if (Mask == fcAllFlags) {
7991 MIRBuilder.buildConstant(DstReg, 1);
7992 MI.eraseFromParent();
7993 return Legalized;
7994 }
7995
7996 // TODO: Try inverting the test with getInvertedFPClassTest like the DAG
7997 // version
7998
7999 unsigned BitSize = SrcTy.getScalarSizeInBits();
8000 const fltSemantics &Semantics = getFltSemanticForLLT(SrcTy.getScalarType());
8001
8002 LLT IntTy = LLT::scalar(BitSize);
8003 if (SrcTy.isVector())
8004 IntTy = LLT::vector(SrcTy.getElementCount(), IntTy);
8005 auto AsInt = MIRBuilder.buildCopy(IntTy, SrcReg);
8006
8007 // Various masks.
8008 APInt SignBit = APInt::getSignMask(BitSize);
8009 APInt ValueMask = APInt::getSignedMaxValue(BitSize); // All bits but sign.
8010 APInt Inf = APFloat::getInf(Semantics).bitcastToAPInt(); // Exp and int bit.
8011 APInt ExpMask = Inf;
8012 APInt AllOneMantissa = APFloat::getLargest(Semantics).bitcastToAPInt() & ~Inf;
8013 APInt QNaNBitMask =
8014 APInt::getOneBitSet(BitSize, AllOneMantissa.getActiveBits() - 1);
8015 APInt InvertionMask = APInt::getAllOnes(DstTy.getScalarSizeInBits());
8016
8017 auto SignBitC = MIRBuilder.buildConstant(IntTy, SignBit);
8018 auto ValueMaskC = MIRBuilder.buildConstant(IntTy, ValueMask);
8019 auto InfC = MIRBuilder.buildConstant(IntTy, Inf);
8020 auto ExpMaskC = MIRBuilder.buildConstant(IntTy, ExpMask);
8021 auto ZeroC = MIRBuilder.buildConstant(IntTy, 0);
8022
8023 auto Abs = MIRBuilder.buildAnd(IntTy, AsInt, ValueMaskC);
8024 auto Sign =
8026
8027 auto Res = MIRBuilder.buildConstant(DstTy, 0);
8028 // Clang doesn't support capture of structured bindings:
8029 LLT DstTyCopy = DstTy;
8030 const auto appendToRes = [&](MachineInstrBuilder ToAppend) {
8031 Res = MIRBuilder.buildOr(DstTyCopy, Res, ToAppend);
8032 };
8033
8034 // Tests that involve more than one class should be processed first.
8035 if ((Mask & fcFinite) == fcFinite) {
8036 // finite(V) ==> abs(V) u< exp_mask
8037 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8038 ExpMaskC));
8039 Mask &= ~fcFinite;
8040 } else if ((Mask & fcFinite) == fcPosFinite) {
8041 // finite(V) && V > 0 ==> V u< exp_mask
8042 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, AsInt,
8043 ExpMaskC));
8044 Mask &= ~fcPosFinite;
8045 } else if ((Mask & fcFinite) == fcNegFinite) {
8046 // finite(V) && V < 0 ==> abs(V) u< exp_mask && signbit == 1
8047 auto Cmp = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy, Abs,
8048 ExpMaskC);
8049 auto And = MIRBuilder.buildAnd(DstTy, Cmp, Sign);
8050 appendToRes(And);
8051 Mask &= ~fcNegFinite;
8052 }
8053
8054 if (FPClassTest PartialCheck = Mask & (fcZero | fcSubnormal)) {
8055 // fcZero | fcSubnormal => test all exponent bits are 0
8056 // TODO: Handle sign bit specific cases
8057 // TODO: Handle inverted case
8058 if (PartialCheck == (fcZero | fcSubnormal)) {
8059 auto ExpBits = MIRBuilder.buildAnd(IntTy, AsInt, ExpMaskC);
8061 ExpBits, ZeroC));
8062 Mask &= ~PartialCheck;
8063 }
8064 }
8065
8066 // Check for individual classes.
8067 if (FPClassTest PartialCheck = Mask & fcZero) {
8068 if (PartialCheck == fcPosZero)
8070 AsInt, ZeroC));
8071 else if (PartialCheck == fcZero)
8072 appendToRes(
8074 else // fcNegZero
8076 AsInt, SignBitC));
8077 }
8078
8079 if (FPClassTest PartialCheck = Mask & fcSubnormal) {
8080 // issubnormal(V) ==> unsigned(abs(V) - 1) u< (all mantissa bits set)
8081 // issubnormal(V) && V>0 ==> unsigned(V - 1) u< (all mantissa bits set)
8082 auto V = (PartialCheck == fcPosSubnormal) ? AsInt : Abs;
8083 auto OneC = MIRBuilder.buildConstant(IntTy, 1);
8084 auto VMinusOne = MIRBuilder.buildSub(IntTy, V, OneC);
8085 auto SubnormalRes =
8087 MIRBuilder.buildConstant(IntTy, AllOneMantissa));
8088 if (PartialCheck == fcNegSubnormal)
8089 SubnormalRes = MIRBuilder.buildAnd(DstTy, SubnormalRes, Sign);
8090 appendToRes(SubnormalRes);
8091 }
8092
8093 if (FPClassTest PartialCheck = Mask & fcInf) {
8094 if (PartialCheck == fcPosInf)
8096 AsInt, InfC));
8097 else if (PartialCheck == fcInf)
8098 appendToRes(
8100 else { // fcNegInf
8101 APInt NegInf = APFloat::getInf(Semantics, true).bitcastToAPInt();
8102 auto NegInfC = MIRBuilder.buildConstant(IntTy, NegInf);
8104 AsInt, NegInfC));
8105 }
8106 }
8107
8108 if (FPClassTest PartialCheck = Mask & fcNan) {
8109 auto InfWithQnanBitC = MIRBuilder.buildConstant(IntTy, Inf | QNaNBitMask);
8110 if (PartialCheck == fcNan) {
8111 // isnan(V) ==> abs(V) u> int(inf)
8112 appendToRes(
8114 } else if (PartialCheck == fcQNan) {
8115 // isquiet(V) ==> abs(V) u>= (unsigned(Inf) | quiet_bit)
8116 appendToRes(MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_UGE, DstTy, Abs,
8117 InfWithQnanBitC));
8118 } else { // fcSNan
8119 // issignaling(V) ==> abs(V) u> unsigned(Inf) &&
8120 // abs(V) u< (unsigned(Inf) | quiet_bit)
8121 auto IsNan =
8123 auto IsNotQnan = MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_ULT, DstTy,
8124 Abs, InfWithQnanBitC);
8125 appendToRes(MIRBuilder.buildAnd(DstTy, IsNan, IsNotQnan));
8126 }
8127 }
8128
8129 if (FPClassTest PartialCheck = Mask & fcNormal) {
8130 // isnormal(V) ==> (0 u< exp u< max_exp) ==> (unsigned(exp-1) u<
8131 // (max_exp-1))
8132 APInt ExpLSB = ExpMask & ~(ExpMask.shl(1));
8133 auto ExpMinusOne = MIRBuilder.buildSub(
8134 IntTy, Abs, MIRBuilder.buildConstant(IntTy, ExpLSB));
8135 APInt MaxExpMinusOne = ExpMask - ExpLSB;
8136 auto NormalRes =
8138 MIRBuilder.buildConstant(IntTy, MaxExpMinusOne));
8139 if (PartialCheck == fcNegNormal)
8140 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, Sign);
8141 else if (PartialCheck == fcPosNormal) {
8142 auto PosSign = MIRBuilder.buildXor(
8143 DstTy, Sign, MIRBuilder.buildConstant(DstTy, InvertionMask));
8144 NormalRes = MIRBuilder.buildAnd(DstTy, NormalRes, PosSign);
8145 }
8146 appendToRes(NormalRes);
8147 }
8148
8149 MIRBuilder.buildCopy(DstReg, Res);
8150 MI.eraseFromParent();
8151 return Legalized;
8152}
8153
8155 // Implement G_SELECT in terms of XOR, AND, OR.
8156 auto [DstReg, DstTy, MaskReg, MaskTy, Op1Reg, Op1Ty, Op2Reg, Op2Ty] =
8157 MI.getFirst4RegLLTs();
8158
8159 bool IsEltPtr = DstTy.isPointerOrPointerVector();
8160 if (IsEltPtr) {
8161 LLT ScalarPtrTy = LLT::scalar(DstTy.getScalarSizeInBits());
8162 LLT NewTy = DstTy.changeElementType(ScalarPtrTy);
8163 Op1Reg = MIRBuilder.buildPtrToInt(NewTy, Op1Reg).getReg(0);
8164 Op2Reg = MIRBuilder.buildPtrToInt(NewTy, Op2Reg).getReg(0);
8165 DstTy = NewTy;
8166 }
8167
8168 if (MaskTy.isScalar()) {
8169 // Turn the scalar condition into a vector condition mask if needed.
8170
8171 Register MaskElt = MaskReg;
8172
8173 // The condition was potentially zero extended before, but we want a sign
8174 // extended boolean.
8175 if (MaskTy != LLT::scalar(1))
8176 MaskElt = MIRBuilder.buildSExtInReg(MaskTy, MaskElt, 1).getReg(0);
8177
8178 // Continue the sign extension (or truncate) to match the data type.
8179 MaskElt =
8180 MIRBuilder.buildSExtOrTrunc(DstTy.getScalarType(), MaskElt).getReg(0);
8181
8182 if (DstTy.isVector()) {
8183 // Generate a vector splat idiom.
8184 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
8185 MaskReg = ShufSplat.getReg(0);
8186 } else {
8187 MaskReg = MaskElt;
8188 }
8189 MaskTy = DstTy;
8190 } else if (!DstTy.isVector()) {
8191 // Cannot handle the case that mask is a vector and dst is a scalar.
8192 return UnableToLegalize;
8193 }
8194
8195 if (MaskTy.getSizeInBits() != DstTy.getSizeInBits()) {
8196 return UnableToLegalize;
8197 }
8198
8199 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
8200 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
8201 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
8202 if (IsEltPtr) {
8203 auto Or = MIRBuilder.buildOr(DstTy, NewOp1, NewOp2);
8204 MIRBuilder.buildIntToPtr(DstReg, Or);
8205 } else {
8206 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
8207 }
8208 MI.eraseFromParent();
8209 return Legalized;
8210}
8211
8213 // Split DIVREM into individual instructions.
8214 unsigned Opcode = MI.getOpcode();
8215
8217 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
8218 : TargetOpcode::G_UDIV,
8219 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8221 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
8222 : TargetOpcode::G_UREM,
8223 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
8224 MI.eraseFromParent();
8225 return Legalized;
8226}
8227
8230 // Expand %res = G_ABS %a into:
8231 // %v1 = G_ASHR %a, scalar_size-1
8232 // %v2 = G_ADD %a, %v1
8233 // %res = G_XOR %v2, %v1
8234 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
8235 Register OpReg = MI.getOperand(1).getReg();
8236 auto ShiftAmt =
8237 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
8238 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
8239 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
8240 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
8241 MI.eraseFromParent();
8242 return Legalized;
8243}
8244
8247 // Expand %res = G_ABS %a into:
8248 // %v1 = G_CONSTANT 0
8249 // %v2 = G_SUB %v1, %a
8250 // %res = G_SMAX %a, %v2
8251 Register SrcReg = MI.getOperand(1).getReg();
8252 LLT Ty = MRI.getType(SrcReg);
8253 auto Zero = MIRBuilder.buildConstant(Ty, 0);
8254 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg);
8255 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
8256 MI.eraseFromParent();
8257 return Legalized;
8258}
8259
8262 Register SrcReg = MI.getOperand(1).getReg();
8263 Register DestReg = MI.getOperand(0).getReg();
8264 LLT Ty = MRI.getType(SrcReg), IType = LLT::scalar(1);
8265 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
8266 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
8267 auto ICmp = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, IType, SrcReg, Zero);
8268 MIRBuilder.buildSelect(DestReg, ICmp, SrcReg, Sub);
8269 MI.eraseFromParent();
8270 return Legalized;
8271}
8272
8275 Register SrcReg = MI.getOperand(1).getReg();
8276 LLT SrcTy = MRI.getType(SrcReg);
8277 LLT DstTy = MRI.getType(SrcReg);
8278
8279 // The source could be a scalar if the IR type was <1 x sN>.
8280 if (SrcTy.isScalar()) {
8281 if (DstTy.getSizeInBits() > SrcTy.getSizeInBits())
8282 return UnableToLegalize; // FIXME: handle extension.
8283 // This can be just a plain copy.
8285 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::COPY));
8287 return Legalized;
8288 }
8289 return UnableToLegalize;
8290}
8291
8292static Type *getTypeForLLT(LLT Ty, LLVMContext &C);
8293
8295 MachineFunction &MF = *MI.getMF();
8297 LLVMContext &Ctx = MF.getFunction().getContext();
8298 Register ListPtr = MI.getOperand(1).getReg();
8299 LLT PtrTy = MRI.getType(ListPtr);
8300
8301 // LstPtr is a pointer to the head of the list. Get the address
8302 // of the head of the list.
8303 Align PtrAlignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));
8304 MachineMemOperand *PtrLoadMMO = MF.getMachineMemOperand(
8305 MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, PtrAlignment);
8306 auto VAList = MIRBuilder.buildLoad(PtrTy, ListPtr, *PtrLoadMMO).getReg(0);
8307
8308 const Align A(MI.getOperand(2).getImm());
8309 LLT PtrTyAsScalarTy = LLT::scalar(PtrTy.getSizeInBits());
8310 if (A > TLI.getMinStackArgumentAlignment()) {
8311 Register AlignAmt =
8312 MIRBuilder.buildConstant(PtrTyAsScalarTy, A.value() - 1).getReg(0);
8313 auto AddDst = MIRBuilder.buildPtrAdd(PtrTy, VAList, AlignAmt);
8314 auto AndDst = MIRBuilder.buildMaskLowPtrBits(PtrTy, AddDst, Log2(A));
8315 VAList = AndDst.getReg(0);
8316 }
8317
8318 // Increment the pointer, VAList, to the next vaarg
8319 // The list should be bumped by the size of element in the current head of
8320 // list.
8321 Register Dst = MI.getOperand(0).getReg();
8322 LLT LLTTy = MRI.getType(Dst);
8323 Type *Ty = getTypeForLLT(LLTTy, Ctx);
8324 auto IncAmt =
8325 MIRBuilder.buildConstant(PtrTyAsScalarTy, DL.getTypeAllocSize(Ty));
8326 auto Succ = MIRBuilder.buildPtrAdd(PtrTy, VAList, IncAmt);
8327
8328 // Store the increment VAList to the legalized pointer
8330 MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, PtrAlignment);
8331 MIRBuilder.buildStore(Succ, ListPtr, *StoreMMO);
8332 // Load the actual argument out of the pointer VAList
8333 Align EltAlignment = DL.getABITypeAlign(Ty);
8334 MachineMemOperand *EltLoadMMO = MF.getMachineMemOperand(
8335 MachinePointerInfo(), MachineMemOperand::MOLoad, LLTTy, EltAlignment);
8336 MIRBuilder.buildLoad(Dst, VAList, *EltLoadMMO);
8337
8338 MI.eraseFromParent();
8339 return Legalized;
8340}
8341
8343 // On Darwin, -Os means optimize for size without hurting performance, so
8344 // only really optimize for size when -Oz (MinSize) is used.
8346 return MF.getFunction().hasMinSize();
8347 return MF.getFunction().hasOptSize();
8348}
8349
8350// Returns a list of types to use for memory op lowering in MemOps. A partial
8351// port of findOptimalMemOpLowering in TargetLowering.
8352static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
8353 unsigned Limit, const MemOp &Op,
8354 unsigned DstAS, unsigned SrcAS,
8355 const AttributeList &FuncAttributes,
8356 const TargetLowering &TLI) {
8357 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
8358 return false;
8359
8360 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
8361
8362 if (Ty == LLT()) {
8363 // Use the largest scalar type whose alignment constraints are satisfied.
8364 // We only need to check DstAlign here as SrcAlign is always greater or
8365 // equal to DstAlign (or zero).
8366 Ty = LLT::scalar(64);
8367 if (Op.isFixedDstAlign())
8368 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
8369 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
8370 Ty = LLT::scalar(Ty.getSizeInBytes());
8371 assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
8372 // FIXME: check for the largest legal type we can load/store to.
8373 }
8374
8375 unsigned NumMemOps = 0;
8376 uint64_t Size = Op.size();
8377 while (Size) {
8378 unsigned TySize = Ty.getSizeInBytes();
8379 while (TySize > Size) {
8380 // For now, only use non-vector load / store's for the left-over pieces.
8381 LLT NewTy = Ty;
8382 // FIXME: check for mem op safety and legality of the types. Not all of
8383 // SDAGisms map cleanly to GISel concepts.
8384 if (NewTy.isVector())
8385 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
8386 NewTy = LLT::scalar(llvm::bit_floor(NewTy.getSizeInBits() - 1));
8387 unsigned NewTySize = NewTy.getSizeInBytes();
8388 assert(NewTySize > 0 && "Could not find appropriate type");
8389
8390 // If the new LLT cannot cover all of the remaining bits, then consider
8391 // issuing a (or a pair of) unaligned and overlapping load / store.
8392 unsigned Fast;
8393 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
8394 MVT VT = getMVTForLLT(Ty);
8395 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
8397 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
8399 Fast)
8400 TySize = Size;
8401 else {
8402 Ty = NewTy;
8403 TySize = NewTySize;
8404 }
8405 }
8406
8407 if (++NumMemOps > Limit)
8408 return false;
8409
8410 MemOps.push_back(Ty);
8411 Size -= TySize;
8412 }
8413
8414 return true;
8415}
8416
8418 if (Ty.isVector())
8420 Ty.getNumElements());
8421 return IntegerType::get(C, Ty.getSizeInBits());
8422}
8423
8424// Get a vectorized representation of the memset value operand, GISel edition.
8426 MachineRegisterInfo &MRI = *MIB.getMRI();
8427 unsigned NumBits = Ty.getScalarSizeInBits();
8428 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8429 if (!Ty.isVector() && ValVRegAndVal) {
8430 APInt Scalar = ValVRegAndVal->Value.trunc(8);
8431 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
8432 return MIB.buildConstant(Ty, SplatVal).getReg(0);
8433 }
8434
8435 // Extend the byte value to the larger type, and then multiply by a magic
8436 // value 0x010101... in order to replicate it across every byte.
8437 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
8438 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
8439 return MIB.buildConstant(Ty, 0).getReg(0);
8440 }
8441
8442 LLT ExtType = Ty.getScalarType();
8443 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
8444 if (NumBits > 8) {
8445 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
8446 auto MagicMI = MIB.buildConstant(ExtType, Magic);
8447 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
8448 }
8449
8450 // For vector types create a G_BUILD_VECTOR.
8451 if (Ty.isVector())
8452 Val = MIB.buildSplatBuildVector(Ty, Val).getReg(0);
8453
8454 return Val;
8455}
8456
8458LegalizerHelper::lowerMemset(MachineInstr &MI, Register Dst, Register Val,
8459 uint64_t KnownLen, Align Alignment,
8460 bool IsVolatile) {
8461 auto &MF = *MI.getParent()->getParent();
8462 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8463 auto &DL = MF.getDataLayout();
8464 LLVMContext &C = MF.getFunction().getContext();
8465
8466 assert(KnownLen != 0 && "Have a zero length memset length!");
8467
8468 bool DstAlignCanChange = false;
8469 MachineFrameInfo &MFI = MF.getFrameInfo();
8470 bool OptSize = shouldLowerMemFuncForSize(MF);
8471
8472 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8473 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8474 DstAlignCanChange = true;
8475
8476 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
8477 std::vector<LLT> MemOps;
8478
8479 const auto &DstMMO = **MI.memoperands_begin();
8480 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8481
8482 auto ValVRegAndVal = getIConstantVRegValWithLookThrough(Val, MRI);
8483 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
8484
8485 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
8486 MemOp::Set(KnownLen, DstAlignCanChange,
8487 Alignment,
8488 /*IsZeroMemset=*/IsZeroVal,
8489 /*IsVolatile=*/IsVolatile),
8490 DstPtrInfo.getAddrSpace(), ~0u,
8491 MF.getFunction().getAttributes(), TLI))
8492 return UnableToLegalize;
8493
8494 if (DstAlignCanChange) {
8495 // Get an estimate of the type from the LLT.
8496 Type *IRTy = getTypeForLLT(MemOps[0], C);
8497 Align NewAlign = DL.getABITypeAlign(IRTy);
8498 if (NewAlign > Alignment) {
8499 Alignment = NewAlign;
8500 unsigned FI = FIDef->getOperand(1).getIndex();
8501 // Give the stack frame object a larger alignment if needed.
8502 if (MFI.getObjectAlign(FI) < Alignment)
8503 MFI.setObjectAlignment(FI, Alignment);
8504 }
8505 }
8506
8507 MachineIRBuilder MIB(MI);
8508 // Find the largest store and generate the bit pattern for it.
8509 LLT LargestTy = MemOps[0];
8510 for (unsigned i = 1; i < MemOps.size(); i++)
8511 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
8512 LargestTy = MemOps[i];
8513
8514 // The memset stored value is always defined as an s8, so in order to make it
8515 // work with larger store types we need to repeat the bit pattern across the
8516 // wider type.
8517 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
8518
8519 if (!MemSetValue)
8520 return UnableToLegalize;
8521
8522 // Generate the stores. For each store type in the list, we generate the
8523 // matching store of that type to the destination address.
8524 LLT PtrTy = MRI.getType(Dst);
8525 unsigned DstOff = 0;
8526 unsigned Size = KnownLen;
8527 for (unsigned I = 0; I < MemOps.size(); I++) {
8528 LLT Ty = MemOps[I];
8529 unsigned TySize = Ty.getSizeInBytes();
8530 if (TySize > Size) {
8531 // Issuing an unaligned load / store pair that overlaps with the previous
8532 // pair. Adjust the offset accordingly.
8533 assert(I == MemOps.size() - 1 && I != 0);
8534 DstOff -= TySize - Size;
8535 }
8536
8537 // If this store is smaller than the largest store see whether we can get
8538 // the smaller value for free with a truncate.
8539 Register Value = MemSetValue;
8540 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
8541 MVT VT = getMVTForLLT(Ty);
8542 MVT LargestVT = getMVTForLLT(LargestTy);
8543 if (!LargestTy.isVector() && !Ty.isVector() &&
8544 TLI.isTruncateFree(LargestVT, VT))
8545 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
8546 else
8547 Value = getMemsetValue(Val, Ty, MIB);
8548 if (!Value)
8549 return UnableToLegalize;
8550 }
8551
8552 auto *StoreMMO = MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
8553
8554 Register Ptr = Dst;
8555 if (DstOff != 0) {
8556 auto Offset =
8557 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
8558 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
8559 }
8560
8561 MIB.buildStore(Value, Ptr, *StoreMMO);
8562 DstOff += Ty.getSizeInBytes();
8563 Size -= TySize;
8564 }
8565
8566 MI.eraseFromParent();
8567 return Legalized;
8568}
8569
8571LegalizerHelper::lowerMemcpyInline(MachineInstr &MI) {
8572 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8573
8574 auto [Dst, Src, Len] = MI.getFirst3Regs();
8575
8576 const auto *MMOIt = MI.memoperands_begin();
8577 const MachineMemOperand *MemOp = *MMOIt;
8578 bool IsVolatile = MemOp->isVolatile();
8579
8580 // See if this is a constant length copy
8581 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8582 // FIXME: support dynamically sized G_MEMCPY_INLINE
8583 assert(LenVRegAndVal &&
8584 "inline memcpy with dynamic size is not yet supported");
8585 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8586 if (KnownLen == 0) {
8587 MI.eraseFromParent();
8588 return Legalized;
8589 }
8590
8591 const auto &DstMMO = **MI.memoperands_begin();
8592 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8593 Align DstAlign = DstMMO.getBaseAlign();
8594 Align SrcAlign = SrcMMO.getBaseAlign();
8595
8596 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8597 IsVolatile);
8598}
8599
8601LegalizerHelper::lowerMemcpyInline(MachineInstr &MI, Register Dst, Register Src,
8602 uint64_t KnownLen, Align DstAlign,
8603 Align SrcAlign, bool IsVolatile) {
8604 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
8605 return lowerMemcpy(MI, Dst, Src, KnownLen,
8606 std::numeric_limits<uint64_t>::max(), DstAlign, SrcAlign,
8607 IsVolatile);
8608}
8609
8611LegalizerHelper::lowerMemcpy(MachineInstr &MI, Register Dst, Register Src,
8612 uint64_t KnownLen, uint64_t Limit, Align DstAlign,
8613 Align SrcAlign, bool IsVolatile) {
8614 auto &MF = *MI.getParent()->getParent();
8615 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8616 auto &DL = MF.getDataLayout();
8617 LLVMContext &C = MF.getFunction().getContext();
8618
8619 assert(KnownLen != 0 && "Have a zero length memcpy length!");
8620
8621 bool DstAlignCanChange = false;
8622 MachineFrameInfo &MFI = MF.getFrameInfo();
8623 Align Alignment = std::min(DstAlign, SrcAlign);
8624
8625 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8626 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8627 DstAlignCanChange = true;
8628
8629 // FIXME: infer better src pointer alignment like SelectionDAG does here.
8630 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
8631 // if the memcpy is in a tail call position.
8632
8633 std::vector<LLT> MemOps;
8634
8635 const auto &DstMMO = **MI.memoperands_begin();
8636 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8637 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8638 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8639
8641 MemOps, Limit,
8642 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8643 IsVolatile),
8644 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8645 MF.getFunction().getAttributes(), TLI))
8646 return UnableToLegalize;
8647
8648 if (DstAlignCanChange) {
8649 // Get an estimate of the type from the LLT.
8650 Type *IRTy = getTypeForLLT(MemOps[0], C);
8651 Align NewAlign = DL.getABITypeAlign(IRTy);
8652
8653 // Don't promote to an alignment that would require dynamic stack
8654 // realignment.
8655 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8656 if (!TRI->hasStackRealignment(MF))
8657 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8658 NewAlign = NewAlign.previous();
8659
8660 if (NewAlign > Alignment) {
8661 Alignment = NewAlign;
8662 unsigned FI = FIDef->getOperand(1).getIndex();
8663 // Give the stack frame object a larger alignment if needed.
8664 if (MFI.getObjectAlign(FI) < Alignment)
8665 MFI.setObjectAlignment(FI, Alignment);
8666 }
8667 }
8668
8669 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
8670
8671 MachineIRBuilder MIB(MI);
8672 // Now we need to emit a pair of load and stores for each of the types we've
8673 // collected. I.e. for each type, generate a load from the source pointer of
8674 // that type width, and then generate a corresponding store to the dest buffer
8675 // of that value loaded. This can result in a sequence of loads and stores
8676 // mixed types, depending on what the target specifies as good types to use.
8677 unsigned CurrOffset = 0;
8678 unsigned Size = KnownLen;
8679 for (auto CopyTy : MemOps) {
8680 // Issuing an unaligned load / store pair that overlaps with the previous
8681 // pair. Adjust the offset accordingly.
8682 if (CopyTy.getSizeInBytes() > Size)
8683 CurrOffset -= CopyTy.getSizeInBytes() - Size;
8684
8685 // Construct MMOs for the accesses.
8686 auto *LoadMMO =
8687 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8688 auto *StoreMMO =
8689 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8690
8691 // Create the load.
8692 Register LoadPtr = Src;
8694 if (CurrOffset != 0) {
8695 LLT SrcTy = MRI.getType(Src);
8696 Offset = MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset)
8697 .getReg(0);
8698 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8699 }
8700 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
8701
8702 // Create the store.
8703 Register StorePtr = Dst;
8704 if (CurrOffset != 0) {
8705 LLT DstTy = MRI.getType(Dst);
8706 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8707 }
8708 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
8709 CurrOffset += CopyTy.getSizeInBytes();
8710 Size -= CopyTy.getSizeInBytes();
8711 }
8712
8713 MI.eraseFromParent();
8714 return Legalized;
8715}
8716
8718LegalizerHelper::lowerMemmove(MachineInstr &MI, Register Dst, Register Src,
8719 uint64_t KnownLen, Align DstAlign, Align SrcAlign,
8720 bool IsVolatile) {
8721 auto &MF = *MI.getParent()->getParent();
8722 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8723 auto &DL = MF.getDataLayout();
8724 LLVMContext &C = MF.getFunction().getContext();
8725
8726 assert(KnownLen != 0 && "Have a zero length memmove length!");
8727
8728 bool DstAlignCanChange = false;
8729 MachineFrameInfo &MFI = MF.getFrameInfo();
8730 bool OptSize = shouldLowerMemFuncForSize(MF);
8731 Align Alignment = std::min(DstAlign, SrcAlign);
8732
8733 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
8734 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
8735 DstAlignCanChange = true;
8736
8737 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
8738 std::vector<LLT> MemOps;
8739
8740 const auto &DstMMO = **MI.memoperands_begin();
8741 const auto &SrcMMO = **std::next(MI.memoperands_begin());
8742 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
8743 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
8744
8745 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
8746 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
8747 // same thing here.
8749 MemOps, Limit,
8750 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
8751 /*IsVolatile*/ true),
8752 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
8753 MF.getFunction().getAttributes(), TLI))
8754 return UnableToLegalize;
8755
8756 if (DstAlignCanChange) {
8757 // Get an estimate of the type from the LLT.
8758 Type *IRTy = getTypeForLLT(MemOps[0], C);
8759 Align NewAlign = DL.getABITypeAlign(IRTy);
8760
8761 // Don't promote to an alignment that would require dynamic stack
8762 // realignment.
8763 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
8764 if (!TRI->hasStackRealignment(MF))
8765 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
8766 NewAlign = NewAlign.previous();
8767
8768 if (NewAlign > Alignment) {
8769 Alignment = NewAlign;
8770 unsigned FI = FIDef->getOperand(1).getIndex();
8771 // Give the stack frame object a larger alignment if needed.
8772 if (MFI.getObjectAlign(FI) < Alignment)
8773 MFI.setObjectAlignment(FI, Alignment);
8774 }
8775 }
8776
8777 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
8778
8779 MachineIRBuilder MIB(MI);
8780 // Memmove requires that we perform the loads first before issuing the stores.
8781 // Apart from that, this loop is pretty much doing the same thing as the
8782 // memcpy codegen function.
8783 unsigned CurrOffset = 0;
8785 for (auto CopyTy : MemOps) {
8786 // Construct MMO for the load.
8787 auto *LoadMMO =
8788 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
8789
8790 // Create the load.
8791 Register LoadPtr = Src;
8792 if (CurrOffset != 0) {
8793 LLT SrcTy = MRI.getType(Src);
8794 auto Offset =
8795 MIB.buildConstant(LLT::scalar(SrcTy.getSizeInBits()), CurrOffset);
8796 LoadPtr = MIB.buildPtrAdd(SrcTy, Src, Offset).getReg(0);
8797 }
8798 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
8799 CurrOffset += CopyTy.getSizeInBytes();
8800 }
8801
8802 CurrOffset = 0;
8803 for (unsigned I = 0; I < MemOps.size(); ++I) {
8804 LLT CopyTy = MemOps[I];
8805 // Now store the values loaded.
8806 auto *StoreMMO =
8807 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
8808
8809 Register StorePtr = Dst;
8810 if (CurrOffset != 0) {
8811 LLT DstTy = MRI.getType(Dst);
8812 auto Offset =
8813 MIB.buildConstant(LLT::scalar(DstTy.getSizeInBits()), CurrOffset);
8814 StorePtr = MIB.buildPtrAdd(DstTy, Dst, Offset).getReg(0);
8815 }
8816 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
8817 CurrOffset += CopyTy.getSizeInBytes();
8818 }
8819 MI.eraseFromParent();
8820 return Legalized;
8821}
8822
8825 const unsigned Opc = MI.getOpcode();
8826 // This combine is fairly complex so it's not written with a separate
8827 // matcher function.
8828 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
8829 Opc == TargetOpcode::G_MEMSET) &&
8830 "Expected memcpy like instruction");
8831
8832 auto MMOIt = MI.memoperands_begin();
8833 const MachineMemOperand *MemOp = *MMOIt;
8834
8835 Align DstAlign = MemOp->getBaseAlign();
8836 Align SrcAlign;
8837 auto [Dst, Src, Len] = MI.getFirst3Regs();
8838
8839 if (Opc != TargetOpcode::G_MEMSET) {
8840 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
8841 MemOp = *(++MMOIt);
8842 SrcAlign = MemOp->getBaseAlign();
8843 }
8844
8845 // See if this is a constant length copy
8846 auto LenVRegAndVal = getIConstantVRegValWithLookThrough(Len, MRI);
8847 if (!LenVRegAndVal)
8848 return UnableToLegalize;
8849 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
8850
8851 if (KnownLen == 0) {
8852 MI.eraseFromParent();
8853 return Legalized;
8854 }
8855
8856 bool IsVolatile = MemOp->isVolatile();
8857 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
8858 return lowerMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
8859 IsVolatile);
8860
8861 // Don't try to optimize volatile.
8862 if (IsVolatile)
8863 return UnableToLegalize;
8864
8865 if (MaxLen && KnownLen > MaxLen)
8866 return UnableToLegalize;
8867
8868 if (Opc == TargetOpcode::G_MEMCPY) {
8869 auto &MF = *MI.getParent()->getParent();
8870 const auto &TLI = *MF.getSubtarget().getTargetLowering();
8871 bool OptSize = shouldLowerMemFuncForSize(MF);
8872 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
8873 return lowerMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
8874 IsVolatile);
8875 }
8876 if (Opc == TargetOpcode::G_MEMMOVE)
8877 return lowerMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
8878 if (Opc == TargetOpcode::G_MEMSET)
8879 return lowerMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
8880 return UnableToLegalize;
8881}
unsigned const MachineRegisterInfo * MRI
#define Success
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static const LLT S1
static const LLT S64
static const LLT S32
static const LLT S16
amdgpu AMDGPU Register Bank Select
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file describes how to lower LLVM calls to machine code calls.
#define GISEL_VECREDUCE_CASES_NONSEQ
Definition: Utils.h:73
static std::optional< bool > isBigEndian(const SmallDenseMap< int64_t, int64_t, 8 > &MemOffset2Idx, int64_t LowestIdx)
Given a map from byte offsets in memory to indices in a load/store, determine if that map corresponds...
static Type * getTypeForLLT(LLT Ty, LLVMContext &C)
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
This contains common code to allow clients to notify changes to machine instr.
Provides analysis for querying information about KnownBits during GISel passes.
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RTLIBCASE_INT(LibcallPrefix)
static bool findGISelOptimalMemOpLowering(std::vector< LLT > &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, unsigned SrcAS, const AttributeList &FuncAttributes, const TargetLowering &TLI)
static RTLIB::Libcall getOutlineAtomicLibcall(MachineInstr &MI)
static Register buildBitFieldInsert(MachineIRBuilder &B, Register TargetReg, Register InsertReg, Register OffsetBits)
Emit code to insert InsertReg into TargetRet at OffsetBits in TargetReg, while preserving other bits ...
static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB)
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
static void broadcastSrcOp(SmallVectorImpl< SrcOp > &Ops, unsigned N, MachineOperand &Op)
Operand Op is used on N sub-instructions.
static bool isLibCallInTailPosition(const CallLowering::ArgInfo &Result, MachineInstr &MI, const TargetInstrInfo &TII, MachineRegisterInfo &MRI)
True if an instruction is in tail position in its caller.
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType, LostDebugLocObserver &LocObserver)
static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B, Register Idx, unsigned NewEltSize, unsigned OldEltSize)
Figure out the bit offset into a register when coercing a vector index for the wide element type.
static void makeDstOps(SmallVectorImpl< DstOp > &DstOps, LLT Ty, unsigned NumElts)
Fill DstOps with DstOps that have same number of elements combined as the Ty.
static bool shouldLowerMemFuncForSize(const MachineFunction &MF)
#define LCALL5(A)
static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B, MachineInstrBuilder Src, const APInt &Mask)
static void emitLoadFromConstantPool(Register DstReg, const Constant *ConstVal, MachineIRBuilder &MIRBuilder)
static void getUnmergePieces(SmallVectorImpl< Register > &Pieces, MachineIRBuilder &B, Register Src, LLT Ty)
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
static LegalizerHelper::LegalizeResult createAtomicLibcall(MachineIRBuilder &MIRBuilder, MachineInstr &MI)
static RTLIB::Libcall getStateLibraryFunctionFor(MachineInstr &MI, const TargetLowering &TLI)
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static bool hasSameNumEltsOnAllVectorOperands(GenericMachineInstr &MI, MachineRegisterInfo &MRI, std::initializer_list< unsigned > NonVecOpIndices)
Check that all vector operands have same number of elements.
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType, LostDebugLocObserver &LocObserver)
static Register clampVectorIndex(MachineIRBuilder &B, Register IdxReg, LLT VecTy)
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
static void getUnmergeResults(SmallVectorImpl< Register > &Regs, const MachineInstr &MI)
Append the result registers of G_UNMERGE_VALUES MI to Regs.
static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI, Register Reg, unsigned BW)
#define RTLIBCASE(LibcallPrefix)
static Type * getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty)
Interface for Targets to specify which operations they can successfully select and how the others sho...
Tracks DebugLocs between checkpoints and verifies that they are transferred.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
This file declares the MachineIRBuilder class.
unsigned const TargetRegisterInfo * TRI
#define R2(n)
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
uint64_t High
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
R600 Clause Merge
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file describes how to lower LLVM code to machine code.
Value * RHS
Value * LHS
opStatus convertFromAPInt(const APInt &Input, bool IsSigned, roundingMode RM)
Definition: APFloat.h:1193
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
static APFloat getLargest(const fltSemantics &Sem, bool Negative=false)
Returns the largest finite number in the given semantics.
Definition: APFloat.h:1006
static APFloat getInf(const fltSemantics &Sem, bool Negative=false)
Factory for Positive and Negative Infinity.
Definition: APFloat.h:966
Class for arbitrary precision integers.
Definition: APInt.h:76
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:212
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:207
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1463
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:906
static APInt getMaxValue(unsigned numBits)
Gets maximum unsigned value of APInt for specific bit width.
Definition: APInt.h:184
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1672
static APInt getSignedMaxValue(unsigned numBits)
Gets maximum signed value of APInt for a specific bit width.
Definition: APInt.h:187
void negate()
Negate this APInt in place.
Definition: APInt.h:1421
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:620
static APInt getSignedMinValue(unsigned numBits)
Gets minimum signed value of APInt for a specific bit width.
Definition: APInt.h:197
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition: APInt.h:851
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:284
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition: APInt.h:178
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:217
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:829
static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, unsigned hiBit)
Wrap version of getBitsSet.
Definition: APInt.h:248
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool hasAttributes() const
Return true if the builder has IR-level attributes.
Definition: Attributes.h:1070
AttrBuilder & removeAttribute(Attribute::AttrKind Val)
Remove an attribute from the builder.
AttributeSet getRetAttrs() const
The attributes for the ret value are returned.
bool hasRetAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the return value.
Definition: Attributes.h:798
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:960
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:989
@ FCMP_OLT
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:966
@ FCMP_OGE
0 0 1 1 True if ordered and greater than or equal
Definition: InstrTypes.h:965
@ ICMP_UGE
unsigned greater or equal
Definition: InstrTypes.h:984
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:983
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:987
@ FCMP_ULT
1 1 0 0 True if unordered or less than
Definition: InstrTypes.h:974
@ FCMP_ONE
0 1 1 0 True if ordered and operands are unequal
Definition: InstrTypes.h:968
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:985
@ ICMP_EQ
equal
Definition: InstrTypes.h:981
@ ICMP_NE
not equal
Definition: InstrTypes.h:982
bool isSigned() const
Definition: InstrTypes.h:1232
const APFloat & getValueAPF() const
Definition: Constants.h:311
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
This is an important base class in LLVM.
Definition: Constant.h:41
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:393
bool isBigEndian() const
Definition: DataLayout.h:239
static constexpr ElementCount getFixed(ScalarTy MinVal)
Definition: TypeSize.h:296
static constexpr ElementCount get(ScalarTy MinVal, bool Scalable)
Definition: TypeSize.h:302
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:680
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:677
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:350
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:205
Represents any generic load, including sign/zero extending variants.
Register getDstReg() const
Get the definition register of the loaded value.
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
Represents any type of generic load or store.
Register getPointerReg() const
Get the source register of the pointer value.
MachineMemOperand & getMMO() const
Get the MachineMemOperand on this instruction.
LocationSize getMemSize() const
Returns the size in bytes of the memory access.
bool isAtomic() const
Returns true if the attached MachineMemOperand has the atomic flag set.
Represents a G_STORE.
Register getValueReg() const
Get the stored value register.
A base class for all GenericMachineInstrs.
Register getReg(unsigned Idx) const
Access the Idx'th operand as a register and return it.
bool isTailCall(const MachineInstr &MI) const override
bool isEquality() const
Return true if this predicate is either EQ or NE.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:278
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267
constexpr bool isScalar() const
Definition: LowLevelType.h:146
constexpr LLT changeElementType(LLT NewEltTy) const
If this type is a vector, return a vector with the same number of elements but the new element type.
Definition: LowLevelType.h:214
static constexpr LLT vector(ElementCount EC, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Definition: LowLevelType.h:64
constexpr bool isPointerVector() const
Definition: LowLevelType.h:152
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42
constexpr bool isValid() const
Definition: LowLevelType.h:145
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159
constexpr bool isVector() const
Definition: LowLevelType.h:148
static constexpr LLT pointer(unsigned AddressSpace, unsigned SizeInBits)
Get a low-level pointer in the given address space.
Definition: LowLevelType.h:57
constexpr bool isScalable() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:170
constexpr bool isByteSized() const
Definition: LowLevelType.h:263
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193
constexpr bool isPointer() const
Definition: LowLevelType.h:149
constexpr LLT getElementType() const
Returns the vector's element type. Only valid for vector types.
Definition: LowLevelType.h:290
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221
constexpr unsigned getAddressSpace() const
Definition: LowLevelType.h:280
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230
constexpr LLT getScalarType() const
Definition: LowLevelType.h:208
constexpr TypeSize getSizeInBytes() const
Returns the total size of the type in bytes, i.e.
Definition: LowLevelType.h:203
static constexpr LLT scalarOrVector(ElementCount EC, LLT ScalarTy)
Definition: LowLevelType.h:124
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
LegalizeResult lowerShlSat(MachineInstr &MI)
LegalizeResult narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFPTRUNC_F64_TO_F16(MachineInstr &MI)
LegalizeResult equalizeVectorShuffleLengths(MachineInstr &MI)
Equalize source and destination vector sizes of G_SHUFFLE_VECTOR.
LegalizeResult bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_INSERT_VECTOR_ELT.
LegalizeResult lowerSITOFP(MachineInstr &MI)
LegalizeResult lowerDynStackAlloc(MachineInstr &MI)
LegalizeResult lowerBitCount(MachineInstr &MI)
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
LegalizeResult lowerIntrinsicRound(MachineInstr &MI)
void widenScalarSrc(MachineInstr &MI, LLT WideTy, unsigned OpIdx, unsigned ExtOpcode)
Legalize a single operand OpIdx of the machine instruction MI as a Use by extending the operand's typ...
LegalizeResult moreElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerSMULH_UMULH(MachineInstr &MI)
LegalizeResult lowerLoad(GAnyLoad &MI)
LegalizeResult fewerElementsVectorShuffle(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerAbsToAddXor(MachineInstr &MI)
void moreElementsVectorDst(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Def by performing it with addition...
LegalizeResult lowerFConstant(MachineInstr &MI)
LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerBitreverse(MachineInstr &MI)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerExtractInsertVectorElt(MachineInstr &MI)
Lower a vector extract or insert by writing the vector to a stack temporary and reloading the element...
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
LegalizeResult lowerFunnelShiftWithInverse(MachineInstr &MI)
LegalizeResult lowerAbsToMaxNeg(MachineInstr &MI)
LegalizeResult lowerEXT(MachineInstr &MI)
LegalizeResult lowerStore(GStore &MI)
LegalizeResult lowerAbsToCNeg(MachineInstr &MI)
LegalizeResult lowerSADDO_SSUBO(MachineInstr &MI)
MachineInstrBuilder createStackTemporary(TypeSize Bytes, Align Alignment, MachinePointerInfo &PtrInfo)
Create a stack temporary based on the size in bytes and the alignment.
void narrowScalarSrc(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by truncating the operand's ty...
LegalizeResult fewerElementsVectorPhi(GenericMachineInstr &MI, unsigned NumElts)
LegalizeResult lowerFPTOUI(MachineInstr &MI)
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
LegalizeResult narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerUnmergeValues(MachineInstr &MI)
LegalizeResult bitcast(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by replacing the value type.
LegalizeResult lowerBitcast(MachineInstr &MI)
LegalizeResult lowerMinMax(MachineInstr &MI)
LegalizeResult lowerFunnelShiftAsShifts(MachineInstr &MI)
LegalizeResult lowerInsert(MachineInstr &MI)
LegalizeResult lowerReadWriteRegister(MachineInstr &MI)
LegalizeResult lowerExtract(MachineInstr &MI)
LegalizeResult fewerElementsBitcast(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
LegalizeResult lowerISFPCLASS(MachineInstr &MI)
LegalizeResult lowerAddSubSatToMinMax(MachineInstr &MI)
LegalizeResult lowerFPOWI(MachineInstr &MI)
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVectorReduction(MachineInstr &MI)
LegalizeResult reduceLoadStoreWidth(GLoadStore &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult fewerElementsVectorMultiEltType(GenericMachineInstr &MI, unsigned NumElts, std::initializer_list< unsigned > NonVecOpIndices={})
Handles most opcodes.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerVAArg(MachineInstr &MI)
@ Legalized
Instruction has been legalized and the MachineFunction changed.
@ AlreadyLegal
Instruction was already legal and no change was made to the MachineFunction.
@ UnableToLegalize
Some kind of error has occurred and we could not legalize this instruction.
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
LegalizeResult lowerFCopySign(MachineInstr &MI)
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult lowerRotateWithReverseRotate(MachineInstr &MI)
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target.
LegalizeResult lowerFunnelShift(MachineInstr &MI)
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
GISelChangeObserver & Observer
To keep track of changes made by the LegalizerHelper.
void bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a def by inserting a G_BITCAST from ...
LegalizeResult lowerFPTRUNC(MachineInstr &MI)
LegalizeResult lowerFMad(MachineInstr &MI)
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerAddSubSatToAddoSubo(MachineInstr &MI)
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult lowerFFloor(MachineInstr &MI)
LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult fewerElementsVectorSeqReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Register getDynStackAllocTargetPtr(Register SPReg, Register AllocSize, Align Alignment, LLT PtrTy)
LegalizeResult lowerFPTOSI(MachineInstr &MI)
LegalizeResult lowerUITOFP(MachineInstr &MI)
LegalizeResult lowerShuffleVector(MachineInstr &MI)
LegalizeResult fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerMergeValues(MachineInstr &MI)
LegalizeResult fewerElementsVectorUnmergeValues(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
void moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a Use by producing a vector with und...
LegalizeResult bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT CastTy)
Perform Bitcast legalize action on G_EXTRACT_VECTOR_ELT.
LegalizeResult lowerRotate(MachineInstr &MI)
LegalizeResult lowerMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0)
Register coerceToScalar(Register Val)
Cast the given value to an LLT::scalar with an equivalent size.
LegalizeResult lowerDIVREM(MachineInstr &MI)
LegalizeResult lowerSelect(MachineInstr &MI)
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
LegalizeResult narrowScalarFLDEXP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
void bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx)
Legalize a single operand OpIdx of the machine instruction MI as a use by inserting a G_BITCAST to Ca...
void narrowScalarDst(MachineInstr &MI, LLT NarrowTy, unsigned OpIdx, unsigned ExtOpcode)
LegalizeResult libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Legalize an instruction by emiting a runtime library call instead.
LegalizeResult lowerStackRestore(MachineInstr &MI)
LegalizeResult fewerElementsVectorReductions(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult lowerStackSave(MachineInstr &MI)
LegalizeResult fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions.
LegalizeResult lowerTRUNC(MachineInstr &MI)
LegalizeResult lowerBswap(MachineInstr &MI)
Register getVectorElementPointer(Register VecPtr, LLT VecTy, Register Index)
Get a pointer to vector element Index located in memory for a vector of type VecTy starting at a base...
LegalizeResult narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Align getStackTemporaryAlignment(LLT Type, Align MinAlign=Align()) const
Return the alignment to use for a stack temporary object with the given type.
LegalizeResult lowerConstant(MachineInstr &MI)
void widenScalarDst(MachineInstr &MI, LLT WideTy, unsigned OpIdx=0, unsigned TruncOpcode=TargetOpcode::G_TRUNC)
Legalize a single operand OpIdx of the machine instruction MI as a Def by extending the operand's typ...
LegalizeResult legalizeInstrStep(MachineInstr &MI, LostDebugLocObserver &LocObserver)
Replace MI by a sequence of legal instructions that can implement the same operation.
virtual unsigned getExtOpcodeForWideningConstant(LLT SmallTy) const
Return the opcode (SEXT/ZEXT/ANYEXT) that should be performed while widening a constant of type Small...
bool isLegalOrCustom(const LegalityQuery &Query) const
virtual bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, LostDebugLocObserver &LocObserver) const
Called for instructions with the Custom LegalizationAction.
virtual bool legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
TypeSize getValue() const
void checkpoint(bool CheckDebugLocs=true)
Call this to indicate that it's a good point to assess whether locations have been lost.
const MCInstrDesc & get(unsigned Opcode) const
Return the machine instruction descriptor that corresponds to the specified instruction opcode.
Definition: MCInstrInfo.h:63
StringRef getName(unsigned Opcode) const
Returns the name for the instructions with the given opcode.
Definition: MCInstrInfo.h:70
A single uniqued string.
Definition: Metadata.h:720
StringRef getString() const
Definition: Metadata.cpp:610
Machine Value Type.
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:585
iterator getFirstTerminatorForward()
Finds the first terminator in a block by scanning forward.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
Helper class to build MachineInstr.
MachineInstrBuilder buildFSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FSUB Op0, Op1.
MachineInstrBuilder buildFPTOSI(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FPTOSI Src0.
MachineInstrBuilder buildFMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFreeze(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_FREEZE Src.
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
std::optional< MachineInstrBuilder > materializePtrAdd(Register &Res, Register Op0, const LLT ValueTy, uint64_t Value)
Materialize and insert Res = G_PTR_ADD Op0, (G_CONSTANT Value)
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
MachineInstrBuilder buildNot(const DstOp &Dst, const SrcOp &Src0)
Build and insert a bitwise not, NegOne = G_CONSTANT -1 Res = G_OR Op0, NegOne.
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildConstantPool(const DstOp &Res, unsigned Idx)
Build and insert Res = G_CONSTANT_POOL Idx.
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ... = G_UNMERGE_VALUES Op.
MachineInstrBuilder buildFAbs(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FABS Op0.
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
MachineInstrBuilder buildZExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and inserts Res = G_AND Op, LowBitsSet(ImmOp) Since there is no G_ZEXT_INREG like G_SEXT_INREG,...
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert Res0, ... = G_EXTRACT Src, Idx0.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
MachineInstrBuilder buildCast(const DstOp &Dst, const SrcOp &Src)
Build and insert an appropriate cast between two registers of equal size.
const TargetInstrInfo & getTII()
MachineInstrBuilder buildURem(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_UREM Op0, Op1.
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildFPow(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FPOW Src0, Src1.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
MachineInstrBuilder buildIntrinsicTrunc(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_INTRINSIC_TRUNC Src0.
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
MachineInstrBuilder buildSExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildShuffleSplat(const DstOp &Res, const SrcOp &Src)
Build and insert a vector splat of a scalar Src using a G_INSERT_VECTOR_ELT and G_SHUFFLE_VECTOR idio...
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_SUB Op0, Op1.
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
MachineInstrBuilder buildVScale(const DstOp &Res, unsigned MinElts)
Build and insert Res = G_VSCALE MinElts.
MachineInstrBuilder buildSplatBuildVector(const DstOp &Res, const SrcOp &Src)
Build and insert Res = G_BUILD_VECTOR with Src replicated to fill the number of elements.
MachineInstrBuilder buildIntToPtr(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_INTTOPTR instruction.
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
MachineInstrBuilder buildNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert integer negation Zero = G_CONSTANT 0 Res = G_SUB Zero, Op0.
MachineInstrBuilder buildCTLZ(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ Op0, Src0.
MachineInstrBuilder buildSMax(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMAX Op0, Op1.
MachineInstrBuilder buildAssertZExt(const DstOp &Res, const SrcOp &Op, unsigned Size)
Build and insert Res = G_ASSERT_ZEXT Op, Size.
MachineInstrBuilder buildStrictFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_STRICT_FADD Op0, Op1.
MachineInstrBuilder buildMergeLikeInstr(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ... or Res = G_BUILD_VECTOR Op0, ... or Res = G_CONCAT_VEC...
MachineInstrBuilder buildExtractVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildExtractVectorElementConstant(const DstOp &Res, const SrcOp &Val, const int Idx)
Build and insert Res = G_EXTRACT_VECTOR_ELT Val, Idx.
MachineInstrBuilder buildCTTZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTTZ_ZERO_UNDEF Op0, Src0.
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr, Register CmpVal, Register NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
MachineInstrBuilder buildSITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_SITOFP Src0.
MachineInstrBuilder buildPadVectorWithUndefElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a, b, .....
MachineInstrBuilder buildFrameIndex(const DstOp &Res, int Idx)
Build and insert Res = G_FRAME_INDEX Idx.
MachineInstrBuilder buildCTPOP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTPOP Op0, Src0.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
MachineFunction & getMF()
Getter for the function we currently build.
MachineInstrBuilder buildSMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_SMIN Op0, Op1.
MachineInstrBuilder buildInsert(const DstOp &Res, const SrcOp &Src, const SrcOp &Op, unsigned Index)
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_FCOPYSIGN Op0, Op1.
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInsertVectorElement(const DstOp &Res, const SrcOp &Val, const SrcOp &Elt, const SrcOp &Idx)
Build and insert Res = G_INSERT_VECTOR_ELT Val, Elt, Idx.
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
MachineInstrBuilder buildBitcast(const DstOp &Dst, const SrcOp &Src)
Build and insert Dst = G_BITCAST Src.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
MachineInstrBuilder buildDeleteTrailingVectorElements(const DstOp &Res, const SrcOp &Op0)
Build and insert a, b, ..., x, y, z = G_UNMERGE_VALUES Op0 Res = G_BUILD_VECTOR a,...
MachineRegisterInfo * getMRI()
Getter for MRI.
MachineInstrBuilder buildFPTrunc(const DstOp &Res, const SrcOp &Op, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FPTRUNC Op.
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_OR Op0, Op1.
MachineInstrBuilder buildShuffleVector(const DstOp &Res, const SrcOp &Src1, const SrcOp &Src2, ArrayRef< int > Mask)
Build and insert Res = G_SHUFFLE_VECTOR Src1, Src2, Mask.
MachineInstrBuilder buildCopy(const DstOp &Res, const SrcOp &Op)
Build and insert Res = COPY Op.
const DataLayout & getDataLayout() const
MachineInstrBuilder buildLoadInstr(unsigned Opcode, const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = <opcode> Addr, MMO.
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineInstrBuilder buildMaskLowPtrBits(const DstOp &Res, const SrcOp &Op0, uint32_t NumBits)
Build and insert Res = G_PTRMASK Op0, G_CONSTANT (1 << NumBits) - 1.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
MachineInstrBuilder buildUMin(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_UMIN Op0, Op1.
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert a Res = G_FCMP PredOp0, Op1.
MachineInstrBuilder buildFAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_FADD Op0, Op1.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, std::optional< unsigned > Flags=std::nullopt)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildSExtInReg(const DstOp &Res, const SrcOp &Op, int64_t ImmOp)
Build and insert Res = G_SEXT_INREG Op, ImmOp.
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
bool isReturn(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:908
bool isCopy() const
bool isDebugInstr() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:549
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:777
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
A description of a memory reference used in the backend.
void setType(LLT NewTy)
Reset the tracked memory type.
LLT getMemoryType() const
Return the memory type of the memory reference.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
LocationSize getSizeInBits() const
Return the size in bits of the memory reference.
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateES(const char *SymName, unsigned TargetFlags=0)
const ConstantInt * getCImm() const
void setReg(Register Reg)
Change the register this operand corresponds to.
void setCImm(const ConstantInt *CI)
Register getReg() const
getReg - Returns the register number.
const ConstantFP * getFPImm() const
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:818
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
constexpr const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
static StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition: Type.cpp:373
TargetInstrInfo - Interface to description of machine instruction set.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned getMaxStoresPerMemcpy(bool OptSize) const
Get maximum # of store operations permitted for llvm.memcpy.
Register getStackPointerRegisterToSaveRestore() const
If a physical register, this specifies the register that llvm.savestack/llvm.restorestack should save...
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
virtual LLT getOptimalMemOpLLT(const MemOp &Op, const AttributeList &) const
LLT returning variant.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const
Return true if the target supports a memory access of this type for the given address space and align...
unsigned getMaxStoresPerMemmove(bool OptSize) const
Get maximum # of store operations permitted for llvm.memmove.
Align getMinStackArgumentAlignment() const
Return the minimum stack alignment of an argument.
unsigned getMaxStoresPerMemset(bool OptSize) const
Get maximum # of store operations permitted for llvm.memset.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual Register getRegisterByName(const char *RegName, LLT Ty, const MachineFunction &MF) const
Return the register ID of the name passed in.
const Triple & getTargetTriple() const
TargetOptions Options
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const CallLowering * getCallLowering() const
virtual const TargetLowering * getTargetLowering() const
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:542
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getHalfTy(LLVMContext &C)
static Type * getDoubleTy(LLVMContext &C)
static Type * getX86_FP80Ty(LLVMContext &C)
static Type * getVoidTy(LLVMContext &C)
static Type * getFP128Ty(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
static Type * getFloatTy(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
constexpr LeafTy divideCoefficientBy(ScalarTy RHS) const
We do not provide the '/' operator here because division for polynomial types does not work in the sa...
Definition: TypeSize.h:239
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ FewerElements
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:65
@ Libcall
The operation should be implemented as a call to some kind of runtime support library.
Definition: LegalizerInfo.h:83
@ WidenScalar
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegalizerInfo.h:74
@ NarrowScalar
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type.
Definition: LegalizerInfo.h:52
@ Custom
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:87
@ MoreElements
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:71
ConstantMatch< APInt > m_ICst(APInt &Cst)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
IterT next_nodbg(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It, then continue incrementing it while it points to a debug instruction.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition: STLExtras.h:862
int64_t maxIntN(int64_t N)
Gets the maximum value for a N-bit signed integer.
Definition: MathExtras.h:219
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:625
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
const llvm::fltSemantics & getFltSemanticForLLT(LLT Ty)
Get the appropriate floating point arithmetic semantic based on the bit size of the given scalar LLT.
MVT getMVTForLLT(LLT Ty)
Get a rough equivalent of an MVT for a given LLT.
bool matchUnaryPredicate(const MachineRegisterInfo &MRI, Register Reg, std::function< bool(const Constant *ConstVal)> Match, bool AllowUndefs=false)
Attempt to match a unary predicate against a scalar/splat constant or every element of a constant G_B...
Definition: Utils.cpp:1472
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI, LostDebugLocObserver &LocObserver)
Create a libcall to memcpy et al.
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:361
LLVM_READNONE LLT getLCMType(LLT OrigTy, LLT TargetTy)
Return the least common multiple type of OrigTy and TargetTy, by changing the number of vector elemen...
Definition: Utils.cpp:1076
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, const char *Name, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args, CallingConv::ID CC, LostDebugLocObserver &LocObserver, MachineInstr *MI=nullptr)
Helper function that creates a libcall to the given Name using the given calling convention CC.
void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition: Utils.cpp:479
@ Mul
Product of integers.
@ Xor
Bitwise or logical XOR of integers.
@ Add
Sum of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:338
int64_t minIntN(int64_t N)
Gets the minimum value for a N-bit signed integer.
Definition: MathExtras.h:212
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:413
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:330
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
Align assumeAligned(uint64_t Value)
Treats the value 0 as a 1, so Align is always at least 1.
Definition: Alignment.h:111
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
LLVM_READNONE LLT getGCDType(LLT OrigTy, LLT TargetTy)
Return a type where the total size is the greatest common divisor of OrigTy and TargetTy.
Definition: Utils.cpp:1164
T bit_floor(T Value)
Returns the largest integral power of two no greater than Value if Value is nonzero.
Definition: bit.h:327
void extractVectorParts(Register Reg, unsigned NumElts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Version which handles irregular sub-vector splits.
Definition: Utils.cpp:583
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:249
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:230
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:250
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Align previous() const
Definition: Alignment.h:88
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
The LegalityQuery object bundles together all the information that's needed to decide whether a given...
LegalizeAction Action
The action to take or the final answer.
Matching combinators.
This class contains a discriminated union of information about pointers in memory operands,...
unsigned getAddrSpace() const
Return the LLVM IR address space number that this pointer points into.
static MachinePointerInfo getConstantPool(MachineFunction &MF)
Return a MachinePointerInfo record that refers to the constant pool.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, bool IsZeroMemset, bool IsVolatile)
static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, Align SrcAlign, bool IsVolatile, bool MemcpyStrSrc=false)