LLVM  13.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
26 #include "llvm/Support/Debug.h"
29 
30 #define DEBUG_TYPE "legalizer"
31 
32 using namespace llvm;
33 using namespace LegalizeActions;
34 using namespace MIPatternMatch;
35 
36 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
37 ///
38 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
39 /// with any leftover piece as type \p LeftoverTy
40 ///
41 /// Returns -1 in the first element of the pair if the breakdown is not
42 /// satisfiable.
43 static std::pair<int, int>
44 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
45  assert(!LeftoverTy.isValid() && "this is an out argument");
46 
47  unsigned Size = OrigTy.getSizeInBits();
48  unsigned NarrowSize = NarrowTy.getSizeInBits();
49  unsigned NumParts = Size / NarrowSize;
50  unsigned LeftoverSize = Size - NumParts * NarrowSize;
51  assert(Size > NarrowSize);
52 
53  if (LeftoverSize == 0)
54  return {NumParts, 0};
55 
56  if (NarrowTy.isVector()) {
57  unsigned EltSize = OrigTy.getScalarSizeInBits();
58  if (LeftoverSize % EltSize != 0)
59  return {-1, -1};
60  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
61  } else {
62  LeftoverTy = LLT::scalar(LeftoverSize);
63  }
64 
65  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
66  return std::make_pair(NumParts, NumLeftover);
67 }
68 
70 
71  if (!Ty.isScalar())
72  return nullptr;
73 
74  switch (Ty.getSizeInBits()) {
75  case 16:
76  return Type::getHalfTy(Ctx);
77  case 32:
78  return Type::getFloatTy(Ctx);
79  case 64:
80  return Type::getDoubleTy(Ctx);
81  case 80:
82  return Type::getX86_FP80Ty(Ctx);
83  case 128:
84  return Type::getFP128Ty(Ctx);
85  default:
86  return nullptr;
87  }
88 }
89 
91  GISelChangeObserver &Observer,
93  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
94  LI(*MF.getSubtarget().getLegalizerInfo()),
95  TLI(*MF.getSubtarget().getTargetLowering()) { }
96 
98  GISelChangeObserver &Observer,
100  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
101  TLI(*MF.getSubtarget().getTargetLowering()) { }
102 
105  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
106 
108 
109  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
110  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
111  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
112  auto Step = LI.getAction(MI, MRI);
113  switch (Step.Action) {
114  case Legal:
115  LLVM_DEBUG(dbgs() << ".. Already legal\n");
116  return AlreadyLegal;
117  case Libcall:
118  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
119  return libcall(MI);
120  case NarrowScalar:
121  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
122  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
123  case WidenScalar:
124  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
125  return widenScalar(MI, Step.TypeIdx, Step.NewType);
126  case Bitcast:
127  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
128  return bitcast(MI, Step.TypeIdx, Step.NewType);
129  case Lower:
130  LLVM_DEBUG(dbgs() << ".. Lower\n");
131  return lower(MI, Step.TypeIdx, Step.NewType);
132  case FewerElements:
133  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
134  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
135  case MoreElements:
136  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
137  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
138  case Custom:
139  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
140  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
141  default:
142  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
143  return UnableToLegalize;
144  }
145 }
146 
147 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
148  SmallVectorImpl<Register> &VRegs) {
149  for (int i = 0; i < NumParts; ++i)
150  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
151  MIRBuilder.buildUnmerge(VRegs, Reg);
152 }
153 
154 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
155  LLT MainTy, LLT &LeftoverTy,
157  SmallVectorImpl<Register> &LeftoverRegs) {
158  assert(!LeftoverTy.isValid() && "this is an out argument");
159 
160  unsigned RegSize = RegTy.getSizeInBits();
161  unsigned MainSize = MainTy.getSizeInBits();
162  unsigned NumParts = RegSize / MainSize;
163  unsigned LeftoverSize = RegSize - NumParts * MainSize;
164 
165  // Use an unmerge when possible.
166  if (LeftoverSize == 0) {
167  for (unsigned I = 0; I < NumParts; ++I)
168  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
169  MIRBuilder.buildUnmerge(VRegs, Reg);
170  return true;
171  }
172 
173  if (MainTy.isVector()) {
174  unsigned EltSize = MainTy.getScalarSizeInBits();
175  if (LeftoverSize % EltSize != 0)
176  return false;
177  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
178  } else {
179  LeftoverTy = LLT::scalar(LeftoverSize);
180  }
181 
182  // For irregular sizes, extract the individual parts.
183  for (unsigned I = 0; I != NumParts; ++I) {
184  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
185  VRegs.push_back(NewReg);
186  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
187  }
188 
189  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
190  Offset += LeftoverSize) {
191  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
192  LeftoverRegs.push_back(NewReg);
193  MIRBuilder.buildExtract(NewReg, Reg, Offset);
194  }
195 
196  return true;
197 }
198 
199 void LegalizerHelper::insertParts(Register DstReg,
200  LLT ResultTy, LLT PartTy,
201  ArrayRef<Register> PartRegs,
202  LLT LeftoverTy,
203  ArrayRef<Register> LeftoverRegs) {
204  if (!LeftoverTy.isValid()) {
205  assert(LeftoverRegs.empty());
206 
207  if (!ResultTy.isVector()) {
208  MIRBuilder.buildMerge(DstReg, PartRegs);
209  return;
210  }
211 
212  if (PartTy.isVector())
213  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
214  else
215  MIRBuilder.buildBuildVector(DstReg, PartRegs);
216  return;
217  }
218 
219  unsigned PartSize = PartTy.getSizeInBits();
220  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
221 
222  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
223  MIRBuilder.buildUndef(CurResultReg);
224 
225  unsigned Offset = 0;
226  for (Register PartReg : PartRegs) {
227  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
228  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
229  CurResultReg = NewResultReg;
230  Offset += PartSize;
231  }
232 
233  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
234  // Use the original output register for the final insert to avoid a copy.
235  Register NewResultReg = (I + 1 == E) ?
236  DstReg : MRI.createGenericVirtualRegister(ResultTy);
237 
238  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
239  CurResultReg = NewResultReg;
240  Offset += LeftoverPartSize;
241  }
242 }
243 
244 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
246  const MachineInstr &MI) {
247  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
248 
249  const int StartIdx = Regs.size();
250  const int NumResults = MI.getNumOperands() - 1;
251  Regs.resize(Regs.size() + NumResults);
252  for (int I = 0; I != NumResults; ++I)
253  Regs[StartIdx + I] = MI.getOperand(I).getReg();
254 }
255 
256 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
257  LLT GCDTy, Register SrcReg) {
258  LLT SrcTy = MRI.getType(SrcReg);
259  if (SrcTy == GCDTy) {
260  // If the source already evenly divides the result type, we don't need to do
261  // anything.
262  Parts.push_back(SrcReg);
263  } else {
264  // Need to split into common type sized pieces.
265  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
266  getUnmergeResults(Parts, *Unmerge);
267  }
268 }
269 
270 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
271  LLT NarrowTy, Register SrcReg) {
272  LLT SrcTy = MRI.getType(SrcReg);
273  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
274  extractGCDType(Parts, GCDTy, SrcReg);
275  return GCDTy;
276 }
277 
278 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
280  unsigned PadStrategy) {
281  LLT LCMTy = getLCMType(DstTy, NarrowTy);
282 
283  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
284  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
285  int NumOrigSrc = VRegs.size();
286 
287  Register PadReg;
288 
289  // Get a value we can use to pad the source value if the sources won't evenly
290  // cover the result type.
291  if (NumOrigSrc < NumParts * NumSubParts) {
292  if (PadStrategy == TargetOpcode::G_ZEXT)
293  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
294  else if (PadStrategy == TargetOpcode::G_ANYEXT)
295  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
296  else {
297  assert(PadStrategy == TargetOpcode::G_SEXT);
298 
299  // Shift the sign bit of the low register through the high register.
300  auto ShiftAmt =
302  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
303  }
304  }
305 
306  // Registers for the final merge to be produced.
307  SmallVector<Register, 4> Remerge(NumParts);
308 
309  // Registers needed for intermediate merges, which will be merged into a
310  // source for Remerge.
311  SmallVector<Register, 4> SubMerge(NumSubParts);
312 
313  // Once we've fully read off the end of the original source bits, we can reuse
314  // the same high bits for remaining padding elements.
315  Register AllPadReg;
316 
317  // Build merges to the LCM type to cover the original result type.
318  for (int I = 0; I != NumParts; ++I) {
319  bool AllMergePartsArePadding = true;
320 
321  // Build the requested merges to the requested type.
322  for (int J = 0; J != NumSubParts; ++J) {
323  int Idx = I * NumSubParts + J;
324  if (Idx >= NumOrigSrc) {
325  SubMerge[J] = PadReg;
326  continue;
327  }
328 
329  SubMerge[J] = VRegs[Idx];
330 
331  // There are meaningful bits here we can't reuse later.
332  AllMergePartsArePadding = false;
333  }
334 
335  // If we've filled up a complete piece with padding bits, we can directly
336  // emit the natural sized constant if applicable, rather than a merge of
337  // smaller constants.
338  if (AllMergePartsArePadding && !AllPadReg) {
339  if (PadStrategy == TargetOpcode::G_ANYEXT)
340  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
341  else if (PadStrategy == TargetOpcode::G_ZEXT)
342  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
343 
344  // If this is a sign extension, we can't materialize a trivial constant
345  // with the right type and have to produce a merge.
346  }
347 
348  if (AllPadReg) {
349  // Avoid creating additional instructions if we're just adding additional
350  // copies of padding bits.
351  Remerge[I] = AllPadReg;
352  continue;
353  }
354 
355  if (NumSubParts == 1)
356  Remerge[I] = SubMerge[0];
357  else
358  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
359 
360  // In the sign extend padding case, re-use the first all-signbit merge.
361  if (AllMergePartsArePadding && !AllPadReg)
362  AllPadReg = Remerge[I];
363  }
364 
365  VRegs = std::move(Remerge);
366  return LCMTy;
367 }
368 
369 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
370  ArrayRef<Register> RemergeRegs) {
371  LLT DstTy = MRI.getType(DstReg);
372 
373  // Create the merge to the widened source, and extract the relevant bits into
374  // the result.
375 
376  if (DstTy == LCMTy) {
377  MIRBuilder.buildMerge(DstReg, RemergeRegs);
378  return;
379  }
380 
381  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
382  if (DstTy.isScalar() && LCMTy.isScalar()) {
383  MIRBuilder.buildTrunc(DstReg, Remerge);
384  return;
385  }
386 
387  if (LCMTy.isVector()) {
388  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
389  SmallVector<Register, 8> UnmergeDefs(NumDefs);
390  UnmergeDefs[0] = DstReg;
391  for (unsigned I = 1; I != NumDefs; ++I)
392  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
393 
394  MIRBuilder.buildUnmerge(UnmergeDefs,
395  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
396  return;
397  }
398 
399  llvm_unreachable("unhandled case");
400 }
401 
402 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
403 #define RTLIBCASE_INT(LibcallPrefix) \
404  do { \
405  switch (Size) { \
406  case 32: \
407  return RTLIB::LibcallPrefix##32; \
408  case 64: \
409  return RTLIB::LibcallPrefix##64; \
410  case 128: \
411  return RTLIB::LibcallPrefix##128; \
412  default: \
413  llvm_unreachable("unexpected size"); \
414  } \
415  } while (0)
416 
417 #define RTLIBCASE(LibcallPrefix) \
418  do { \
419  switch (Size) { \
420  case 32: \
421  return RTLIB::LibcallPrefix##32; \
422  case 64: \
423  return RTLIB::LibcallPrefix##64; \
424  case 80: \
425  return RTLIB::LibcallPrefix##80; \
426  case 128: \
427  return RTLIB::LibcallPrefix##128; \
428  default: \
429  llvm_unreachable("unexpected size"); \
430  } \
431  } while (0)
432 
433  switch (Opcode) {
434  case TargetOpcode::G_SDIV:
435  RTLIBCASE_INT(SDIV_I);
436  case TargetOpcode::G_UDIV:
437  RTLIBCASE_INT(UDIV_I);
438  case TargetOpcode::G_SREM:
439  RTLIBCASE_INT(SREM_I);
440  case TargetOpcode::G_UREM:
441  RTLIBCASE_INT(UREM_I);
442  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
443  RTLIBCASE_INT(CTLZ_I);
444  case TargetOpcode::G_FADD:
445  RTLIBCASE(ADD_F);
446  case TargetOpcode::G_FSUB:
447  RTLIBCASE(SUB_F);
448  case TargetOpcode::G_FMUL:
449  RTLIBCASE(MUL_F);
450  case TargetOpcode::G_FDIV:
451  RTLIBCASE(DIV_F);
452  case TargetOpcode::G_FEXP:
453  RTLIBCASE(EXP_F);
454  case TargetOpcode::G_FEXP2:
455  RTLIBCASE(EXP2_F);
456  case TargetOpcode::G_FREM:
457  RTLIBCASE(REM_F);
458  case TargetOpcode::G_FPOW:
459  RTLIBCASE(POW_F);
460  case TargetOpcode::G_FMA:
461  RTLIBCASE(FMA_F);
462  case TargetOpcode::G_FSIN:
463  RTLIBCASE(SIN_F);
464  case TargetOpcode::G_FCOS:
465  RTLIBCASE(COS_F);
466  case TargetOpcode::G_FLOG10:
467  RTLIBCASE(LOG10_F);
468  case TargetOpcode::G_FLOG:
469  RTLIBCASE(LOG_F);
470  case TargetOpcode::G_FLOG2:
471  RTLIBCASE(LOG2_F);
472  case TargetOpcode::G_FCEIL:
473  RTLIBCASE(CEIL_F);
474  case TargetOpcode::G_FFLOOR:
475  RTLIBCASE(FLOOR_F);
476  case TargetOpcode::G_FMINNUM:
477  RTLIBCASE(FMIN_F);
478  case TargetOpcode::G_FMAXNUM:
479  RTLIBCASE(FMAX_F);
480  case TargetOpcode::G_FSQRT:
481  RTLIBCASE(SQRT_F);
482  case TargetOpcode::G_FRINT:
483  RTLIBCASE(RINT_F);
484  case TargetOpcode::G_FNEARBYINT:
485  RTLIBCASE(NEARBYINT_F);
486  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
487  RTLIBCASE(ROUNDEVEN_F);
488  }
489  llvm_unreachable("Unknown libcall function");
490 }
491 
492 /// True if an instruction is in tail position in its caller. Intended for
493 /// legalizing libcalls as tail calls when possible.
495  MachineInstr &MI) {
496  MachineBasicBlock &MBB = *MI.getParent();
497  const Function &F = MBB.getParent()->getFunction();
498 
499  // Conservatively require the attributes of the call to match those of
500  // the return. Ignore NoAlias and NonNull because they don't affect the
501  // call sequence.
502  AttributeList CallerAttrs = F.getAttributes();
503  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
504  .removeAttribute(Attribute::NoAlias)
505  .removeAttribute(Attribute::NonNull)
506  .hasAttributes())
507  return false;
508 
509  // It's not safe to eliminate the sign / zero extension of the return value.
510  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
511  CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
512  return false;
513 
514  // Only tail call if the following instruction is a standard return.
515  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
516  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
517  return false;
518 
519  return true;
520 }
521 
523 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
524  const CallLowering::ArgInfo &Result,
526  const CallingConv::ID CC) {
527  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
528 
530  Info.CallConv = CC;
532  Info.OrigRet = Result;
533  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
534  if (!CLI.lowerCall(MIRBuilder, Info))
536 
538 }
539 
542  const CallLowering::ArgInfo &Result,
544  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
545  const char *Name = TLI.getLibcallName(Libcall);
546  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
547  return createLibcall(MIRBuilder, Name, Result, Args, CC);
548 }
549 
550 // Useful for libcalls where all operands have the same type.
553  Type *OpType) {
554  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
555 
557  for (unsigned i = 1; i < MI.getNumOperands(); i++)
558  Args.push_back({MI.getOperand(i).getReg(), OpType});
559  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
560  Args);
561 }
562 
565  MachineInstr &MI) {
566  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
567 
569  // Add all the args, except for the last which is an imm denoting 'tail'.
570  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
571  Register Reg = MI.getOperand(i).getReg();
572 
573  // Need derive an IR type for call lowering.
574  LLT OpLLT = MRI.getType(Reg);
575  Type *OpTy = nullptr;
576  if (OpLLT.isPointer())
577  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
578  else
579  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
580  Args.push_back({Reg, OpTy});
581  }
582 
583  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
584  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
585  RTLIB::Libcall RTLibcall;
586  unsigned Opc = MI.getOpcode();
587  switch (Opc) {
588  case TargetOpcode::G_BZERO:
589  RTLibcall = RTLIB::BZERO;
590  break;
591  case TargetOpcode::G_MEMCPY:
592  RTLibcall = RTLIB::MEMCPY;
593  break;
594  case TargetOpcode::G_MEMMOVE:
595  RTLibcall = RTLIB::MEMMOVE;
596  break;
597  case TargetOpcode::G_MEMSET:
598  RTLibcall = RTLIB::MEMSET;
599  break;
600  default:
602  }
603  const char *Name = TLI.getLibcallName(RTLibcall);
604 
605  // Unsupported libcall on the target.
606  if (!Name) {
607  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
608  << MIRBuilder.getTII().getName(Opc) << "\n");
610  }
611 
613  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
615  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
616  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
617  isLibCallInTailPosition(MIRBuilder.getTII(), MI);
618 
619  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
620  if (!CLI.lowerCall(MIRBuilder, Info))
622 
623  if (Info.LoweredTailCall) {
624  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
625  // We must have a return following the call (or debug insts) to get past
626  // isLibCallInTailPosition.
627  do {
628  MachineInstr *Next = MI.getNextNode();
629  assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
630  "Expected instr following MI to be return or debug inst?");
631  // We lowered a tail call, so the call is now the return from the block.
632  // Delete the old return.
633  Next->eraseFromParent();
634  } while (MI.getNextNode());
635  }
636 
638 }
639 
640 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
641  Type *FromType) {
642  auto ToMVT = MVT::getVT(ToType);
643  auto FromMVT = MVT::getVT(FromType);
644 
645  switch (Opcode) {
646  case TargetOpcode::G_FPEXT:
647  return RTLIB::getFPEXT(FromMVT, ToMVT);
648  case TargetOpcode::G_FPTRUNC:
649  return RTLIB::getFPROUND(FromMVT, ToMVT);
650  case TargetOpcode::G_FPTOSI:
651  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
652  case TargetOpcode::G_FPTOUI:
653  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
654  case TargetOpcode::G_SITOFP:
655  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
656  case TargetOpcode::G_UITOFP:
657  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
658  }
659  llvm_unreachable("Unsupported libcall function");
660 }
661 
664  Type *FromType) {
665  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
666  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
667  {{MI.getOperand(1).getReg(), FromType}});
668 }
669 
672  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
673  unsigned Size = LLTy.getSizeInBits();
674  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
675 
676  switch (MI.getOpcode()) {
677  default:
678  return UnableToLegalize;
679  case TargetOpcode::G_SDIV:
680  case TargetOpcode::G_UDIV:
681  case TargetOpcode::G_SREM:
682  case TargetOpcode::G_UREM:
683  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
684  Type *HLTy = IntegerType::get(Ctx, Size);
685  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
686  if (Status != Legalized)
687  return Status;
688  break;
689  }
690  case TargetOpcode::G_FADD:
691  case TargetOpcode::G_FSUB:
692  case TargetOpcode::G_FMUL:
693  case TargetOpcode::G_FDIV:
694  case TargetOpcode::G_FMA:
695  case TargetOpcode::G_FPOW:
696  case TargetOpcode::G_FREM:
697  case TargetOpcode::G_FCOS:
698  case TargetOpcode::G_FSIN:
699  case TargetOpcode::G_FLOG10:
700  case TargetOpcode::G_FLOG:
701  case TargetOpcode::G_FLOG2:
702  case TargetOpcode::G_FEXP:
703  case TargetOpcode::G_FEXP2:
704  case TargetOpcode::G_FCEIL:
705  case TargetOpcode::G_FFLOOR:
706  case TargetOpcode::G_FMINNUM:
707  case TargetOpcode::G_FMAXNUM:
708  case TargetOpcode::G_FSQRT:
709  case TargetOpcode::G_FRINT:
710  case TargetOpcode::G_FNEARBYINT:
711  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
712  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
713  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
714  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
715  return UnableToLegalize;
716  }
717  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
718  if (Status != Legalized)
719  return Status;
720  break;
721  }
722  case TargetOpcode::G_FPEXT:
723  case TargetOpcode::G_FPTRUNC: {
724  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
725  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
726  if (!FromTy || !ToTy)
727  return UnableToLegalize;
729  if (Status != Legalized)
730  return Status;
731  break;
732  }
733  case TargetOpcode::G_FPTOSI:
734  case TargetOpcode::G_FPTOUI: {
735  // FIXME: Support other types
736  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
737  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
738  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
739  return UnableToLegalize;
741  MI, MIRBuilder,
742  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
743  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
744  if (Status != Legalized)
745  return Status;
746  break;
747  }
748  case TargetOpcode::G_SITOFP:
749  case TargetOpcode::G_UITOFP: {
750  // FIXME: Support other types
751  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
752  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
753  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
754  return UnableToLegalize;
756  MI, MIRBuilder,
757  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
758  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
759  if (Status != Legalized)
760  return Status;
761  break;
762  }
763  case TargetOpcode::G_BZERO:
764  case TargetOpcode::G_MEMCPY:
765  case TargetOpcode::G_MEMMOVE:
766  case TargetOpcode::G_MEMSET: {
767  LegalizeResult Result =
769  if (Result != Legalized)
770  return Result;
771  MI.eraseFromParent();
772  return Result;
773  }
774  }
775 
776  MI.eraseFromParent();
777  return Legalized;
778 }
779 
781  unsigned TypeIdx,
782  LLT NarrowTy) {
783  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
784  uint64_t NarrowSize = NarrowTy.getSizeInBits();
785 
786  switch (MI.getOpcode()) {
787  default:
788  return UnableToLegalize;
789  case TargetOpcode::G_IMPLICIT_DEF: {
790  Register DstReg = MI.getOperand(0).getReg();
791  LLT DstTy = MRI.getType(DstReg);
792 
793  // If SizeOp0 is not an exact multiple of NarrowSize, emit
794  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
795  // FIXME: Although this would also be legal for the general case, it causes
796  // a lot of regressions in the emitted code (superfluous COPYs, artifact
797  // combines not being hit). This seems to be a problem related to the
798  // artifact combiner.
799  if (SizeOp0 % NarrowSize != 0) {
800  LLT ImplicitTy = NarrowTy;
801  if (DstTy.isVector())
802  ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
803 
804  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
805  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
806 
807  MI.eraseFromParent();
808  return Legalized;
809  }
810 
811  int NumParts = SizeOp0 / NarrowSize;
812 
813  SmallVector<Register, 2> DstRegs;
814  for (int i = 0; i < NumParts; ++i)
815  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
816 
817  if (DstTy.isVector())
818  MIRBuilder.buildBuildVector(DstReg, DstRegs);
819  else
820  MIRBuilder.buildMerge(DstReg, DstRegs);
821  MI.eraseFromParent();
822  return Legalized;
823  }
824  case TargetOpcode::G_CONSTANT: {
825  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
826  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
827  unsigned TotalSize = Ty.getSizeInBits();
828  unsigned NarrowSize = NarrowTy.getSizeInBits();
829  int NumParts = TotalSize / NarrowSize;
830 
831  SmallVector<Register, 4> PartRegs;
832  for (int I = 0; I != NumParts; ++I) {
833  unsigned Offset = I * NarrowSize;
834  auto K = MIRBuilder.buildConstant(NarrowTy,
835  Val.lshr(Offset).trunc(NarrowSize));
836  PartRegs.push_back(K.getReg(0));
837  }
838 
839  LLT LeftoverTy;
840  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
841  SmallVector<Register, 1> LeftoverRegs;
842  if (LeftoverBits != 0) {
843  LeftoverTy = LLT::scalar(LeftoverBits);
844  auto K = MIRBuilder.buildConstant(
845  LeftoverTy,
846  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
847  LeftoverRegs.push_back(K.getReg(0));
848  }
849 
850  insertParts(MI.getOperand(0).getReg(),
851  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
852 
853  MI.eraseFromParent();
854  return Legalized;
855  }
856  case TargetOpcode::G_SEXT:
857  case TargetOpcode::G_ZEXT:
858  case TargetOpcode::G_ANYEXT:
859  return narrowScalarExt(MI, TypeIdx, NarrowTy);
860  case TargetOpcode::G_TRUNC: {
861  if (TypeIdx != 1)
862  return UnableToLegalize;
863 
864  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
865  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
866  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
867  return UnableToLegalize;
868  }
869 
870  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
871  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
872  MI.eraseFromParent();
873  return Legalized;
874  }
875 
876  case TargetOpcode::G_FREEZE:
877  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
878  case TargetOpcode::G_ADD:
879  case TargetOpcode::G_SUB:
880  case TargetOpcode::G_SADDO:
881  case TargetOpcode::G_SSUBO:
882  case TargetOpcode::G_SADDE:
883  case TargetOpcode::G_SSUBE:
884  case TargetOpcode::G_UADDO:
885  case TargetOpcode::G_USUBO:
886  case TargetOpcode::G_UADDE:
887  case TargetOpcode::G_USUBE:
888  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
889  case TargetOpcode::G_MUL:
890  case TargetOpcode::G_UMULH:
891  return narrowScalarMul(MI, NarrowTy);
892  case TargetOpcode::G_EXTRACT:
893  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
894  case TargetOpcode::G_INSERT:
895  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
896  case TargetOpcode::G_LOAD: {
897  auto &MMO = **MI.memoperands_begin();
898  Register DstReg = MI.getOperand(0).getReg();
899  LLT DstTy = MRI.getType(DstReg);
900  if (DstTy.isVector())
901  return UnableToLegalize;
902 
903  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
904  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
905  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
906  MIRBuilder.buildAnyExt(DstReg, TmpReg);
907  MI.eraseFromParent();
908  return Legalized;
909  }
910 
911  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
912  }
913  case TargetOpcode::G_ZEXTLOAD:
914  case TargetOpcode::G_SEXTLOAD: {
915  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
916  Register DstReg = MI.getOperand(0).getReg();
917  Register PtrReg = MI.getOperand(1).getReg();
918 
919  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
920  auto &MMO = **MI.memoperands_begin();
921  unsigned MemSize = MMO.getSizeInBits();
922 
923  if (MemSize == NarrowSize) {
924  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
925  } else if (MemSize < NarrowSize) {
926  MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
927  } else if (MemSize > NarrowSize) {
928  // FIXME: Need to split the load.
929  return UnableToLegalize;
930  }
931 
932  if (ZExt)
933  MIRBuilder.buildZExt(DstReg, TmpReg);
934  else
935  MIRBuilder.buildSExt(DstReg, TmpReg);
936 
937  MI.eraseFromParent();
938  return Legalized;
939  }
940  case TargetOpcode::G_STORE: {
941  const auto &MMO = **MI.memoperands_begin();
942 
943  Register SrcReg = MI.getOperand(0).getReg();
944  LLT SrcTy = MRI.getType(SrcReg);
945  if (SrcTy.isVector())
946  return UnableToLegalize;
947 
948  int NumParts = SizeOp0 / NarrowSize;
949  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
950  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
951  if (SrcTy.isVector() && LeftoverBits != 0)
952  return UnableToLegalize;
953 
954  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
955  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
956  auto &MMO = **MI.memoperands_begin();
957  MIRBuilder.buildTrunc(TmpReg, SrcReg);
958  MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
959  MI.eraseFromParent();
960  return Legalized;
961  }
962 
963  return reduceLoadStoreWidth(MI, 0, NarrowTy);
964  }
965  case TargetOpcode::G_SELECT:
966  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
967  case TargetOpcode::G_AND:
968  case TargetOpcode::G_OR:
969  case TargetOpcode::G_XOR: {
970  // Legalize bitwise operation:
971  // A = BinOp<Ty> B, C
972  // into:
973  // B1, ..., BN = G_UNMERGE_VALUES B
974  // C1, ..., CN = G_UNMERGE_VALUES C
975  // A1 = BinOp<Ty/N> B1, C2
976  // ...
977  // AN = BinOp<Ty/N> BN, CN
978  // A = G_MERGE_VALUES A1, ..., AN
979  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
980  }
981  case TargetOpcode::G_SHL:
982  case TargetOpcode::G_LSHR:
983  case TargetOpcode::G_ASHR:
984  return narrowScalarShift(MI, TypeIdx, NarrowTy);
985  case TargetOpcode::G_CTLZ:
986  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
987  case TargetOpcode::G_CTTZ:
988  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
989  case TargetOpcode::G_CTPOP:
990  if (TypeIdx == 1)
991  switch (MI.getOpcode()) {
992  case TargetOpcode::G_CTLZ:
993  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
994  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
995  case TargetOpcode::G_CTTZ:
996  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
997  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
998  case TargetOpcode::G_CTPOP:
999  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1000  default:
1001  return UnableToLegalize;
1002  }
1003 
1005  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1007  return Legalized;
1008  case TargetOpcode::G_INTTOPTR:
1009  if (TypeIdx != 1)
1010  return UnableToLegalize;
1011 
1013  narrowScalarSrc(MI, NarrowTy, 1);
1015  return Legalized;
1016  case TargetOpcode::G_PTRTOINT:
1017  if (TypeIdx != 0)
1018  return UnableToLegalize;
1019 
1021  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1023  return Legalized;
1024  case TargetOpcode::G_PHI: {
1025  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1026  // NarrowSize.
1027  if (SizeOp0 % NarrowSize != 0)
1028  return UnableToLegalize;
1029 
1030  unsigned NumParts = SizeOp0 / NarrowSize;
1031  SmallVector<Register, 2> DstRegs(NumParts);
1032  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1034  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1035  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1036  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1037  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1038  SrcRegs[i / 2]);
1039  }
1040  MachineBasicBlock &MBB = *MI.getParent();
1042  for (unsigned i = 0; i < NumParts; ++i) {
1043  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1044  MachineInstrBuilder MIB =
1045  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1046  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1047  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1048  }
1050  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1052  MI.eraseFromParent();
1053  return Legalized;
1054  }
1055  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1056  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1057  if (TypeIdx != 2)
1058  return UnableToLegalize;
1059 
1060  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1062  narrowScalarSrc(MI, NarrowTy, OpIdx);
1064  return Legalized;
1065  }
1066  case TargetOpcode::G_ICMP: {
1067  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1068  if (NarrowSize * 2 != SrcSize)
1069  return UnableToLegalize;
1070 
1072  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1073  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1074  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1075 
1076  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1077  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1078  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1079 
1080  CmpInst::Predicate Pred =
1081  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1082  LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1083 
1084  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1085  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1086  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1087  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1088  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1089  MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1090  } else {
1091  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1092  MachineInstrBuilder CmpHEQ =
1093  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1095  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1096  MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1097  }
1099  MI.eraseFromParent();
1100  return Legalized;
1101  }
1102  case TargetOpcode::G_SEXT_INREG: {
1103  if (TypeIdx != 0)
1104  return UnableToLegalize;
1105 
1106  int64_t SizeInBits = MI.getOperand(2).getImm();
1107 
1108  // So long as the new type has more bits than the bits we're extending we
1109  // don't need to break it apart.
1110  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1112  // We don't lose any non-extension bits by truncating the src and
1113  // sign-extending the dst.
1114  MachineOperand &MO1 = MI.getOperand(1);
1115  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1116  MO1.setReg(TruncMIB.getReg(0));
1117 
1118  MachineOperand &MO2 = MI.getOperand(0);
1119  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1121  MIRBuilder.buildSExt(MO2, DstExt);
1122  MO2.setReg(DstExt);
1124  return Legalized;
1125  }
1126 
1127  // Break it apart. Components below the extension point are unmodified. The
1128  // component containing the extension point becomes a narrower SEXT_INREG.
1129  // Components above it are ashr'd from the component containing the
1130  // extension point.
1131  if (SizeOp0 % NarrowSize != 0)
1132  return UnableToLegalize;
1133  int NumParts = SizeOp0 / NarrowSize;
1134 
1135  // List the registers where the destination will be scattered.
1136  SmallVector<Register, 2> DstRegs;
1137  // List the registers where the source will be split.
1138  SmallVector<Register, 2> SrcRegs;
1139 
1140  // Create all the temporary registers.
1141  for (int i = 0; i < NumParts; ++i) {
1142  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1143 
1144  SrcRegs.push_back(SrcReg);
1145  }
1146 
1147  // Explode the big arguments into smaller chunks.
1148  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1149 
1150  Register AshrCstReg =
1151  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1152  .getReg(0);
1153  Register FullExtensionReg = 0;
1154  Register PartialExtensionReg = 0;
1155 
1156  // Do the operation on each small part.
1157  for (int i = 0; i < NumParts; ++i) {
1158  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1159  DstRegs.push_back(SrcRegs[i]);
1160  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1161  assert(PartialExtensionReg &&
1162  "Expected to visit partial extension before full");
1163  if (FullExtensionReg) {
1164  DstRegs.push_back(FullExtensionReg);
1165  continue;
1166  }
1167  DstRegs.push_back(
1168  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1169  .getReg(0));
1170  FullExtensionReg = DstRegs.back();
1171  } else {
1172  DstRegs.push_back(
1173  MIRBuilder
1174  .buildInstr(
1175  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1176  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1177  .getReg(0));
1178  PartialExtensionReg = DstRegs.back();
1179  }
1180  }
1181 
1182  // Gather the destination registers into the final destination.
1183  Register DstReg = MI.getOperand(0).getReg();
1184  MIRBuilder.buildMerge(DstReg, DstRegs);
1185  MI.eraseFromParent();
1186  return Legalized;
1187  }
1188  case TargetOpcode::G_BSWAP:
1189  case TargetOpcode::G_BITREVERSE: {
1190  if (SizeOp0 % NarrowSize != 0)
1191  return UnableToLegalize;
1192 
1194  SmallVector<Register, 2> SrcRegs, DstRegs;
1195  unsigned NumParts = SizeOp0 / NarrowSize;
1196  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1197 
1198  for (unsigned i = 0; i < NumParts; ++i) {
1199  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1200  {SrcRegs[NumParts - 1 - i]});
1201  DstRegs.push_back(DstPart.getReg(0));
1202  }
1203 
1204  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1205 
1207  MI.eraseFromParent();
1208  return Legalized;
1209  }
1210  case TargetOpcode::G_PTR_ADD:
1211  case TargetOpcode::G_PTRMASK: {
1212  if (TypeIdx != 1)
1213  return UnableToLegalize;
1215  narrowScalarSrc(MI, NarrowTy, 2);
1217  return Legalized;
1218  }
1219  case TargetOpcode::G_FPTOUI: {
1220  if (TypeIdx != 0)
1221  return UnableToLegalize;
1223  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1225  return Legalized;
1226  }
1227  case TargetOpcode::G_FPTOSI: {
1228  if (TypeIdx != 0)
1229  return UnableToLegalize;
1231  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_SEXT);
1233  return Legalized;
1234  }
1235  case TargetOpcode::G_FPEXT:
1236  if (TypeIdx != 0)
1237  return UnableToLegalize;
1239  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1241  return Legalized;
1242  }
1243 }
1244 
1246  LLT Ty = MRI.getType(Val);
1247  if (Ty.isScalar())
1248  return Val;
1249 
1251  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1252  if (Ty.isPointer()) {
1253  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1254  return Register();
1255  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1256  }
1257 
1258  Register NewVal = Val;
1259 
1260  assert(Ty.isVector());
1261  LLT EltTy = Ty.getElementType();
1262  if (EltTy.isPointer())
1263  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1264  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1265 }
1266 
1268  unsigned OpIdx, unsigned ExtOpcode) {
1269  MachineOperand &MO = MI.getOperand(OpIdx);
1270  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1271  MO.setReg(ExtB.getReg(0));
1272 }
1273 
1275  unsigned OpIdx) {
1276  MachineOperand &MO = MI.getOperand(OpIdx);
1277  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1278  MO.setReg(ExtB.getReg(0));
1279 }
1280 
1282  unsigned OpIdx, unsigned TruncOpcode) {
1283  MachineOperand &MO = MI.getOperand(OpIdx);
1284  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1286  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1287  MO.setReg(DstExt);
1288 }
1289 
1291  unsigned OpIdx, unsigned ExtOpcode) {
1292  MachineOperand &MO = MI.getOperand(OpIdx);
1293  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1295  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1296  MO.setReg(DstTrunc);
1297 }
1298 
1300  unsigned OpIdx) {
1301  MachineOperand &MO = MI.getOperand(OpIdx);
1303  MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1304 }
1305 
1307  unsigned OpIdx) {
1308  MachineOperand &MO = MI.getOperand(OpIdx);
1309 
1310  LLT OldTy = MRI.getType(MO.getReg());
1311  unsigned OldElts = OldTy.getNumElements();
1312  unsigned NewElts = MoreTy.getNumElements();
1313 
1314  unsigned NumParts = NewElts / OldElts;
1315 
1316  // Use concat_vectors if the result is a multiple of the number of elements.
1317  if (NumParts * OldElts == NewElts) {
1319  Parts.push_back(MO.getReg());
1320 
1321  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1322  for (unsigned I = 1; I != NumParts; ++I)
1323  Parts.push_back(ImpDef);
1324 
1325  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1326  MO.setReg(Concat.getReg(0));
1327  return;
1328  }
1329 
1330  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1331  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1332  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1333  MO.setReg(MoreReg);
1334 }
1335 
1336 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1337  MachineOperand &Op = MI.getOperand(OpIdx);
1338  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1339 }
1340 
1341 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1342  MachineOperand &MO = MI.getOperand(OpIdx);
1343  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1345  MIRBuilder.buildBitcast(MO, CastDst);
1346  MO.setReg(CastDst);
1347 }
1348 
1350 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1351  LLT WideTy) {
1352  if (TypeIdx != 1)
1353  return UnableToLegalize;
1354 
1355  Register DstReg = MI.getOperand(0).getReg();
1356  LLT DstTy = MRI.getType(DstReg);
1357  if (DstTy.isVector())
1358  return UnableToLegalize;
1359 
1360  Register Src1 = MI.getOperand(1).getReg();
1361  LLT SrcTy = MRI.getType(Src1);
1362  const int DstSize = DstTy.getSizeInBits();
1363  const int SrcSize = SrcTy.getSizeInBits();
1364  const int WideSize = WideTy.getSizeInBits();
1365  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1366 
1367  unsigned NumOps = MI.getNumOperands();
1368  unsigned NumSrc = MI.getNumOperands() - 1;
1369  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1370 
1371  if (WideSize >= DstSize) {
1372  // Directly pack the bits in the target type.
1373  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1374 
1375  for (unsigned I = 2; I != NumOps; ++I) {
1376  const unsigned Offset = (I - 1) * PartSize;
1377 
1378  Register SrcReg = MI.getOperand(I).getReg();
1379  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1380 
1381  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1382 
1383  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1384  MRI.createGenericVirtualRegister(WideTy);
1385 
1386  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1387  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1388  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1389  ResultReg = NextResult;
1390  }
1391 
1392  if (WideSize > DstSize)
1393  MIRBuilder.buildTrunc(DstReg, ResultReg);
1394  else if (DstTy.isPointer())
1395  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1396 
1397  MI.eraseFromParent();
1398  return Legalized;
1399  }
1400 
1401  // Unmerge the original values to the GCD type, and recombine to the next
1402  // multiple greater than the original type.
1403  //
1404  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1405  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1406  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1407  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1408  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1409  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1410  // %12:_(s12) = G_MERGE_VALUES %10, %11
1411  //
1412  // Padding with undef if necessary:
1413  //
1414  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1415  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1416  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1417  // %7:_(s2) = G_IMPLICIT_DEF
1418  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1419  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1420  // %10:_(s12) = G_MERGE_VALUES %8, %9
1421 
1422  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1423  LLT GCDTy = LLT::scalar(GCD);
1424 
1426  SmallVector<Register, 8> NewMergeRegs;
1427  SmallVector<Register, 8> Unmerges;
1428  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1429 
1430  // Decompose the original operands if they don't evenly divide.
1431  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1432  Register SrcReg = MI.getOperand(I).getReg();
1433  if (GCD == SrcSize) {
1434  Unmerges.push_back(SrcReg);
1435  } else {
1436  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1437  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1438  Unmerges.push_back(Unmerge.getReg(J));
1439  }
1440  }
1441 
1442  // Pad with undef to the next size that is a multiple of the requested size.
1443  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1444  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1445  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1446  Unmerges.push_back(UndefReg);
1447  }
1448 
1449  const int PartsPerGCD = WideSize / GCD;
1450 
1451  // Build merges of each piece.
1452  ArrayRef<Register> Slicer(Unmerges);
1453  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1454  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1455  NewMergeRegs.push_back(Merge.getReg(0));
1456  }
1457 
1458  // A truncate may be necessary if the requested type doesn't evenly divide the
1459  // original result type.
1460  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1461  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1462  } else {
1463  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1464  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1465  }
1466 
1467  MI.eraseFromParent();
1468  return Legalized;
1469 }
1470 
1472  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1473  LLT OrigTy = MRI.getType(OrigReg);
1474  LLT LCMTy = getLCMType(WideTy, OrigTy);
1475 
1476  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1477  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1478 
1479  Register UnmergeSrc = WideReg;
1480 
1481  // Create a merge to the LCM type, padding with undef
1482  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1483  // =>
1484  // %1:_(<4 x s32>) = G_FOO
1485  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1486  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1487  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1488  if (NumMergeParts > 1) {
1489  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1490  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1491  MergeParts[0] = WideReg;
1492  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1493  }
1494 
1495  // Unmerge to the original register and pad with dead defs.
1496  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1497  UnmergeResults[0] = OrigReg;
1498  for (int I = 1; I != NumUnmergeParts; ++I)
1499  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1500 
1501  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1502  return WideReg;
1503 }
1504 
1506 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1507  LLT WideTy) {
1508  if (TypeIdx != 0)
1509  return UnableToLegalize;
1510 
1511  int NumDst = MI.getNumOperands() - 1;
1512  Register SrcReg = MI.getOperand(NumDst).getReg();
1513  LLT SrcTy = MRI.getType(SrcReg);
1514  if (SrcTy.isVector())
1515  return UnableToLegalize;
1516 
1517  Register Dst0Reg = MI.getOperand(0).getReg();
1518  LLT DstTy = MRI.getType(Dst0Reg);
1519  if (!DstTy.isScalar())
1520  return UnableToLegalize;
1521 
1522  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1523  if (SrcTy.isPointer()) {
1525  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1526  LLVM_DEBUG(
1527  dbgs() << "Not casting non-integral address space integer\n");
1528  return UnableToLegalize;
1529  }
1530 
1531  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1532  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1533  }
1534 
1535  // Widen SrcTy to WideTy. This does not affect the result, but since the
1536  // user requested this size, it is probably better handled than SrcTy and
1537  // should reduce the total number of legalization artifacts
1538  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1539  SrcTy = WideTy;
1540  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1541  }
1542 
1543  // Theres no unmerge type to target. Directly extract the bits from the
1544  // source type
1545  unsigned DstSize = DstTy.getSizeInBits();
1546 
1547  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1548  for (int I = 1; I != NumDst; ++I) {
1549  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1550  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1551  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1552  }
1553 
1554  MI.eraseFromParent();
1555  return Legalized;
1556  }
1557 
1558  // Extend the source to a wider type.
1559  LLT LCMTy = getLCMType(SrcTy, WideTy);
1560 
1561  Register WideSrc = SrcReg;
1562  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1563  // TODO: If this is an integral address space, cast to integer and anyext.
1564  if (SrcTy.isPointer()) {
1565  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1566  return UnableToLegalize;
1567  }
1568 
1569  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1570  }
1571 
1572  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1573 
1574  // Create a sequence of unmerges and merges to the original results. Since we
1575  // may have widened the source, we will need to pad the results with dead defs
1576  // to cover the source register.
1577  // e.g. widen s48 to s64:
1578  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1579  //
1580  // =>
1581  // %4:_(s192) = G_ANYEXT %0:_(s96)
1582  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1583  // ; unpack to GCD type, with extra dead defs
1584  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1585  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1586  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1587  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1588  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1589  const LLT GCDTy = getGCDType(WideTy, DstTy);
1590  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1591  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1592 
1593  // Directly unmerge to the destination without going through a GCD type
1594  // if possible
1595  if (PartsPerRemerge == 1) {
1596  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1597 
1598  for (int I = 0; I != NumUnmerge; ++I) {
1599  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1600 
1601  for (int J = 0; J != PartsPerUnmerge; ++J) {
1602  int Idx = I * PartsPerUnmerge + J;
1603  if (Idx < NumDst)
1604  MIB.addDef(MI.getOperand(Idx).getReg());
1605  else {
1606  // Create dead def for excess components.
1607  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1608  }
1609  }
1610 
1611  MIB.addUse(Unmerge.getReg(I));
1612  }
1613  } else {
1615  for (int J = 0; J != NumUnmerge; ++J)
1616  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1617 
1618  SmallVector<Register, 8> RemergeParts;
1619  for (int I = 0; I != NumDst; ++I) {
1620  for (int J = 0; J < PartsPerRemerge; ++J) {
1621  const int Idx = I * PartsPerRemerge + J;
1622  RemergeParts.emplace_back(Parts[Idx]);
1623  }
1624 
1625  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1626  RemergeParts.clear();
1627  }
1628  }
1629 
1630  MI.eraseFromParent();
1631  return Legalized;
1632 }
1633 
1635 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1636  LLT WideTy) {
1637  Register DstReg = MI.getOperand(0).getReg();
1638  Register SrcReg = MI.getOperand(1).getReg();
1639  LLT SrcTy = MRI.getType(SrcReg);
1640 
1641  LLT DstTy = MRI.getType(DstReg);
1642  unsigned Offset = MI.getOperand(2).getImm();
1643 
1644  if (TypeIdx == 0) {
1645  if (SrcTy.isVector() || DstTy.isVector())
1646  return UnableToLegalize;
1647 
1648  SrcOp Src(SrcReg);
1649  if (SrcTy.isPointer()) {
1650  // Extracts from pointers can be handled only if they are really just
1651  // simple integers.
1653  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1654  return UnableToLegalize;
1655 
1656  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1657  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1658  SrcTy = SrcAsIntTy;
1659  }
1660 
1661  if (DstTy.isPointer())
1662  return UnableToLegalize;
1663 
1664  if (Offset == 0) {
1665  // Avoid a shift in the degenerate case.
1666  MIRBuilder.buildTrunc(DstReg,
1667  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1668  MI.eraseFromParent();
1669  return Legalized;
1670  }
1671 
1672  // Do a shift in the source type.
1673  LLT ShiftTy = SrcTy;
1674  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1675  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1676  ShiftTy = WideTy;
1677  }
1678 
1679  auto LShr = MIRBuilder.buildLShr(
1680  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1681  MIRBuilder.buildTrunc(DstReg, LShr);
1682  MI.eraseFromParent();
1683  return Legalized;
1684  }
1685 
1686  if (SrcTy.isScalar()) {
1688  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1690  return Legalized;
1691  }
1692 
1693  if (!SrcTy.isVector())
1694  return UnableToLegalize;
1695 
1696  if (DstTy != SrcTy.getElementType())
1697  return UnableToLegalize;
1698 
1699  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1700  return UnableToLegalize;
1701 
1703  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1704 
1705  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1706  Offset);
1707  widenScalarDst(MI, WideTy.getScalarType(), 0);
1709  return Legalized;
1710 }
1711 
1713 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1714  LLT WideTy) {
1715  if (TypeIdx != 0 || WideTy.isVector())
1716  return UnableToLegalize;
1718  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1719  widenScalarDst(MI, WideTy);
1721  return Legalized;
1722 }
1723 
1725 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1726  LLT WideTy) {
1727  if (TypeIdx == 1)
1728  return UnableToLegalize; // TODO
1729 
1730  unsigned Opcode;
1731  unsigned ExtOpcode;
1732  Optional<Register> CarryIn = None;
1733  switch (MI.getOpcode()) {
1734  default:
1735  llvm_unreachable("Unexpected opcode!");
1736  case TargetOpcode::G_SADDO:
1737  Opcode = TargetOpcode::G_ADD;
1738  ExtOpcode = TargetOpcode::G_SEXT;
1739  break;
1740  case TargetOpcode::G_SSUBO:
1741  Opcode = TargetOpcode::G_SUB;
1742  ExtOpcode = TargetOpcode::G_SEXT;
1743  break;
1744  case TargetOpcode::G_UADDO:
1745  Opcode = TargetOpcode::G_ADD;
1746  ExtOpcode = TargetOpcode::G_ZEXT;
1747  break;
1748  case TargetOpcode::G_USUBO:
1749  Opcode = TargetOpcode::G_SUB;
1750  ExtOpcode = TargetOpcode::G_ZEXT;
1751  break;
1752  case TargetOpcode::G_SADDE:
1753  Opcode = TargetOpcode::G_UADDE;
1754  ExtOpcode = TargetOpcode::G_SEXT;
1755  CarryIn = MI.getOperand(4).getReg();
1756  break;
1757  case TargetOpcode::G_SSUBE:
1758  Opcode = TargetOpcode::G_USUBE;
1759  ExtOpcode = TargetOpcode::G_SEXT;
1760  CarryIn = MI.getOperand(4).getReg();
1761  break;
1762  case TargetOpcode::G_UADDE:
1763  Opcode = TargetOpcode::G_UADDE;
1764  ExtOpcode = TargetOpcode::G_ZEXT;
1765  CarryIn = MI.getOperand(4).getReg();
1766  break;
1767  case TargetOpcode::G_USUBE:
1768  Opcode = TargetOpcode::G_USUBE;
1769  ExtOpcode = TargetOpcode::G_ZEXT;
1770  CarryIn = MI.getOperand(4).getReg();
1771  break;
1772  }
1773 
1774  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1775  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1776  // Do the arithmetic in the larger type.
1777  Register NewOp;
1778  if (CarryIn) {
1779  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1780  NewOp = MIRBuilder
1781  .buildInstr(Opcode, {WideTy, CarryOutTy},
1782  {LHSExt, RHSExt, *CarryIn})
1783  .getReg(0);
1784  } else {
1785  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1786  }
1787  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1788  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1789  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1790  // There is no overflow if the ExtOp is the same as NewOp.
1791  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1792  // Now trunc the NewOp to the original result.
1793  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1794  MI.eraseFromParent();
1795  return Legalized;
1796 }
1797 
1799 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1800  LLT WideTy) {
1801  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1802  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1803  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1804  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1805  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1806  // We can convert this to:
1807  // 1. Any extend iN to iM
1808  // 2. SHL by M-N
1809  // 3. [US][ADD|SUB|SHL]SAT
1810  // 4. L/ASHR by M-N
1811  //
1812  // It may be more efficient to lower this to a min and a max operation in
1813  // the higher precision arithmetic if the promoted operation isn't legal,
1814  // but this decision is up to the target's lowering request.
1815  Register DstReg = MI.getOperand(0).getReg();
1816 
1817  unsigned NewBits = WideTy.getScalarSizeInBits();
1818  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1819 
1820  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1821  // must not left shift the RHS to preserve the shift amount.
1822  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1823  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1824  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1825  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1826  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1827  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1828 
1829  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1830  {ShiftL, ShiftR}, MI.getFlags());
1831 
1832  // Use a shift that will preserve the number of sign bits when the trunc is
1833  // folded away.
1834  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1835  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1836 
1837  MIRBuilder.buildTrunc(DstReg, Result);
1838  MI.eraseFromParent();
1839  return Legalized;
1840 }
1841 
1843 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1844  LLT WideTy) {
1845  if (TypeIdx == 1)
1846  return UnableToLegalize;
1847 
1848  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1849  Register Result = MI.getOperand(0).getReg();
1850  Register OriginalOverflow = MI.getOperand(1).getReg();
1851  Register LHS = MI.getOperand(2).getReg();
1852  Register RHS = MI.getOperand(3).getReg();
1853  LLT SrcTy = MRI.getType(LHS);
1854  LLT OverflowTy = MRI.getType(OriginalOverflow);
1855  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1856 
1857  // To determine if the result overflowed in the larger type, we extend the
1858  // input to the larger type, do the multiply (checking if it overflows),
1859  // then also check the high bits of the result to see if overflow happened
1860  // there.
1861  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1862  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1863  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1864 
1865  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1866  {LeftOperand, RightOperand});
1867  auto Mul = Mulo->getOperand(0);
1868  MIRBuilder.buildTrunc(Result, Mul);
1869 
1870  MachineInstrBuilder ExtResult;
1871  // Overflow occurred if it occurred in the larger type, or if the high part
1872  // of the result does not zero/sign-extend the low part. Check this second
1873  // possibility first.
1874  if (IsSigned) {
1875  // For signed, overflow occurred when the high part does not sign-extend
1876  // the low part.
1877  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1878  } else {
1879  // Unsigned overflow occurred when the high part does not zero-extend the
1880  // low part.
1881  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1882  }
1883 
1884  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1885  // so we don't need to check the overflow result of larger type Mulo.
1886  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1887  auto Overflow =
1888  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1889  // Finally check if the multiplication in the larger type itself overflowed.
1890  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1891  } else {
1892  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1893  }
1894  MI.eraseFromParent();
1895  return Legalized;
1896 }
1897 
1899 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1900  switch (MI.getOpcode()) {
1901  default:
1902  return UnableToLegalize;
1903  case TargetOpcode::G_EXTRACT:
1904  return widenScalarExtract(MI, TypeIdx, WideTy);
1905  case TargetOpcode::G_INSERT:
1906  return widenScalarInsert(MI, TypeIdx, WideTy);
1907  case TargetOpcode::G_MERGE_VALUES:
1908  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1909  case TargetOpcode::G_UNMERGE_VALUES:
1910  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1911  case TargetOpcode::G_SADDO:
1912  case TargetOpcode::G_SSUBO:
1913  case TargetOpcode::G_UADDO:
1914  case TargetOpcode::G_USUBO:
1915  case TargetOpcode::G_SADDE:
1916  case TargetOpcode::G_SSUBE:
1917  case TargetOpcode::G_UADDE:
1918  case TargetOpcode::G_USUBE:
1919  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1920  case TargetOpcode::G_UMULO:
1921  case TargetOpcode::G_SMULO:
1922  return widenScalarMulo(MI, TypeIdx, WideTy);
1923  case TargetOpcode::G_SADDSAT:
1924  case TargetOpcode::G_SSUBSAT:
1925  case TargetOpcode::G_SSHLSAT:
1926  case TargetOpcode::G_UADDSAT:
1927  case TargetOpcode::G_USUBSAT:
1928  case TargetOpcode::G_USHLSAT:
1929  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1930  case TargetOpcode::G_CTTZ:
1931  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1932  case TargetOpcode::G_CTLZ:
1933  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1934  case TargetOpcode::G_CTPOP: {
1935  if (TypeIdx == 0) {
1937  widenScalarDst(MI, WideTy, 0);
1939  return Legalized;
1940  }
1941 
1942  Register SrcReg = MI.getOperand(1).getReg();
1943 
1944  // First ZEXT the input.
1945  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1946  LLT CurTy = MRI.getType(SrcReg);
1947  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1948  // The count is the same in the larger type except if the original
1949  // value was zero. This can be handled by setting the bit just off
1950  // the top of the original type.
1951  auto TopBit =
1952  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1953  MIBSrc = MIRBuilder.buildOr(
1954  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1955  }
1956 
1957  // Perform the operation at the larger size.
1958  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1959  // This is already the correct result for CTPOP and CTTZs
1960  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1961  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1962  // The correct result is NewOp - (Difference in widety and current ty).
1963  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1964  MIBNewOp = MIRBuilder.buildSub(
1965  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1966  }
1967 
1968  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1969  MI.eraseFromParent();
1970  return Legalized;
1971  }
1972  case TargetOpcode::G_BSWAP: {
1974  Register DstReg = MI.getOperand(0).getReg();
1975 
1976  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1977  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1978  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1979  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1980 
1981  MI.getOperand(0).setReg(DstExt);
1982 
1984 
1985  LLT Ty = MRI.getType(DstReg);
1986  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1987  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1988  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1989 
1990  MIRBuilder.buildTrunc(DstReg, ShrReg);
1992  return Legalized;
1993  }
1994  case TargetOpcode::G_BITREVERSE: {
1996 
1997  Register DstReg = MI.getOperand(0).getReg();
1998  LLT Ty = MRI.getType(DstReg);
1999  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2000 
2001  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2002  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2003  MI.getOperand(0).setReg(DstExt);
2005 
2006  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2007  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2008  MIRBuilder.buildTrunc(DstReg, Shift);
2010  return Legalized;
2011  }
2012  case TargetOpcode::G_FREEZE:
2014  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2015  widenScalarDst(MI, WideTy);
2017  return Legalized;
2018 
2019  case TargetOpcode::G_ADD:
2020  case TargetOpcode::G_AND:
2021  case TargetOpcode::G_MUL:
2022  case TargetOpcode::G_OR:
2023  case TargetOpcode::G_XOR:
2024  case TargetOpcode::G_SUB:
2025  // Perform operation at larger width (any extension is fines here, high bits
2026  // don't affect the result) and then truncate the result back to the
2027  // original type.
2029  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2030  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2031  widenScalarDst(MI, WideTy);
2033  return Legalized;
2034 
2035  case TargetOpcode::G_SHL:
2037 
2038  if (TypeIdx == 0) {
2039  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2040  widenScalarDst(MI, WideTy);
2041  } else {
2042  assert(TypeIdx == 1);
2043  // The "number of bits to shift" operand must preserve its value as an
2044  // unsigned integer:
2045  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2046  }
2047 
2049  return Legalized;
2050 
2051  case TargetOpcode::G_SDIV:
2052  case TargetOpcode::G_SREM:
2053  case TargetOpcode::G_SMIN:
2054  case TargetOpcode::G_SMAX:
2056  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2057  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2058  widenScalarDst(MI, WideTy);
2060  return Legalized;
2061 
2062  case TargetOpcode::G_ASHR:
2063  case TargetOpcode::G_LSHR:
2065 
2066  if (TypeIdx == 0) {
2067  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2068  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2069 
2070  widenScalarSrc(MI, WideTy, 1, CvtOp);
2071  widenScalarDst(MI, WideTy);
2072  } else {
2073  assert(TypeIdx == 1);
2074  // The "number of bits to shift" operand must preserve its value as an
2075  // unsigned integer:
2076  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2077  }
2078 
2080  return Legalized;
2081  case TargetOpcode::G_UDIV:
2082  case TargetOpcode::G_UREM:
2083  case TargetOpcode::G_UMIN:
2084  case TargetOpcode::G_UMAX:
2086  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2087  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2088  widenScalarDst(MI, WideTy);
2090  return Legalized;
2091 
2092  case TargetOpcode::G_SELECT:
2094  if (TypeIdx == 0) {
2095  // Perform operation at larger width (any extension is fine here, high
2096  // bits don't affect the result) and then truncate the result back to the
2097  // original type.
2098  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2099  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2100  widenScalarDst(MI, WideTy);
2101  } else {
2102  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2103  // Explicit extension is required here since high bits affect the result.
2104  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2105  }
2107  return Legalized;
2108 
2109  case TargetOpcode::G_FPTOSI:
2110  case TargetOpcode::G_FPTOUI:
2112 
2113  if (TypeIdx == 0)
2114  widenScalarDst(MI, WideTy);
2115  else
2116  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2117 
2119  return Legalized;
2120  case TargetOpcode::G_SITOFP:
2122 
2123  if (TypeIdx == 0)
2124  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2125  else
2126  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2127 
2129  return Legalized;
2130  case TargetOpcode::G_UITOFP:
2132 
2133  if (TypeIdx == 0)
2134  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2135  else
2136  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2137 
2139  return Legalized;
2140  case TargetOpcode::G_LOAD:
2141  case TargetOpcode::G_SEXTLOAD:
2142  case TargetOpcode::G_ZEXTLOAD:
2144  widenScalarDst(MI, WideTy);
2146  return Legalized;
2147 
2148  case TargetOpcode::G_STORE: {
2149  if (TypeIdx != 0)
2150  return UnableToLegalize;
2151 
2152  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2153  if (!Ty.isScalar())
2154  return UnableToLegalize;
2155 
2157 
2158  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2159  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2160  widenScalarSrc(MI, WideTy, 0, ExtType);
2161 
2163  return Legalized;
2164  }
2165  case TargetOpcode::G_CONSTANT: {
2166  MachineOperand &SrcMO = MI.getOperand(1);
2168  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2169  MRI.getType(MI.getOperand(0).getReg()));
2170  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2171  ExtOpc == TargetOpcode::G_ANYEXT) &&
2172  "Illegal Extend");
2173  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2174  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2175  ? SrcVal.sext(WideTy.getSizeInBits())
2176  : SrcVal.zext(WideTy.getSizeInBits());
2178  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2179 
2180  widenScalarDst(MI, WideTy);
2182  return Legalized;
2183  }
2184  case TargetOpcode::G_FCONSTANT: {
2185  MachineOperand &SrcMO = MI.getOperand(1);
2187  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2188  bool LosesInfo;
2189  switch (WideTy.getSizeInBits()) {
2190  case 32:
2192  &LosesInfo);
2193  break;
2194  case 64:
2196  &LosesInfo);
2197  break;
2198  default:
2199  return UnableToLegalize;
2200  }
2201 
2202  assert(!LosesInfo && "extend should always be lossless");
2203 
2205  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2206 
2207  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2209  return Legalized;
2210  }
2211  case TargetOpcode::G_IMPLICIT_DEF: {
2213  widenScalarDst(MI, WideTy);
2215  return Legalized;
2216  }
2217  case TargetOpcode::G_BRCOND:
2219  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2221  return Legalized;
2222 
2223  case TargetOpcode::G_FCMP:
2225  if (TypeIdx == 0)
2226  widenScalarDst(MI, WideTy);
2227  else {
2228  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2229  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2230  }
2232  return Legalized;
2233 
2234  case TargetOpcode::G_ICMP:
2236  if (TypeIdx == 0)
2237  widenScalarDst(MI, WideTy);
2238  else {
2239  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2240  MI.getOperand(1).getPredicate()))
2241  ? TargetOpcode::G_SEXT
2242  : TargetOpcode::G_ZEXT;
2243  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2244  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2245  }
2247  return Legalized;
2248 
2249  case TargetOpcode::G_PTR_ADD:
2250  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2252  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2254  return Legalized;
2255 
2256  case TargetOpcode::G_PHI: {
2257  assert(TypeIdx == 0 && "Expecting only Idx 0");
2258 
2260  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2261  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2262  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2263  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2264  }
2265 
2266  MachineBasicBlock &MBB = *MI.getParent();
2268  widenScalarDst(MI, WideTy);
2270  return Legalized;
2271  }
2272  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2273  if (TypeIdx == 0) {
2274  Register VecReg = MI.getOperand(1).getReg();
2275  LLT VecTy = MRI.getType(VecReg);
2277 
2279  WideTy.getSizeInBits()),
2280  1, TargetOpcode::G_SEXT);
2281 
2282  widenScalarDst(MI, WideTy, 0);
2284  return Legalized;
2285  }
2286 
2287  if (TypeIdx != 2)
2288  return UnableToLegalize;
2290  // TODO: Probably should be zext
2291  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2293  return Legalized;
2294  }
2295  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2296  if (TypeIdx == 1) {
2298 
2299  Register VecReg = MI.getOperand(1).getReg();
2300  LLT VecTy = MRI.getType(VecReg);
2301  LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2302 
2303  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2304  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2305  widenScalarDst(MI, WideVecTy, 0);
2307  return Legalized;
2308  }
2309 
2310  if (TypeIdx == 2) {
2312  // TODO: Probably should be zext
2313  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2315  return Legalized;
2316  }
2317 
2318  return UnableToLegalize;
2319  }
2320  case TargetOpcode::G_FADD:
2321  case TargetOpcode::G_FMUL:
2322  case TargetOpcode::G_FSUB:
2323  case TargetOpcode::G_FMA:
2324  case TargetOpcode::G_FMAD:
2325  case TargetOpcode::G_FNEG:
2326  case TargetOpcode::G_FABS:
2327  case TargetOpcode::G_FCANONICALIZE:
2328  case TargetOpcode::G_FMINNUM:
2329  case TargetOpcode::G_FMAXNUM:
2330  case TargetOpcode::G_FMINNUM_IEEE:
2331  case TargetOpcode::G_FMAXNUM_IEEE:
2332  case TargetOpcode::G_FMINIMUM:
2333  case TargetOpcode::G_FMAXIMUM:
2334  case TargetOpcode::G_FDIV:
2335  case TargetOpcode::G_FREM:
2336  case TargetOpcode::G_FCEIL:
2337  case TargetOpcode::G_FFLOOR:
2338  case TargetOpcode::G_FCOS:
2339  case TargetOpcode::G_FSIN:
2340  case TargetOpcode::G_FLOG10:
2341  case TargetOpcode::G_FLOG:
2342  case TargetOpcode::G_FLOG2:
2343  case TargetOpcode::G_FRINT:
2344  case TargetOpcode::G_FNEARBYINT:
2345  case TargetOpcode::G_FSQRT:
2346  case TargetOpcode::G_FEXP:
2347  case TargetOpcode::G_FEXP2:
2348  case TargetOpcode::G_FPOW:
2349  case TargetOpcode::G_INTRINSIC_TRUNC:
2350  case TargetOpcode::G_INTRINSIC_ROUND:
2351  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2352  assert(TypeIdx == 0);
2354 
2355  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2356  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2357 
2358  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2360  return Legalized;
2361  case TargetOpcode::G_FPOWI: {
2362  if (TypeIdx != 0)
2363  return UnableToLegalize;
2365  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2366  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2368  return Legalized;
2369  }
2370  case TargetOpcode::G_INTTOPTR:
2371  if (TypeIdx != 1)
2372  return UnableToLegalize;
2373 
2375  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2377  return Legalized;
2378  case TargetOpcode::G_PTRTOINT:
2379  if (TypeIdx != 0)
2380  return UnableToLegalize;
2381 
2383  widenScalarDst(MI, WideTy, 0);
2385  return Legalized;
2386  case TargetOpcode::G_BUILD_VECTOR: {
2388 
2389  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2390  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2391  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2392 
2393  // Avoid changing the result vector type if the source element type was
2394  // requested.
2395  if (TypeIdx == 1) {
2396  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2397  } else {
2398  widenScalarDst(MI, WideTy, 0);
2399  }
2400 
2402  return Legalized;
2403  }
2404  case TargetOpcode::G_SEXT_INREG:
2405  if (TypeIdx != 0)
2406  return UnableToLegalize;
2407 
2409  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2410  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2412  return Legalized;
2413  case TargetOpcode::G_PTRMASK: {
2414  if (TypeIdx != 1)
2415  return UnableToLegalize;
2417  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2419  return Legalized;
2420  }
2421  }
2422 }
2423 
2425  MachineIRBuilder &B, Register Src, LLT Ty) {
2426  auto Unmerge = B.buildUnmerge(Ty, Src);
2427  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2428  Pieces.push_back(Unmerge.getReg(I));
2429 }
2430 
2433  Register Dst = MI.getOperand(0).getReg();
2434  Register Src = MI.getOperand(1).getReg();
2435  LLT DstTy = MRI.getType(Dst);
2436  LLT SrcTy = MRI.getType(Src);
2437 
2438  if (SrcTy.isVector()) {
2439  LLT SrcEltTy = SrcTy.getElementType();
2440  SmallVector<Register, 8> SrcRegs;
2441 
2442  if (DstTy.isVector()) {
2443  int NumDstElt = DstTy.getNumElements();
2444  int NumSrcElt = SrcTy.getNumElements();
2445 
2446  LLT DstEltTy = DstTy.getElementType();
2447  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2448  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2449 
2450  // If there's an element size mismatch, insert intermediate casts to match
2451  // the result element type.
2452  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2453  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2454  //
2455  // =>
2456  //
2457  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2458  // %3:_(<2 x s8>) = G_BITCAST %2
2459  // %4:_(<2 x s8>) = G_BITCAST %3
2460  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2461  DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2462  SrcPartTy = SrcEltTy;
2463  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2464  //
2465  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2466  //
2467  // =>
2468  //
2469  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2470  // %3:_(s16) = G_BITCAST %2
2471  // %4:_(s16) = G_BITCAST %3
2472  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2473  SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2474  DstCastTy = DstEltTy;
2475  }
2476 
2477  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2478  for (Register &SrcReg : SrcRegs)
2479  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2480  } else
2481  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2482 
2483  MIRBuilder.buildMerge(Dst, SrcRegs);
2484  MI.eraseFromParent();
2485  return Legalized;
2486  }
2487 
2488  if (DstTy.isVector()) {
2489  SmallVector<Register, 8> SrcRegs;
2490  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2491  MIRBuilder.buildMerge(Dst, SrcRegs);
2492  MI.eraseFromParent();
2493  return Legalized;
2494  }
2495 
2496  return UnableToLegalize;
2497 }
2498 
2499 /// Figure out the bit offset into a register when coercing a vector index for
2500 /// the wide element type. This is only for the case when promoting vector to
2501 /// one with larger elements.
2502 //
2503 ///
2504 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2505 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2507  Register Idx,
2508  unsigned NewEltSize,
2509  unsigned OldEltSize) {
2510  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2511  LLT IdxTy = B.getMRI()->getType(Idx);
2512 
2513  // Now figure out the amount we need to shift to get the target bits.
2514  auto OffsetMask = B.buildConstant(
2515  IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2516  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2517  return B.buildShl(IdxTy, OffsetIdx,
2518  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2519 }
2520 
2521 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2522 /// is casting to a vector with a smaller element size, perform multiple element
2523 /// extracts and merge the results. If this is coercing to a vector with larger
2524 /// elements, index the bitcasted vector and extract the target element with bit
2525 /// operations. This is intended to force the indexing in the native register
2526 /// size for architectures that can dynamically index the register file.
2529  LLT CastTy) {
2530  if (TypeIdx != 1)
2531  return UnableToLegalize;
2532 
2533  Register Dst = MI.getOperand(0).getReg();
2534  Register SrcVec = MI.getOperand(1).getReg();
2535  Register Idx = MI.getOperand(2).getReg();
2536  LLT SrcVecTy = MRI.getType(SrcVec);
2537  LLT IdxTy = MRI.getType(Idx);
2538 
2539  LLT SrcEltTy = SrcVecTy.getElementType();
2540  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2541  unsigned OldNumElts = SrcVecTy.getNumElements();
2542 
2543  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2544  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2545 
2546  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2547  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2548  if (NewNumElts > OldNumElts) {
2549  // Decreasing the vector element size
2550  //
2551  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2552  // =>
2553  // v4i32:castx = bitcast x:v2i64
2554  //
2555  // i64 = bitcast
2556  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2557  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2558  //
2559  if (NewNumElts % OldNumElts != 0)
2560  return UnableToLegalize;
2561 
2562  // Type of the intermediate result vector.
2563  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2564  LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2565 
2566  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2567 
2568  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2569  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2570 
2571  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2572  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2573  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2574  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2575  NewOps[I] = Elt.getReg(0);
2576  }
2577 
2578  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2579  MIRBuilder.buildBitcast(Dst, NewVec);
2580  MI.eraseFromParent();
2581  return Legalized;
2582  }
2583 
2584  if (NewNumElts < OldNumElts) {
2585  if (NewEltSize % OldEltSize != 0)
2586  return UnableToLegalize;
2587 
2588  // This only depends on powers of 2 because we use bit tricks to figure out
2589  // the bit offset we need to shift to get the target element. A general
2590  // expansion could emit division/multiply.
2591  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2592  return UnableToLegalize;
2593 
2594  // Increasing the vector element size.
2595  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2596  //
2597  // =>
2598  //
2599  // %cast = G_BITCAST %vec
2600  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2601  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2602  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2603  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2604  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2605  // %elt = G_TRUNC %elt_bits
2606 
2607  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2608  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2609 
2610  // Divide to get the index in the wider element type.
2611  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2612 
2613  Register WideElt = CastVec;
2614  if (CastTy.isVector()) {
2615  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2616  ScaledIdx).getReg(0);
2617  }
2618 
2619  // Compute the bit offset into the register of the target element.
2621  MIRBuilder, Idx, NewEltSize, OldEltSize);
2622 
2623  // Shift the wide element to get the target element.
2624  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2625  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2626  MI.eraseFromParent();
2627  return Legalized;
2628  }
2629 
2630  return UnableToLegalize;
2631 }
2632 
2633 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2634 /// TargetReg, while preserving other bits in \p TargetReg.
2635 ///
2636 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2638  Register TargetReg, Register InsertReg,
2639  Register OffsetBits) {
2640  LLT TargetTy = B.getMRI()->getType(TargetReg);
2641  LLT InsertTy = B.getMRI()->getType(InsertReg);
2642  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2643  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2644 
2645  // Produce a bitmask of the value to insert
2646  auto EltMask = B.buildConstant(
2647  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2648  InsertTy.getSizeInBits()));
2649  // Shift it into position
2650  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2651  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2652 
2653  // Clear out the bits in the wide element
2654  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2655 
2656  // The value to insert has all zeros already, so stick it into the masked
2657  // wide element.
2658  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2659 }
2660 
2661 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2662 /// is increasing the element size, perform the indexing in the target element
2663 /// type, and use bit operations to insert at the element position. This is
2664 /// intended for architectures that can dynamically index the register file and
2665 /// want to force indexing in the native register size.
2668  LLT CastTy) {
2669  if (TypeIdx != 0)
2670  return UnableToLegalize;
2671 
2672  Register Dst = MI.getOperand(0).getReg();
2673  Register SrcVec = MI.getOperand(1).getReg();
2674  Register Val = MI.getOperand(2).getReg();
2675  Register Idx = MI.getOperand(3).getReg();
2676 
2677  LLT VecTy = MRI.getType(Dst);
2678  LLT IdxTy = MRI.getType(Idx);
2679 
2680  LLT VecEltTy = VecTy.getElementType();
2681  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2682  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2683  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2684 
2685  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2686  unsigned OldNumElts = VecTy.getNumElements();
2687 
2688  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2689  if (NewNumElts < OldNumElts) {
2690  if (NewEltSize % OldEltSize != 0)
2691  return UnableToLegalize;
2692 
2693  // This only depends on powers of 2 because we use bit tricks to figure out
2694  // the bit offset we need to shift to get the target element. A general
2695  // expansion could emit division/multiply.
2696  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2697  return UnableToLegalize;
2698 
2699  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2700  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2701 
2702  // Divide to get the index in the wider element type.
2703  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2704 
2705  Register ExtractedElt = CastVec;
2706  if (CastTy.isVector()) {
2707  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2708  ScaledIdx).getReg(0);
2709  }
2710 
2711  // Compute the bit offset into the register of the target element.
2713  MIRBuilder, Idx, NewEltSize, OldEltSize);
2714 
2715  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2716  Val, OffsetBits);
2717  if (CastTy.isVector()) {
2718  InsertedElt = MIRBuilder.buildInsertVectorElement(
2719  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2720  }
2721 
2722  MIRBuilder.buildBitcast(Dst, InsertedElt);
2723  MI.eraseFromParent();
2724  return Legalized;
2725  }
2726 
2727  return UnableToLegalize;
2728 }
2729 
2732  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2733  Register DstReg = MI.getOperand(0).getReg();
2734  Register PtrReg = MI.getOperand(1).getReg();
2735  LLT DstTy = MRI.getType(DstReg);
2736  auto &MMO = **MI.memoperands_begin();
2737 
2738  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2739  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2740  // This load needs splitting into power of 2 sized loads.
2741  if (DstTy.isVector())
2742  return UnableToLegalize;
2743  if (isPowerOf2_32(DstTy.getSizeInBits()))
2744  return UnableToLegalize; // Don't know what we're being asked to do.
2745 
2746  // Our strategy here is to generate anyextending loads for the smaller
2747  // types up to next power-2 result type, and then combine the two larger
2748  // result values together, before truncating back down to the non-pow-2
2749  // type.
2750  // E.g. v1 = i24 load =>
2751  // v2 = i32 zextload (2 byte)
2752  // v3 = i32 load (1 byte)
2753  // v4 = i32 shl v3, 16
2754  // v5 = i32 or v4, v2
2755  // v1 = i24 trunc v5
2756  // By doing this we generate the correct truncate which should get
2757  // combined away as an artifact with a matching extend.
2758  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2759  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2760 
2762  MachineMemOperand *LargeMMO =
2763  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2764  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2765  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2766 
2767  LLT PtrTy = MRI.getType(PtrReg);
2768  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2769  LLT AnyExtTy = LLT::scalar(AnyExtSize);
2770  Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2771  Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2772  auto LargeLoad = MIRBuilder.buildLoadInstr(
2773  TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2774 
2775  auto OffsetCst = MIRBuilder.buildConstant(
2776  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2777  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2778  auto SmallPtr =
2779  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2780  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
2781  *SmallMMO);
2782 
2783  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2784  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2785  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2786  MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
2787  MI.eraseFromParent();
2788  return Legalized;
2789  }
2790 
2791  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2792  MI.eraseFromParent();
2793  return Legalized;
2794  }
2795 
2796  if (DstTy.isScalar()) {
2797  Register TmpReg =
2798  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
2799  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
2800  switch (MI.getOpcode()) {
2801  default:
2802  llvm_unreachable("Unexpected opcode");
2803  case TargetOpcode::G_LOAD:
2804  MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
2805  break;
2806  case TargetOpcode::G_SEXTLOAD:
2807  MIRBuilder.buildSExt(DstReg, TmpReg);
2808  break;
2809  case TargetOpcode::G_ZEXTLOAD:
2810  MIRBuilder.buildZExt(DstReg, TmpReg);
2811  break;
2812  }
2813 
2814  MI.eraseFromParent();
2815  return Legalized;
2816  }
2817 
2818  return UnableToLegalize;
2819 }
2820 
2823  // Lower a non-power of 2 store into multiple pow-2 stores.
2824  // E.g. split an i24 store into an i16 store + i8 store.
2825  // We do this by first extending the stored value to the next largest power
2826  // of 2 type, and then using truncating stores to store the components.
2827  // By doing this, likewise with G_LOAD, generate an extend that can be
2828  // artifact-combined away instead of leaving behind extracts.
2829  Register SrcReg = MI.getOperand(0).getReg();
2830  Register PtrReg = MI.getOperand(1).getReg();
2831  LLT SrcTy = MRI.getType(SrcReg);
2832  MachineMemOperand &MMO = **MI.memoperands_begin();
2833  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2834  return UnableToLegalize;
2835  if (SrcTy.isVector())
2836  return UnableToLegalize;
2837  if (isPowerOf2_32(SrcTy.getSizeInBits()))
2838  return UnableToLegalize; // Don't know what we're being asked to do.
2839 
2840  // Extend to the next pow-2.
2841  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2842  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2843 
2844  // Obtain the smaller value by shifting away the larger value.
2845  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2846  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2847  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2848  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2849 
2850  // Generate the PtrAdd and truncating stores.
2851  LLT PtrTy = MRI.getType(PtrReg);
2852  auto OffsetCst = MIRBuilder.buildConstant(
2853  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2854  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2855  auto SmallPtr =
2856  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
2857 
2859  MachineMemOperand *LargeMMO =
2860  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2861  MachineMemOperand *SmallMMO =
2862  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2863  MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
2864  MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
2865  MI.eraseFromParent();
2866  return Legalized;
2867 }
2868 
2870 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2871  switch (MI.getOpcode()) {
2872  case TargetOpcode::G_LOAD: {
2873  if (TypeIdx != 0)
2874  return UnableToLegalize;
2875 
2877  bitcastDst(MI, CastTy, 0);
2879  return Legalized;
2880  }
2881  case TargetOpcode::G_STORE: {
2882  if (TypeIdx != 0)
2883  return UnableToLegalize;
2884 
2886  bitcastSrc(MI, CastTy, 0);
2888  return Legalized;
2889  }
2890  case TargetOpcode::G_SELECT: {
2891  if (TypeIdx != 0)
2892  return UnableToLegalize;
2893 
2894  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2895  LLVM_DEBUG(
2896  dbgs() << "bitcast action not implemented for vector select\n");
2897  return UnableToLegalize;
2898  }
2899 
2901  bitcastSrc(MI, CastTy, 2);
2902  bitcastSrc(MI, CastTy, 3);
2903  bitcastDst(MI, CastTy, 0);
2905  return Legalized;
2906  }
2907  case TargetOpcode::G_AND:
2908  case TargetOpcode::G_OR:
2909  case TargetOpcode::G_XOR: {
2911  bitcastSrc(MI, CastTy, 1);
2912  bitcastSrc(MI, CastTy, 2);
2913  bitcastDst(MI, CastTy, 0);
2915  return Legalized;
2916  }
2917  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2918  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2919  case TargetOpcode::G_INSERT_VECTOR_ELT:
2920  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2921  default:
2922  return UnableToLegalize;
2923  }
2924 }
2925 
2926 // Legalize an instruction by changing the opcode in place.
2927 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2929  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2931 }
2932 
2934 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2935  using namespace TargetOpcode;
2936 
2937  switch(MI.getOpcode()) {
2938  default:
2939  return UnableToLegalize;
2940  case TargetOpcode::G_BITCAST:
2941  return lowerBitcast(MI);
2942  case TargetOpcode::G_SREM:
2943  case TargetOpcode::G_UREM: {
2944  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2945  auto Quot =
2946  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2947  {MI.getOperand(1), MI.getOperand(2)});
2948 
2949  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2950  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2951  MI.eraseFromParent();
2952  return Legalized;
2953  }
2954  case TargetOpcode::G_SADDO:
2955  case TargetOpcode::G_SSUBO:
2956  return lowerSADDO_SSUBO(MI);
2957  case TargetOpcode::G_UMULH:
2958  case TargetOpcode::G_SMULH:
2959  return lowerSMULH_UMULH(MI);
2960  case TargetOpcode::G_SMULO:
2961  case TargetOpcode::G_UMULO: {
2962  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2963  // result.
2964  Register Res = MI.getOperand(0).getReg();
2965  Register Overflow = MI.getOperand(1).getReg();
2966  Register LHS = MI.getOperand(2).getReg();
2967  Register RHS = MI.getOperand(3).getReg();
2968  LLT Ty = MRI.getType(Res);
2969 
2970  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2971  ? TargetOpcode::G_SMULH
2972  : TargetOpcode::G_UMULH;
2973 
2975  const auto &TII = MIRBuilder.getTII();
2976  MI.setDesc(TII.get(TargetOpcode::G_MUL));
2977  MI.RemoveOperand(1);
2979 
2980  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2981  auto Zero = MIRBuilder.buildConstant(Ty, 0);
2982 
2983  // Move insert point forward so we can use the Res register if needed.
2985 
2986  // For *signed* multiply, overflow is detected by checking:
2987  // (hi != (lo >> bitwidth-1))
2988  if (Opcode == TargetOpcode::G_SMULH) {
2989  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2990  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2991  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2992  } else {
2993  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2994  }
2995  return Legalized;
2996  }
2997  case TargetOpcode::G_FNEG: {
2998  Register Res = MI.getOperand(0).getReg();
2999  LLT Ty = MRI.getType(Res);
3000 
3001  // TODO: Handle vector types once we are able to
3002  // represent them.
3003  if (Ty.isVector())
3004  return UnableToLegalize;
3005  auto SignMask =
3007  Register SubByReg = MI.getOperand(1).getReg();
3008  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3009  MI.eraseFromParent();
3010  return Legalized;
3011  }
3012  case TargetOpcode::G_FSUB: {
3013  Register Res = MI.getOperand(0).getReg();
3014  LLT Ty = MRI.getType(Res);
3015 
3016  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3017  // First, check if G_FNEG is marked as Lower. If so, we may
3018  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3019  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3020  return UnableToLegalize;
3021  Register LHS = MI.getOperand(1).getReg();
3022  Register RHS = MI.getOperand(2).getReg();
3024  MIRBuilder.buildFNeg(Neg, RHS);
3025  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3026  MI.eraseFromParent();
3027  return Legalized;
3028  }
3029  case TargetOpcode::G_FMAD:
3030  return lowerFMad(MI);
3031  case TargetOpcode::G_FFLOOR:
3032  return lowerFFloor(MI);
3033  case TargetOpcode::G_INTRINSIC_ROUND:
3034  return lowerIntrinsicRound(MI);
3035  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3036  // Since round even is the assumed rounding mode for unconstrained FP
3037  // operations, rint and roundeven are the same operation.
3038  changeOpcode(MI, TargetOpcode::G_FRINT);
3039  return Legalized;
3040  }
3041  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3042  Register OldValRes = MI.getOperand(0).getReg();
3043  Register SuccessRes = MI.getOperand(1).getReg();
3044  Register Addr = MI.getOperand(2).getReg();
3045  Register CmpVal = MI.getOperand(3).getReg();
3046  Register NewVal = MI.getOperand(4).getReg();
3047  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3048  **MI.memoperands_begin());
3049  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3050  MI.eraseFromParent();
3051  return Legalized;
3052  }
3053  case TargetOpcode::G_LOAD:
3054  case TargetOpcode::G_SEXTLOAD:
3055  case TargetOpcode::G_ZEXTLOAD:
3056  return lowerLoad(MI);
3057  case TargetOpcode::G_STORE:
3058  return lowerStore(MI);
3059  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3060  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3061  case TargetOpcode::G_CTLZ:
3062  case TargetOpcode::G_CTTZ:
3063  case TargetOpcode::G_CTPOP:
3064  return lowerBitCount(MI);
3065  case G_UADDO: {
3066  Register Res = MI.getOperand(0).getReg();
3067  Register CarryOut = MI.getOperand(1).getReg();
3068  Register LHS = MI.getOperand(2).getReg();
3069  Register RHS = MI.getOperand(3).getReg();
3070 
3071  MIRBuilder.buildAdd(Res, LHS, RHS);
3072  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3073 
3074  MI.eraseFromParent();
3075  return Legalized;
3076  }
3077  case G_UADDE: {
3078  Register Res = MI.getOperand(0).getReg();
3079  Register CarryOut = MI.getOperand(1).getReg();
3080  Register LHS = MI.getOperand(2).getReg();
3081  Register RHS = MI.getOperand(3).getReg();
3082  Register CarryIn = MI.getOperand(4).getReg();
3083  LLT Ty = MRI.getType(Res);
3084 
3085  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3086  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3087  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3088  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3089 
3090  MI.eraseFromParent();
3091  return Legalized;
3092  }
3093  case G_USUBO: {
3094  Register Res = MI.getOperand(0).getReg();
3095  Register BorrowOut = MI.getOperand(1).getReg();
3096  Register LHS = MI.getOperand(2).getReg();
3097  Register RHS = MI.getOperand(3).getReg();
3098 
3099  MIRBuilder.buildSub(Res, LHS, RHS);
3100  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3101 
3102  MI.eraseFromParent();
3103  return Legalized;
3104  }
3105  case G_USUBE: {
3106  Register Res = MI.getOperand(0).getReg();
3107  Register BorrowOut = MI.getOperand(1).getReg();
3108  Register LHS = MI.getOperand(2).getReg();
3109  Register RHS = MI.getOperand(3).getReg();
3110  Register BorrowIn = MI.getOperand(4).getReg();
3111  const LLT CondTy = MRI.getType(BorrowOut);
3112  const LLT Ty = MRI.getType(Res);
3113 
3114  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3115  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3116  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3117 
3118  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3119  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3120  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3121 
3122  MI.eraseFromParent();
3123  return Legalized;
3124  }
3125  case G_UITOFP:
3126  return lowerUITOFP(MI);
3127  case G_SITOFP:
3128  return lowerSITOFP(MI);
3129  case G_FPTOUI:
3130  return lowerFPTOUI(MI);
3131  case G_FPTOSI:
3132  return lowerFPTOSI(MI);
3133  case G_FPTRUNC:
3134  return lowerFPTRUNC(MI);
3135  case G_FPOWI:
3136  return lowerFPOWI(MI);
3137  case G_SMIN:
3138  case G_SMAX:
3139  case G_UMIN:
3140  case G_UMAX:
3141  return lowerMinMax(MI);
3142  case G_FCOPYSIGN:
3143  return lowerFCopySign(MI);
3144  case G_FMINNUM:
3145  case G_FMAXNUM:
3146  return lowerFMinNumMaxNum(MI);
3147  case G_MERGE_VALUES:
3148  return lowerMergeValues(MI);
3149  case G_UNMERGE_VALUES:
3150  return lowerUnmergeValues(MI);
3151  case TargetOpcode::G_SEXT_INREG: {
3152  assert(MI.getOperand(2).isImm() && "Expected immediate");
3153  int64_t SizeInBits = MI.getOperand(2).getImm();
3154 
3155  Register DstReg = MI.getOperand(0).getReg();
3156  Register SrcReg = MI.getOperand(1).getReg();
3157  LLT DstTy = MRI.getType(DstReg);
3158  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3159 
3160  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3161  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3162  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3163  MI.eraseFromParent();
3164  return Legalized;
3165  }
3166  case G_EXTRACT_VECTOR_ELT:
3167  case G_INSERT_VECTOR_ELT:
3169  case G_SHUFFLE_VECTOR:
3170  return lowerShuffleVector(MI);
3171  case G_DYN_STACKALLOC:
3172  return lowerDynStackAlloc(MI);
3173  case G_EXTRACT:
3174  return lowerExtract(MI);
3175  case G_INSERT:
3176  return lowerInsert(MI);
3177  case G_BSWAP:
3178  return lowerBswap(MI);
3179  case G_BITREVERSE:
3180  return lowerBitreverse(MI);
3181  case G_READ_REGISTER:
3182  case G_WRITE_REGISTER:
3183  return lowerReadWriteRegister(MI);
3184  case G_UADDSAT:
3185  case G_USUBSAT: {
3186  // Try to make a reasonable guess about which lowering strategy to use. The
3187  // target can override this with custom lowering and calling the
3188  // implementation functions.
3189  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3190  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3191  return lowerAddSubSatToMinMax(MI);
3192  return lowerAddSubSatToAddoSubo(MI);
3193  }
3194  case G_SADDSAT:
3195  case G_SSUBSAT: {
3196  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3197 
3198  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3199  // since it's a shorter expansion. However, we would need to figure out the
3200  // preferred boolean type for the carry out for the query.
3201  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3202  return lowerAddSubSatToMinMax(MI);
3203  return lowerAddSubSatToAddoSubo(MI);
3204  }
3205  case G_SSHLSAT:
3206  case G_USHLSAT:
3207  return lowerShlSat(MI);
3208  case G_ABS: {
3209  // Expand %res = G_ABS %a into:
3210  // %v1 = G_ASHR %a, scalar_size-1
3211  // %v2 = G_ADD %a, %v1
3212  // %res = G_XOR %v2, %v1
3213  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3214  Register OpReg = MI.getOperand(1).getReg();
3215  auto ShiftAmt =
3216  MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
3217  auto Shift =
3218  MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
3219  auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
3220  MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
3221  MI.eraseFromParent();
3222  return Legalized;
3223  }
3224  case G_SELECT:
3225  return lowerSelect(MI);
3226  case G_SDIVREM:
3227  case G_UDIVREM:
3228  return lowerDIVREM(MI);
3229  case G_FSHL:
3230  case G_FSHR:
3231  return lowerFunnelShift(MI);
3232  case G_ROTL:
3233  case G_ROTR:
3234  return lowerRotate(MI);
3235  }
3236 }
3237 
3239  Align MinAlign) const {
3240  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3241  // datalayout for the preferred alignment. Also there should be a target hook
3242  // for this to allow targets to reduce the alignment and ignore the
3243  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3244  // the type.
3246 }
3247 
3250  MachinePointerInfo &PtrInfo) {
3253  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3254 
3255  unsigned AddrSpace = DL.getAllocaAddrSpace();
3256  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3257 
3258  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3259  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3260 }
3261 
3263  LLT VecTy) {
3264  int64_t IdxVal;
3265  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3266  return IdxReg;
3267 
3268  LLT IdxTy = B.getMRI()->getType(IdxReg);
3269  unsigned NElts = VecTy.getNumElements();
3270  if (isPowerOf2_32(NElts)) {
3271  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3272  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3273  }
3274 
3275  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3276  .getReg(0);
3277 }
3278 
3280  Register Index) {
3281  LLT EltTy = VecTy.getElementType();
3282 
3283  // Calculate the element offset and add it to the pointer.
3284  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3285  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3286  "Converting bits to bytes lost precision");
3287 
3289 
3290  LLT IdxTy = MRI.getType(Index);
3291  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3292  MIRBuilder.buildConstant(IdxTy, EltSize));
3293 
3294  LLT PtrTy = MRI.getType(VecPtr);
3295  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3296 }
3297 
3299  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3300  Register DstReg = MI.getOperand(0).getReg();
3301  LLT DstTy = MRI.getType(DstReg);
3302  LLT LCMTy = getLCMType(DstTy, NarrowTy);
3303 
3304  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3305 
3306  auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3307  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3308 
3309  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3310  MI.eraseFromParent();
3311  return Legalized;
3312 }
3313 
3314 // Handle splitting vector operations which need to have the same number of
3315 // elements in each type index, but each type index may have a different element
3316 // type.
3317 //
3318 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3319 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3320 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3321 //
3322 // Also handles some irregular breakdown cases, e.g.
3323 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3324 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3325 // s64 = G_SHL s64, s32
3328  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3329  if (TypeIdx != 0)
3330  return UnableToLegalize;
3331 
3332  const LLT NarrowTy0 = NarrowTyArg;
3333  const unsigned NewNumElts =
3334  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3335 
3336  const Register DstReg = MI.getOperand(0).getReg();
3337  LLT DstTy = MRI.getType(DstReg);
3338  LLT LeftoverTy0;
3339 
3340  // All of the operands need to have the same number of elements, so if we can
3341  // determine a type breakdown for the result type, we can for all of the
3342  // source types.
3343  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3344  if (NumParts < 0)
3345  return UnableToLegalize;
3346 
3348 
3349  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3350  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3351 
3352  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3353  Register SrcReg = MI.getOperand(I).getReg();
3354  LLT SrcTyI = MRI.getType(SrcReg);
3355  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3356  LLT LeftoverTyI;
3357 
3358  // Split this operand into the requested typed registers, and any leftover
3359  // required to reproduce the original type.
3360  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3361  LeftoverRegs))
3362  return UnableToLegalize;
3363 
3364  if (I == 1) {
3365  // For the first operand, create an instruction for each part and setup
3366  // the result.
3367  for (Register PartReg : PartRegs) {
3368  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3369  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3370  .addDef(PartDstReg)
3371  .addUse(PartReg));
3372  DstRegs.push_back(PartDstReg);
3373  }
3374 
3375  for (Register LeftoverReg : LeftoverRegs) {
3376  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3377  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3378  .addDef(PartDstReg)
3379  .addUse(LeftoverReg));
3380  LeftoverDstRegs.push_back(PartDstReg);
3381  }
3382  } else {
3383  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3384 
3385  // Add the newly created operand splits to the existing instructions. The
3386  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3387  // pieces.
3388  unsigned InstCount = 0;
3389  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3390  NewInsts[InstCount++].addUse(PartRegs[J]);
3391  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3392  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3393  }
3394 
3395  PartRegs.clear();
3396  LeftoverRegs.clear();
3397  }
3398 
3399  // Insert the newly built operations and rebuild the result register.
3400  for (auto &MIB : NewInsts)
3401  MIRBuilder.insertInstr(MIB);
3402 
3403  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3404 
3405  MI.eraseFromParent();
3406  return Legalized;
3407 }
3408 
3411  LLT NarrowTy) {
3412  if (TypeIdx != 0)
3413  return UnableToLegalize;
3414 
3415  Register DstReg = MI.getOperand(0).getReg();
3416  Register SrcReg = MI.getOperand(1).getReg();
3417  LLT DstTy = MRI.getType(DstReg);
3418  LLT SrcTy = MRI.getType(SrcReg);
3419 
3420  LLT NarrowTy0 = NarrowTy;
3421  LLT NarrowTy1;
3422  unsigned NumParts;
3423 
3424  if (NarrowTy.isVector()) {
3425  // Uneven breakdown not handled.
3426  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3427  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3428  return UnableToLegalize;
3429 
3430  NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3431  } else {
3432  NumParts = DstTy.getNumElements();
3433  NarrowTy1 = SrcTy.getElementType();
3434  }
3435 
3436  SmallVector<Register, 4> SrcRegs, DstRegs;
3437  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3438 
3439  for (unsigned I = 0; I < NumParts; ++I) {
3440  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3441  MachineInstr *NewInst =
3442  MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3443 
3444  NewInst->setFlags(MI.getFlags());
3445  DstRegs.push_back(DstReg);
3446  }
3447 
3448  if (NarrowTy.isVector())
3449  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3450  else
3451  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3452 
3453  MI.eraseFromParent();
3454  return Legalized;
3455 }
3456 
3459  LLT NarrowTy) {
3460  Register DstReg = MI.getOperand(0).getReg();
3461  Register Src0Reg = MI.getOperand(2).getReg();
3462  LLT DstTy = MRI.getType(DstReg);
3463  LLT SrcTy = MRI.getType(Src0Reg);
3464 
3465  unsigned NumParts;
3466  LLT NarrowTy0, NarrowTy1;
3467 
3468  if (TypeIdx == 0) {
3469  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3470  unsigned OldElts = DstTy.getNumElements();
3471 
3472  NarrowTy0 = NarrowTy;
3473  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3474  NarrowTy1 = NarrowTy.isVector() ?
3475  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3476  SrcTy.getElementType();
3477 
3478  } else {
3479  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3480  unsigned OldElts = SrcTy.getNumElements();
3481 
3482  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3483  NarrowTy.getNumElements();
3484  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3485  DstTy.getScalarSizeInBits());
3486  NarrowTy1 = NarrowTy;
3487  }
3488 
3489  // FIXME: Don't know how to handle the situation where the small vectors
3490  // aren't all the same size yet.
3491  if (NarrowTy1.isVector() &&
3492  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3493  return UnableToLegalize;
3494 
3495  CmpInst::Predicate Pred
3496  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3497 
3498  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3499  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3500  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3501 
3502  for (unsigned I = 0; I < NumParts; ++I) {
3503  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3504  DstRegs.push_back(DstReg);
3505 
3506  if (MI.getOpcode() == TargetOpcode::G_ICMP)
3507  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3508  else {
3509  MachineInstr *NewCmp
3510  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3511  NewCmp->setFlags(MI.getFlags());
3512  }
3513  }
3514 
3515  if (NarrowTy1.isVector())
3516  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3517  else
3518  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3519 
3520  MI.eraseFromParent();
3521  return Legalized;
3522 }
3523 
3526  LLT NarrowTy) {
3527  Register DstReg = MI.getOperand(0).getReg();
3528  Register CondReg = MI.getOperand(1).getReg();
3529 
3530  unsigned NumParts = 0;
3531  LLT NarrowTy0, NarrowTy1;
3532 
3533  LLT DstTy = MRI.getType(DstReg);
3534  LLT CondTy = MRI.getType(CondReg);
3535  unsigned Size = DstTy.getSizeInBits();
3536 
3537  assert(TypeIdx == 0 || CondTy.isVector());
3538 
3539  if (TypeIdx == 0) {
3540  NarrowTy0 = NarrowTy;
3541  NarrowTy1 = CondTy;
3542 
3543  unsigned NarrowSize = NarrowTy0.getSizeInBits();
3544  // FIXME: Don't know how to handle the situation where the small vectors
3545  // aren't all the same size yet.
3546  if (Size % NarrowSize != 0)
3547  return UnableToLegalize;
3548 
3549  NumParts = Size / NarrowSize;
3550 
3551  // Need to break down the condition type
3552  if (CondTy.isVector()) {
3553  if (CondTy.getNumElements() == NumParts)
3554  NarrowTy1 = CondTy.getElementType();
3555  else
3556  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3557  CondTy.getScalarSizeInBits());
3558  }
3559  } else {
3560  NumParts = CondTy.getNumElements();
3561  if (NarrowTy.isVector()) {
3562  // TODO: Handle uneven breakdown.
3563  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3564  return UnableToLegalize;
3565 
3566  return UnableToLegalize;
3567  } else {
3568  NarrowTy0 = DstTy.getElementType();
3569  NarrowTy1 = NarrowTy;
3570  }
3571  }
3572 
3573  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3574  if (CondTy.isVector())
3575  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3576 
3577  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3578  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3579 
3580  for (unsigned i = 0; i < NumParts; ++i) {
3581  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3582  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3583  Src1Regs[i], Src2Regs[i]);
3584  DstRegs.push_back(DstReg);
3585  }
3586 
3587  if (NarrowTy0.isVector())
3588  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3589  else
3590  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3591 
3592  MI.eraseFromParent();
3593  return Legalized;
3594 }
3595 
3598  LLT NarrowTy) {
3599  const Register DstReg = MI.getOperand(0).getReg();
3600  LLT PhiTy = MRI.getType(DstReg);
3601  LLT LeftoverTy;
3602 
3603  // All of the operands need to have the same number of elements, so if we can
3604  // determine a type breakdown for the result type, we can for all of the
3605  // source types.
3606  int NumParts, NumLeftover;
3607  std::tie(NumParts, NumLeftover)
3608  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3609  if (NumParts < 0)
3610  return UnableToLegalize;
3611 
3612  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3614 
3615  const int TotalNumParts = NumParts + NumLeftover;
3616 
3617  // Insert the new phis in the result block first.
3618  for (int I = 0; I != TotalNumParts; ++I) {
3619  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3620  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3621  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3622  .addDef(PartDstReg));
3623  if (I < NumParts)
3624  DstRegs.push_back(PartDstReg);
3625  else
3626  LeftoverDstRegs.push_back(PartDstReg);
3627  }
3628 
3629  MachineBasicBlock *MBB = MI.getParent();
3631  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3632 
3633  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3634 
3635  // Insert code to extract the incoming values in each predecessor block.
3636  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3637  PartRegs.clear();
3638  LeftoverRegs.clear();
3639 
3640  Register SrcReg = MI.getOperand(I).getReg();
3641  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3642  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3643 
3644  LLT Unused;
3645  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3646  LeftoverRegs))
3647  return UnableToLegalize;
3648 
3649  // Add the newly created operand splits to the existing instructions. The
3650  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3651  // pieces.
3652  for (int J = 0; J != TotalNumParts; ++J) {
3653  MachineInstrBuilder MIB = NewInsts[J];
3654  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3655  MIB.addMBB(&OpMBB);
3656  }
3657  }
3658 
3659  MI.eraseFromParent();
3660  return Legalized;
3661 }
3662 
3665  unsigned TypeIdx,
3666  LLT NarrowTy) {
3667  if (TypeIdx != 1)
3668  return UnableToLegalize;
3669 
3670  const int NumDst = MI.getNumOperands() - 1;
3671  const Register SrcReg = MI.getOperand(NumDst).getReg();
3672  LLT SrcTy = MRI.getType(SrcReg);
3673 
3674  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3675 
3676  // TODO: Create sequence of extracts.
3677  if (DstTy == NarrowTy)
3678  return UnableToLegalize;
3679 
3680  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3681  if (DstTy == GCDTy) {
3682  // This would just be a copy of the same unmerge.
3683  // TODO: Create extracts, pad with undef and create intermediate merges.
3684  return UnableToLegalize;
3685  }
3686 
3687  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3688  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3689  const int PartsPerUnmerge = NumDst / NumUnmerge;
3690 
3691  for (int I = 0; I != NumUnmerge; ++I) {
3692  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3693 
3694  for (int J = 0; J != PartsPerUnmerge; ++J)
3695  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3696  MIB.addUse(Unmerge.getReg(I));
3697  }
3698 
3699  MI.eraseFromParent();
3700  return Legalized;
3701 }
3702 
3705  LLT NarrowTy) {
3706  Register Result = MI.getOperand(0).getReg();
3707  Register Overflow = MI.getOperand(1).getReg();
3708  Register LHS = MI.getOperand(2).getReg();
3709  Register RHS = MI.getOperand(3).getReg();
3710 
3711  LLT SrcTy = MRI.getType(LHS);
3712  if (!SrcTy.isVector())
3713  return UnableToLegalize;
3714 
3715  LLT ElementType = SrcTy.getElementType();
3716  LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3717  const int NumResult = SrcTy.getNumElements();
3718  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3719 
3720  // Unmerge the operands to smaller parts of GCD type.
3721  auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3722  auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3723 
3724  const int NumOps = UnmergeLHS->getNumOperands() - 1;
3725  const int PartsPerUnmerge = NumResult / NumOps;
3726  LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3727  LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3728 
3729  // Perform the operation over unmerged parts.
3730  SmallVector<Register, 8> ResultParts;
3731  SmallVector<Register, 8> OverflowParts;
3732  for (int I = 0; I != NumOps; ++I) {
3733  Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3734  Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3735  auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3736  {Operand1, Operand2});
3737  ResultParts.push_back(PartMul->getOperand(0).getReg());
3738  OverflowParts.push_back(PartMul->getOperand(1).getReg());
3739  }
3740 
3741  LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3742  LLT OverflowLCMTy =
3743  LLT::scalarOrVector(ResultLCMTy.getNumElements(), OverflowElementTy);
3744 
3745  // Recombine the pieces to the original result and overflow registers.
3746  buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3747  buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3748  MI.eraseFromParent();
3749  return Legalized;
3750 }
3751 
3752 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3753 // a vector
3754 //
3755 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3756 // undef as necessary.
3757 //
3758 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3759 // -> <2 x s16>
3760 //
3761 // %4:_(s16) = G_IMPLICIT_DEF
3762 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3763 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3764 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3765 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3766 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3769  LLT NarrowTy) {
3770  Register DstReg = MI.getOperand(0).getReg();
3771  LLT DstTy = MRI.getType(DstReg);
3772  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3773  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3774 
3775  // Break into a common type
3777  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3778  extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3779 
3780  // Build the requested new merge, padding with undef.
3781  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3782  TargetOpcode::G_ANYEXT);
3783 
3784  // Pack into the original result register.
3785  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3786 
3787  MI.eraseFromParent();
3788  return Legalized;
3789 }
3790 
3793  unsigned TypeIdx,
3794  LLT NarrowVecTy) {
3795  Register DstReg = MI.getOperand(0).getReg();
3796  Register SrcVec = MI.getOperand(1).getReg();
3797  Register InsertVal;
3798  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3799 
3800  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3801  if (IsInsert)
3802  InsertVal = MI.getOperand(2).getReg();
3803 
3804  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3805 
3806  // TODO: Handle total scalarization case.
3807  if (!NarrowVecTy.isVector())
3808  return UnableToLegalize;
3809 
3810  LLT VecTy = MRI.getType(SrcVec);
3811 
3812  // If the index is a constant, we can really break this down as you would
3813  // expect, and index into the target size pieces.
3814  int64_t IdxVal;
3815  if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
3816  // Avoid out of bounds indexing the pieces.
3817  if (IdxVal >= VecTy.getNumElements()) {
3818  MIRBuilder.buildUndef(DstReg);
3819  MI.eraseFromParent();
3820  return Legalized;
3821  }
3822 
3823  SmallVector<Register, 8> VecParts;
3824  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3825 
3826  // Build a sequence of NarrowTy pieces in VecParts for this operand.
3827  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3828  TargetOpcode::G_ANYEXT);
3829 
3830  unsigned NewNumElts = NarrowVecTy.getNumElements();
3831 
3832  LLT IdxTy = MRI.getType(Idx);
3833  int64_t PartIdx = IdxVal / NewNumElts;
3834  auto NewIdx =
3835  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3836 
3837  if (IsInsert) {
3838  LLT PartTy = MRI.getType(VecParts[PartIdx]);
3839 
3840  // Use the adjusted index to insert into one of the subvectors.
3841  auto InsertPart = MIRBuilder.buildInsertVectorElement(
3842  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3843  VecParts[PartIdx] = InsertPart.getReg(0);
3844 
3845  // Recombine the inserted subvector with the others to reform the result
3846  // vector.
3847  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3848  } else {
3849  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3850  }
3851 
3852  MI.eraseFromParent();
3853  return Legalized;
3854  }
3855 
3856  // With a variable index, we can't perform the operation in a smaller type, so
3857  // we're forced to expand this.
3858  //
3859  // TODO: We could emit a chain of compare/select to figure out which piece to
3860  // index.
3862 }
3863 
3866  LLT NarrowTy) {
3867  // FIXME: Don't know how to handle secondary types yet.
3868  if (TypeIdx != 0)
3869  return UnableToLegalize;
3870 
3871  MachineMemOperand *MMO = *MI.memoperands_begin();
3872 
3873  // This implementation doesn't work for atomics. Give up instead of doing
3874  // something invalid.
3875  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3877  return UnableToLegalize;
3878 
3879  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3880  Register ValReg = MI.getOperand(0).getReg();
3881  Register AddrReg = MI.getOperand(1).getReg();
3882  LLT ValTy = MRI.getType(ValReg);
3883 
3884  // FIXME: Do we need a distinct NarrowMemory legalize action?
3885  if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3886  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3887  return UnableToLegalize;
3888  }
3889 
3890  int NumParts = -1;
3891  int NumLeftover = -1;
3892  LLT LeftoverTy;
3893  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3894  if (IsLoad) {
3895  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3896  } else {
3897  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3898  NarrowLeftoverRegs)) {
3899  NumParts = NarrowRegs.size();
3900  NumLeftover = NarrowLeftoverRegs.size();
3901  }
3902  }
3903 
3904  if (NumParts == -1)
3905  return UnableToLegalize;
3906 
3907  LLT PtrTy = MRI.getType(AddrReg);
3908  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3909 
3910  unsigned TotalSize = ValTy.getSizeInBits();
3911 
3912  // Split the load/store into PartTy sized pieces starting at Offset. If this
3913  // is a load, return the new registers in ValRegs. For a store, each elements
3914  // of ValRegs should be PartTy. Returns the next offset that needs to be
3915  // handled.
3916  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3917  unsigned Offset) -> unsigned {
3919  unsigned PartSize = PartTy.getSizeInBits();
3920  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3921  Offset += PartSize, ++Idx) {
3922  unsigned ByteSize = PartSize / 8;
3923  unsigned ByteOffset = Offset / 8;
3924  Register NewAddrReg;
3925 
3926  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3927 
3928  MachineMemOperand *NewMMO =
3929  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3930 
3931  if (IsLoad) {
3932  Register Dst = MRI.createGenericVirtualRegister(PartTy);
3933  ValRegs.push_back(Dst);
3934  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3935  } else {
3936  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3937  }
3938  }
3939 
3940  return Offset;
3941  };
3942 
3943  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3944 
3945  // Handle the rest of the register if this isn't an even type breakdown.
3946  if (LeftoverTy.isValid())
3947  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3948 
3949  if (IsLoad) {
3950  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3951  LeftoverTy, NarrowLeftoverRegs);
3952  }
3953 
3954  MI.eraseFromParent();
3955  return Legalized;
3956 }
3957 
3960  LLT NarrowTy) {
3961  assert(TypeIdx == 0 && "only one type index expected");
3962 
3963  const unsigned Opc = MI.getOpcode();
3964  const int NumOps = MI.getNumOperands() - 1;
3965  const Register DstReg = MI.getOperand(0).getReg();
3966  const unsigned Flags = MI.getFlags();
3967  const unsigned NarrowSize = NarrowTy.getSizeInBits();
3968  const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3969 
3970  assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
3971 
3972  // First of all check whether we are narrowing (changing the element type)
3973  // or reducing the vector elements
3974  const LLT DstTy = MRI.getType(DstReg);
3975  const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3976 
3977  SmallVector<Register, 8> ExtractedRegs[3];
3979 
3980  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3981 
3982  // Break down all the sources into NarrowTy pieces we can operate on. This may
3983  // involve creating merges to a wider type, padded with undef.
3984  for (int I = 0; I != NumOps; ++I) {
3985  Register SrcReg = MI.getOperand(I + 1).getReg();
3986  LLT SrcTy = MRI.getType(SrcReg);
3987 
3988  // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3989  // For fewerElements, this is a smaller vector with the same element type.
3990  LLT OpNarrowTy;
3991  if (IsNarrow) {
3992  OpNarrowTy = NarrowScalarTy;
3993 
3994  // In case of narrowing, we need to cast vectors to scalars for this to
3995  // work properly
3996  // FIXME: Can we do without the bitcast here if we're narrowing?
3997  if (SrcTy.isVector()) {
3998  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3999  SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
4000  }
4001  } else {
4002  OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
4003  }
4004 
4005  LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
4006 
4007  // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
4008  buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
4009  TargetOpcode::G_ANYEXT);
4010  }
4011 
4012  SmallVector<Register, 8> ResultRegs;
4013 
4014  // Input operands for each sub-instruction.
4015  SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
4016 
4017  int NumParts = ExtractedRegs[0].size();
4018  const unsigned DstSize = DstTy.getSizeInBits();
4019  const LLT DstScalarTy = LLT::scalar(DstSize);
4020 
4021  // Narrowing needs to use scalar types
4022  LLT DstLCMTy, NarrowDstTy;
4023  if (IsNarrow) {
4024  DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4025  NarrowDstTy = NarrowScalarTy;
4026  } else {
4027  DstLCMTy = getLCMType(DstTy, NarrowTy);
4028  NarrowDstTy = NarrowTy;
4029  }
4030 
4031  // We widened the source registers to satisfy merge/unmerge size
4032  // constraints. We'll have some extra fully undef parts.
4033  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4034 
4035  for (int I = 0; I != NumRealParts; ++I) {
4036  // Emit this instruction on each of the split pieces.
4037  for (int J = 0; J != NumOps; ++J)
4038  InputRegs[J] = ExtractedRegs[J][I];
4039 
4040  auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4041  ResultRegs.push_back(Inst.getReg(0));
4042  }
4043 
4044  // Fill out the widened result with undef instead of creating instructions
4045  // with undef inputs.
4046  int NumUndefParts = NumParts - NumRealParts;
4047  if (NumUndefParts != 0)
4048  ResultRegs.append(NumUndefParts,
4049  MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
4050 
4051  // Extract the possibly padded result. Use a scratch register if we need to do
4052  // a final bitcast, otherwise use the original result register.
4053  Register MergeDstReg;
4054  if (IsNarrow && DstTy.isVector())
4055  MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4056  else
4057  MergeDstReg = DstReg;
4058 
4059  buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
4060 
4061  // Recast to vector if we narrowed a vector
4062  if (IsNarrow && DstTy.isVector())
4063  MIRBuilder.buildBitcast(DstReg, MergeDstReg);
4064 
4065  MI.eraseFromParent();
4066  return Legalized;
4067 }
4068 
4071  LLT NarrowTy) {
4072  Register DstReg = MI.getOperand(0).getReg();
4073  Register SrcReg = MI.getOperand(1).getReg();
4074  int64_t Imm = MI.getOperand(2).getImm();
4075 
4076  LLT DstTy = MRI.getType(DstReg);
4077 
4079  LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4080  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4081 
4082  for (Register &R : Parts)
4083  R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4084 
4085  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4086 
4087  MI.eraseFromParent();
4088  return Legalized;
4089 }
4090 
4093  LLT NarrowTy) {
4094  using namespace TargetOpcode;
4095 
4096  switch (MI.getOpcode()) {
4097  case G_IMPLICIT_DEF:
4098  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4099  case G_TRUNC:
4100  case G_AND:
4101  case G_OR:
4102  case G_XOR:
4103  case G_ADD:
4104  case G_SUB:
4105  case G_MUL:
4106  case G_PTR_ADD:
4107  case G_SMULH:
4108  case G_UMULH:
4109  case G_FADD:
4110  case G_FMUL:
4111  case G_FSUB:
4112  case G_FNEG:
4113  case G_FABS:
4114  case G_FCANONICALIZE:
4115  case G_FDIV:
4116  case G_FREM:
4117  case G_FMA:
4118  case G_FMAD:
4119  case G_FPOW:
4120  case G_FEXP:
4121  case G_FEXP2:
4122  case G_FLOG:
4123  case G_FLOG2:
4124  case G_FLOG10:
4125  case G_FNEARBYINT:
4126  case G_FCEIL:
4127  case G_FFLOOR:
4128  case G_FRINT:
4129  case G_INTRINSIC_ROUND:
4130  case G_INTRINSIC_ROUNDEVEN:
4131  case G_INTRINSIC_TRUNC:
4132  case G_FCOS:
4133  case G_FSIN:
4134  case G_FSQRT:
4135  case G_BSWAP:
4136  case G_BITREVERSE:
4137  case G_SDIV:
4138  case G_UDIV:
4139  case G_SREM:
4140  case G_UREM:
4141  case G_SMIN:
4142  case G_SMAX:
4143  case G_UMIN:
4144  case G_UMAX:
4145  case G_FMINNUM:
4146  case G_FMAXNUM:
4147  case G_FMINNUM_IEEE:
4148  case G_FMAXNUM_IEEE:
4149  case G_FMINIMUM:
4150  case G_FMAXIMUM:
4151  case G_FSHL:
4152  case G_FSHR:
4153  case G_FREEZE:
4154  case G_SADDSAT:
4155  case G_SSUBSAT:
4156  case G_UADDSAT:
4157  case G_USUBSAT:
4158  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4159  case G_UMULO:
4160  case G_SMULO:
4161  return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4162  case G_SHL:
4163  case G_LSHR:
4164  case G_ASHR:
4165  case G_SSHLSAT:
4166  case G_USHLSAT:
4167  case G_CTLZ:
4168  case G_CTLZ_ZERO_UNDEF:
4169  case G_CTTZ:
4170  case G_CTTZ_ZERO_UNDEF:
4171  case G_CTPOP:
4172  case G_FCOPYSIGN:
4173  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4174  case G_ZEXT:
4175  case G_SEXT:
4176  case G_ANYEXT:
4177  case G_FPEXT:
4178  case G_FPTRUNC:
4179  case G_SITOFP:
4180  case G_UITOFP:
4181  case G_FPTOSI:
4182  case G_FPTOUI:
4183  case G_INTTOPTR:
4184  case G_PTRTOINT:
4185  case G_ADDRSPACE_CAST:
4186  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4187  case G_ICMP:
4188  case G_FCMP:
4189  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4190  case G_SELECT:
4191  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4192  case G_PHI:
4193  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4194  case G_UNMERGE_VALUES:
4195  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4196  case G_BUILD_VECTOR:
4197  assert(TypeIdx == 0 && "not a vector type index");
4198  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4199  case G_CONCAT_VECTORS:
4200  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4201  return UnableToLegalize;
4202  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4203  case G_EXTRACT_VECTOR_ELT:
4204  case G_INSERT_VECTOR_ELT:
4205  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4206  case G_LOAD:
4207  case G_STORE:
4208  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4209  case G_SEXT_INREG:
4210  return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4212  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4213  default:
4214  return UnableToLegalize;
4215  }
4216 }
4217 
4219  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4220  unsigned Opc = MI.getOpcode();
4221  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4222  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4223  "Sequential reductions not expected");
4224 
4225  if (TypeIdx != 1)
4226  return UnableToLegalize;
4227 
4228  // The semantics of the normal non-sequential reductions allow us to freely
4229  // re-associate the operation.
4230  Register SrcReg = MI.getOperand(1).getReg();
4231  LLT SrcTy = MRI.getType(SrcReg);
4232  Register DstReg = MI.getOperand(0).getReg();
4233  LLT DstTy = MRI.getType(DstReg);
4234 
4235  if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
4236  return UnableToLegalize;
4237 
4238  SmallVector<Register> SplitSrcs;
4239  const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
4240  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4241  SmallVector<Register> PartialReductions;
4242  for (unsigned Part = 0; Part < NumParts; ++Part) {
4243  PartialReductions.push_back(
4244  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4245  }
4246 
4247  unsigned ScalarOpc;
4248  switch (Opc) {
4249  case TargetOpcode::G_VECREDUCE_FADD:
4250  ScalarOpc = TargetOpcode::G_FADD;
4251  break;
4252  case TargetOpcode::G_VECREDUCE_FMUL:
4253  ScalarOpc = TargetOpcode::G_FMUL;
4254  break;
4255  case TargetOpcode::G_VECREDUCE_FMAX:
4256  ScalarOpc = TargetOpcode::G_FMAXNUM;
4257  break;
4258  case TargetOpcode::G_VECREDUCE_FMIN:
4259  ScalarOpc = TargetOpcode::G_FMINNUM;
4260  break;
4261  case TargetOpcode::G_VECREDUCE_ADD:
4262  ScalarOpc = TargetOpcode::G_ADD;
4263  break;
4264  case TargetOpcode::G_VECREDUCE_MUL:
4265  ScalarOpc = TargetOpcode::G_MUL;
4266  break;
4267  case TargetOpcode::G_VECREDUCE_AND:
4268  ScalarOpc = TargetOpcode::G_AND;
4269  break;
4270  case TargetOpcode::G_VECREDUCE_OR:
4271  ScalarOpc = TargetOpcode::G_OR;
4272  break;
4273  case TargetOpcode::G_VECREDUCE_XOR:
4274  ScalarOpc = TargetOpcode::G_XOR;
4275  break;
4276  case TargetOpcode::G_VECREDUCE_SMAX:
4277  ScalarOpc = TargetOpcode::G_SMAX;
4278  break;
4279  case TargetOpcode::G_VECREDUCE_SMIN:
4280  ScalarOpc = TargetOpcode::G_SMIN;
4281  break;
4282  case TargetOpcode::G_VECREDUCE_UMAX:
4283  ScalarOpc = TargetOpcode::G_UMAX;
4284  break;
4285  case TargetOpcode::G_VECREDUCE_UMIN:
4286  ScalarOpc = TargetOpcode::G_UMIN;
4287  break;
4288  default:
4289  LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
4290  return UnableToLegalize;
4291  }
4292 
4293  // If the types involved are powers of 2, we can generate intermediate vector
4294  // ops, before generating a final reduction operation.
4295  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4296  isPowerOf2_32(NarrowTy.getNumElements())) {
4297  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4298  }
4299 
4300  Register Acc = PartialReductions[0];
4301  for (unsigned Part = 1; Part < NumParts; ++Part) {
4302  if (Part == NumParts - 1) {
4303  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4304  {Acc, PartialReductions[Part]});
4305  } else {
4306  Acc = MIRBuilder
4307  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4308  .getReg(0);
4309  }
4310  }
4311  MI.eraseFromParent();
4312  return Legalized;
4313 }
4314 
4316 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4317  LLT SrcTy, LLT NarrowTy,
4318  unsigned ScalarOpc) {
4319  SmallVector<Register> SplitSrcs;
4320  // Split the sources into NarrowTy size pieces.
4321  extractParts(SrcReg, NarrowTy,
4322  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4323  // We're going to do a tree reduction using vector operations until we have
4324  // one NarrowTy size value left.
4325  while (SplitSrcs.size() > 1) {
4326  SmallVector<Register> PartialRdxs;
4327  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4328  Register LHS = SplitSrcs[Idx];
4329  Register RHS = SplitSrcs[Idx + 1];
4330  // Create the intermediate vector op.
4331  Register Res =
4332  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4333  PartialRdxs.push_back(Res);
4334  }
4335  SplitSrcs = std::move(PartialRdxs);
4336  }
4337  // Finally generate the requested NarrowTy based reduction.
4339  MI.getOperand(1).setReg(SplitSrcs[0]);
4341  return Legalized;
4342 }
4343 
4346  const LLT HalfTy, const LLT AmtTy) {
4347 
4348  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4349  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4350  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4351 
4352  if (Amt.isNullValue()) {
4353  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4354  MI.eraseFromParent();
4355  return Legalized;
4356  }
4357 
4358  LLT NVT = HalfTy;
4359  unsigned NVTBits = HalfTy.getSizeInBits();
4360  unsigned VTBits = 2 * NVTBits;
4361 
4362  SrcOp Lo(Register(0)), Hi(Register(0));
4363  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4364  if (Amt.ugt(VTBits)) {
4365  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4366  } else if (Amt.ugt(NVTBits)) {
4367  Lo = MIRBuilder.buildConstant(NVT, 0);
4368  Hi = MIRBuilder.buildShl(NVT, InL,
4369  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4370  } else if (Amt == NVTBits) {
4371  Lo = MIRBuilder.buildConstant(NVT, 0);
4372  Hi = InL;
4373  } else {
4374  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4375  auto OrLHS =
4376  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4377  auto OrRHS = MIRBuilder.buildLShr(
4378  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4379  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4380  }
4381  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4382  if (Amt.ugt(VTBits)) {
4383  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4384  } else if (Amt.ugt(NVTBits)) {
4385  Lo = MIRBuilder.buildLShr(NVT, InH,
4386  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4387  Hi = MIRBuilder.buildConstant(NVT, 0);
4388  } else if (Amt == NVTBits) {
4389  Lo = InH;
4390  Hi = MIRBuilder.buildConstant(NVT, 0);
4391  } else {
4392  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4393 
4394  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4395  auto OrRHS = MIRBuilder.buildShl(
4396  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4397 
4398  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4399  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4400  }
4401  } else {
4402  if (Amt.ugt(VTBits)) {
4403  Hi = Lo = MIRBuilder.buildAShr(
4404  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4405  } else if (Amt.ugt(NVTBits)) {
4406  Lo = MIRBuilder.buildAShr(NVT, InH,
4407  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4408  Hi = MIRBuilder.buildAShr(NVT, InH,
4409  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4410  } else if (Amt == NVTBits) {
4411  Lo = InH;
4412  Hi = MIRBuilder.buildAShr(NVT, InH,
4413  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4414  } else {
4415  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4416 
4417  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4418  auto OrRHS = MIRBuilder.buildShl(
4419  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4420 
4421  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4422  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4423  }
4424  }
4425 
4426  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4427  MI.eraseFromParent();
4428 
4429  return Legalized;
4430 }
4431 
4432 // TODO: Optimize if constant shift amount.
4435  LLT RequestedTy) {
4436  if (TypeIdx == 1) {
4438  narrowScalarSrc(MI, RequestedTy, 2);
4440  return Legalized;
4441  }
4442 
4443  Register DstReg = MI.getOperand(0).getReg();
4444  LLT DstTy = MRI.getType(DstReg);
4445  if (DstTy.isVector())
4446  return UnableToLegalize;
4447 
4448  Register Amt = MI.getOperand(2).getReg();
4449  LLT ShiftAmtTy = MRI.getType(Amt);
4450  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4451  if (DstEltSize % 2 != 0)
4452  return UnableToLegalize;
4453 
4454  // Ignore the input type. We can only go to exactly half the size of the
4455  // input. If that isn't small enough, the resulting pieces will be further
4456  // legalized.
4457  const unsigned NewBitSize = DstEltSize / 2;
4458  const LLT HalfTy = LLT::scalar(NewBitSize);
4459  const LLT CondTy = LLT::scalar(1);
4460 
4461  if (const MachineInstr *KShiftAmt =
4462  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4464  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4465  }
4466 
4467  // TODO: Expand with known bits.
4468 
4469  // Handle the fully general expansion by an unknown amount.
4470  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4471 
4472  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4473  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4474  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4475 
4476  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4477  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4478 
4479  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4480  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4481  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4482 
4483  Register ResultRegs[2];
4484  switch (MI.getOpcode()) {
4485  case TargetOpcode::G_SHL: {
4486  // Short: ShAmt < NewBitSize
4487  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4488 
4489  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4490  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4491  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4492 
4493  // Long: ShAmt >= NewBitSize
4494  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4495  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4496 
4497  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4498  auto Hi = MIRBuilder.buildSelect(
4499  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4500 
4501  ResultRegs[0] = Lo.getReg(0);
4502  ResultRegs[1] = Hi.getReg(0);
4503  break;
4504  }
4505  case TargetOpcode::G_LSHR:
4506  case TargetOpcode::G_ASHR: {
4507  // Short: ShAmt < NewBitSize
4508  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4509 
4510  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4511  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4512  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4513 
4514  // Long: ShAmt >= NewBitSize
4515  MachineInstrBuilder HiL;
4516  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4517  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4518  } else {
4519  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4520  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4521  }
4522  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4523  {InH, AmtExcess}); // Lo from Hi part.
4524 
4525  auto Lo = MIRBuilder.buildSelect(
4526  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4527 
4528  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4529 
4530  ResultRegs[0] = Lo.getReg(0);
4531  ResultRegs[1] = Hi.getReg(0);
4532  break;
4533  }
4534  default:
4535  llvm_unreachable("not a shift");
4536  }
4537 
4538  MIRBuilder.buildMerge(DstReg, ResultRegs);
4539  MI.eraseFromParent();
4540  return Legalized;
4541 }
4542 
4545  LLT MoreTy) {
4546  assert(TypeIdx == 0 && "Expecting only Idx 0");
4547 
4549  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4550  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4551  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4552  moreElementsVectorSrc(MI, MoreTy, I);
4553  }
4554 
4555  MachineBasicBlock &MBB = *MI.getParent();
4557  moreElementsVectorDst(MI, MoreTy, 0);
4559  return Legalized;
4560 }
4561 
4564  LLT MoreTy) {
4565  unsigned Opc = MI.getOpcode();
4566  switch (Opc) {
4567  case TargetOpcode::G_IMPLICIT_DEF:
4568  case TargetOpcode::G_LOAD: {
4569  if (TypeIdx != 0)
4570  return UnableToLegalize;
4572  moreElementsVectorDst(MI, MoreTy, 0);
4574  return Legalized;
4575  }
4576  case TargetOpcode::G_STORE:
4577  if (TypeIdx != 0)
4578  return UnableToLegalize;
4580  moreElementsVectorSrc(MI, MoreTy, 0);
4582  return Legalized;
4583  case TargetOpcode::G_AND:
4584  case TargetOpcode::G_OR:
4585  case TargetOpcode::G_XOR:
4586  case TargetOpcode::G_SMIN:
4587  case TargetOpcode::G_SMAX:
4588  case TargetOpcode::G_UMIN:
4589  case TargetOpcode::G_UMAX:
4590  case TargetOpcode::G_FMINNUM:
4591  case TargetOpcode::G_FMAXNUM:
4592  case TargetOpcode::G_FMINNUM_IEEE:
4593  case TargetOpcode::G_FMAXNUM_IEEE:
4594  case TargetOpcode::G_FMINIMUM:
4595  case TargetOpcode::G_FMAXIMUM: {
4597  moreElementsVectorSrc(MI, MoreTy, 1);
4598  moreElementsVectorSrc(MI, MoreTy, 2);
4599  moreElementsVectorDst(MI, MoreTy, 0);
4601  return Legalized;
4602  }
4603  case TargetOpcode::G_EXTRACT:
4604  if (TypeIdx != 1)
4605  return UnableToLegalize;
4607  moreElementsVectorSrc(MI, MoreTy, 1);
4609  return Legalized;
4610  case TargetOpcode::G_INSERT:
4611  case TargetOpcode::G_FREEZE:
4612  if (TypeIdx != 0)
4613  return UnableToLegalize;
4615  moreElementsVectorSrc(MI, MoreTy, 1);
4616  moreElementsVectorDst(MI, MoreTy, 0);
4618  return Legalized;
4619  case TargetOpcode::G_SELECT:
4620  if (TypeIdx != 0)
4621  return UnableToLegalize;
4622  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4623  return UnableToLegalize;
4624 
4626  moreElementsVectorSrc(MI, MoreTy, 2);
4627  moreElementsVectorSrc(MI, MoreTy, 3);
4628  moreElementsVectorDst(MI, MoreTy, 0);
4630  return Legalized;
4631  case TargetOpcode::G_UNMERGE_VALUES: {
4632  if (TypeIdx != 1)
4633  return UnableToLegalize;
4634 
4635  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4636  int NumDst = MI.getNumOperands() - 1;
4637  moreElementsVectorSrc(MI, MoreTy, NumDst);
4638 
4639  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4640  for (int I = 0; I != NumDst; ++I)
4641  MIB.addDef(MI.getOperand(I).getReg());
4642 
4643  int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
4644  for (int I = NumDst; I != NewNumDst; ++I)
4645  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
4646 
4647  MIB.addUse(MI.getOperand(NumDst).getReg());
4648  MI.eraseFromParent();
4649  return Legalized;
4650  }
4651  case TargetOpcode::G_PHI:
4652  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
4653  default:
4654  return UnableToLegalize;
4655  }
4656 }
4657 
4658 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
4659  ArrayRef<Register> Src1Regs,
4660  ArrayRef<Register> Src2Regs,
4661  LLT NarrowTy) {
4663  unsigned SrcParts = Src1Regs.size();
4664  unsigned DstParts = DstRegs.size();
4665 
4666  unsigned DstIdx = 0; // Low bits of the result.
4667  Register FactorSum =
4668  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
4669  DstRegs[DstIdx] = FactorSum;
4670 
4671  unsigned CarrySumPrevDstIdx;
4672  SmallVector<Register, 4> Factors;
4673 
4674  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
4675  // Collect low parts of muls for DstIdx.
4676  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
4677  i <= std::min(DstIdx, SrcParts - 1); ++i) {
4679  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
4680  Factors.push_back(Mul.getReg(0));
4681  }
4682  // Collect high parts of muls from previous DstIdx.
4683  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
4684  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
4685  MachineInstrBuilder Umulh =
4686  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
4687  Factors.push_back(Umulh.getReg(0));
4688  }
4689  // Add CarrySum from additions calculated for previous DstIdx.
4690  if (DstIdx != 1) {
4691  Factors.push_back(CarrySumPrevDstIdx);
4692  }
4693 
4694  Register CarrySum;
4695  // Add all factors and accumulate all carries into CarrySum.
4696  if (DstIdx != DstParts - 1) {
4697  MachineInstrBuilder Uaddo =
4698  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
4699  FactorSum = Uaddo.getReg(0);
4700  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
4701  for (unsigned i = 2; i < Factors.size(); ++i) {
4702  MachineInstrBuilder Uaddo =
4703  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
4704  FactorSum = Uaddo.getReg(0);
4705  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
4706  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
4707  }
4708  } else {
4709  // Since value for the next index is not calculated, neither is CarrySum.
4710  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
4711  for (unsigned i = 2; i < Factors.size(); ++i)
4712  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
4713  }
4714 
4715  CarrySumPrevDstIdx = CarrySum;
4716  DstRegs[DstIdx] = FactorSum;
4717  Factors.clear();
4718  }
4719 }
4720 
4723  LLT NarrowTy) {
4724  if (TypeIdx != 0)
4725  return UnableToLegalize;
4726 
4727  Register DstReg = MI.getOperand(0).getReg();
4728  LLT DstType = MRI.getType(DstReg);
4729  // FIXME: add support for vector types
4730  if (DstType.isVector())
4731  return UnableToLegalize;
4732 
4733  uint64_t SizeOp0 = DstType.getSizeInBits();
4734  uint64_t NarrowSize = NarrowTy.getSizeInBits();
4735 
4736  // FIXME: add support for when SizeOp0 isn't an exact multiple of
4737  // NarrowSize.
4738  if (SizeOp0 % NarrowSize != 0)
4739  return UnableToLegalize;
4740 
4741  // Expand in terms of carry-setting/consuming G_<Op>E instructions.
4742  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
4743 
4744  unsigned Opcode = MI.getOpcode();
4745  unsigned OpO, OpE, OpF;
4746  switch (Opcode) {
4747  case TargetOpcode::G_SADDO:
4748  case TargetOpcode::G_SADDE:
4749  case TargetOpcode::G_UADDO:
4750  case TargetOpcode::G_UADDE:
4751  case TargetOpcode::G_ADD:
4752  OpO = TargetOpcode::G_UADDO;
4753  OpE = TargetOpcode::G_UADDE;
4754  OpF = TargetOpcode::G_UADDE;
4755  if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
4756  OpF = TargetOpcode::G_SADDE;
4757  break;
4758  case TargetOpcode::G_SSUBO:
4759  case TargetOpcode::G_SSUBE:
4760  case TargetOpcode::G_USUBO:
4761  case TargetOpcode::G_USUBE:
4762  case TargetOpcode::G_SUB:
4763  OpO = TargetOpcode::G_USUBO;
4764  OpE = TargetOpcode::G_USUBE;
4765  OpF = TargetOpcode::G_USUBE;
4766  if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
4767  OpF = TargetOpcode::G_SSUBE;
4768  break;
4769  default:
4770  llvm_unreachable("Unexpected add/sub opcode!");
4771  }
4772 
4773  // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
4774  unsigned NumDefs = MI.getNumExplicitDefs();
4775  Register Src1 = MI.getOperand(NumDefs).getReg();
4776  Register Src2 = MI.getOperand(NumDefs + 1).getReg();
4777  Register CarryDst;
4778  if (NumDefs == 2)
4779  CarryDst = MI.getOperand(1).getReg();
4780  Register CarryIn;
4781  if (MI.getNumOperands() == NumDefs + 3)
4782  CarryIn = MI.getOperand(NumDefs + 2).getReg();