LLVM  14.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/Support/Debug.h"
33 
34 #define DEBUG_TYPE "legalizer"
35 
36 using namespace llvm;
37 using namespace LegalizeActions;
38 using namespace MIPatternMatch;
39 
40 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
41 ///
42 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
43 /// with any leftover piece as type \p LeftoverTy
44 ///
45 /// Returns -1 in the first element of the pair if the breakdown is not
46 /// satisfiable.
47 static std::pair<int, int>
48 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
49  assert(!LeftoverTy.isValid() && "this is an out argument");
50 
51  unsigned Size = OrigTy.getSizeInBits();
52  unsigned NarrowSize = NarrowTy.getSizeInBits();
53  unsigned NumParts = Size / NarrowSize;
54  unsigned LeftoverSize = Size - NumParts * NarrowSize;
55  assert(Size > NarrowSize);
56 
57  if (LeftoverSize == 0)
58  return {NumParts, 0};
59 
60  if (NarrowTy.isVector()) {
61  unsigned EltSize = OrigTy.getScalarSizeInBits();
62  if (LeftoverSize % EltSize != 0)
63  return {-1, -1};
64  LeftoverTy = LLT::scalarOrVector(
65  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
66  } else {
67  LeftoverTy = LLT::scalar(LeftoverSize);
68  }
69 
70  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
71  return std::make_pair(NumParts, NumLeftover);
72 }
73 
75 
76  if (!Ty.isScalar())
77  return nullptr;
78 
79  switch (Ty.getSizeInBits()) {
80  case 16:
81  return Type::getHalfTy(Ctx);
82  case 32:
83  return Type::getFloatTy(Ctx);
84  case 64:
85  return Type::getDoubleTy(Ctx);
86  case 80:
87  return Type::getX86_FP80Ty(Ctx);
88  case 128:
89  return Type::getFP128Ty(Ctx);
90  default:
91  return nullptr;
92  }
93 }
94 
96  GISelChangeObserver &Observer,
98  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
99  LI(*MF.getSubtarget().getLegalizerInfo()),
100  TLI(*MF.getSubtarget().getTargetLowering()) { }
101 
103  GISelChangeObserver &Observer,
105  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
106  TLI(*MF.getSubtarget().getTargetLowering()) { }
107 
110  LostDebugLocObserver &LocObserver) {
111  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
112 
114 
115  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
116  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
117  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
118  auto Step = LI.getAction(MI, MRI);
119  switch (Step.Action) {
120  case Legal:
121  LLVM_DEBUG(dbgs() << ".. Already legal\n");
122  return AlreadyLegal;
123  case Libcall:
124  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
125  return libcall(MI, LocObserver);
126  case NarrowScalar:
127  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
128  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
129  case WidenScalar:
130  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
131  return widenScalar(MI, Step.TypeIdx, Step.NewType);
132  case Bitcast:
133  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
134  return bitcast(MI, Step.TypeIdx, Step.NewType);
135  case Lower:
136  LLVM_DEBUG(dbgs() << ".. Lower\n");
137  return lower(MI, Step.TypeIdx, Step.NewType);
138  case FewerElements:
139  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
140  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
141  case MoreElements:
142  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
143  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
144  case Custom:
145  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
146  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
147  default:
148  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
149  return UnableToLegalize;
150  }
151 }
152 
153 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
154  SmallVectorImpl<Register> &VRegs) {
155  for (int i = 0; i < NumParts; ++i)
156  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
157  MIRBuilder.buildUnmerge(VRegs, Reg);
158 }
159 
160 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
161  LLT MainTy, LLT &LeftoverTy,
163  SmallVectorImpl<Register> &LeftoverRegs) {
164  assert(!LeftoverTy.isValid() && "this is an out argument");
165 
166  unsigned RegSize = RegTy.getSizeInBits();
167  unsigned MainSize = MainTy.getSizeInBits();
168  unsigned NumParts = RegSize / MainSize;
169  unsigned LeftoverSize = RegSize - NumParts * MainSize;
170 
171  // Use an unmerge when possible.
172  if (LeftoverSize == 0) {
173  for (unsigned I = 0; I < NumParts; ++I)
174  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
175  MIRBuilder.buildUnmerge(VRegs, Reg);
176  return true;
177  }
178 
179  if (MainTy.isVector()) {
180  unsigned EltSize = MainTy.getScalarSizeInBits();
181  if (LeftoverSize % EltSize != 0)
182  return false;
183  LeftoverTy = LLT::scalarOrVector(
184  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
185  } else {
186  LeftoverTy = LLT::scalar(LeftoverSize);
187  }
188 
189  // For irregular sizes, extract the individual parts.
190  for (unsigned I = 0; I != NumParts; ++I) {
191  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
192  VRegs.push_back(NewReg);
193  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
194  }
195 
196  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
197  Offset += LeftoverSize) {
198  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
199  LeftoverRegs.push_back(NewReg);
200  MIRBuilder.buildExtract(NewReg, Reg, Offset);
201  }
202 
203  return true;
204 }
205 
206 void LegalizerHelper::insertParts(Register DstReg,
207  LLT ResultTy, LLT PartTy,
208  ArrayRef<Register> PartRegs,
209  LLT LeftoverTy,
210  ArrayRef<Register> LeftoverRegs) {
211  if (!LeftoverTy.isValid()) {
212  assert(LeftoverRegs.empty());
213 
214  if (!ResultTy.isVector()) {
215  MIRBuilder.buildMerge(DstReg, PartRegs);
216  return;
217  }
218 
219  if (PartTy.isVector())
220  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
221  else
222  MIRBuilder.buildBuildVector(DstReg, PartRegs);
223  return;
224  }
225 
226  SmallVector<Register> GCDRegs;
227  LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
228  for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
229  extractGCDType(GCDRegs, GCDTy, PartReg);
230  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
231  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
232 }
233 
234 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
236  const MachineInstr &MI) {
237  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
238 
239  const int StartIdx = Regs.size();
240  const int NumResults = MI.getNumOperands() - 1;
241  Regs.resize(Regs.size() + NumResults);
242  for (int I = 0; I != NumResults; ++I)
243  Regs[StartIdx + I] = MI.getOperand(I).getReg();
244 }
245 
246 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
247  LLT GCDTy, Register SrcReg) {
248  LLT SrcTy = MRI.getType(SrcReg);
249  if (SrcTy == GCDTy) {
250  // If the source already evenly divides the result type, we don't need to do
251  // anything.
252  Parts.push_back(SrcReg);
253  } else {
254  // Need to split into common type sized pieces.
255  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
256  getUnmergeResults(Parts, *Unmerge);
257  }
258 }
259 
260 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
261  LLT NarrowTy, Register SrcReg) {
262  LLT SrcTy = MRI.getType(SrcReg);
263  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
264  extractGCDType(Parts, GCDTy, SrcReg);
265  return GCDTy;
266 }
267 
268 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
270  unsigned PadStrategy) {
271  LLT LCMTy = getLCMType(DstTy, NarrowTy);
272 
273  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
274  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
275  int NumOrigSrc = VRegs.size();
276 
277  Register PadReg;
278 
279  // Get a value we can use to pad the source value if the sources won't evenly
280  // cover the result type.
281  if (NumOrigSrc < NumParts * NumSubParts) {
282  if (PadStrategy == TargetOpcode::G_ZEXT)
283  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
284  else if (PadStrategy == TargetOpcode::G_ANYEXT)
285  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
286  else {
287  assert(PadStrategy == TargetOpcode::G_SEXT);
288 
289  // Shift the sign bit of the low register through the high register.
290  auto ShiftAmt =
292  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
293  }
294  }
295 
296  // Registers for the final merge to be produced.
297  SmallVector<Register, 4> Remerge(NumParts);
298 
299  // Registers needed for intermediate merges, which will be merged into a
300  // source for Remerge.
301  SmallVector<Register, 4> SubMerge(NumSubParts);
302 
303  // Once we've fully read off the end of the original source bits, we can reuse
304  // the same high bits for remaining padding elements.
305  Register AllPadReg;
306 
307  // Build merges to the LCM type to cover the original result type.
308  for (int I = 0; I != NumParts; ++I) {
309  bool AllMergePartsArePadding = true;
310 
311  // Build the requested merges to the requested type.
312  for (int J = 0; J != NumSubParts; ++J) {
313  int Idx = I * NumSubParts + J;
314  if (Idx >= NumOrigSrc) {
315  SubMerge[J] = PadReg;
316  continue;
317  }
318 
319  SubMerge[J] = VRegs[Idx];
320 
321  // There are meaningful bits here we can't reuse later.
322  AllMergePartsArePadding = false;
323  }
324 
325  // If we've filled up a complete piece with padding bits, we can directly
326  // emit the natural sized constant if applicable, rather than a merge of
327  // smaller constants.
328  if (AllMergePartsArePadding && !AllPadReg) {
329  if (PadStrategy == TargetOpcode::G_ANYEXT)
330  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
331  else if (PadStrategy == TargetOpcode::G_ZEXT)
332  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
333 
334  // If this is a sign extension, we can't materialize a trivial constant
335  // with the right type and have to produce a merge.
336  }
337 
338  if (AllPadReg) {
339  // Avoid creating additional instructions if we're just adding additional
340  // copies of padding bits.
341  Remerge[I] = AllPadReg;
342  continue;
343  }
344 
345  if (NumSubParts == 1)
346  Remerge[I] = SubMerge[0];
347  else
348  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
349 
350  // In the sign extend padding case, re-use the first all-signbit merge.
351  if (AllMergePartsArePadding && !AllPadReg)
352  AllPadReg = Remerge[I];
353  }
354 
355  VRegs = std::move(Remerge);
356  return LCMTy;
357 }
358 
359 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
360  ArrayRef<Register> RemergeRegs) {
361  LLT DstTy = MRI.getType(DstReg);
362 
363  // Create the merge to the widened source, and extract the relevant bits into
364  // the result.
365 
366  if (DstTy == LCMTy) {
367  MIRBuilder.buildMerge(DstReg, RemergeRegs);
368  return;
369  }
370 
371  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
372  if (DstTy.isScalar() && LCMTy.isScalar()) {
373  MIRBuilder.buildTrunc(DstReg, Remerge);
374  return;
375  }
376 
377  if (LCMTy.isVector()) {
378  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
379  SmallVector<Register, 8> UnmergeDefs(NumDefs);
380  UnmergeDefs[0] = DstReg;
381  for (unsigned I = 1; I != NumDefs; ++I)
382  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
383 
384  MIRBuilder.buildUnmerge(UnmergeDefs,
385  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
386  return;
387  }
388 
389  llvm_unreachable("unhandled case");
390 }
391 
392 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
393 #define RTLIBCASE_INT(LibcallPrefix) \
394  do { \
395  switch (Size) { \
396  case 32: \
397  return RTLIB::LibcallPrefix##32; \
398  case 64: \
399  return RTLIB::LibcallPrefix##64; \
400  case 128: \
401  return RTLIB::LibcallPrefix##128; \
402  default: \
403  llvm_unreachable("unexpected size"); \
404  } \
405  } while (0)
406 
407 #define RTLIBCASE(LibcallPrefix) \
408  do { \
409  switch (Size) { \
410  case 32: \
411  return RTLIB::LibcallPrefix##32; \
412  case 64: \
413  return RTLIB::LibcallPrefix##64; \
414  case 80: \
415  return RTLIB::LibcallPrefix##80; \
416  case 128: \
417  return RTLIB::LibcallPrefix##128; \
418  default: \
419  llvm_unreachable("unexpected size"); \
420  } \
421  } while (0)
422 
423  switch (Opcode) {
424  case TargetOpcode::G_SDIV:
425  RTLIBCASE_INT(SDIV_I);
426  case TargetOpcode::G_UDIV:
427  RTLIBCASE_INT(UDIV_I);
428  case TargetOpcode::G_SREM:
429  RTLIBCASE_INT(SREM_I);
430  case TargetOpcode::G_UREM:
431  RTLIBCASE_INT(UREM_I);
432  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
433  RTLIBCASE_INT(CTLZ_I);
434  case TargetOpcode::G_FADD:
435  RTLIBCASE(ADD_F);
436  case TargetOpcode::G_FSUB:
437  RTLIBCASE(SUB_F);
438  case TargetOpcode::G_FMUL:
439  RTLIBCASE(MUL_F);
440  case TargetOpcode::G_FDIV:
441  RTLIBCASE(DIV_F);
442  case TargetOpcode::G_FEXP:
443  RTLIBCASE(EXP_F);
444  case TargetOpcode::G_FEXP2:
445  RTLIBCASE(EXP2_F);
446  case TargetOpcode::G_FREM:
447  RTLIBCASE(REM_F);
448  case TargetOpcode::G_FPOW:
449  RTLIBCASE(POW_F);
450  case TargetOpcode::G_FMA:
451  RTLIBCASE(FMA_F);
452  case TargetOpcode::G_FSIN:
453  RTLIBCASE(SIN_F);
454  case TargetOpcode::G_FCOS:
455  RTLIBCASE(COS_F);
456  case TargetOpcode::G_FLOG10:
457  RTLIBCASE(LOG10_F);
458  case TargetOpcode::G_FLOG:
459  RTLIBCASE(LOG_F);
460  case TargetOpcode::G_FLOG2:
461  RTLIBCASE(LOG2_F);
462  case TargetOpcode::G_FCEIL:
463  RTLIBCASE(CEIL_F);
464  case TargetOpcode::G_FFLOOR:
465  RTLIBCASE(FLOOR_F);
466  case TargetOpcode::G_FMINNUM:
467  RTLIBCASE(FMIN_F);
468  case TargetOpcode::G_FMAXNUM:
469  RTLIBCASE(FMAX_F);
470  case TargetOpcode::G_FSQRT:
471  RTLIBCASE(SQRT_F);
472  case TargetOpcode::G_FRINT:
473  RTLIBCASE(RINT_F);
474  case TargetOpcode::G_FNEARBYINT:
475  RTLIBCASE(NEARBYINT_F);
476  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
477  RTLIBCASE(ROUNDEVEN_F);
478  }
479  llvm_unreachable("Unknown libcall function");
480 }
481 
482 /// True if an instruction is in tail position in its caller. Intended for
483 /// legalizing libcalls as tail calls when possible.
485  const TargetInstrInfo &TII,
487  MachineBasicBlock &MBB = *MI.getParent();
488  const Function &F = MBB.getParent()->getFunction();
489 
490  // Conservatively require the attributes of the call to match those of
491  // the return. Ignore NoAlias and NonNull because they don't affect the
492  // call sequence.
493  AttributeList CallerAttrs = F.getAttributes();
494  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
495  .removeAttribute(Attribute::NoAlias)
496  .removeAttribute(Attribute::NonNull)
497  .hasAttributes())
498  return false;
499 
500  // It's not safe to eliminate the sign / zero extension of the return value.
501  if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
502  CallerAttrs.hasRetAttr(Attribute::SExt))
503  return false;
504 
505  // Only tail call if the following instruction is a standard return or if we
506  // have a `thisreturn` callee, and a sequence like:
507  //
508  // G_MEMCPY %0, %1, %2
509  // $x0 = COPY %0
510  // RET_ReallyLR implicit $x0
511  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
512  if (Next != MBB.instr_end() && Next->isCopy()) {
513  switch (MI.getOpcode()) {
514  default:
515  llvm_unreachable("unsupported opcode");
516  case TargetOpcode::G_BZERO:
517  return false;
518  case TargetOpcode::G_MEMCPY:
519  case TargetOpcode::G_MEMMOVE:
520  case TargetOpcode::G_MEMSET:
521  break;
522  }
523 
524  Register VReg = MI.getOperand(0).getReg();
525  if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
526  return false;
527 
528  Register PReg = Next->getOperand(0).getReg();
529  if (!PReg.isPhysical())
530  return false;
531 
532  auto Ret = next_nodbg(Next, MBB.instr_end());
533  if (Ret == MBB.instr_end() || !Ret->isReturn())
534  return false;
535 
536  if (Ret->getNumImplicitOperands() != 1)
537  return false;
538 
539  if (PReg != Ret->getOperand(0).getReg())
540  return false;
541 
542  // Skip over the COPY that we just validated.
543  Next = Ret;
544  }
545 
546  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
547  return false;
548 
549  return true;
550 }
551 
553 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
554  const CallLowering::ArgInfo &Result,
556  const CallingConv::ID CC) {
557  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
558 
560  Info.CallConv = CC;
562  Info.OrigRet = Result;
563  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
564  if (!CLI.lowerCall(MIRBuilder, Info))
566 
568 }
569 
572  const CallLowering::ArgInfo &Result,
574  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
575  const char *Name = TLI.getLibcallName(Libcall);
576  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
577  return createLibcall(MIRBuilder, Name, Result, Args, CC);
578 }
579 
580 // Useful for libcalls where all operands have the same type.
583  Type *OpType) {
584  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
585 
586  // FIXME: What does the original arg index mean here?
588  for (unsigned i = 1; i < MI.getNumOperands(); i++)
589  Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
590  return createLibcall(MIRBuilder, Libcall,
591  {MI.getOperand(0).getReg(), OpType, 0}, Args);
592 }
593 
596  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
597  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
598 
600  // Add all the args, except for the last which is an imm denoting 'tail'.
601  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
602  Register Reg = MI.getOperand(i).getReg();
603 
604  // Need derive an IR type for call lowering.
605  LLT OpLLT = MRI.getType(Reg);
606  Type *OpTy = nullptr;
607  if (OpLLT.isPointer())
608  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
609  else
610  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
611  Args.push_back({Reg, OpTy, 0});
612  }
613 
614  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
615  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
616  RTLIB::Libcall RTLibcall;
617  unsigned Opc = MI.getOpcode();
618  switch (Opc) {
619  case TargetOpcode::G_BZERO:
620  RTLibcall = RTLIB::BZERO;
621  break;
622  case TargetOpcode::G_MEMCPY:
623  RTLibcall = RTLIB::MEMCPY;
624  Args[0].Flags[0].setReturned();
625  break;
626  case TargetOpcode::G_MEMMOVE:
627  RTLibcall = RTLIB::MEMMOVE;
628  Args[0].Flags[0].setReturned();
629  break;
630  case TargetOpcode::G_MEMSET:
631  RTLibcall = RTLIB::MEMSET;
632  Args[0].Flags[0].setReturned();
633  break;
634  default:
635  llvm_unreachable("unsupported opcode");
636  }
637  const char *Name = TLI.getLibcallName(RTLibcall);
638 
639  // Unsupported libcall on the target.
640  if (!Name) {
641  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
642  << MIRBuilder.getTII().getName(Opc) << "\n");
644  }
645 
647  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
649  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
650  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
651  isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
652 
653  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
654  if (!CLI.lowerCall(MIRBuilder, Info))
656 
657  if (Info.LoweredTailCall) {
658  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
659 
660  // Check debug locations before removing the return.
661  LocObserver.checkpoint(true);
662 
663  // We must have a return following the call (or debug insts) to get past
664  // isLibCallInTailPosition.
665  do {
666  MachineInstr *Next = MI.getNextNode();
667  assert(Next &&
668  (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
669  "Expected instr following MI to be return or debug inst?");
670  // We lowered a tail call, so the call is now the return from the block.
671  // Delete the old return.
672  Next->eraseFromParent();
673  } while (MI.getNextNode());
674 
675  // We expect to lose the debug location from the return.
676  LocObserver.checkpoint(false);
677  }
678 
680 }
681 
682 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
683  Type *FromType) {
684  auto ToMVT = MVT::getVT(ToType);
685  auto FromMVT = MVT::getVT(FromType);
686 
687  switch (Opcode) {
688  case TargetOpcode::G_FPEXT:
689  return RTLIB::getFPEXT(FromMVT, ToMVT);
690  case TargetOpcode::G_FPTRUNC:
691  return RTLIB::getFPROUND(FromMVT, ToMVT);
692  case TargetOpcode::G_FPTOSI:
693  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
694  case TargetOpcode::G_FPTOUI:
695  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
696  case TargetOpcode::G_SITOFP:
697  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
698  case TargetOpcode::G_UITOFP:
699  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
700  }
701  llvm_unreachable("Unsupported libcall function");
702 }
703 
706  Type *FromType) {
707  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
708  return createLibcall(MIRBuilder, Libcall,
709  {MI.getOperand(0).getReg(), ToType, 0},
710  {{MI.getOperand(1).getReg(), FromType, 0}});
711 }
712 
715  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
716  unsigned Size = LLTy.getSizeInBits();
717  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
718 
719  switch (MI.getOpcode()) {
720  default:
721  return UnableToLegalize;
722  case TargetOpcode::G_SDIV:
723  case TargetOpcode::G_UDIV:
724  case TargetOpcode::G_SREM:
725  case TargetOpcode::G_UREM:
726  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
727  Type *HLTy = IntegerType::get(Ctx, Size);
728  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
729  if (Status != Legalized)
730  return Status;
731  break;
732  }
733  case TargetOpcode::G_FADD:
734  case TargetOpcode::G_FSUB:
735  case TargetOpcode::G_FMUL:
736  case TargetOpcode::G_FDIV:
737  case TargetOpcode::G_FMA:
738  case TargetOpcode::G_FPOW:
739  case TargetOpcode::G_FREM:
740  case TargetOpcode::G_FCOS:
741  case TargetOpcode::G_FSIN:
742  case TargetOpcode::G_FLOG10:
743  case TargetOpcode::G_FLOG:
744  case TargetOpcode::G_FLOG2:
745  case TargetOpcode::G_FEXP:
746  case TargetOpcode::G_FEXP2:
747  case TargetOpcode::G_FCEIL:
748  case TargetOpcode::G_FFLOOR:
749  case TargetOpcode::G_FMINNUM:
750  case TargetOpcode::G_FMAXNUM:
751  case TargetOpcode::G_FSQRT:
752  case TargetOpcode::G_FRINT:
753  case TargetOpcode::G_FNEARBYINT:
754  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
755  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
756  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
757  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
758  return UnableToLegalize;
759  }
760  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
761  if (Status != Legalized)
762  return Status;
763  break;
764  }
765  case TargetOpcode::G_FPEXT:
766  case TargetOpcode::G_FPTRUNC: {
767  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
768  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
769  if (!FromTy || !ToTy)
770  return UnableToLegalize;
772  if (Status != Legalized)
773  return Status;
774  break;
775  }
776  case TargetOpcode::G_FPTOSI:
777  case TargetOpcode::G_FPTOUI: {
778  // FIXME: Support other types
779  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
780  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
781  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
782  return UnableToLegalize;
784  MI, MIRBuilder,
785  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
786  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
787  if (Status != Legalized)
788  return Status;
789  break;
790  }
791  case TargetOpcode::G_SITOFP:
792  case TargetOpcode::G_UITOFP: {
793  // FIXME: Support other types
794  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
795  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
796  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
797  return UnableToLegalize;
799  MI, MIRBuilder,
800  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
801  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
802  if (Status != Legalized)
803  return Status;
804  break;
805  }
806  case TargetOpcode::G_BZERO:
807  case TargetOpcode::G_MEMCPY:
808  case TargetOpcode::G_MEMMOVE:
809  case TargetOpcode::G_MEMSET: {
810  LegalizeResult Result =
811  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
812  if (Result != Legalized)
813  return Result;
814  MI.eraseFromParent();
815  return Result;
816  }
817  }
818 
819  MI.eraseFromParent();
820  return Legalized;
821 }
822 
824  unsigned TypeIdx,
825  LLT NarrowTy) {
826  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
827  uint64_t NarrowSize = NarrowTy.getSizeInBits();
828 
829  switch (MI.getOpcode()) {
830  default:
831  return UnableToLegalize;
832  case TargetOpcode::G_IMPLICIT_DEF: {
833  Register DstReg = MI.getOperand(0).getReg();
834  LLT DstTy = MRI.getType(DstReg);
835 
836  // If SizeOp0 is not an exact multiple of NarrowSize, emit
837  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
838  // FIXME: Although this would also be legal for the general case, it causes
839  // a lot of regressions in the emitted code (superfluous COPYs, artifact
840  // combines not being hit). This seems to be a problem related to the
841  // artifact combiner.
842  if (SizeOp0 % NarrowSize != 0) {
843  LLT ImplicitTy = NarrowTy;
844  if (DstTy.isVector())
845  ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
846 
847  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
848  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
849 
850  MI.eraseFromParent();
851  return Legalized;
852  }
853 
854  int NumParts = SizeOp0 / NarrowSize;
855 
856  SmallVector<Register, 2> DstRegs;
857  for (int i = 0; i < NumParts; ++i)
858  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
859 
860  if (DstTy.isVector())
861  MIRBuilder.buildBuildVector(DstReg, DstRegs);
862  else
863  MIRBuilder.buildMerge(DstReg, DstRegs);
864  MI.eraseFromParent();
865  return Legalized;
866  }
867  case TargetOpcode::G_CONSTANT: {
868  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
869  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
870  unsigned TotalSize = Ty.getSizeInBits();
871  unsigned NarrowSize = NarrowTy.getSizeInBits();
872  int NumParts = TotalSize / NarrowSize;
873 
874  SmallVector<Register, 4> PartRegs;
875  for (int I = 0; I != NumParts; ++I) {
876  unsigned Offset = I * NarrowSize;
877  auto K = MIRBuilder.buildConstant(NarrowTy,
878  Val.lshr(Offset).trunc(NarrowSize));
879  PartRegs.push_back(K.getReg(0));
880  }
881 
882  LLT LeftoverTy;
883  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
884  SmallVector<Register, 1> LeftoverRegs;
885  if (LeftoverBits != 0) {
886  LeftoverTy = LLT::scalar(LeftoverBits);
887  auto K = MIRBuilder.buildConstant(
888  LeftoverTy,
889  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
890  LeftoverRegs.push_back(K.getReg(0));
891  }
892 
893  insertParts(MI.getOperand(0).getReg(),
894  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
895 
896  MI.eraseFromParent();
897  return Legalized;
898  }
899  case TargetOpcode::G_SEXT:
900  case TargetOpcode::G_ZEXT:
901  case TargetOpcode::G_ANYEXT:
902  return narrowScalarExt(MI, TypeIdx, NarrowTy);
903  case TargetOpcode::G_TRUNC: {
904  if (TypeIdx != 1)
905  return UnableToLegalize;
906 
907  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
908  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
909  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
910  return UnableToLegalize;
911  }
912 
913  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
914  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
915  MI.eraseFromParent();
916  return Legalized;
917  }
918 
919  case TargetOpcode::G_FREEZE:
920  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
921  case TargetOpcode::G_ADD:
922  case TargetOpcode::G_SUB:
923  case TargetOpcode::G_SADDO:
924  case TargetOpcode::G_SSUBO:
925  case TargetOpcode::G_SADDE:
926  case TargetOpcode::G_SSUBE:
927  case TargetOpcode::G_UADDO:
928  case TargetOpcode::G_USUBO:
929  case TargetOpcode::G_UADDE:
930  case TargetOpcode::G_USUBE:
931  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
932  case TargetOpcode::G_MUL:
933  case TargetOpcode::G_UMULH:
934  return narrowScalarMul(MI, NarrowTy);
935  case TargetOpcode::G_EXTRACT:
936  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
937  case TargetOpcode::G_INSERT:
938  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
939  case TargetOpcode::G_LOAD: {
940  auto &LoadMI = cast<GLoad>(MI);
941  Register DstReg = LoadMI.getDstReg();
942  LLT DstTy = MRI.getType(DstReg);
943  if (DstTy.isVector())
944  return UnableToLegalize;
945 
946  if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
947  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
948  MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
949  MIRBuilder.buildAnyExt(DstReg, TmpReg);
950  LoadMI.eraseFromParent();
951  return Legalized;
952  }
953 
954  return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
955  }
956  case TargetOpcode::G_ZEXTLOAD:
957  case TargetOpcode::G_SEXTLOAD: {
958  auto &LoadMI = cast<GExtLoad>(MI);
959  Register DstReg = LoadMI.getDstReg();
960  Register PtrReg = LoadMI.getPointerReg();
961 
962  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
963  auto &MMO = LoadMI.getMMO();
964  unsigned MemSize = MMO.getSizeInBits();
965 
966  if (MemSize == NarrowSize) {
967  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
968  } else if (MemSize < NarrowSize) {
969  MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
970  } else if (MemSize > NarrowSize) {
971  // FIXME: Need to split the load.
972  return UnableToLegalize;
973  }
974 
975  if (isa<GZExtLoad>(LoadMI))
976  MIRBuilder.buildZExt(DstReg, TmpReg);
977  else
978  MIRBuilder.buildSExt(DstReg, TmpReg);
979 
980  LoadMI.eraseFromParent();
981  return Legalized;
982  }
983  case TargetOpcode::G_STORE: {
984  auto &StoreMI = cast<GStore>(MI);
985 
986  Register SrcReg = StoreMI.getValueReg();
987  LLT SrcTy = MRI.getType(SrcReg);
988  if (SrcTy.isVector())
989  return UnableToLegalize;
990 
991  int NumParts = SizeOp0 / NarrowSize;
992  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
993  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
994  if (SrcTy.isVector() && LeftoverBits != 0)
995  return UnableToLegalize;
996 
997  if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
998  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
999  MIRBuilder.buildTrunc(TmpReg, SrcReg);
1000  MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1001  StoreMI.eraseFromParent();
1002  return Legalized;
1003  }
1004 
1005  return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1006  }
1007  case TargetOpcode::G_SELECT:
1008  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1009  case TargetOpcode::G_AND:
1010  case TargetOpcode::G_OR:
1011  case TargetOpcode::G_XOR: {
1012  // Legalize bitwise operation:
1013  // A = BinOp<Ty> B, C
1014  // into:
1015  // B1, ..., BN = G_UNMERGE_VALUES B
1016  // C1, ..., CN = G_UNMERGE_VALUES C
1017  // A1 = BinOp<Ty/N> B1, C2
1018  // ...
1019  // AN = BinOp<Ty/N> BN, CN
1020  // A = G_MERGE_VALUES A1, ..., AN
1021  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1022  }
1023  case TargetOpcode::G_SHL:
1024  case TargetOpcode::G_LSHR:
1025  case TargetOpcode::G_ASHR:
1026  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1027  case TargetOpcode::G_CTLZ:
1028  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1029  case TargetOpcode::G_CTTZ:
1030  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1031  case TargetOpcode::G_CTPOP:
1032  if (TypeIdx == 1)
1033  switch (MI.getOpcode()) {
1034  case TargetOpcode::G_CTLZ:
1035  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1036  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1037  case TargetOpcode::G_CTTZ:
1038  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1039  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1040  case TargetOpcode::G_CTPOP:
1041  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1042  default:
1043  return UnableToLegalize;
1044  }
1045 
1047  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1049  return Legalized;
1050  case TargetOpcode::G_INTTOPTR:
1051  if (TypeIdx != 1)
1052  return UnableToLegalize;
1053 
1055  narrowScalarSrc(MI, NarrowTy, 1);
1057  return Legalized;
1058  case TargetOpcode::G_PTRTOINT:
1059  if (TypeIdx != 0)
1060  return UnableToLegalize;
1061 
1063  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1065  return Legalized;
1066  case TargetOpcode::G_PHI: {
1067  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1068  // NarrowSize.
1069  if (SizeOp0 % NarrowSize != 0)
1070  return UnableToLegalize;
1071 
1072  unsigned NumParts = SizeOp0 / NarrowSize;
1073  SmallVector<Register, 2> DstRegs(NumParts);
1074  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1076  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1077  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1078  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1079  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1080  SrcRegs[i / 2]);
1081  }
1082  MachineBasicBlock &MBB = *MI.getParent();
1084  for (unsigned i = 0; i < NumParts; ++i) {
1085  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1086  MachineInstrBuilder MIB =
1087  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1088  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1089  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1090  }
1092  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1094  MI.eraseFromParent();
1095  return Legalized;
1096  }
1097  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1098  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1099  if (TypeIdx != 2)
1100  return UnableToLegalize;
1101 
1102  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1104  narrowScalarSrc(MI, NarrowTy, OpIdx);
1106  return Legalized;
1107  }
1108  case TargetOpcode::G_ICMP: {
1109  Register LHS = MI.getOperand(2).getReg();
1110  LLT SrcTy = MRI.getType(LHS);
1111  uint64_t SrcSize = SrcTy.getSizeInBits();
1112  CmpInst::Predicate Pred =
1113  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1114 
1115  // TODO: Handle the non-equality case for weird sizes.
1116  if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1117  return UnableToLegalize;
1118 
1119  LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1120  SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1121  if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1122  LHSLeftoverRegs))
1123  return UnableToLegalize;
1124 
1125  LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1126  SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1127  if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1128  RHSPartRegs, RHSLeftoverRegs))
1129  return UnableToLegalize;
1130 
1131  // We now have the LHS and RHS of the compare split into narrow-type
1132  // registers, plus potentially some leftover type.
1133  Register Dst = MI.getOperand(0).getReg();
1134  LLT ResTy = MRI.getType(Dst);
1135  if (ICmpInst::isEquality(Pred)) {
1136  // For each part on the LHS and RHS, keep track of the result of XOR-ing
1137  // them together. For each equal part, the result should be all 0s. For
1138  // each non-equal part, we'll get at least one 1.
1139  auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1141  for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1142  auto LHS = std::get<0>(LHSAndRHS);
1143  auto RHS = std::get<1>(LHSAndRHS);
1144  auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1145  Xors.push_back(Xor);
1146  }
1147 
1148  // Build a G_XOR for each leftover register. Each G_XOR must be widened
1149  // to the desired narrow type so that we can OR them together later.
1150  SmallVector<Register, 4> WidenedXors;
1151  for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1152  auto LHS = std::get<0>(LHSAndRHS);
1153  auto RHS = std::get<1>(LHSAndRHS);
1154  auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1155  LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1156  buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1157  /* PadStrategy = */ TargetOpcode::G_ZEXT);
1158  Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1159  }
1160 
1161  // Now, for each part we broke up, we know if they are equal/not equal
1162  // based off the G_XOR. We can OR these all together and compare against
1163  // 0 to get the result.
1164  assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1165  auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1166  for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1167  Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1168  MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1169  } else {
1170  // TODO: Handle non-power-of-two types.
1171  assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1172  assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1173  Register LHSL = LHSPartRegs[0];
1174  Register LHSH = LHSPartRegs[1];
1175  Register RHSL = RHSPartRegs[0];
1176  Register RHSH = RHSPartRegs[1];
1177  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1178  MachineInstrBuilder CmpHEQ =
1179  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1181  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1182  MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1183  }
1184  MI.eraseFromParent();
1185  return Legalized;
1186  }
1187  case TargetOpcode::G_SEXT_INREG: {
1188  if (TypeIdx != 0)
1189  return UnableToLegalize;
1190 
1191  int64_t SizeInBits = MI.getOperand(2).getImm();
1192 
1193  // So long as the new type has more bits than the bits we're extending we
1194  // don't need to break it apart.
1195  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1197  // We don't lose any non-extension bits by truncating the src and
1198  // sign-extending the dst.
1199  MachineOperand &MO1 = MI.getOperand(1);
1200  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1201  MO1.setReg(TruncMIB.getReg(0));
1202 
1203  MachineOperand &MO2 = MI.getOperand(0);
1204  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1206  MIRBuilder.buildSExt(MO2, DstExt);
1207  MO2.setReg(DstExt);
1209  return Legalized;
1210  }
1211 
1212  // Break it apart. Components below the extension point are unmodified. The
1213  // component containing the extension point becomes a narrower SEXT_INREG.
1214  // Components above it are ashr'd from the component containing the
1215  // extension point.
1216  if (SizeOp0 % NarrowSize != 0)
1217  return UnableToLegalize;
1218  int NumParts = SizeOp0 / NarrowSize;
1219 
1220  // List the registers where the destination will be scattered.
1221  SmallVector<Register, 2> DstRegs;
1222  // List the registers where the source will be split.
1223  SmallVector<Register, 2> SrcRegs;
1224 
1225  // Create all the temporary registers.
1226  for (int i = 0; i < NumParts; ++i) {
1227  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1228 
1229  SrcRegs.push_back(SrcReg);
1230  }
1231 
1232  // Explode the big arguments into smaller chunks.
1233  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1234 
1235  Register AshrCstReg =
1236  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1237  .getReg(0);
1238  Register FullExtensionReg = 0;
1239  Register PartialExtensionReg = 0;
1240 
1241  // Do the operation on each small part.
1242  for (int i = 0; i < NumParts; ++i) {
1243  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1244  DstRegs.push_back(SrcRegs[i]);
1245  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1246  assert(PartialExtensionReg &&
1247  "Expected to visit partial extension before full");
1248  if (FullExtensionReg) {
1249  DstRegs.push_back(FullExtensionReg);
1250  continue;
1251  }
1252  DstRegs.push_back(
1253  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1254  .getReg(0));
1255  FullExtensionReg = DstRegs.back();
1256  } else {
1257  DstRegs.push_back(
1258  MIRBuilder
1259  .buildInstr(
1260  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1261  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1262  .getReg(0));
1263  PartialExtensionReg = DstRegs.back();
1264  }
1265  }
1266 
1267  // Gather the destination registers into the final destination.
1268  Register DstReg = MI.getOperand(0).getReg();
1269  MIRBuilder.buildMerge(DstReg, DstRegs);
1270  MI.eraseFromParent();
1271  return Legalized;
1272  }
1273  case TargetOpcode::G_BSWAP:
1274  case TargetOpcode::G_BITREVERSE: {
1275  if (SizeOp0 % NarrowSize != 0)
1276  return UnableToLegalize;
1277 
1279  SmallVector<Register, 2> SrcRegs, DstRegs;
1280  unsigned NumParts = SizeOp0 / NarrowSize;
1281  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1282 
1283  for (unsigned i = 0; i < NumParts; ++i) {
1284  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1285  {SrcRegs[NumParts - 1 - i]});
1286  DstRegs.push_back(DstPart.getReg(0));
1287  }
1288 
1289  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1290 
1292  MI.eraseFromParent();
1293  return Legalized;
1294  }
1295  case TargetOpcode::G_PTR_ADD:
1296  case TargetOpcode::G_PTRMASK: {
1297  if (TypeIdx != 1)
1298  return UnableToLegalize;
1300  narrowScalarSrc(MI, NarrowTy, 2);
1302  return Legalized;
1303  }
1304  case TargetOpcode::G_FPTOUI:
1305  case TargetOpcode::G_FPTOSI:
1306  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1307  case TargetOpcode::G_FPEXT:
1308  if (TypeIdx != 0)
1309  return UnableToLegalize;
1311  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1313  return Legalized;
1314  }
1315 }
1316 
1318  LLT Ty = MRI.getType(Val);
1319  if (Ty.isScalar())
1320  return Val;
1321 
1323  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1324  if (Ty.isPointer()) {
1325  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1326  return Register();
1327  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1328  }
1329 
1330  Register NewVal = Val;
1331 
1332  assert(Ty.isVector());
1333  LLT EltTy = Ty.getElementType();
1334  if (EltTy.isPointer())
1335  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1336  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1337 }
1338 
1340  unsigned OpIdx, unsigned ExtOpcode) {
1341  MachineOperand &MO = MI.getOperand(OpIdx);
1342  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1343  MO.setReg(ExtB.getReg(0));
1344 }
1345 
1347  unsigned OpIdx) {
1348  MachineOperand &MO = MI.getOperand(OpIdx);
1349  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1350  MO.setReg(ExtB.getReg(0));
1351 }
1352 
1354  unsigned OpIdx, unsigned TruncOpcode) {
1355  MachineOperand &MO = MI.getOperand(OpIdx);
1356  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1358  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1359  MO.setReg(DstExt);
1360 }
1361 
1363  unsigned OpIdx, unsigned ExtOpcode) {
1364  MachineOperand &MO = MI.getOperand(OpIdx);
1365  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1367  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1368  MO.setReg(DstTrunc);
1369 }
1370 
1372  unsigned OpIdx) {
1373  MachineOperand &MO = MI.getOperand(OpIdx);
1375  MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1376 }
1377 
1379  unsigned OpIdx) {
1380  MachineOperand &MO = MI.getOperand(OpIdx);
1381 
1382  LLT OldTy = MRI.getType(MO.getReg());
1383  unsigned OldElts = OldTy.getNumElements();
1384  unsigned NewElts = MoreTy.getNumElements();
1385 
1386  unsigned NumParts = NewElts / OldElts;
1387 
1388  // Use concat_vectors if the result is a multiple of the number of elements.
1389  if (NumParts * OldElts == NewElts) {
1391  Parts.push_back(MO.getReg());
1392 
1393  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1394  for (unsigned I = 1; I != NumParts; ++I)
1395  Parts.push_back(ImpDef);
1396 
1397  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1398  MO.setReg(Concat.getReg(0));
1399  return;
1400  }
1401 
1402  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1403  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1404  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1405  MO.setReg(MoreReg);
1406 }
1407 
1408 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1409  MachineOperand &Op = MI.getOperand(OpIdx);
1410  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1411 }
1412 
1413 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1414  MachineOperand &MO = MI.getOperand(OpIdx);
1415  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1417  MIRBuilder.buildBitcast(MO, CastDst);
1418  MO.setReg(CastDst);
1419 }
1420 
1422 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1423  LLT WideTy) {
1424  if (TypeIdx != 1)
1425  return UnableToLegalize;
1426 
1427  Register DstReg = MI.getOperand(0).getReg();
1428  LLT DstTy = MRI.getType(DstReg);
1429  if (DstTy.isVector())
1430  return UnableToLegalize;
1431 
1432  Register Src1 = MI.getOperand(1).getReg();
1433  LLT SrcTy = MRI.getType(Src1);
1434  const int DstSize = DstTy.getSizeInBits();
1435  const int SrcSize = SrcTy.getSizeInBits();
1436  const int WideSize = WideTy.getSizeInBits();
1437  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1438 
1439  unsigned NumOps = MI.getNumOperands();
1440  unsigned NumSrc = MI.getNumOperands() - 1;
1441  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1442 
1443  if (WideSize >= DstSize) {
1444  // Directly pack the bits in the target type.
1445  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1446 
1447  for (unsigned I = 2; I != NumOps; ++I) {
1448  const unsigned Offset = (I - 1) * PartSize;
1449 
1450  Register SrcReg = MI.getOperand(I).getReg();
1451  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1452 
1453  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1454 
1455  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1456  MRI.createGenericVirtualRegister(WideTy);
1457 
1458  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1459  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1460  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1461  ResultReg = NextResult;
1462  }
1463 
1464  if (WideSize > DstSize)
1465  MIRBuilder.buildTrunc(DstReg, ResultReg);
1466  else if (DstTy.isPointer())
1467  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1468 
1469  MI.eraseFromParent();
1470  return Legalized;
1471  }
1472 
1473  // Unmerge the original values to the GCD type, and recombine to the next
1474  // multiple greater than the original type.
1475  //
1476  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1477  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1478  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1479  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1480  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1481  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1482  // %12:_(s12) = G_MERGE_VALUES %10, %11
1483  //
1484  // Padding with undef if necessary:
1485  //
1486  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1487  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1488  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1489  // %7:_(s2) = G_IMPLICIT_DEF
1490  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1491  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1492  // %10:_(s12) = G_MERGE_VALUES %8, %9
1493 
1494  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1495  LLT GCDTy = LLT::scalar(GCD);
1496 
1498  SmallVector<Register, 8> NewMergeRegs;
1499  SmallVector<Register, 8> Unmerges;
1500  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1501 
1502  // Decompose the original operands if they don't evenly divide.
1503  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1504  Register SrcReg = MI.getOperand(I).getReg();
1505  if (GCD == SrcSize) {
1506  Unmerges.push_back(SrcReg);
1507  } else {
1508  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1509  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1510  Unmerges.push_back(Unmerge.getReg(J));
1511  }
1512  }
1513 
1514  // Pad with undef to the next size that is a multiple of the requested size.
1515  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1516  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1517  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1518  Unmerges.push_back(UndefReg);
1519  }
1520 
1521  const int PartsPerGCD = WideSize / GCD;
1522 
1523  // Build merges of each piece.
1524  ArrayRef<Register> Slicer(Unmerges);
1525  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1526  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1527  NewMergeRegs.push_back(Merge.getReg(0));
1528  }
1529 
1530  // A truncate may be necessary if the requested type doesn't evenly divide the
1531  // original result type.
1532  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1533  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1534  } else {
1535  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1536  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1537  }
1538 
1539  MI.eraseFromParent();
1540  return Legalized;
1541 }
1542 
1544  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1545  LLT OrigTy = MRI.getType(OrigReg);
1546  LLT LCMTy = getLCMType(WideTy, OrigTy);
1547 
1548  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1549  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1550 
1551  Register UnmergeSrc = WideReg;
1552 
1553  // Create a merge to the LCM type, padding with undef
1554  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1555  // =>
1556  // %1:_(<4 x s32>) = G_FOO
1557  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1558  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1559  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1560  if (NumMergeParts > 1) {
1561  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1562  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1563  MergeParts[0] = WideReg;
1564  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1565  }
1566 
1567  // Unmerge to the original register and pad with dead defs.
1568  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1569  UnmergeResults[0] = OrigReg;
1570  for (int I = 1; I != NumUnmergeParts; ++I)
1571  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1572 
1573  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1574  return WideReg;
1575 }
1576 
1578 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1579  LLT WideTy) {
1580  if (TypeIdx != 0)
1581  return UnableToLegalize;
1582 
1583  int NumDst = MI.getNumOperands() - 1;
1584  Register SrcReg = MI.getOperand(NumDst).getReg();
1585  LLT SrcTy = MRI.getType(SrcReg);
1586  if (SrcTy.isVector())
1587  return UnableToLegalize;
1588 
1589  Register Dst0Reg = MI.getOperand(0).getReg();
1590  LLT DstTy = MRI.getType(Dst0Reg);
1591  if (!DstTy.isScalar())
1592  return UnableToLegalize;
1593 
1594  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1595  if (SrcTy.isPointer()) {
1597  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1598  LLVM_DEBUG(
1599  dbgs() << "Not casting non-integral address space integer\n");
1600  return UnableToLegalize;
1601  }
1602 
1603  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1604  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1605  }
1606 
1607  // Widen SrcTy to WideTy. This does not affect the result, but since the
1608  // user requested this size, it is probably better handled than SrcTy and
1609  // should reduce the total number of legalization artifacts
1610  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1611  SrcTy = WideTy;
1612  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1613  }
1614 
1615  // Theres no unmerge type to target. Directly extract the bits from the
1616  // source type
1617  unsigned DstSize = DstTy.getSizeInBits();
1618 
1619  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1620  for (int I = 1; I != NumDst; ++I) {
1621  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1622  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1623  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1624  }
1625 
1626  MI.eraseFromParent();
1627  return Legalized;
1628  }
1629 
1630  // Extend the source to a wider type.
1631  LLT LCMTy = getLCMType(SrcTy, WideTy);
1632 
1633  Register WideSrc = SrcReg;
1634  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1635  // TODO: If this is an integral address space, cast to integer and anyext.
1636  if (SrcTy.isPointer()) {
1637  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1638  return UnableToLegalize;
1639  }
1640 
1641  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1642  }
1643 
1644  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1645 
1646  // Create a sequence of unmerges and merges to the original results. Since we
1647  // may have widened the source, we will need to pad the results with dead defs
1648  // to cover the source register.
1649  // e.g. widen s48 to s64:
1650  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1651  //
1652  // =>
1653  // %4:_(s192) = G_ANYEXT %0:_(s96)
1654  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1655  // ; unpack to GCD type, with extra dead defs
1656  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1657  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1658  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1659  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1660  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1661  const LLT GCDTy = getGCDType(WideTy, DstTy);
1662  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1663  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1664 
1665  // Directly unmerge to the destination without going through a GCD type
1666  // if possible
1667  if (PartsPerRemerge == 1) {
1668  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1669 
1670  for (int I = 0; I != NumUnmerge; ++I) {
1671  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1672 
1673  for (int J = 0; J != PartsPerUnmerge; ++J) {
1674  int Idx = I * PartsPerUnmerge + J;
1675  if (Idx < NumDst)
1676  MIB.addDef(MI.getOperand(Idx).getReg());
1677  else {
1678  // Create dead def for excess components.
1679  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1680  }
1681  }
1682 
1683  MIB.addUse(Unmerge.getReg(I));
1684  }
1685  } else {
1687  for (int J = 0; J != NumUnmerge; ++J)
1688  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1689 
1690  SmallVector<Register, 8> RemergeParts;
1691  for (int I = 0; I != NumDst; ++I) {
1692  for (int J = 0; J < PartsPerRemerge; ++J) {
1693  const int Idx = I * PartsPerRemerge + J;
1694  RemergeParts.emplace_back(Parts[Idx]);
1695  }
1696 
1697  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1698  RemergeParts.clear();
1699  }
1700  }
1701 
1702  MI.eraseFromParent();
1703  return Legalized;
1704 }
1705 
1707 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1708  LLT WideTy) {
1709  Register DstReg = MI.getOperand(0).getReg();
1710  Register SrcReg = MI.getOperand(1).getReg();
1711  LLT SrcTy = MRI.getType(SrcReg);
1712 
1713  LLT DstTy = MRI.getType(DstReg);
1714  unsigned Offset = MI.getOperand(2).getImm();
1715 
1716  if (TypeIdx == 0) {
1717  if (SrcTy.isVector() || DstTy.isVector())
1718  return UnableToLegalize;
1719 
1720  SrcOp Src(SrcReg);
1721  if (SrcTy.isPointer()) {
1722  // Extracts from pointers can be handled only if they are really just
1723  // simple integers.
1725  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1726  return UnableToLegalize;
1727 
1728  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1729  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1730  SrcTy = SrcAsIntTy;
1731  }
1732 
1733  if (DstTy.isPointer())
1734  return UnableToLegalize;
1735 
1736  if (Offset == 0) {
1737  // Avoid a shift in the degenerate case.
1738  MIRBuilder.buildTrunc(DstReg,
1739  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1740  MI.eraseFromParent();
1741  return Legalized;
1742  }
1743 
1744  // Do a shift in the source type.
1745  LLT ShiftTy = SrcTy;
1746  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1747  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1748  ShiftTy = WideTy;
1749  }
1750 
1751  auto LShr = MIRBuilder.buildLShr(
1752  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1753  MIRBuilder.buildTrunc(DstReg, LShr);
1754  MI.eraseFromParent();
1755  return Legalized;
1756  }
1757 
1758  if (SrcTy.isScalar()) {
1760  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1762  return Legalized;
1763  }
1764 
1765  if (!SrcTy.isVector())
1766  return UnableToLegalize;
1767 
1768  if (DstTy != SrcTy.getElementType())
1769  return UnableToLegalize;
1770 
1771  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1772  return UnableToLegalize;
1773 
1775  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1776 
1777  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1778  Offset);
1779  widenScalarDst(MI, WideTy.getScalarType(), 0);
1781  return Legalized;
1782 }
1783 
1785 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1786  LLT WideTy) {
1787  if (TypeIdx != 0 || WideTy.isVector())
1788  return UnableToLegalize;
1790  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1791  widenScalarDst(MI, WideTy);
1793  return Legalized;
1794 }
1795 
1797 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1798  LLT WideTy) {
1799  if (TypeIdx == 1)
1800  return UnableToLegalize; // TODO
1801 
1802  unsigned Opcode;
1803  unsigned ExtOpcode;
1804  Optional<Register> CarryIn = None;
1805  switch (MI.getOpcode()) {
1806  default:
1807  llvm_unreachable("Unexpected opcode!");
1808  case TargetOpcode::G_SADDO:
1809  Opcode = TargetOpcode::G_ADD;
1810  ExtOpcode = TargetOpcode::G_SEXT;
1811  break;
1812  case TargetOpcode::G_SSUBO:
1813  Opcode = TargetOpcode::G_SUB;
1814  ExtOpcode = TargetOpcode::G_SEXT;
1815  break;
1816  case TargetOpcode::G_UADDO:
1817  Opcode = TargetOpcode::G_ADD;
1818  ExtOpcode = TargetOpcode::G_ZEXT;
1819  break;
1820  case TargetOpcode::G_USUBO:
1821  Opcode = TargetOpcode::G_SUB;
1822  ExtOpcode = TargetOpcode::G_ZEXT;
1823  break;
1824  case TargetOpcode::G_SADDE:
1825  Opcode = TargetOpcode::G_UADDE;
1826  ExtOpcode = TargetOpcode::G_SEXT;
1827  CarryIn = MI.getOperand(4).getReg();
1828  break;
1829  case TargetOpcode::G_SSUBE:
1830  Opcode = TargetOpcode::G_USUBE;
1831  ExtOpcode = TargetOpcode::G_SEXT;
1832  CarryIn = MI.getOperand(4).getReg();
1833  break;
1834  case TargetOpcode::G_UADDE:
1835  Opcode = TargetOpcode::G_UADDE;
1836  ExtOpcode = TargetOpcode::G_ZEXT;
1837  CarryIn = MI.getOperand(4).getReg();
1838  break;
1839  case TargetOpcode::G_USUBE:
1840  Opcode = TargetOpcode::G_USUBE;
1841  ExtOpcode = TargetOpcode::G_ZEXT;
1842  CarryIn = MI.getOperand(4).getReg();
1843  break;
1844  }
1845 
1846  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1847  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1848  // Do the arithmetic in the larger type.
1849  Register NewOp;
1850  if (CarryIn) {
1851  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1852  NewOp = MIRBuilder
1853  .buildInstr(Opcode, {WideTy, CarryOutTy},
1854  {LHSExt, RHSExt, *CarryIn})
1855  .getReg(0);
1856  } else {
1857  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1858  }
1859  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1860  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1861  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1862  // There is no overflow if the ExtOp is the same as NewOp.
1863  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1864  // Now trunc the NewOp to the original result.
1865  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1866  MI.eraseFromParent();
1867  return Legalized;
1868 }
1869 
1871 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1872  LLT WideTy) {
1873  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1874  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1875  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1876  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1877  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1878  // We can convert this to:
1879  // 1. Any extend iN to iM
1880  // 2. SHL by M-N
1881  // 3. [US][ADD|SUB|SHL]SAT
1882  // 4. L/ASHR by M-N
1883  //
1884  // It may be more efficient to lower this to a min and a max operation in
1885  // the higher precision arithmetic if the promoted operation isn't legal,
1886  // but this decision is up to the target's lowering request.
1887  Register DstReg = MI.getOperand(0).getReg();
1888 
1889  unsigned NewBits = WideTy.getScalarSizeInBits();
1890  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1891 
1892  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1893  // must not left shift the RHS to preserve the shift amount.
1894  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1895  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1896  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1897  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1898  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1899  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1900 
1901  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1902  {ShiftL, ShiftR}, MI.getFlags());
1903 
1904  // Use a shift that will preserve the number of sign bits when the trunc is
1905  // folded away.
1906  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1907  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1908 
1909  MIRBuilder.buildTrunc(DstReg, Result);
1910  MI.eraseFromParent();
1911  return Legalized;
1912 }
1913 
1915 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1916  LLT WideTy) {
1917  if (TypeIdx == 1)
1918  return UnableToLegalize;
1919 
1920  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1921  Register Result = MI.getOperand(0).getReg();
1922  Register OriginalOverflow = MI.getOperand(1).getReg();
1923  Register LHS = MI.getOperand(2).getReg();
1924  Register RHS = MI.getOperand(3).getReg();
1925  LLT SrcTy = MRI.getType(LHS);
1926  LLT OverflowTy = MRI.getType(OriginalOverflow);
1927  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1928 
1929  // To determine if the result overflowed in the larger type, we extend the
1930  // input to the larger type, do the multiply (checking if it overflows),
1931  // then also check the high bits of the result to see if overflow happened
1932  // there.
1933  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1934  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1935  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1936 
1937  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1938  {LeftOperand, RightOperand});
1939  auto Mul = Mulo->getOperand(0);
1940  MIRBuilder.buildTrunc(Result, Mul);
1941 
1942  MachineInstrBuilder ExtResult;
1943  // Overflow occurred if it occurred in the larger type, or if the high part
1944  // of the result does not zero/sign-extend the low part. Check this second
1945  // possibility first.
1946  if (IsSigned) {
1947  // For signed, overflow occurred when the high part does not sign-extend
1948  // the low part.
1949  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1950  } else {
1951  // Unsigned overflow occurred when the high part does not zero-extend the
1952  // low part.
1953  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1954  }
1955 
1956  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1957  // so we don't need to check the overflow result of larger type Mulo.
1958  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1959  auto Overflow =
1960  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1961  // Finally check if the multiplication in the larger type itself overflowed.
1962  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1963  } else {
1964  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1965  }
1966  MI.eraseFromParent();
1967  return Legalized;
1968 }
1969 
1971 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1972  switch (MI.getOpcode()) {
1973  default:
1974  return UnableToLegalize;
1975  case TargetOpcode::G_ATOMICRMW_XCHG:
1976  case TargetOpcode::G_ATOMICRMW_ADD:
1977  case TargetOpcode::G_ATOMICRMW_SUB:
1978  case TargetOpcode::G_ATOMICRMW_AND:
1979  case TargetOpcode::G_ATOMICRMW_OR:
1980  case TargetOpcode::G_ATOMICRMW_XOR:
1981  case TargetOpcode::G_ATOMICRMW_MIN:
1982  case TargetOpcode::G_ATOMICRMW_MAX:
1983  case TargetOpcode::G_ATOMICRMW_UMIN:
1984  case TargetOpcode::G_ATOMICRMW_UMAX:
1985  assert(TypeIdx == 0 && "atomicrmw with second scalar type");
1987  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1988  widenScalarDst(MI, WideTy, 0);
1990  return Legalized;
1991  case TargetOpcode::G_ATOMIC_CMPXCHG:
1992  assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
1994  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1995  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1996  widenScalarDst(MI, WideTy, 0);
1998  return Legalized;
1999  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2000  if (TypeIdx == 0) {
2002  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2003  widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2004  widenScalarDst(MI, WideTy, 0);
2006  return Legalized;
2007  }
2008  assert(TypeIdx == 1 &&
2009  "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2011  widenScalarDst(MI, WideTy, 1);
2013  return Legalized;
2014  case TargetOpcode::G_EXTRACT:
2015  return widenScalarExtract(MI, TypeIdx, WideTy);
2016  case TargetOpcode::G_INSERT:
2017  return widenScalarInsert(MI, TypeIdx, WideTy);
2018  case TargetOpcode::G_MERGE_VALUES:
2019  return widenScalarMergeValues(MI, TypeIdx, WideTy);
2020  case TargetOpcode::G_UNMERGE_VALUES:
2021  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2022  case TargetOpcode::G_SADDO:
2023  case TargetOpcode::G_SSUBO:
2024  case TargetOpcode::G_UADDO:
2025  case TargetOpcode::G_USUBO:
2026  case TargetOpcode::G_SADDE:
2027  case TargetOpcode::G_SSUBE:
2028  case TargetOpcode::G_UADDE:
2029  case TargetOpcode::G_USUBE:
2030  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2031  case TargetOpcode::G_UMULO:
2032  case TargetOpcode::G_SMULO:
2033  return widenScalarMulo(MI, TypeIdx, WideTy);
2034  case TargetOpcode::G_SADDSAT:
2035  case TargetOpcode::G_SSUBSAT:
2036  case TargetOpcode::G_SSHLSAT:
2037  case TargetOpcode::G_UADDSAT:
2038  case TargetOpcode::G_USUBSAT:
2039  case TargetOpcode::G_USHLSAT:
2040  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2041  case TargetOpcode::G_CTTZ:
2042  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2043  case TargetOpcode::G_CTLZ:
2044  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2045  case TargetOpcode::G_CTPOP: {
2046  if (TypeIdx == 0) {
2048  widenScalarDst(MI, WideTy, 0);
2050  return Legalized;
2051  }
2052 
2053  Register SrcReg = MI.getOperand(1).getReg();
2054 
2055  // First extend the input.
2056  unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2057  MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2058  ? TargetOpcode::G_ANYEXT
2059  : TargetOpcode::G_ZEXT;
2060  auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2061  LLT CurTy = MRI.getType(SrcReg);
2062  unsigned NewOpc = MI.getOpcode();
2063  if (NewOpc == TargetOpcode::G_CTTZ) {
2064  // The count is the same in the larger type except if the original
2065  // value was zero. This can be handled by setting the bit just off
2066  // the top of the original type.
2067  auto TopBit =
2068  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2069  MIBSrc = MIRBuilder.buildOr(
2070  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2071  // Now we know the operand is non-zero, use the more relaxed opcode.
2072  NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2073  }
2074 
2075  // Perform the operation at the larger size.
2076  auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2077  // This is already the correct result for CTPOP and CTTZs
2078  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2079  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2080  // The correct result is NewOp - (Difference in widety and current ty).
2081  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2082  MIBNewOp = MIRBuilder.buildSub(
2083  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2084  }
2085 
2086  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2087  MI.eraseFromParent();
2088  return Legalized;
2089  }
2090  case TargetOpcode::G_BSWAP: {
2092  Register DstReg = MI.getOperand(0).getReg();
2093 
2094  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2095  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2096  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2097  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2098 
2099  MI.getOperand(0).setReg(DstExt);
2100 
2102 
2103  LLT Ty = MRI.getType(DstReg);
2104  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2105  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2106  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2107 
2108  MIRBuilder.buildTrunc(DstReg, ShrReg);
2110  return Legalized;
2111  }
2112  case TargetOpcode::G_BITREVERSE: {
2114 
2115  Register DstReg = MI.getOperand(0).getReg();
2116  LLT Ty = MRI.getType(DstReg);
2117  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2118 
2119  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2120  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2121  MI.getOperand(0).setReg(DstExt);
2123 
2124  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2125  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2126  MIRBuilder.buildTrunc(DstReg, Shift);
2128  return Legalized;
2129  }
2130  case TargetOpcode::G_FREEZE:
2132  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2133  widenScalarDst(MI, WideTy);
2135  return Legalized;
2136 
2137  case TargetOpcode::G_ABS:
2139  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2140  widenScalarDst(MI, WideTy);
2142  return Legalized;
2143 
2144  case TargetOpcode::G_ADD:
2145  case TargetOpcode::G_AND:
2146  case TargetOpcode::G_MUL:
2147  case TargetOpcode::G_OR:
2148  case TargetOpcode::G_XOR:
2149  case TargetOpcode::G_SUB:
2150  // Perform operation at larger width (any extension is fines here, high bits
2151  // don't affect the result) and then truncate the result back to the
2152  // original type.
2154  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2155  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2156  widenScalarDst(MI, WideTy);
2158  return Legalized;
2159 
2160  case TargetOpcode::G_SBFX:
2161  case TargetOpcode::G_UBFX:
2163 
2164  if (TypeIdx == 0) {
2165  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2166  widenScalarDst(MI, WideTy);
2167  } else {
2168  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2169  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2170  }
2171 
2173  return Legalized;
2174 
2175  case TargetOpcode::G_SHL:
2177 
2178  if (TypeIdx == 0) {
2179  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2180  widenScalarDst(MI, WideTy);
2181  } else {
2182  assert(TypeIdx == 1);
2183  // The "number of bits to shift" operand must preserve its value as an
2184  // unsigned integer:
2185  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2186  }
2187 
2189  return Legalized;
2190 
2191  case TargetOpcode::G_SDIV:
2192  case TargetOpcode::G_SREM:
2193  case TargetOpcode::G_SMIN:
2194  case TargetOpcode::G_SMAX:
2196  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2197  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2198  widenScalarDst(MI, WideTy);
2200  return Legalized;
2201 
2202  case TargetOpcode::G_SDIVREM:
2204  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2205  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2206  widenScalarDst(MI, WideTy);
2207  widenScalarDst(MI, WideTy, 1);
2209  return Legalized;
2210 
2211  case TargetOpcode::G_ASHR:
2212  case TargetOpcode::G_LSHR:
2214 
2215  if (TypeIdx == 0) {
2216  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2217  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2218 
2219  widenScalarSrc(MI, WideTy, 1, CvtOp);
2220  widenScalarDst(MI, WideTy);
2221  } else {
2222  assert(TypeIdx == 1);
2223  // The "number of bits to shift" operand must preserve its value as an
2224  // unsigned integer:
2225  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2226  }
2227 
2229  return Legalized;
2230  case TargetOpcode::G_UDIV:
2231  case TargetOpcode::G_UREM:
2232  case TargetOpcode::G_UMIN:
2233  case TargetOpcode::G_UMAX:
2235  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2236  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2237  widenScalarDst(MI, WideTy);
2239  return Legalized;
2240 
2241  case TargetOpcode::G_UDIVREM:
2243  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2244  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2245  widenScalarDst(MI, WideTy);
2246  widenScalarDst(MI, WideTy, 1);
2248  return Legalized;
2249 
2250  case TargetOpcode::G_SELECT:
2252  if (TypeIdx == 0) {
2253  // Perform operation at larger width (any extension is fine here, high
2254  // bits don't affect the result) and then truncate the result back to the
2255  // original type.
2256  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2257  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2258  widenScalarDst(MI, WideTy);
2259  } else {
2260  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2261  // Explicit extension is required here since high bits affect the result.
2262  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2263  }
2265  return Legalized;
2266 
2267  case TargetOpcode::G_FPTOSI:
2268  case TargetOpcode::G_FPTOUI:
2270 
2271  if (TypeIdx == 0)
2272  widenScalarDst(MI, WideTy);
2273  else
2274  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2275 
2277  return Legalized;
2278  case TargetOpcode::G_SITOFP:
2280 
2281  if (TypeIdx == 0)
2282  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2283  else
2284  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2285 
2287  return Legalized;
2288  case TargetOpcode::G_UITOFP:
2290 
2291  if (TypeIdx == 0)
2292  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2293  else
2294  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2295 
2297  return Legalized;
2298  case TargetOpcode::G_LOAD:
2299  case TargetOpcode::G_SEXTLOAD:
2300  case TargetOpcode::G_ZEXTLOAD:
2302  widenScalarDst(MI, WideTy);
2304  return Legalized;
2305 
2306  case TargetOpcode::G_STORE: {
2307  if (TypeIdx != 0)
2308  return UnableToLegalize;
2309 
2310  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2311  if (!Ty.isScalar())
2312  return UnableToLegalize;
2313 
2315 
2316  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2317  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2318  widenScalarSrc(MI, WideTy, 0, ExtType);
2319 
2321  return Legalized;
2322  }
2323  case TargetOpcode::G_CONSTANT: {
2324  MachineOperand &SrcMO = MI.getOperand(1);
2326  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2327  MRI.getType(MI.getOperand(0).getReg()));
2328  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2329  ExtOpc == TargetOpcode::G_ANYEXT) &&
2330  "Illegal Extend");
2331  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2332  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2333  ? SrcVal.sext(WideTy.getSizeInBits())
2334  : SrcVal.zext(WideTy.getSizeInBits());
2336  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2337 
2338  widenScalarDst(MI, WideTy);
2340  return Legalized;
2341  }
2342  case TargetOpcode::G_FCONSTANT: {
2343  MachineOperand &SrcMO = MI.getOperand(1);
2345  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2346  bool LosesInfo;
2347  switch (WideTy.getSizeInBits()) {
2348  case 32:
2350  &LosesInfo);
2351  break;
2352  case 64:
2354  &LosesInfo);
2355  break;
2356  default:
2357  return UnableToLegalize;
2358  }
2359 
2360  assert(!LosesInfo && "extend should always be lossless");
2361 
2363  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2364 
2365  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2367  return Legalized;
2368  }
2369  case TargetOpcode::G_IMPLICIT_DEF: {
2371  widenScalarDst(MI, WideTy);
2373  return Legalized;
2374  }
2375  case TargetOpcode::G_BRCOND:
2377  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2379  return Legalized;
2380 
2381  case TargetOpcode::G_FCMP:
2383  if (TypeIdx == 0)
2384  widenScalarDst(MI, WideTy);
2385  else {
2386  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2387  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2388  }
2390  return Legalized;
2391 
2392  case TargetOpcode::G_ICMP:
2394  if (TypeIdx == 0)
2395  widenScalarDst(MI, WideTy);
2396  else {
2397  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2398  MI.getOperand(1).getPredicate()))
2399  ? TargetOpcode::G_SEXT
2400  : TargetOpcode::G_ZEXT;
2401  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2402  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2403  }
2405  return Legalized;
2406 
2407  case TargetOpcode::G_PTR_ADD:
2408  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2410  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2412  return Legalized;
2413 
2414  case TargetOpcode::G_PHI: {
2415  assert(TypeIdx == 0 && "Expecting only Idx 0");
2416 
2418  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2419  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2420  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2421  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2422  }
2423 
2424  MachineBasicBlock &MBB = *MI.getParent();
2426  widenScalarDst(MI, WideTy);
2428  return Legalized;
2429  }
2430  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2431  if (TypeIdx == 0) {
2432  Register VecReg = MI.getOperand(1).getReg();
2433  LLT VecTy = MRI.getType(VecReg);
2435 
2437  MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2438  TargetOpcode::G_SEXT);
2439 
2440  widenScalarDst(MI, WideTy, 0);
2442  return Legalized;
2443  }
2444 
2445  if (TypeIdx != 2)
2446  return UnableToLegalize;
2448  // TODO: Probably should be zext
2449  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2451  return Legalized;
2452  }
2453  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2454  if (TypeIdx == 1) {
2456 
2457  Register VecReg = MI.getOperand(1).getReg();
2458  LLT VecTy = MRI.getType(VecReg);
2459  LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2460 
2461  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2462  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2463  widenScalarDst(MI, WideVecTy, 0);
2465  return Legalized;
2466  }
2467 
2468  if (TypeIdx == 2) {
2470  // TODO: Probably should be zext
2471  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2473  return Legalized;
2474  }
2475 
2476  return UnableToLegalize;
2477  }
2478  case TargetOpcode::G_FADD:
2479  case TargetOpcode::G_FMUL:
2480  case TargetOpcode::G_FSUB:
2481  case TargetOpcode::G_FMA:
2482  case TargetOpcode::G_FMAD:
2483  case TargetOpcode::G_FNEG:
2484  case TargetOpcode::G_FABS:
2485  case TargetOpcode::G_FCANONICALIZE:
2486  case TargetOpcode::G_FMINNUM:
2487  case TargetOpcode::G_FMAXNUM:
2488  case TargetOpcode::G_FMINNUM_IEEE:
2489  case TargetOpcode::G_FMAXNUM_IEEE:
2490  case TargetOpcode::G_FMINIMUM:
2491  case TargetOpcode::G_FMAXIMUM:
2492  case TargetOpcode::G_FDIV:
2493  case TargetOpcode::G_FREM:
2494  case TargetOpcode::G_FCEIL:
2495  case TargetOpcode::G_FFLOOR:
2496  case TargetOpcode::G_FCOS:
2497  case TargetOpcode::G_FSIN:
2498  case TargetOpcode::G_FLOG10:
2499  case TargetOpcode::G_FLOG:
2500  case TargetOpcode::G_FLOG2:
2501  case TargetOpcode::G_FRINT:
2502  case TargetOpcode::G_FNEARBYINT:
2503  case TargetOpcode::G_FSQRT:
2504  case TargetOpcode::G_FEXP:
2505  case TargetOpcode::G_FEXP2:
2506  case TargetOpcode::G_FPOW:
2507  case TargetOpcode::G_INTRINSIC_TRUNC:
2508  case TargetOpcode::G_INTRINSIC_ROUND:
2509  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2510  assert(TypeIdx == 0);
2512 
2513  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2514  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2515 
2516  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2518  return Legalized;
2519  case TargetOpcode::G_FPOWI: {
2520  if (TypeIdx != 0)
2521  return UnableToLegalize;
2523  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2524  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2526  return Legalized;
2527  }
2528  case TargetOpcode::G_INTTOPTR:
2529  if (TypeIdx != 1)
2530  return UnableToLegalize;
2531 
2533  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2535  return Legalized;
2536  case TargetOpcode::G_PTRTOINT:
2537  if (TypeIdx != 0)
2538  return UnableToLegalize;
2539 
2541  widenScalarDst(MI, WideTy, 0);
2543  return Legalized;
2544  case TargetOpcode::G_BUILD_VECTOR: {
2546 
2547  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2548  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2549  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2550 
2551  // Avoid changing the result vector type if the source element type was
2552  // requested.
2553  if (TypeIdx == 1) {
2554  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2555  } else {
2556  widenScalarDst(MI, WideTy, 0);
2557  }
2558 
2560  return Legalized;
2561  }
2562  case TargetOpcode::G_SEXT_INREG:
2563  if (TypeIdx != 0)
2564  return UnableToLegalize;
2565 
2567  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2568  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2570  return Legalized;
2571  case TargetOpcode::G_PTRMASK: {
2572  if (TypeIdx != 1)
2573  return UnableToLegalize;
2575  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2577  return Legalized;
2578  }
2579  }
2580 }
2581 
2583  MachineIRBuilder &B, Register Src, LLT Ty) {
2584  auto Unmerge = B.buildUnmerge(Ty, Src);
2585  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2586  Pieces.push_back(Unmerge.getReg(I));
2587 }
2588 
2591  Register Dst = MI.getOperand(0).getReg();
2592  Register Src = MI.getOperand(1).getReg();
2593  LLT DstTy = MRI.getType(Dst);
2594  LLT SrcTy = MRI.getType(Src);
2595 
2596  if (SrcTy.isVector()) {
2597  LLT SrcEltTy = SrcTy.getElementType();
2598  SmallVector<Register, 8> SrcRegs;
2599 
2600  if (DstTy.isVector()) {
2601  int NumDstElt = DstTy.getNumElements();
2602  int NumSrcElt = SrcTy.getNumElements();
2603 
2604  LLT DstEltTy = DstTy.getElementType();
2605  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2606  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2607 
2608  // If there's an element size mismatch, insert intermediate casts to match
2609  // the result element type.
2610  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2611  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2612  //
2613  // =>
2614  //
2615  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2616  // %3:_(<2 x s8>) = G_BITCAST %2
2617  // %4:_(<2 x s8>) = G_BITCAST %3
2618  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2619  DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2620  SrcPartTy = SrcEltTy;
2621  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2622  //
2623  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2624  //
2625  // =>
2626  //
2627  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2628  // %3:_(s16) = G_BITCAST %2
2629  // %4:_(s16) = G_BITCAST %3
2630  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2631  SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2632  DstCastTy = DstEltTy;
2633  }
2634 
2635  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2636  for (Register &SrcReg : SrcRegs)
2637  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2638  } else
2639  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2640 
2641  MIRBuilder.buildMerge(Dst, SrcRegs);
2642  MI.eraseFromParent();
2643  return Legalized;
2644  }
2645 
2646  if (DstTy.isVector()) {
2647  SmallVector<Register, 8> SrcRegs;
2648  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2649  MIRBuilder.buildMerge(Dst, SrcRegs);
2650  MI.eraseFromParent();
2651  return Legalized;
2652  }
2653 
2654  return UnableToLegalize;
2655 }
2656 
2657 /// Figure out the bit offset into a register when coercing a vector index for
2658 /// the wide element type. This is only for the case when promoting vector to
2659 /// one with larger elements.
2660 //
2661 ///
2662 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2663 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2665  Register Idx,
2666  unsigned NewEltSize,
2667  unsigned OldEltSize) {
2668  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2669  LLT IdxTy = B.getMRI()->getType(Idx);
2670 
2671  // Now figure out the amount we need to shift to get the target bits.
2672  auto OffsetMask = B.buildConstant(
2673  IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2674  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2675  return B.buildShl(IdxTy, OffsetIdx,
2676  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2677 }
2678 
2679 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2680 /// is casting to a vector with a smaller element size, perform multiple element
2681 /// extracts and merge the results. If this is coercing to a vector with larger
2682 /// elements, index the bitcasted vector and extract the target element with bit
2683 /// operations. This is intended to force the indexing in the native register
2684 /// size for architectures that can dynamically index the register file.
2687  LLT CastTy) {
2688  if (TypeIdx != 1)
2689  return UnableToLegalize;
2690 
2691  Register Dst = MI.getOperand(0).getReg();
2692  Register SrcVec = MI.getOperand(1).getReg();
2693  Register Idx = MI.getOperand(2).getReg();
2694  LLT SrcVecTy = MRI.getType(SrcVec);
2695  LLT IdxTy = MRI.getType(Idx);
2696 
2697  LLT SrcEltTy = SrcVecTy.getElementType();
2698  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2699  unsigned OldNumElts = SrcVecTy.getNumElements();
2700 
2701  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2702  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2703 
2704  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2705  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2706  if (NewNumElts > OldNumElts) {
2707  // Decreasing the vector element size
2708  //
2709  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2710  // =>
2711  // v4i32:castx = bitcast x:v2i64
2712  //
2713  // i64 = bitcast
2714  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2715  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2716  //
2717  if (NewNumElts % OldNumElts != 0)
2718  return UnableToLegalize;
2719 
2720  // Type of the intermediate result vector.
2721  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2722  LLT MidTy =
2723  LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2724 
2725  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2726 
2727  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2728  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2729 
2730  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2731  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2732  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2733  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2734  NewOps[I] = Elt.getReg(0);
2735  }
2736 
2737  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2738  MIRBuilder.buildBitcast(Dst, NewVec);
2739  MI.eraseFromParent();
2740  return Legalized;
2741  }
2742 
2743  if (NewNumElts < OldNumElts) {
2744  if (NewEltSize % OldEltSize != 0)
2745  return UnableToLegalize;
2746 
2747  // This only depends on powers of 2 because we use bit tricks to figure out
2748  // the bit offset we need to shift to get the target element. A general
2749  // expansion could emit division/multiply.
2750  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2751  return UnableToLegalize;
2752 
2753  // Increasing the vector element size.
2754  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2755  //
2756  // =>
2757  //
2758  // %cast = G_BITCAST %vec
2759  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2760  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2761  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2762  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2763  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2764  // %elt = G_TRUNC %elt_bits
2765 
2766  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2767  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2768 
2769  // Divide to get the index in the wider element type.
2770  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2771 
2772  Register WideElt = CastVec;
2773  if (CastTy.isVector()) {
2774  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2775  ScaledIdx).getReg(0);
2776  }
2777 
2778  // Compute the bit offset into the register of the target element.
2780  MIRBuilder, Idx, NewEltSize, OldEltSize);
2781 
2782  // Shift the wide element to get the target element.
2783  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2784  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2785  MI.eraseFromParent();
2786  return Legalized;
2787  }
2788 
2789  return UnableToLegalize;
2790 }
2791 
2792 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2793 /// TargetReg, while preserving other bits in \p TargetReg.
2794 ///
2795 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2797  Register TargetReg, Register InsertReg,
2798  Register OffsetBits) {
2799  LLT TargetTy = B.getMRI()->getType(TargetReg);
2800  LLT InsertTy = B.getMRI()->getType(InsertReg);
2801  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2802  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2803 
2804  // Produce a bitmask of the value to insert
2805  auto EltMask = B.buildConstant(
2806  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2807  InsertTy.getSizeInBits()));
2808  // Shift it into position
2809  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2810  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2811 
2812  // Clear out the bits in the wide element
2813  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2814 
2815  // The value to insert has all zeros already, so stick it into the masked
2816  // wide element.
2817  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2818 }
2819 
2820 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2821 /// is increasing the element size, perform the indexing in the target element
2822 /// type, and use bit operations to insert at the element position. This is
2823 /// intended for architectures that can dynamically index the register file and
2824 /// want to force indexing in the native register size.
2827  LLT CastTy) {
2828  if (TypeIdx != 0)
2829  return UnableToLegalize;
2830 
2831  Register Dst = MI.getOperand(0).getReg();
2832  Register SrcVec = MI.getOperand(1).getReg();
2833  Register Val = MI.getOperand(2).getReg();
2834  Register Idx = MI.getOperand(3).getReg();
2835 
2836  LLT VecTy = MRI.getType(Dst);
2837  LLT IdxTy = MRI.getType(Idx);
2838 
2839  LLT VecEltTy = VecTy.getElementType();
2840  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2841  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2842  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2843 
2844  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2845  unsigned OldNumElts = VecTy.getNumElements();
2846 
2847  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2848  if (NewNumElts < OldNumElts) {
2849  if (NewEltSize % OldEltSize != 0)
2850  return UnableToLegalize;
2851 
2852  // This only depends on powers of 2 because we use bit tricks to figure out
2853  // the bit offset we need to shift to get the target element. A general
2854  // expansion could emit division/multiply.
2855  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2856  return UnableToLegalize;
2857 
2858  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2859  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2860 
2861  // Divide to get the index in the wider element type.
2862  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2863 
2864  Register ExtractedElt = CastVec;
2865  if (CastTy.isVector()) {
2866  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2867  ScaledIdx).getReg(0);
2868  }
2869 
2870  // Compute the bit offset into the register of the target element.
2872  MIRBuilder, Idx, NewEltSize, OldEltSize);
2873 
2874  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2875  Val, OffsetBits);
2876  if (CastTy.isVector()) {
2877  InsertedElt = MIRBuilder.buildInsertVectorElement(
2878  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2879  }
2880 
2881  MIRBuilder.buildBitcast(Dst, InsertedElt);
2882  MI.eraseFromParent();
2883  return Legalized;
2884  }
2885 
2886  return UnableToLegalize;
2887 }
2888 
2890  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2891  Register DstReg = LoadMI.getDstReg();
2892  Register PtrReg = LoadMI.getPointerReg();
2893  LLT DstTy = MRI.getType(DstReg);
2894  MachineMemOperand &MMO = LoadMI.getMMO();
2895  LLT MemTy = MMO.getMemoryType();
2897 
2898  unsigned MemSizeInBits = MemTy.getSizeInBits();
2899  unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2900 
2901  if (MemSizeInBits != MemStoreSizeInBits) {
2902  if (MemTy.isVector())
2903  return UnableToLegalize;
2904 
2905  // Promote to a byte-sized load if not loading an integral number of
2906  // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2907  LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2908  MachineMemOperand *NewMMO =
2909  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2910 
2911  Register LoadReg = DstReg;
2912  LLT LoadTy = DstTy;
2913 
2914  // If this wasn't already an extending load, we need to widen the result
2915  // register to avoid creating a load with a narrower result than the source.
2916  if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
2917  LoadTy = WideMemTy;
2918  LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
2919  }
2920 
2921  if (isa<GSExtLoad>(LoadMI)) {
2922  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2923  MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
2924  } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
2925  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2926  // The extra bits are guaranteed to be zero, since we stored them that
2927  // way. A zext load from Wide thus automatically gives zext from MemVT.
2928  MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
2929  } else {
2930  MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
2931  }
2932 
2933  if (DstTy != LoadTy)
2934  MIRBuilder.buildTrunc(DstReg, LoadReg);
2935 
2936  LoadMI.eraseFromParent();
2937  return Legalized;
2938  }
2939 
2940  // Big endian lowering not implemented.
2942  return UnableToLegalize;
2943 
2944  // This load needs splitting into power of 2 sized loads.
2945  //
2946  // Our strategy here is to generate anyextending loads for the smaller
2947  // types up to next power-2 result type, and then combine the two larger
2948  // result values together, before truncating back down to the non-pow-2
2949  // type.
2950  // E.g. v1 = i24 load =>
2951  // v2 = i32 zextload (2 byte)
2952  // v3 = i32 load (1 byte)
2953  // v4 = i32 shl v3, 16
2954  // v5 = i32 or v4, v2
2955  // v1 = i24 trunc v5
2956  // By doing this we generate the correct truncate which should get
2957  // combined away as an artifact with a matching extend.
2958 
2959  uint64_t LargeSplitSize, SmallSplitSize;
2960 
2961  if (!isPowerOf2_32(MemSizeInBits)) {
2962  // This load needs splitting into power of 2 sized loads.
2963  LargeSplitSize = PowerOf2Floor(MemSizeInBits);
2964  SmallSplitSize = MemSizeInBits - LargeSplitSize;
2965  } else {
2966  // This is already a power of 2, but we still need to split this in half.
2967  //
2968  // Assume we're being asked to decompose an unaligned load.
2969  // TODO: If this requires multiple splits, handle them all at once.
2970  auto &Ctx = MF.getFunction().getContext();
2971  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
2972  return UnableToLegalize;
2973 
2974  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
2975  }
2976 
2977  if (MemTy.isVector()) {
2978  // TODO: Handle vector extloads
2979  if (MemTy != DstTy)
2980  return UnableToLegalize;
2981 
2982  // TODO: We can do better than scalarizing the vector and at least split it
2983  // in half.
2984  return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
2985  }
2986 
2987  MachineMemOperand *LargeMMO =
2988  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2989  MachineMemOperand *SmallMMO =
2990  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2991 
2992  LLT PtrTy = MRI.getType(PtrReg);
2993  unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
2994  LLT AnyExtTy = LLT::scalar(AnyExtSize);
2995  auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
2996  PtrReg, *LargeMMO);
2997 
2998  auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
2999  LargeSplitSize / 8);
3000  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3001  auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3002  auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3003  SmallPtr, *SmallMMO);
3004 
3005  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3006  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3007 
3008  if (AnyExtTy == DstTy)
3009  MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3010  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3011  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3012  MIRBuilder.buildTrunc(DstReg, {Or});
3013  } else {
3014  assert(DstTy.isPointer() && "expected pointer");
3015  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3016 
3017  // FIXME: We currently consider this to be illegal for non-integral address
3018  // spaces, but we need still need a way to reinterpret the bits.
3019  MIRBuilder.buildIntToPtr(DstReg, Or);
3020  }
3021 
3022  LoadMI.eraseFromParent();
3023  return Legalized;
3024 }
3025 
3027  // Lower a non-power of 2 store into multiple pow-2 stores.
3028  // E.g. split an i24 store into an i16 store + i8 store.
3029  // We do this by first extending the stored value to the next largest power
3030  // of 2 type, and then using truncating stores to store the components.
3031  // By doing this, likewise with G_LOAD, generate an extend that can be
3032  // artifact-combined away instead of leaving behind extracts.
3033  Register SrcReg = StoreMI.getValueReg();
3034  Register PtrReg = StoreMI.getPointerReg();
3035  LLT SrcTy = MRI.getType(SrcReg);
3037  MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3038  LLT MemTy = MMO.getMemoryType();
3039 
3040  unsigned StoreWidth = MemTy.getSizeInBits();
3041  unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3042 
3043  if (StoreWidth != StoreSizeInBits) {
3044  if (SrcTy.isVector())
3045  return UnableToLegalize;
3046 
3047  // Promote to a byte-sized store with upper bits zero if not
3048  // storing an integral number of bytes. For example, promote
3049  // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3050  LLT WideTy = LLT::scalar(StoreSizeInBits);
3051 
3052  if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3053  // Avoid creating a store with a narrower source than result.
3054  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3055  SrcTy = WideTy;
3056  }
3057 
3058  auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3059 
3060  MachineMemOperand *NewMMO =
3061  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3062  MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3063  StoreMI.eraseFromParent();
3064  return Legalized;
3065  }
3066 
3067  if (MemTy.isVector()) {
3068  // TODO: Handle vector trunc stores
3069  if (MemTy != SrcTy)
3070  return UnableToLegalize;
3071 
3072  // TODO: We can do better than scalarizing the vector and at least split it
3073  // in half.
3074  return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3075  }
3076 
3077  unsigned MemSizeInBits = MemTy.getSizeInBits();
3078  uint64_t LargeSplitSize, SmallSplitSize;
3079 
3080  if (!isPowerOf2_32(MemSizeInBits)) {
3081  LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3082  SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3083  } else {
3084  auto &Ctx = MF.getFunction().getContext();
3085  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3086  return UnableToLegalize; // Don't know what we're being asked to do.
3087 
3088  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3089  }
3090 
3091  // Extend to the next pow-2. If this store was itself the result of lowering,
3092  // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3093  // that's wider than the stored size.
3094  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3095  const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3096 
3097  if (SrcTy.isPointer()) {
3098  const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3099  SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3100  }
3101 
3102  auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3103 
3104  // Obtain the smaller value by shifting away the larger value.
3105  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3106  auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3107 
3108  // Generate the PtrAdd and truncating stores.
3109  LLT PtrTy = MRI.getType(PtrReg);
3110  auto OffsetCst = MIRBuilder.buildConstant(
3111  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3112  auto SmallPtr =
3113  MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3114 
3115  MachineMemOperand *LargeMMO =
3116  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3117  MachineMemOperand *SmallMMO =
3118  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3119  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3120  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3121  StoreMI.eraseFromParent();
3122  return Legalized;
3123 }
3124 
3126 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3127  switch (MI.getOpcode()) {
3128  case TargetOpcode::G_LOAD: {
3129  if (TypeIdx != 0)
3130  return UnableToLegalize;
3131  MachineMemOperand &MMO = **MI.memoperands_begin();
3132 
3133  // Not sure how to interpret a bitcast of an extending load.
3134  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3135  return UnableToLegalize;
3136 
3138  bitcastDst(MI, CastTy, 0);
3139  MMO.setType(CastTy);
3141  return Legalized;
3142  }
3143  case TargetOpcode::G_STORE: {
3144  if (TypeIdx != 0)
3145  return UnableToLegalize;
3146 
3147  MachineMemOperand &MMO = **MI.memoperands_begin();
3148 
3149  // Not sure how to interpret a bitcast of a truncating store.
3150  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3151  return UnableToLegalize;
3152 
3154  bitcastSrc(MI, CastTy, 0);
3155  MMO.setType(CastTy);
3157  return Legalized;
3158  }
3159  case TargetOpcode::G_SELECT: {
3160  if (TypeIdx != 0)
3161  return UnableToLegalize;
3162 
3163  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3164  LLVM_DEBUG(
3165  dbgs() << "bitcast action not implemented for vector select\n");
3166  return UnableToLegalize;
3167  }
3168 
3170  bitcastSrc(MI, CastTy, 2);
3171  bitcastSrc(MI, CastTy, 3);
3172  bitcastDst(MI, CastTy, 0);
3174  return Legalized;
3175  }
3176  case TargetOpcode::G_AND:
3177  case TargetOpcode::G_OR:
3178  case TargetOpcode::G_XOR: {
3180  bitcastSrc(MI, CastTy, 1);
3181  bitcastSrc(MI, CastTy, 2);
3182  bitcastDst(MI, CastTy, 0);
3184  return Legalized;
3185  }
3186  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3187  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3188  case TargetOpcode::G_INSERT_VECTOR_ELT:
3189  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3190  default:
3191  return UnableToLegalize;
3192  }
3193 }
3194 
3195 // Legalize an instruction by changing the opcode in place.
3196 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3198  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3200 }
3201 
3203 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3204  using namespace TargetOpcode;
3205 
3206  switch(MI.getOpcode()) {
3207  default:
3208  return UnableToLegalize;
3209  case TargetOpcode::G_BITCAST:
3210  return lowerBitcast(MI);
3211  case TargetOpcode::G_SREM:
3212  case TargetOpcode::G_UREM: {
3213  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3214  auto Quot =
3215  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3216  {MI.getOperand(1), MI.getOperand(2)});
3217 
3218  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3219  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3220  MI.eraseFromParent();
3221  return Legalized;
3222  }
3223  case TargetOpcode::G_SADDO:
3224  case TargetOpcode::G_SSUBO:
3225  return lowerSADDO_SSUBO(MI);
3226  case TargetOpcode::G_UMULH:
3227  case TargetOpcode::G_SMULH:
3228  return lowerSMULH_UMULH(MI);
3229  case TargetOpcode::G_SMULO:
3230  case TargetOpcode::G_UMULO: {
3231  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3232  // result.
3233  Register Res = MI.getOperand(0).getReg();
3234  Register Overflow = MI.getOperand(1).getReg();
3235  Register LHS = MI.getOperand(2).getReg();
3236  Register RHS = MI.getOperand(3).getReg();
3237  LLT Ty = MRI.getType(Res);
3238 
3239  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3240  ? TargetOpcode::G_SMULH
3241  : TargetOpcode::G_UMULH;
3242 
3244  const auto &TII = MIRBuilder.getTII();
3245  MI.setDesc(TII.get(TargetOpcode::G_MUL));
3246  MI.RemoveOperand(1);
3248 
3249  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3250  auto Zero = MIRBuilder.buildConstant(Ty, 0);
3251 
3252  // Move insert point forward so we can use the Res register if needed.
3254 
3255  // For *signed* multiply, overflow is detected by checking:
3256  // (hi != (lo >> bitwidth-1))
3257  if (Opcode == TargetOpcode::G_SMULH) {
3258  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3259  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3260  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3261  } else {
3262  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3263  }
3264  return Legalized;
3265  }
3266  case TargetOpcode::G_FNEG: {
3267  Register Res = MI.getOperand(0).getReg();
3268  LLT Ty = MRI.getType(Res);
3269 
3270  // TODO: Handle vector types once we are able to
3271  // represent them.
3272  if (Ty.isVector())
3273  return UnableToLegalize;
3274  auto SignMask =
3276  Register SubByReg = MI.getOperand(1).getReg();
3277  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3278  MI.eraseFromParent();
3279  return Legalized;
3280  }
3281  case TargetOpcode::G_FSUB: {
3282  Register Res = MI.getOperand(0).getReg();
3283  LLT Ty = MRI.getType(Res);
3284 
3285  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3286  // First, check if G_FNEG is marked as Lower. If so, we may
3287  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3288  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3289  return UnableToLegalize;
3290  Register LHS = MI.getOperand(1).getReg();
3291  Register RHS = MI.getOperand(2).getReg();
3293  MIRBuilder.buildFNeg(Neg, RHS);
3294  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3295  MI.eraseFromParent();
3296  return Legalized;
3297  }
3298  case TargetOpcode::G_FMAD:
3299  return lowerFMad(MI);
3300  case TargetOpcode::G_FFLOOR:
3301  return lowerFFloor(MI);
3302  case TargetOpcode::G_INTRINSIC_ROUND:
3303  return lowerIntrinsicRound(MI);
3304  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3305  // Since round even is the assumed rounding mode for unconstrained FP
3306  // operations, rint and roundeven are the same operation.
3307  changeOpcode(MI, TargetOpcode::G_FRINT);
3308  return Legalized;
3309  }
3310  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3311  Register OldValRes = MI.getOperand(0).getReg();
3312  Register SuccessRes = MI.getOperand(1).getReg();
3313  Register Addr = MI.getOperand(2).getReg();
3314  Register CmpVal = MI.getOperand(3).getReg();
3315  Register NewVal = MI.getOperand(4).getReg();
3316  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3317  **MI.memoperands_begin());
3318  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3319  MI.eraseFromParent();
3320  return Legalized;
3321  }
3322  case TargetOpcode::G_LOAD:
3323  case TargetOpcode::G_SEXTLOAD:
3324  case TargetOpcode::G_ZEXTLOAD:
3325  return lowerLoad(cast<GAnyLoad>(MI));
3326  case TargetOpcode::G_STORE:
3327  return lowerStore(cast<GStore>(MI));
3328  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3329  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3330  case TargetOpcode::G_CTLZ:
3331  case TargetOpcode::G_CTTZ:
3332  case TargetOpcode::G_CTPOP:
3333  return lowerBitCount(MI);
3334  case G_UADDO: {
3335  Register Res = MI.getOperand(0).getReg();
3336  Register CarryOut = MI.getOperand(1).getReg();
3337  Register LHS = MI.getOperand(2).getReg();
3338  Register RHS = MI.getOperand(3).getReg();
3339 
3340  MIRBuilder.buildAdd(Res, LHS, RHS);
3341  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3342 
3343  MI.eraseFromParent();
3344  return Legalized;
3345  }
3346  case G_UADDE: {
3347  Register Res = MI.getOperand(0).getReg();
3348  Register CarryOut = MI.getOperand(1).getReg();
3349  Register LHS = MI.getOperand(2).getReg();
3350  Register RHS = MI.getOperand(3).getReg();
3351  Register CarryIn = MI.getOperand(4).getReg();
3352  LLT Ty = MRI.getType(Res);
3353 
3354  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3355  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3356  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3357  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3358 
3359  MI.eraseFromParent();
3360  return Legalized;
3361  }
3362  case G_USUBO: {
3363  Register Res = MI.getOperand(0).getReg();
3364  Register BorrowOut = MI.getOperand(1).getReg();
3365  Register LHS = MI.getOperand(2).getReg();
3366  Register RHS = MI.getOperand(3).getReg();
3367 
3368  MIRBuilder.buildSub(Res, LHS, RHS);
3369  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3370 
3371  MI.eraseFromParent();
3372  return Legalized;
3373  }
3374  case G_USUBE: {
3375  Register Res = MI.getOperand(0).getReg();
3376  Register BorrowOut = MI.getOperand(1).getReg();
3377  Register LHS = MI.getOperand(2).getReg();
3378  Register RHS = MI.getOperand(3).getReg();
3379  Register BorrowIn = MI.getOperand(4).getReg();
3380  const LLT CondTy = MRI.getType(BorrowOut);
3381  const LLT Ty = MRI.getType(Res);
3382 
3383  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3384  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3385  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3386 
3387  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3388  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3389  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3390 
3391  MI.eraseFromParent();
3392  return Legalized;
3393  }
3394  case G_UITOFP:
3395  return lowerUITOFP(MI);
3396  case G_SITOFP:
3397  return lowerSITOFP(MI);
3398  case G_FPTOUI:
3399  return lowerFPTOUI(MI);
3400  case G_FPTOSI:
3401  return lowerFPTOSI(MI);
3402  case G_FPTRUNC:
3403  return lowerFPTRUNC(MI);
3404  case G_FPOWI:
3405  return lowerFPOWI(MI);
3406  case G_SMIN:
3407  case G_SMAX:
3408  case G_UMIN:
3409  case G_UMAX:
3410  return lowerMinMax(MI);
3411  case G_FCOPYSIGN:
3412  return lowerFCopySign(MI);
3413  case G_FMINNUM:
3414  case G_FMAXNUM:
3415  return lowerFMinNumMaxNum(MI);
3416  case G_MERGE_VALUES:
3417  return lowerMergeValues(MI);
3418  case G_UNMERGE_VALUES:
3419  return lowerUnmergeValues(MI);
3420  case TargetOpcode::G_SEXT_INREG: {
3421  assert(MI.getOperand(2).isImm() && "Expected immediate");
3422  int64_t SizeInBits = MI.getOperand(2).getImm();
3423 
3424  Register DstReg = MI.getOperand(0).getReg();
3425  Register SrcReg = MI.getOperand(1).getReg();
3426  LLT DstTy = MRI.getType(DstReg);
3427  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3428 
3429  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3430  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3431  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3432  MI.eraseFromParent();
3433  return Legalized;
3434  }
3435  case G_EXTRACT_VECTOR_ELT:
3436  case G_INSERT_VECTOR_ELT:
3438  case G_SHUFFLE_VECTOR:
3439  return lowerShuffleVector(MI);
3440  case G_DYN_STACKALLOC:
3441  return lowerDynStackAlloc(MI);
3442  case G_EXTRACT:
3443  return lowerExtract(MI);
3444  case G_INSERT:
3445  return lowerInsert(MI);
3446  case G_BSWAP:
3447  return lowerBswap(MI);
3448  case G_BITREVERSE:
3449  return lowerBitreverse(MI);
3450  case G_READ_REGISTER:
3451  case G_WRITE_REGISTER:
3452  return lowerReadWriteRegister(MI);
3453  case G_UADDSAT:
3454  case G_USUBSAT: {
3455  // Try to make a reasonable guess about which lowering strategy to use. The
3456  // target can override this with custom lowering and calling the
3457  // implementation functions.
3458  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3459  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3460  return lowerAddSubSatToMinMax(MI);
3461  return lowerAddSubSatToAddoSubo(MI);
3462  }
3463  case G_SADDSAT:
3464  case G_SSUBSAT: {
3465  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3466 
3467  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3468  // since it's a shorter expansion. However, we would need to figure out the
3469  // preferred boolean type for the carry out for the query.
3470  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3471  return lowerAddSubSatToMinMax(MI);
3472  return lowerAddSubSatToAddoSubo(MI);
3473  }
3474  case G_SSHLSAT:
3475  case G_USHLSAT:
3476  return lowerShlSat(MI);
3477  case G_ABS:
3478  return lowerAbsToAddXor(MI);
3479  case G_SELECT:
3480  return lowerSelect(MI);
3481  case G_SDIVREM:
3482  case G_UDIVREM:
3483  return lowerDIVREM(MI);
3484  case G_FSHL:
3485  case G_FSHR:
3486  return lowerFunnelShift(MI);
3487  case G_ROTL:
3488  case G_ROTR:
3489  return lowerRotate(MI);
3490  case G_MEMSET:
3491  case G_MEMCPY:
3492  case G_MEMMOVE:
3493  return lowerMemCpyFamily(MI);
3494  case G_MEMCPY_INLINE:
3495  return lowerMemcpyInline(MI);
3497  return lowerVectorReduction(MI);
3498  }
3499 }
3500 
3502  Align MinAlign) const {
3503  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3504  // datalayout for the preferred alignment. Also there should be a target hook
3505  // for this to allow targets to reduce the alignment and ignore the
3506  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3507  // the type.
3509 }
3510 
3513  MachinePointerInfo &PtrInfo) {
3516  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3517 
3518  unsigned AddrSpace = DL.getAllocaAddrSpace();
3519  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3520 
3521  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3522  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3523 }
3524 
3526  LLT VecTy) {
3527  int64_t IdxVal;
3528  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3529  return IdxReg;
3530 
3531  LLT IdxTy = B.getMRI()->getType(IdxReg);
3532  unsigned NElts = VecTy.getNumElements();
3533  if (isPowerOf2_32(NElts)) {
3534  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3535  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3536  }
3537 
3538  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3539  .getReg(0);
3540 }
3541 
3543  Register Index) {
3544  LLT EltTy = VecTy.getElementType();
3545 
3546  // Calculate the element offset and add it to the pointer.
3547  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3548  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3549  "Converting bits to bytes lost precision");
3550 
3552 
3553  LLT IdxTy = MRI.getType(Index);
3554  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3555  MIRBuilder.buildConstant(IdxTy, EltSize));
3556 
3557  LLT PtrTy = MRI.getType(VecPtr);
3558  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3559 }
3560 
3562  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3563  Register DstReg = MI.getOperand(0).getReg();
3564  LLT DstTy = MRI.getType(DstReg);
3565  LLT LCMTy = getLCMType(DstTy, NarrowTy);
3566 
3567  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3568 
3569  auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3570  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3571 
3572  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3573  MI.eraseFromParent();
3574  return Legalized;
3575 }
3576 
3577 // Handle splitting vector operations which need to have the same number of
3578 // elements in each type index, but each type index may have a different element
3579 // type.
3580 //
3581 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3582 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3583 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3584 //
3585 // Also handles some irregular breakdown cases, e.g.
3586 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3587 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3588 // s64 = G_SHL s64, s32
3591  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3592  if (TypeIdx != 0)
3593  return UnableToLegalize;
3594 
3595  const LLT NarrowTy0 = NarrowTyArg;
3596  const Register DstReg = MI.getOperand(0).getReg();
3597  LLT DstTy = MRI.getType(DstReg);
3598  LLT LeftoverTy0;
3599 
3600  // All of the operands need to have the same number of elements, so if we can
3601  // determine a type breakdown for the result type, we can for all of the
3602  // source types.
3603  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3604  if (NumParts < 0)
3605  return UnableToLegalize;
3606 
3608 
3609  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3610  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3611 
3612  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3613  Register SrcReg = MI.getOperand(I).getReg();
3614  LLT SrcTyI = MRI.getType(SrcReg);
3615  const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
3617  LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
3618  LLT LeftoverTyI;
3619 
3620  // Split this operand into the requested typed registers, and any leftover
3621  // required to reproduce the original type.
3622  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3623  LeftoverRegs))
3624  return UnableToLegalize;
3625 
3626  if (I == 1) {
3627  // For the first operand, create an instruction for each part and setup
3628  // the result.
3629  for (Register PartReg : PartRegs) {
3630  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3631  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3632  .addDef(PartDstReg)
3633  .addUse(PartReg));
3634  DstRegs.push_back(PartDstReg);
3635  }
3636 
3637  for (Register LeftoverReg : LeftoverRegs) {
3638  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3639  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3640  .addDef(PartDstReg)
3641  .addUse(LeftoverReg));
3642  LeftoverDstRegs.push_back(PartDstReg);
3643  }
3644  } else {
3645  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3646 
3647  // Add the newly created operand splits to the existing instructions. The
3648  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3649  // pieces.
3650  unsigned InstCount = 0;
3651  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3652  NewInsts[InstCount++].addUse(PartRegs[J]);
3653  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3654  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3655  }
3656 
3657  PartRegs.clear();
3658  LeftoverRegs.clear();
3659  }
3660 
3661  // Insert the newly built operations and rebuild the result register.
3662  for (auto &MIB : NewInsts)
3663  MIRBuilder.insertInstr(MIB);
3664 
3665  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3666 
3667  MI.eraseFromParent();
3668  return Legalized;
3669 }
3670 
3673  LLT NarrowTy) {
3674  if (TypeIdx != 0)
3675  return UnableToLegalize;
3676 
3677  Register DstReg = MI.getOperand(0).getReg();
3678  Register SrcReg = MI.getOperand(1).getReg();
3679  LLT DstTy = MRI.getType(DstReg);
3680  LLT SrcTy = MRI.getType(SrcReg);
3681 
3682  LLT NarrowTy0 = NarrowTy;
3683  LLT NarrowTy1;
3684  unsigned NumParts;
3685 
3686  if (NarrowTy.isVector()) {
3687  // Uneven breakdown not handled.
3688  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3689  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3690  return UnableToLegalize;
3691 
3692  NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
3693  } else {
3694  NumParts = DstTy.getNumElements();
3695  NarrowTy1 = SrcTy.getElementType();
3696  }
3697 
3698  SmallVector<Register, 4> SrcRegs, DstRegs;
3699  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3700 
3701  for (unsigned I = 0; I < NumParts; ++I) {
3702  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3703  MachineInstr *NewInst =
3704  MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3705 
3706  NewInst->setFlags(MI.getFlags());
3707  DstRegs.push_back(DstReg);
3708  }
3709 
3710  if (NarrowTy.isVector())
3711  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3712  else
3713  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3714 
3715  MI.eraseFromParent();
3716  return Legalized;
3717 }
3718 
3721  LLT NarrowTy) {
3722  Register DstReg = MI.getOperand(0).getReg();
3723  Register Src0Reg = MI.getOperand(2).getReg();
3724  LLT DstTy = MRI.getType(DstReg);
3725  LLT SrcTy = MRI.getType(Src0Reg);
3726 
3727  unsigned NumParts;
3728  LLT NarrowTy0, NarrowTy1;
3729 
3730  if (TypeIdx == 0) {
3731  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3732  unsigned OldElts = DstTy.getNumElements();
3733 
3734  NarrowTy0 = NarrowTy;
3735  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3736  NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
3737  SrcTy.getScalarSizeInBits())
3738  : SrcTy.getElementType();
3739 
3740  } else {
3741  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3742  unsigned OldElts = SrcTy.getNumElements();
3743 
3744  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3745  NarrowTy.getNumElements();
3746  NarrowTy0 =
3747  LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
3748  NarrowTy1 = NarrowTy;
3749  }
3750 
3751  // FIXME: Don't know how to handle the situation where the small vectors
3752  // aren't all the same size yet.
3753  if (NarrowTy1.isVector() &&
3754  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3755  return UnableToLegalize;
3756 
3757  CmpInst::Predicate Pred
3758  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3759 
3760  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3761  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3762  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3763 
3764  for (unsigned I = 0; I < NumParts; ++I) {
3765  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3766  DstRegs.push_back(DstReg);
3767 
3768  if (MI.getOpcode() == TargetOpcode::G_ICMP)
3769  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3770  else {
3771  MachineInstr *NewCmp
3772  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3773  NewCmp->setFlags(MI.getFlags());
3774  }
3775  }
3776 
3777  if (NarrowTy1.isVector())
3778  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3779  else
3780  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3781 
3782  MI.eraseFromParent();
3783  return Legalized;
3784 }
3785 
3788  LLT NarrowTy) {
3789  Register DstReg = MI.getOperand(0).getReg();
3790  Register CondReg = MI.getOperand(1).getReg();
3791 
3792  unsigned NumParts = 0;
3793  LLT NarrowTy0, NarrowTy1;
3794 
3795  LLT DstTy = MRI.getType(DstReg);
3796  LLT CondTy = MRI.getType(CondReg);
3797  unsigned Size = DstTy.getSizeInBits();
3798 
3799  assert(TypeIdx == 0 || CondTy.isVector());
3800 
3801  if (TypeIdx == 0) {
3802  NarrowTy0 = NarrowTy;
3803  NarrowTy1 = CondTy;
3804 
3805  unsigned NarrowSize = NarrowTy0.getSizeInBits();
3806  // FIXME: Don't know how to handle the situation where the small vectors
3807  // aren't all the same size yet.
3808  if (Size % NarrowSize != 0)
3809  return UnableToLegalize;
3810 
3811  NumParts = Size / NarrowSize;
3812 
3813  // Need to break down the condition type
3814  if (CondTy.isVector()) {
3815  if (CondTy.getNumElements() == NumParts)
3816  NarrowTy1 = CondTy.getElementType();
3817  else
3818  NarrowTy1 =
3819  LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
3820  CondTy.getScalarSizeInBits());
3821  }
3822  } else {
3823  NumParts = CondTy.getNumElements();
3824  if (NarrowTy.isVector()) {
3825  // TODO: Handle uneven breakdown.
3826  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3827  return UnableToLegalize;
3828 
3829  return UnableToLegalize;
3830  } else {
3831  NarrowTy0 = DstTy.getElementType();
3832  NarrowTy1 = NarrowTy;
3833  }
3834  }
3835 
3836  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3837  if (CondTy.isVector())
3838  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3839 
3840  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3841  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3842 
3843  for (unsigned i = 0; i < NumParts; ++i) {
3844  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3845  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3846  Src1Regs[i], Src2Regs[i]);
3847  DstRegs.push_back(DstReg);
3848  }
3849 
3850  if (NarrowTy0.isVector())
3851  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3852  else
3853  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3854 
3855  MI.eraseFromParent();
3856  return Legalized;
3857 }
3858 
3861  LLT NarrowTy) {
3862  const Register DstReg = MI.getOperand(0).getReg();
3863  LLT PhiTy = MRI.getType(DstReg);
3864  LLT LeftoverTy;
3865 
3866  // All of the operands need to have the same number of elements, so if we can
3867  // determine a type breakdown for the result type, we can for all of the
3868  // source types.
3869  int NumParts, NumLeftover;
3870  std::tie(NumParts, NumLeftover)
3871  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3872  if (NumParts < 0)
3873  return UnableToLegalize;
3874 
3875  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3877 
3878  const int TotalNumParts = NumParts + NumLeftover;
3879 
3880  // Insert the new phis in the result block first.
3881  for (int I = 0; I != TotalNumParts; ++I) {
3882  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3883  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3884  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3885  .addDef(PartDstReg));
3886  if (I < NumParts)
3887  DstRegs.push_back(PartDstReg);
3888  else
3889  LeftoverDstRegs.push_back(PartDstReg);
3890  }
3891 
3892  MachineBasicBlock *MBB = MI.getParent();
3894  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3895 
3896  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3897 
3898  // Insert code to extract the incoming values in each predecessor block.
3899  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3900  PartRegs.clear();
3901  LeftoverRegs.clear();
3902 
3903  Register SrcReg = MI.getOperand(I).getReg();
3904  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3905  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3906 
3907  LLT Unused;
3908  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3909  LeftoverRegs))
3910  return UnableToLegalize;
3911 
3912  // Add the newly created operand splits to the existing instructions. The
3913  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3914  // pieces.
3915  for (int J = 0; J != TotalNumParts; ++J) {
3916  MachineInstrBuilder MIB = NewInsts[J];
3917  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3918  MIB.addMBB(&OpMBB);
3919  }
3920  }
3921 
3922  MI.eraseFromParent();
3923  return Legalized;
3924 }
3925 
3928  unsigned TypeIdx,
3929  LLT NarrowTy) {
3930  if (TypeIdx != 1)
3931  return UnableToLegalize;
3932 
3933  const int NumDst = MI.getNumOperands() - 1;
3934  const Register SrcReg = MI.getOperand(NumDst).getReg();
3935  LLT SrcTy = MRI.getType(SrcReg);
3936 
3937  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3938 
3939  // TODO: Create sequence of extracts.
3940  if (DstTy == NarrowTy)
3941  return UnableToLegalize;
3942 
3943  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3944  if (DstTy == GCDTy) {
3945  // This would just be a copy of the same unmerge.
3946  // TODO: Create extracts, pad with undef and create intermediate merges.
3947  return UnableToLegalize;
3948  }
3949 
3950  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3951  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3952  const int PartsPerUnmerge = NumDst / NumUnmerge;
3953 
3954  for (int I = 0; I != NumUnmerge; ++I) {
3955  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3956 
3957  for (int J = 0; J != PartsPerUnmerge; ++J)
3958  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3959  MIB.addUse(Unmerge.getReg(I));
3960  }
3961 
3962  MI.eraseFromParent();
3963  return Legalized;
3964 }
3965 
3968  LLT NarrowTy) {
3969  Register Result = MI.getOperand(0).getReg();
3970  Register Overflow = MI.getOperand(1).getReg();
3971  Register LHS = MI.getOperand(2).getReg();
3972  Register RHS = MI.getOperand(3).getReg();
3973 
3974  LLT SrcTy = MRI.getType(LHS);
3975  if (!SrcTy.isVector())
3976  return UnableToLegalize;
3977 
3978  LLT ElementType = SrcTy.getElementType();
3979  LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3980  const ElementCount NumResult = SrcTy.getElementCount();
3981  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3982 
3983  // Unmerge the operands to smaller parts of GCD type.
3984  auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3985  auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3986 
3987  const int NumOps = UnmergeLHS->getNumOperands() - 1;
3988  const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
3989  LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3990  LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3991 
3992  // Perform the operation over unmerged parts.
3993  SmallVector<Register, 8> ResultParts;
3994  SmallVector<Register, 8> OverflowParts;
3995  for (int I = 0; I != NumOps; ++I) {
3996  Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3997  Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3998  auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3999  {Operand1, Operand2});
4000  ResultParts.push_back(PartMul->getOperand(0).getReg());
4001  OverflowParts.push_back(PartMul->getOperand(1).getReg());
4002  }
4003 
4004  LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
4005  LLT OverflowLCMTy =
4006  LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
4007 
4008  // Recombine the pieces to the original result and overflow registers.
4009  buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
4010  buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
4011  MI.eraseFromParent();
4012  return Legalized;
4013 }
4014 
4015 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
4016 // a vector
4017 //
4018 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
4019 // undef as necessary.
4020 //
4021 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
4022 // -> <2 x s16>
4023 //
4024 // %4:_(s16) = G_IMPLICIT_DEF
4025 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
4026 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
4027 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
4028 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
4029 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
4032  LLT NarrowTy) {
4033  Register DstReg = MI.getOperand(0).getReg();
4034  LLT DstTy = MRI.getType(DstReg);
4035  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
4036  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
4037 
4038  // Break into a common type
4040  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
4041  extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
4042 
4043  // Build the requested new merge, padding with undef.
4044  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
4045  TargetOpcode::G_ANYEXT);
4046 
4047  // Pack into the original result register.
4048  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4049 
4050  MI.eraseFromParent();
4051  return Legalized;
4052 }
4053 
4056  unsigned TypeIdx,
4057  LLT NarrowVecTy) {
4058  Register DstReg = MI.getOperand(0).getReg();
4059  Register SrcVec = MI.getOperand(1).getReg();
4060  Register InsertVal;
4061  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
4062 
4063  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4064  if (IsInsert)
4065  InsertVal = MI.getOperand(2).getReg();
4066 
4067  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4068 
4069  // TODO: Handle total scalarization case.
4070  if (!NarrowVecTy.isVector())
4071  return UnableToLegalize;
4072 
4073  LLT VecTy = MRI.getType(SrcVec);
4074 
4075  // If the index is a constant, we can really break this down as you would
4076  // expect, and index into the target size pieces.
4077  int64_t IdxVal;
4078  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4079  if (MaybeCst) {
4080  IdxVal = MaybeCst->Value.getSExtValue();
4081  // Avoid out of bounds indexing the pieces.
4082  if (IdxVal >= VecTy.getNumElements()) {
4083  MIRBuilder.buildUndef(DstReg);
4084  MI.eraseFromParent();
4085  return Legalized;
4086  }
4087 
4088  SmallVector<Register, 8> VecParts;
4089  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4090 
4091  // Build a sequence of NarrowTy pieces in VecParts for this operand.
4092  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4093  TargetOpcode::G_ANYEXT);
4094 
4095  unsigned NewNumElts = NarrowVecTy.getNumElements();
4096 
4097  LLT IdxTy = MRI.getType(Idx);
4098  int64_t PartIdx = IdxVal / NewNumElts;
4099  auto NewIdx =
4100  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4101 
4102  if (IsInsert) {
4103  LLT PartTy = MRI.getType(VecParts[PartIdx]);
4104 
4105  // Use the adjusted index to insert into one of the subvectors.
4106  auto InsertPart = MIRBuilder.buildInsertVectorElement(
4107  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4108  VecParts[PartIdx] = InsertPart.getReg(0);
4109 
4110  // Recombine the inserted subvector with the others to reform the result
4111  // vector.
4112  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4113  } else {
4114  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4115  }
4116 
4117  MI.eraseFromParent();
4118  return Legalized;
4119  }
4120 
4121  // With a variable index, we can't perform the operation in a smaller type, so
4122  // we're forced to expand this.
4123  //
4124  // TODO: We could emit a chain of compare/select to figure out which piece to
4125  // index.
4127 }
4128 
4131  LLT NarrowTy) {
4132  // FIXME: Don't know how to handle secondary types yet.
4133  if (TypeIdx != 0)
4134  return UnableToLegalize;
4135 
4136  // This implementation doesn't work for atomics. Give up instead of doing
4137  // something invalid.
4138  if (LdStMI.isAtomic())
4139  return UnableToLegalize;
4140 
4141  bool IsLoad = isa<GLoad>(LdStMI);
4142  Register ValReg = LdStMI.getReg(0);
4143  Register AddrReg = LdStMI.getPointerReg();
4144  LLT ValTy = MRI.getType(ValReg);
4145 
4146  // FIXME: Do we need a distinct NarrowMemory legalize action?
4147  if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4148  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4149  return UnableToLegalize;
4150  }
4151 
4152  int NumParts = -1;
4153  int NumLeftover = -1;
4154  LLT LeftoverTy;
4155  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4156  if (IsLoad) {
4157  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4158  } else {
4159  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4160  NarrowLeftoverRegs)) {
4161  NumParts = NarrowRegs.size();
4162  NumLeftover = NarrowLeftoverRegs.size();
4163  }
4164  }
4165 
4166  if (NumParts == -1)
4167  return UnableToLegalize;
4168 
4169  LLT PtrTy = MRI.getType(AddrReg);
4170  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4171 
4172  unsigned TotalSize = ValTy.getSizeInBits();
4173 
4174  // Split the load/store into PartTy sized pieces starting at Offset. If this
4175  // is a load, return the new registers in ValRegs. For a store, each elements
4176  // of ValRegs should be PartTy. Returns the next offset that needs to be
4177  // handled.
4178  auto MMO = LdStMI.getMMO();
4179  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4180  unsigned Offset) -> unsigned {
4182  unsigned PartSize = PartTy.getSizeInBits();
4183  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4184  Offset += PartSize, ++Idx) {
4185  unsigned ByteOffset = Offset / 8;
4186  Register NewAddrReg;
4187 
4188  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4189 
4190  MachineMemOperand *NewMMO =
4191  MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4192 
4193  if (IsLoad) {
4194  Register Dst = MRI.createGenericVirtualRegister(PartTy);
4195  ValRegs.push_back(Dst);
4196  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4197  } else {
4198  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4199  }
4200  }
4201 
4202  return Offset;
4203  };
4204 
4205  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
4206 
4207  // Handle the rest of the register if this isn't an even type breakdown.
4208  if (LeftoverTy.isValid())
4209  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
4210 
4211  if (IsLoad) {
4212  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4213  LeftoverTy, NarrowLeftoverRegs);
4214  }
4215 
4216  LdStMI.eraseFromParent();
4217  return Legalized;
4218 }
4219 
4222  LLT NarrowTy) {
4223  assert(TypeIdx == 0 && "only one type index expected");
4224 
4225  const unsigned Opc = MI.getOpcode();
4226  const int NumDefOps = MI.getNumExplicitDefs();
4227  const int NumSrcOps = MI.getNumOperands() - NumDefOps;
4228  const unsigned Flags = MI.getFlags();
4229  const unsigned NarrowSize = NarrowTy.getSizeInBits();
4230  const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
4231 
4232  assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
4233  "result and 1-3 sources or 2 results and "
4234  "1-2 sources");
4235 
4236  SmallVector<Register, 2> DstRegs;
4237  for (int I = 0; I < NumDefOps; ++I)
4238  DstRegs.push_back(MI.getOperand(I).getReg());
4239 
4240  // First of all check whether we are narrowing (changing the element type)
4241  // or reducing the vector elements
4242  const LLT DstTy = MRI.getType(DstRegs[0]);
4243  const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
4244 
4245  SmallVector<Register, 8> ExtractedRegs[3];
4247 
4248  // Break down all the sources into NarrowTy pieces we can operate on. This may
4249  // involve creating merges to a wider type, padded with undef.
4250  for (int I = 0; I != NumSrcOps; ++I) {
4251  Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
4252  LLT SrcTy = MRI.getType(SrcReg);
4253 
4254  // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
4255  // For fewerElements, this is a smaller vector with the same element type.
4256  LLT OpNarrowTy;
4257  if (IsNarrow) {
4258  OpNarrowTy = NarrowScalarTy;
4259 
4260  // In case of narrowing, we need to cast vectors to scalars for this to
4261  // work properly
4262  // FIXME: Can we do without the bitcast here if we're narrowing?
4263  if (SrcTy.isVector()) {
4264  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
4265  SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
4266  }
4267  } else {
4268  auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
4270  OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
4271  }
4272 
4273  LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
4274 
4275  // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
4276  buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
4277  TargetOpcode::G_ANYEXT);
4278  }
4279 
4280  SmallVector<Register, 8> ResultRegs[2];
4281 
4282  // Input operands for each sub-instruction.
4283  SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
4284 
4285  int NumParts = ExtractedRegs[0].size();
4286  const unsigned DstSize = DstTy.getSizeInBits();
4287  const LLT DstScalarTy = LLT::scalar(DstSize);
4288 
4289  // Narrowing needs to use scalar types
4290  LLT DstLCMTy, NarrowDstTy;
4291  if (IsNarrow) {
4292  DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4293  NarrowDstTy = NarrowScalarTy;
4294  } else {
4295  DstLCMTy = getLCMType(DstTy, NarrowTy);
4296  NarrowDstTy = NarrowTy;
4297  }
4298 
4299  // We widened the source registers to satisfy merge/unmerge size
4300  // constraints. We'll have some extra fully undef parts.
4301  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4302 
4303  for (int I = 0; I != NumRealParts; ++I) {
4304  // Emit this instruction on each of the split pieces.
4305  for (int J = 0; J != NumSrcOps; ++J)
4306  InputRegs[J] = ExtractedRegs[J][I];
4307 
4308  MachineInstrBuilder Inst;
4309  if (NumDefOps == 1)
4310  Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4311  else
4312  Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
4313  Flags);
4314 
4315  for (int J = 0; J != NumDefOps; ++J)
4316  ResultRegs[J].push_back(Inst.getReg(J));
4317  }
4318 
4319  // Fill out the widened result with undef instead of creating instructions
4320  // with undef inputs.
4321  int NumUndefParts = NumParts - NumRealParts;
4322  if (NumUndefParts != 0) {
4323  Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
4324  for (int I = 0; I != NumDefOps; ++I)
4325  ResultRegs[I].append(NumUndefParts, Undef);
4326  }
4327 
4328  // Extract the possibly padded result. Use a scratch register if we need to do
4329  // a final bitcast, otherwise use the original result register.
4330  Register MergeDstReg;
4331  for (int I = 0; I != NumDefOps; ++I) {
4332  if (IsNarrow && DstTy.isVector())
4333  MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4334  else
4335  MergeDstReg = DstRegs[I];
4336 
4337  buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
4338 
4339  // Recast to vector if we narrowed a vector
4340  if (IsNarrow && DstTy.isVector())
4341  MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
4342  }
4343 
4344  MI.eraseFromParent();
4345  return Legalized;
4346 }
4347 
4350  LLT NarrowTy) {
4351  Register DstReg = MI.getOperand(0).getReg();
4352  Register SrcReg = MI.getOperand(1).getReg();
4353  int64_t Imm = MI.getOperand(2).getImm();
4354 
4355  LLT DstTy = MRI.getType(DstReg);
4356 
4358  LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4359  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4360 
4361  for (Register &R : Parts)
4362  R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4363 
4364  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4365 
4366  MI.eraseFromParent();
4367  return Legalized;
4368 }
4369 
4372  LLT NarrowTy) {
4373  using namespace TargetOpcode;
4374 
4375  switch (MI.getOpcode()) {
4376  case G_IMPLICIT_DEF:
4377  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4378  case G_TRUNC:
4379  case G_AND:
4380  case G_OR:
4381  case G_XOR:
4382  case G_ADD:
4383  case G_SUB:
4384  case G_MUL:
4385  case G_PTR_ADD:
4386  case G_SMULH:
4387  case G_UMULH:
4388  case G_FADD:
4389  case G_FMUL:
4390  case G_FSUB:
4391  case G_FNEG:
4392  case G_FABS:
4393  case G_FCANONICALIZE:
4394  case G_FDIV:
4395  case G_FREM:
4396  case G_FMA:
4397  case G_FMAD:
4398  case G_FPOW:
4399  case G_FEXP:
4400  case G_FEXP2:
4401  case G_FLOG:
4402  case G_FLOG2:
4403  case G_FLOG10:
4404  case G_FNEARBYINT:
4405  case G_FCEIL:
4406  case G_FFLOOR:
4407  case G_FRINT:
4408  case G_INTRINSIC_ROUND:
4409  case G_INTRINSIC_ROUNDEVEN:
4410  case G_INTRINSIC_TRUNC:
4411  case G_FCOS:
4412  case G_FSIN:
4413  case G_FSQRT:
4414  case G_BSWAP:
4415  case G_BITREVERSE:
4416  case G_SDIV:
4417  case G_UDIV:
4418  case G_SREM:
4419  case G_UREM:
4420  case G_SDIVREM:
4421  case G_UDIVREM:
4422  case G_SMIN:
4423  case G_SMAX:
4424  case G_UMIN:
4425  case G_UMAX:
4426  case G_ABS:
4427  case G_FMINNUM:
4428  case G_FMAXNUM:
4429  case G_FMINNUM_IEEE:
4430  case G_FMAXNUM_IEEE:
4431  case G_FMINIMUM:
4432  case G_FMAXIMUM:
4433  case G_FSHL:
4434  case G_FSHR:
4435  case G_ROTL:
4436  case G_ROTR:
4437  case G_FREEZE:
4438  case G_SADDSAT:
4439  case G_SSUBSAT:
4440  case G_UADDSAT:
4441  case G_USUBSAT:
4442  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4443  case G_UMULO:
4444  case G_SMULO:
4445  return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4446  case G_SHL:
4447  case G_LSHR:
4448  case G_ASHR:
4449  case G_SSHLSAT:
4450  case G_USHLSAT:
4451  case G_CTLZ:
4452  case G_CTLZ_ZERO_UNDEF:
4453  case G_CTTZ:
4454  case G_CTTZ_ZERO_UNDEF:
4455  case G_CTPOP:
4456  case G_FCOPYSIGN:
4457  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4458  case G_ZEXT:
4459  case G_SEXT:
4460  case G_ANYEXT:
4461  case G_FPEXT:
4462  case G_FPTRUNC:
4463  case G_SITOFP:
4464  case G_UITOFP:
4465  case G_FPTOSI:
4466  case G_FPTOUI:
4467  case G_INTTOPTR:
4468  case G_PTRTOINT:
4469  case G_ADDRSPACE_CAST:
4470  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4471  case G_ICMP:
4472  case G_FCMP:
4473  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4474  case G_SELECT:
4475  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4476  case G_PHI:
4477  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4478  case G_UNMERGE_VALUES:
4479  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4480  case G_BUILD_VECTOR:
4481  assert(TypeIdx == 0 && "not a vector type index");
4482  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4483  case G_CONCAT_VECTORS:
4484  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4485  return UnableToLegalize;
4486  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4487  case G_EXTRACT_VECTOR_ELT:
4488  case G_INSERT_VECTOR_ELT:
4489  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4490  case G_LOAD:
4491  case G_STORE:
4492  return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4493  case G_SEXT_INREG:
4494  return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4496  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4497  case G_SHUFFLE_VECTOR:
4498  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4499  default:
4500  return UnableToLegalize;
4501  }
4502 }
4503 
4505  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4506  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4507  if (TypeIdx != 0)
4508  return UnableToLegalize;
4509 
4510  Register DstReg = MI.getOperand(0).getReg();
4511  Register Src1Reg = MI.getOperand(1).getReg();
4512  Register Src2Reg = MI.getOperand(2).getReg();
4513  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4514  LLT DstTy = MRI.getType(DstReg);
4515  LLT Src1Ty = MRI.getType(Src1Reg);
4516  LLT Src2Ty = MRI.getType(Src2Reg);
4517  // The shuffle should be canonicalized by now.
4518  if (DstTy != Src1Ty)
4519  return UnableToLegalize;
4520  if (DstTy != Src2Ty)
4521  return UnableToLegalize;
4522 
4523  if (!isPowerOf2_32(DstTy.getNumElements()))
4524  return UnableToLegalize;
4525 
4526  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4527  // Further legalization attempts will be needed to do split further.
4528  NarrowTy =
4530  unsigned NewElts = NarrowTy.getNumElements();
4531 
4532  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4533  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4534  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4535  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4536  SplitSrc2Regs[1]};
4537 
4538  Register Hi, Lo;
4539 
4540  // If Lo or Hi uses elements from at most two of the four input vectors, then
4541  // express it as a vector shuffle of those two inputs. Otherwise extract the
4542  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4544  for (unsigned High = 0; High < 2; ++High) {
4545  Register &Output = High ? Hi : Lo;
4546 
4547  // Build a shuffle mask for the output, discovering on the fly which
4548  // input vectors to use as shuffle operands (recorded in InputUsed).
4549  // If building a suitable shuffle vector proves too hard, then bail
4550  // out with useBuildVector set.
4551  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4552  unsigned FirstMaskIdx = High * NewElts;
4553  bool UseBuildVector = false;
4554  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4555  // The mask element. This indexes into the input.
4556  int Idx = Mask[FirstMaskIdx + MaskOffset];
4557 
4558  // The input vector this mask element indexes into.
4559  unsigned Input = (unsigned)Idx / NewElts;
4560 
4561  if (Input >= array_lengthof(Inputs)) {
4562  // The mask element does not index into any input vector.
4563  Ops.push_back(-1);
4564  continue;
4565  }
4566 
4567  // Turn the index into an offset from the start of the input vector.
4568  Idx -= Input * NewElts;
4569 
4570  // Find or create a shuffle vector operand to hold this input.
4571  unsigned OpNo;
4572  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4573  if (InputUsed[OpNo] == Input) {
4574  // This input vector is already an operand.
4575  break;
4576  } else if (InputUsed[OpNo] == -1U) {
4577  // Create a new operand for this input vector.
4578  InputUsed[OpNo] = Input;
4579  break;
4580  }
4581  }
4582 
4583  if (OpNo >= array_lengthof(InputUsed)) {
4584  // More than two input vectors used! Give up on trying to create a
4585  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4586  UseBuildVector = true;
4587  break;
4588  }
4589 
4590  // Add the mask index for the new shuffle vector.
4591  Ops.push_back(Idx + OpNo * NewElts);
4592  }
4593 
4594  if (UseBuildVector) {
4595  LLT EltTy = NarrowTy.getElementType();
4597 
4598  // Extract the input elements by hand.
4599  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4600  // The mask element. This indexes into the input.
4601  int Idx = Mask[FirstMaskIdx + MaskOffset];
4602 
4603  // The input vector this mask element indexes into.
4604  unsigned Input = (unsigned)Idx / NewElts;
4605 
4606  if (Input >= array_lengthof(Inputs)) {
4607  // The mask element is "undef" or indexes off the end of the input.
4608  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4609  continue;
4610  }
4611 
4612  // Turn the index into an offset from the start of the input vector.
4613  Idx -= Input * NewElts;
4614 
4615  // Extract the vector element by hand.
4616  SVOps.push_back(MIRBuilder
4617  .buildExtractVectorElement(
4618  EltTy, Inputs[Input],
4620  .getReg(0));
4621  }
4622 
4623  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4624  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4625  } else if (InputUsed[0] == -1U) {
4626  // No input vectors were used! The result is undefined.
4627  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4628  } else {
4629  Register Op0 = Inputs[InputUsed[0]];
4630  // If only one input was used, use an undefined vector for the other.
4631  Register Op1 = InputUsed[1] == -1U
4632  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4633  : Inputs[InputUsed[1]];
4634  // At least one input vector was used. Create a new shuffle vector.
4635  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4636  }
4637 
4638  Ops.clear();
4639  }
4640 
4641  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4642  MI.eraseFromParent();
4643  return Legalized;
4644 }
4645 
4646 static unsigned getScalarOpcForReduction(unsigned Opc) {
4647  unsigned ScalarOpc;
4648  switch (Opc) {
4649  case TargetOpcode::G_VECREDUCE_FADD:
4650  ScalarOpc = TargetOpcode::G_FADD;
4651  break;
4652  case TargetOpcode::G_VECREDUCE_FMUL:
4653  ScalarOpc = TargetOpcode::G_FMUL;
4654  break;
4655  case TargetOpcode::G_VECREDUCE_FMAX:
4656  ScalarOpc = TargetOpcode::G_FMAXNUM;
4657  break;
4658  case TargetOpcode::G_VECREDUCE_FMIN:
4659  ScalarOpc = TargetOpcode::G_FMINNUM;
4660  break;
4661  case TargetOpcode::G_VECREDUCE_ADD:
4662  ScalarOpc = TargetOpcode::G_ADD;
4663  break;
4664  case TargetOpcode::G_VECREDUCE_MUL:
4665  ScalarOpc = TargetOpcode::G_MUL;
4666  break;
4667  case TargetOpcode::G_VECREDUCE_AND:
4668  ScalarOpc = TargetOpcode::G_AND;
4669  break;
4670  case TargetOpcode::G_VECREDUCE_OR:
4671  ScalarOpc = TargetOpcode::G_OR;
4672  break;
4673  case TargetOpcode::G_VECREDUCE_XOR:
4674  ScalarOpc = TargetOpcode::G_XOR;
4675  break;
4676  case TargetOpcode::G_VECREDUCE_SMAX:
4677  ScalarOpc = TargetOpcode::G_SMAX;
4678  break;
4679  case TargetOpcode::G_VECREDUCE_SMIN:
4680  ScalarOpc = TargetOpcode::G_SMIN;
4681  break;
4682  case TargetOpcode::G_VECREDUCE_UMAX:
4683  ScalarOpc = TargetOpcode::G_UMAX;
4684  break;
4685  case TargetOpcode::G_VECREDUCE_UMIN:
4686  ScalarOpc = TargetOpcode::G_UMIN;
4687  break;
4688  default:
4689  llvm_unreachable("Unhandled reduction");
4690  }
4691  return ScalarOpc;
4692 }
4693 
4695  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4696  unsigned Opc = MI.getOpcode();
4697  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4698  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4699  "Sequential reductions not expected");
4700 
4701  if (TypeIdx != 1)
4702  return UnableToLegalize;
4703 
4704  // The semantics of the normal non-sequential reductions allow us to freely
4705  // re-associate the operation.
4706  Register SrcReg = MI.getOperand(1).getReg();
4707  LLT SrcTy = MRI.getType(SrcReg);
4708  Register DstReg = MI.getOperand(0).getReg();
4709  LLT DstTy = MRI.getType(DstReg);
4710 
4711  if (NarrowTy.isVector() &&
4712  (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4713  return UnableToLegalize;
4714 
4715  unsigned ScalarOpc = getScalarOpcForReduction(Opc);
4716  SmallVector<Register> SplitSrcs;
4717  // If NarrowTy is a scalar then we're being asked to scalarize.
4718  const unsigned NumParts =
4719  NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4720  : SrcTy.getNumElements();
4721 
4722  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4723  if (NarrowTy.isScalar()) {
4724  if (DstTy != NarrowTy)
4725  return UnableToLegalize; // FIXME: handle implicit extensions.
4726 
4727  if (isPowerOf2_32(NumParts)) {
4728  // Generate a tree of scalar operations to reduce the critical path.
4729  SmallVector<Register> PartialResults;
4730  unsigned NumPartsLeft = NumParts;
4731  while (NumPartsLeft > 1) {
4732  for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4733  PartialResults.emplace_back(
4734  MIRBuilder
4735  .buildInstr(ScalarOpc, {NarrowTy},
4736  {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4737  .getReg(0));
4738  }
4739  SplitSrcs = PartialResults;
4740  PartialResults.clear();
4741  NumPartsLeft = SplitSrcs.size();
4742  }
4743  assert(SplitSrcs.size() == 1);
4744  MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4745  MI.eraseFromParent();
4746  return Legalized;
4747  }
4748  // If we can't generate a tree, then just do sequential operations.
4749  Register Acc = SplitSrcs[0];
4750  for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4751  Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4752  .getReg(0);
4753  MIRBuilder.buildCopy(DstReg, Acc);
4754  MI.eraseFromParent();
4755  return Legalized;
4756  }
4757  SmallVector<Register> PartialReductions;
4758  for (unsigned Part = 0; Part < NumParts; ++Part) {
4759  PartialReductions.push_back(
4760  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4761  }
4762 
4763 
4764  // If the types involved are powers of 2, we can generate intermediate vector
4765  // ops, before generating a final reduction operation.
4766  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4767  isPowerOf2_32(NarrowTy.getNumElements())) {
4768  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4769  }
4770 
4771  Register Acc = PartialReductions[0];
4772  for (unsigned Part = 1; Part < NumParts; ++Part) {
4773  if (Part == NumParts - 1) {
4774  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4775  {Acc, PartialReductions[Part]});
4776  } else {
4777  Acc = MIRBuilder
4778  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4779  .getReg(0);
4780  }
4781  }
4782  MI.eraseFromParent();
4783  return Legalized;
4784 }
4785 
4787 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4788  LLT SrcTy, LLT NarrowTy,
4789  unsigned ScalarOpc) {
4790  SmallVector<Register> SplitSrcs;
4791  // Split the sources into NarrowTy size pieces.
4792  extractParts(SrcReg, NarrowTy,
4793  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4794  // We're going to do a tree reduction using vector operations until we have
4795  // one NarrowTy size value left.
4796  while (SplitSrcs.size() > 1) {
4797  SmallVector<Register> PartialRdxs;
4798  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4799  Register LHS = SplitSrcs[Idx];
4800  Register RHS = SplitSrcs[Idx + 1];
4801  // Create the intermediate vector op.
4802  Register Res =
4803  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4804  PartialRdxs.push_back(Res);
4805  }
4806  SplitSrcs = std::move(PartialRdxs);
4807  }
4808  // Finally generate the requested NarrowTy based reduction.
4810  MI.getOperand(1).setReg(SplitSrcs[0]);
4812  return Legalized;
4813 }
4814 
4817  const LLT HalfTy, const LLT AmtTy) {
4818 
4819  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4820  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4821  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4822 
4823  if (Amt.isNullValue()) {
4824  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4825  MI.eraseFromParent();
4826  return Legalized;
4827  }
4828 
4829  LLT NVT = HalfTy;
4830  unsigned NVTBits = HalfTy.getSizeInBits();
4831  unsigned VTBits = 2 * NVTBits;
4832 
4833  SrcOp Lo(Register(0)), Hi(Register(0));
4834  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4835  if (Amt.ugt(VTBits)) {
4836  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4837  } else if (Amt.ugt(NVTBits)) {
4838  Lo = MIRBuilder.buildConstant(NVT, 0);
4839  Hi = MIRBuilder.buildShl(NVT, InL,
4840  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4841  } else if (Amt == NVTBits) {
4842  Lo = MIRBuilder.buildConstant(NVT, 0);
4843  Hi = InL;
4844  } else {
4845  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4846  auto OrLHS =
4847  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4848  auto OrRHS = MIRBuilder.buildLShr(
4849  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4850  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4851  }
4852  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4853  if (Amt.ugt(VTBits)) {
4854  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4855  } else if (Amt.