LLVM  13.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/Support/Debug.h"
32 
33 #define DEBUG_TYPE "legalizer"
34 
35 using namespace llvm;
36 using namespace LegalizeActions;
37 using namespace MIPatternMatch;
38 
39 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
40 ///
41 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
42 /// with any leftover piece as type \p LeftoverTy
43 ///
44 /// Returns -1 in the first element of the pair if the breakdown is not
45 /// satisfiable.
46 static std::pair<int, int>
47 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
48  assert(!LeftoverTy.isValid() && "this is an out argument");
49 
50  unsigned Size = OrigTy.getSizeInBits();
51  unsigned NarrowSize = NarrowTy.getSizeInBits();
52  unsigned NumParts = Size / NarrowSize;
53  unsigned LeftoverSize = Size - NumParts * NarrowSize;
54  assert(Size > NarrowSize);
55 
56  if (LeftoverSize == 0)
57  return {NumParts, 0};
58 
59  if (NarrowTy.isVector()) {
60  unsigned EltSize = OrigTy.getScalarSizeInBits();
61  if (LeftoverSize % EltSize != 0)
62  return {-1, -1};
63  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
64  } else {
65  LeftoverTy = LLT::scalar(LeftoverSize);
66  }
67 
68  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
69  return std::make_pair(NumParts, NumLeftover);
70 }
71 
73 
74  if (!Ty.isScalar())
75  return nullptr;
76 
77  switch (Ty.getSizeInBits()) {
78  case 16:
79  return Type::getHalfTy(Ctx);
80  case 32:
81  return Type::getFloatTy(Ctx);
82  case 64:
83  return Type::getDoubleTy(Ctx);
84  case 80:
85  return Type::getX86_FP80Ty(Ctx);
86  case 128:
87  return Type::getFP128Ty(Ctx);
88  default:
89  return nullptr;
90  }
91 }
92 
94  GISelChangeObserver &Observer,
96  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
97  LI(*MF.getSubtarget().getLegalizerInfo()),
98  TLI(*MF.getSubtarget().getTargetLowering()) { }
99 
101  GISelChangeObserver &Observer,
103  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
104  TLI(*MF.getSubtarget().getTargetLowering()) { }
105 
108  LostDebugLocObserver &LocObserver) {
109  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
110 
112 
113  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
114  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
115  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
116  auto Step = LI.getAction(MI, MRI);
117  switch (Step.Action) {
118  case Legal:
119  LLVM_DEBUG(dbgs() << ".. Already legal\n");
120  return AlreadyLegal;
121  case Libcall:
122  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
123  return libcall(MI, LocObserver);
124  case NarrowScalar:
125  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
126  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
127  case WidenScalar:
128  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
129  return widenScalar(MI, Step.TypeIdx, Step.NewType);
130  case Bitcast:
131  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
132  return bitcast(MI, Step.TypeIdx, Step.NewType);
133  case Lower:
134  LLVM_DEBUG(dbgs() << ".. Lower\n");
135  return lower(MI, Step.TypeIdx, Step.NewType);
136  case FewerElements:
137  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
138  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
139  case MoreElements:
140  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
141  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
142  case Custom:
143  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
144  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
145  default:
146  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
147  return UnableToLegalize;
148  }
149 }
150 
151 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
152  SmallVectorImpl<Register> &VRegs) {
153  for (int i = 0; i < NumParts; ++i)
154  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
155  MIRBuilder.buildUnmerge(VRegs, Reg);
156 }
157 
158 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
159  LLT MainTy, LLT &LeftoverTy,
161  SmallVectorImpl<Register> &LeftoverRegs) {
162  assert(!LeftoverTy.isValid() && "this is an out argument");
163 
164  unsigned RegSize = RegTy.getSizeInBits();
165  unsigned MainSize = MainTy.getSizeInBits();
166  unsigned NumParts = RegSize / MainSize;
167  unsigned LeftoverSize = RegSize - NumParts * MainSize;
168 
169  // Use an unmerge when possible.
170  if (LeftoverSize == 0) {
171  for (unsigned I = 0; I < NumParts; ++I)
172  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
173  MIRBuilder.buildUnmerge(VRegs, Reg);
174  return true;
175  }
176 
177  if (MainTy.isVector()) {
178  unsigned EltSize = MainTy.getScalarSizeInBits();
179  if (LeftoverSize % EltSize != 0)
180  return false;
181  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
182  } else {
183  LeftoverTy = LLT::scalar(LeftoverSize);
184  }
185 
186  // For irregular sizes, extract the individual parts.
187  for (unsigned I = 0; I != NumParts; ++I) {
188  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
189  VRegs.push_back(NewReg);
190  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
191  }
192 
193  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
194  Offset += LeftoverSize) {
195  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
196  LeftoverRegs.push_back(NewReg);
197  MIRBuilder.buildExtract(NewReg, Reg, Offset);
198  }
199 
200  return true;
201 }
202 
203 void LegalizerHelper::insertParts(Register DstReg,
204  LLT ResultTy, LLT PartTy,
205  ArrayRef<Register> PartRegs,
206  LLT LeftoverTy,
207  ArrayRef<Register> LeftoverRegs) {
208  if (!LeftoverTy.isValid()) {
209  assert(LeftoverRegs.empty());
210 
211  if (!ResultTy.isVector()) {
212  MIRBuilder.buildMerge(DstReg, PartRegs);
213  return;
214  }
215 
216  if (PartTy.isVector())
217  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
218  else
219  MIRBuilder.buildBuildVector(DstReg, PartRegs);
220  return;
221  }
222 
223  unsigned PartSize = PartTy.getSizeInBits();
224  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
225 
226  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
227  MIRBuilder.buildUndef(CurResultReg);
228 
229  unsigned Offset = 0;
230  for (Register PartReg : PartRegs) {
231  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
232  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
233  CurResultReg = NewResultReg;
234  Offset += PartSize;
235  }
236 
237  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
238  // Use the original output register for the final insert to avoid a copy.
239  Register NewResultReg = (I + 1 == E) ?
240  DstReg : MRI.createGenericVirtualRegister(ResultTy);
241 
242  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
243  CurResultReg = NewResultReg;
244  Offset += LeftoverPartSize;
245  }
246 }
247 
248 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
250  const MachineInstr &MI) {
251  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
252 
253  const int StartIdx = Regs.size();
254  const int NumResults = MI.getNumOperands() - 1;
255  Regs.resize(Regs.size() + NumResults);
256  for (int I = 0; I != NumResults; ++I)
257  Regs[StartIdx + I] = MI.getOperand(I).getReg();
258 }
259 
260 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
261  LLT GCDTy, Register SrcReg) {
262  LLT SrcTy = MRI.getType(SrcReg);
263  if (SrcTy == GCDTy) {
264  // If the source already evenly divides the result type, we don't need to do
265  // anything.
266  Parts.push_back(SrcReg);
267  } else {
268  // Need to split into common type sized pieces.
269  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
270  getUnmergeResults(Parts, *Unmerge);
271  }
272 }
273 
274 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
275  LLT NarrowTy, Register SrcReg) {
276  LLT SrcTy = MRI.getType(SrcReg);
277  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
278  extractGCDType(Parts, GCDTy, SrcReg);
279  return GCDTy;
280 }
281 
282 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
284  unsigned PadStrategy) {
285  LLT LCMTy = getLCMType(DstTy, NarrowTy);
286 
287  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
288  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
289  int NumOrigSrc = VRegs.size();
290 
291  Register PadReg;
292 
293  // Get a value we can use to pad the source value if the sources won't evenly
294  // cover the result type.
295  if (NumOrigSrc < NumParts * NumSubParts) {
296  if (PadStrategy == TargetOpcode::G_ZEXT)
297  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
298  else if (PadStrategy == TargetOpcode::G_ANYEXT)
299  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
300  else {
301  assert(PadStrategy == TargetOpcode::G_SEXT);
302 
303  // Shift the sign bit of the low register through the high register.
304  auto ShiftAmt =
306  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
307  }
308  }
309 
310  // Registers for the final merge to be produced.
311  SmallVector<Register, 4> Remerge(NumParts);
312 
313  // Registers needed for intermediate merges, which will be merged into a
314  // source for Remerge.
315  SmallVector<Register, 4> SubMerge(NumSubParts);
316 
317  // Once we've fully read off the end of the original source bits, we can reuse
318  // the same high bits for remaining padding elements.
319  Register AllPadReg;
320 
321  // Build merges to the LCM type to cover the original result type.
322  for (int I = 0; I != NumParts; ++I) {
323  bool AllMergePartsArePadding = true;
324 
325  // Build the requested merges to the requested type.
326  for (int J = 0; J != NumSubParts; ++J) {
327  int Idx = I * NumSubParts + J;
328  if (Idx >= NumOrigSrc) {
329  SubMerge[J] = PadReg;
330  continue;
331  }
332 
333  SubMerge[J] = VRegs[Idx];
334 
335  // There are meaningful bits here we can't reuse later.
336  AllMergePartsArePadding = false;
337  }
338 
339  // If we've filled up a complete piece with padding bits, we can directly
340  // emit the natural sized constant if applicable, rather than a merge of
341  // smaller constants.
342  if (AllMergePartsArePadding && !AllPadReg) {
343  if (PadStrategy == TargetOpcode::G_ANYEXT)
344  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
345  else if (PadStrategy == TargetOpcode::G_ZEXT)
346  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
347 
348  // If this is a sign extension, we can't materialize a trivial constant
349  // with the right type and have to produce a merge.
350  }
351 
352  if (AllPadReg) {
353  // Avoid creating additional instructions if we're just adding additional
354  // copies of padding bits.
355  Remerge[I] = AllPadReg;
356  continue;
357  }
358 
359  if (NumSubParts == 1)
360  Remerge[I] = SubMerge[0];
361  else
362  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
363 
364  // In the sign extend padding case, re-use the first all-signbit merge.
365  if (AllMergePartsArePadding && !AllPadReg)
366  AllPadReg = Remerge[I];
367  }
368 
369  VRegs = std::move(Remerge);
370  return LCMTy;
371 }
372 
373 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
374  ArrayRef<Register> RemergeRegs) {
375  LLT DstTy = MRI.getType(DstReg);
376 
377  // Create the merge to the widened source, and extract the relevant bits into
378  // the result.
379 
380  if (DstTy == LCMTy) {
381  MIRBuilder.buildMerge(DstReg, RemergeRegs);
382  return;
383  }
384 
385  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
386  if (DstTy.isScalar() && LCMTy.isScalar()) {
387  MIRBuilder.buildTrunc(DstReg, Remerge);
388  return;
389  }
390 
391  if (LCMTy.isVector()) {
392  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
393  SmallVector<Register, 8> UnmergeDefs(NumDefs);
394  UnmergeDefs[0] = DstReg;
395  for (unsigned I = 1; I != NumDefs; ++I)
396  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
397 
398  MIRBuilder.buildUnmerge(UnmergeDefs,
399  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
400  return;
401  }
402 
403  llvm_unreachable("unhandled case");
404 }
405 
406 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
407 #define RTLIBCASE_INT(LibcallPrefix) \
408  do { \
409  switch (Size) { \
410  case 32: \
411  return RTLIB::LibcallPrefix##32; \
412  case 64: \
413  return RTLIB::LibcallPrefix##64; \
414  case 128: \
415  return RTLIB::LibcallPrefix##128; \
416  default: \
417  llvm_unreachable("unexpected size"); \
418  } \
419  } while (0)
420 
421 #define RTLIBCASE(LibcallPrefix) \
422  do { \
423  switch (Size) { \
424  case 32: \
425  return RTLIB::LibcallPrefix##32; \
426  case 64: \
427  return RTLIB::LibcallPrefix##64; \
428  case 80: \
429  return RTLIB::LibcallPrefix##80; \
430  case 128: \
431  return RTLIB::LibcallPrefix##128; \
432  default: \
433  llvm_unreachable("unexpected size"); \
434  } \
435  } while (0)
436 
437  switch (Opcode) {
438  case TargetOpcode::G_SDIV:
439  RTLIBCASE_INT(SDIV_I);
440  case TargetOpcode::G_UDIV:
441  RTLIBCASE_INT(UDIV_I);
442  case TargetOpcode::G_SREM:
443  RTLIBCASE_INT(SREM_I);
444  case TargetOpcode::G_UREM:
445  RTLIBCASE_INT(UREM_I);
446  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
447  RTLIBCASE_INT(CTLZ_I);
448  case TargetOpcode::G_FADD:
449  RTLIBCASE(ADD_F);
450  case TargetOpcode::G_FSUB:
451  RTLIBCASE(SUB_F);
452  case TargetOpcode::G_FMUL:
453  RTLIBCASE(MUL_F);
454  case TargetOpcode::G_FDIV:
455  RTLIBCASE(DIV_F);
456  case TargetOpcode::G_FEXP:
457  RTLIBCASE(EXP_F);
458  case TargetOpcode::G_FEXP2:
459  RTLIBCASE(EXP2_F);
460  case TargetOpcode::G_FREM:
461  RTLIBCASE(REM_F);
462  case TargetOpcode::G_FPOW:
463  RTLIBCASE(POW_F);
464  case TargetOpcode::G_FMA:
465  RTLIBCASE(FMA_F);
466  case TargetOpcode::G_FSIN:
467  RTLIBCASE(SIN_F);
468  case TargetOpcode::G_FCOS:
469  RTLIBCASE(COS_F);
470  case TargetOpcode::G_FLOG10:
471  RTLIBCASE(LOG10_F);
472  case TargetOpcode::G_FLOG:
473  RTLIBCASE(LOG_F);
474  case TargetOpcode::G_FLOG2:
475  RTLIBCASE(LOG2_F);
476  case TargetOpcode::G_FCEIL:
477  RTLIBCASE(CEIL_F);
478  case TargetOpcode::G_FFLOOR:
479  RTLIBCASE(FLOOR_F);
480  case TargetOpcode::G_FMINNUM:
481  RTLIBCASE(FMIN_F);
482  case TargetOpcode::G_FMAXNUM:
483  RTLIBCASE(FMAX_F);
484  case TargetOpcode::G_FSQRT:
485  RTLIBCASE(SQRT_F);
486  case TargetOpcode::G_FRINT:
487  RTLIBCASE(RINT_F);
488  case TargetOpcode::G_FNEARBYINT:
489  RTLIBCASE(NEARBYINT_F);
490  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
491  RTLIBCASE(ROUNDEVEN_F);
492  }
493  llvm_unreachable("Unknown libcall function");
494 }
495 
496 /// True if an instruction is in tail position in its caller. Intended for
497 /// legalizing libcalls as tail calls when possible.
499  MachineInstr &MI) {
500  MachineBasicBlock &MBB = *MI.getParent();
501  const Function &F = MBB.getParent()->getFunction();
502 
503  // Conservatively require the attributes of the call to match those of
504  // the return. Ignore NoAlias and NonNull because they don't affect the
505  // call sequence.
506  AttributeList CallerAttrs = F.getAttributes();
507  if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
508  .removeAttribute(Attribute::NoAlias)
509  .removeAttribute(Attribute::NonNull)
510  .hasAttributes())
511  return false;
512 
513  // It's not safe to eliminate the sign / zero extension of the return value.
514  if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
515  CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
516  return false;
517 
518  // Only tail call if the following instruction is a standard return.
519  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
520  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
521  return false;
522 
523  return true;
524 }
525 
527 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
528  const CallLowering::ArgInfo &Result,
530  const CallingConv::ID CC) {
531  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
532 
534  Info.CallConv = CC;
536  Info.OrigRet = Result;
537  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
538  if (!CLI.lowerCall(MIRBuilder, Info))
540 
542 }
543 
546  const CallLowering::ArgInfo &Result,
548  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
549  const char *Name = TLI.getLibcallName(Libcall);
550  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
551  return createLibcall(MIRBuilder, Name, Result, Args, CC);
552 }
553 
554 // Useful for libcalls where all operands have the same type.
557  Type *OpType) {
558  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
559 
561  for (unsigned i = 1; i < MI.getNumOperands(); i++)
562  Args.push_back({MI.getOperand(i).getReg(), OpType});
563  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
564  Args);
565 }
566 
569  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
570  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
571 
573  // Add all the args, except for the last which is an imm denoting 'tail'.
574  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
575  Register Reg = MI.getOperand(i).getReg();
576 
577  // Need derive an IR type for call lowering.
578  LLT OpLLT = MRI.getType(Reg);
579  Type *OpTy = nullptr;
580  if (OpLLT.isPointer())
581  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
582  else
583  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
584  Args.push_back({Reg, OpTy});
585  }
586 
587  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
588  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
589  RTLIB::Libcall RTLibcall;
590  unsigned Opc = MI.getOpcode();
591  switch (Opc) {
592  case TargetOpcode::G_BZERO:
593  RTLibcall = RTLIB::BZERO;
594  break;
595  case TargetOpcode::G_MEMCPY:
596  RTLibcall = RTLIB::MEMCPY;
597  break;
598  case TargetOpcode::G_MEMMOVE:
599  RTLibcall = RTLIB::MEMMOVE;
600  break;
601  case TargetOpcode::G_MEMSET:
602  RTLibcall = RTLIB::MEMSET;
603  break;
604  default:
606  }
607  const char *Name = TLI.getLibcallName(RTLibcall);
608 
609  // Unsupported libcall on the target.
610  if (!Name) {
611  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
612  << MIRBuilder.getTII().getName(Opc) << "\n");
614  }
615 
617  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
619  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
620  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
621  isLibCallInTailPosition(MIRBuilder.getTII(), MI);
622 
623  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
624  if (!CLI.lowerCall(MIRBuilder, Info))
626 
627 
628  if (Info.LoweredTailCall) {
629  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
630 
631  // Check debug locations before removing the return.
632  LocObserver.checkpoint(true);
633 
634  // We must have a return following the call (or debug insts) to get past
635  // isLibCallInTailPosition.
636  do {
637  MachineInstr *Next = MI.getNextNode();
638  assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
639  "Expected instr following MI to be return or debug inst?");
640  // We lowered a tail call, so the call is now the return from the block.
641  // Delete the old return.
642  Next->eraseFromParent();
643  } while (MI.getNextNode());
644 
645  // We expect to lose the debug location from the return.
646  LocObserver.checkpoint(false);
647  }
648 
650 }
651 
652 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
653  Type *FromType) {
654  auto ToMVT = MVT::getVT(ToType);
655  auto FromMVT = MVT::getVT(FromType);
656 
657  switch (Opcode) {
658  case TargetOpcode::G_FPEXT:
659  return RTLIB::getFPEXT(FromMVT, ToMVT);
660  case TargetOpcode::G_FPTRUNC:
661  return RTLIB::getFPROUND(FromMVT, ToMVT);
662  case TargetOpcode::G_FPTOSI:
663  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
664  case TargetOpcode::G_FPTOUI:
665  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
666  case TargetOpcode::G_SITOFP:
667  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
668  case TargetOpcode::G_UITOFP:
669  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
670  }
671  llvm_unreachable("Unsupported libcall function");
672 }
673 
676  Type *FromType) {
677  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
678  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
679  {{MI.getOperand(1).getReg(), FromType}});
680 }
681 
684  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
685  unsigned Size = LLTy.getSizeInBits();
686  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
687 
688  switch (MI.getOpcode()) {
689  default:
690  return UnableToLegalize;
691  case TargetOpcode::G_SDIV:
692  case TargetOpcode::G_UDIV:
693  case TargetOpcode::G_SREM:
694  case TargetOpcode::G_UREM:
695  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
696  Type *HLTy = IntegerType::get(Ctx, Size);
697  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
698  if (Status != Legalized)
699  return Status;
700  break;
701  }
702  case TargetOpcode::G_FADD:
703  case TargetOpcode::G_FSUB:
704  case TargetOpcode::G_FMUL:
705  case TargetOpcode::G_FDIV:
706  case TargetOpcode::G_FMA:
707  case TargetOpcode::G_FPOW:
708  case TargetOpcode::G_FREM:
709  case TargetOpcode::G_FCOS:
710  case TargetOpcode::G_FSIN:
711  case TargetOpcode::G_FLOG10:
712  case TargetOpcode::G_FLOG:
713  case TargetOpcode::G_FLOG2:
714  case TargetOpcode::G_FEXP:
715  case TargetOpcode::G_FEXP2:
716  case TargetOpcode::G_FCEIL:
717  case TargetOpcode::G_FFLOOR:
718  case TargetOpcode::G_FMINNUM:
719  case TargetOpcode::G_FMAXNUM:
720  case TargetOpcode::G_FSQRT:
721  case TargetOpcode::G_FRINT:
722  case TargetOpcode::G_FNEARBYINT:
723  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
724  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
725  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
726  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
727  return UnableToLegalize;
728  }
729  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
730  if (Status != Legalized)
731  return Status;
732  break;
733  }
734  case TargetOpcode::G_FPEXT:
735  case TargetOpcode::G_FPTRUNC: {
736  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
737  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
738  if (!FromTy || !ToTy)
739  return UnableToLegalize;
741  if (Status != Legalized)
742  return Status;
743  break;
744  }
745  case TargetOpcode::G_FPTOSI:
746  case TargetOpcode::G_FPTOUI: {
747  // FIXME: Support other types
748  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
749  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
750  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
751  return UnableToLegalize;
753  MI, MIRBuilder,
754  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
755  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
756  if (Status != Legalized)
757  return Status;
758  break;
759  }
760  case TargetOpcode::G_SITOFP:
761  case TargetOpcode::G_UITOFP: {
762  // FIXME: Support other types
763  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
764  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
765  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
766  return UnableToLegalize;
768  MI, MIRBuilder,
769  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
770  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
771  if (Status != Legalized)
772  return Status;
773  break;
774  }
775  case TargetOpcode::G_BZERO:
776  case TargetOpcode::G_MEMCPY:
777  case TargetOpcode::G_MEMMOVE:
778  case TargetOpcode::G_MEMSET: {
779  LegalizeResult Result =
780  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
781  if (Result != Legalized)
782  return Result;
783  MI.eraseFromParent();
784  return Result;
785  }
786  }
787 
788  MI.eraseFromParent();
789  return Legalized;
790 }
791 
793  unsigned TypeIdx,
794  LLT NarrowTy) {
795  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
796  uint64_t NarrowSize = NarrowTy.getSizeInBits();
797 
798  switch (MI.getOpcode()) {
799  default:
800  return UnableToLegalize;
801  case TargetOpcode::G_IMPLICIT_DEF: {
802  Register DstReg = MI.getOperand(0).getReg();
803  LLT DstTy = MRI.getType(DstReg);
804 
805  // If SizeOp0 is not an exact multiple of NarrowSize, emit
806  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
807  // FIXME: Although this would also be legal for the general case, it causes
808  // a lot of regressions in the emitted code (superfluous COPYs, artifact
809  // combines not being hit). This seems to be a problem related to the
810  // artifact combiner.
811  if (SizeOp0 % NarrowSize != 0) {
812  LLT ImplicitTy = NarrowTy;
813  if (DstTy.isVector())
814  ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
815 
816  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
817  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
818 
819  MI.eraseFromParent();
820  return Legalized;
821  }
822 
823  int NumParts = SizeOp0 / NarrowSize;
824 
825  SmallVector<Register, 2> DstRegs;
826  for (int i = 0; i < NumParts; ++i)
827  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
828 
829  if (DstTy.isVector())
830  MIRBuilder.buildBuildVector(DstReg, DstRegs);
831  else
832  MIRBuilder.buildMerge(DstReg, DstRegs);
833  MI.eraseFromParent();
834  return Legalized;
835  }
836  case TargetOpcode::G_CONSTANT: {
837  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
838  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
839  unsigned TotalSize = Ty.getSizeInBits();
840  unsigned NarrowSize = NarrowTy.getSizeInBits();
841  int NumParts = TotalSize / NarrowSize;
842 
843  SmallVector<Register, 4> PartRegs;
844  for (int I = 0; I != NumParts; ++I) {
845  unsigned Offset = I * NarrowSize;
846  auto K = MIRBuilder.buildConstant(NarrowTy,
847  Val.lshr(Offset).trunc(NarrowSize));
848  PartRegs.push_back(K.getReg(0));
849  }
850 
851  LLT LeftoverTy;
852  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
853  SmallVector<Register, 1> LeftoverRegs;
854  if (LeftoverBits != 0) {
855  LeftoverTy = LLT::scalar(LeftoverBits);
856  auto K = MIRBuilder.buildConstant(
857  LeftoverTy,
858  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
859  LeftoverRegs.push_back(K.getReg(0));
860  }
861 
862  insertParts(MI.getOperand(0).getReg(),
863  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
864 
865  MI.eraseFromParent();
866  return Legalized;
867  }
868  case TargetOpcode::G_SEXT:
869  case TargetOpcode::G_ZEXT:
870  case TargetOpcode::G_ANYEXT:
871  return narrowScalarExt(MI, TypeIdx, NarrowTy);
872  case TargetOpcode::G_TRUNC: {
873  if (TypeIdx != 1)
874  return UnableToLegalize;
875 
876  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
877  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
878  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
879  return UnableToLegalize;
880  }
881 
882  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
883  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
884  MI.eraseFromParent();
885  return Legalized;
886  }
887 
888  case TargetOpcode::G_FREEZE:
889  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
890  case TargetOpcode::G_ADD:
891  case TargetOpcode::G_SUB:
892  case TargetOpcode::G_SADDO:
893  case TargetOpcode::G_SSUBO:
894  case TargetOpcode::G_SADDE:
895  case TargetOpcode::G_SSUBE:
896  case TargetOpcode::G_UADDO:
897  case TargetOpcode::G_USUBO:
898  case TargetOpcode::G_UADDE:
899  case TargetOpcode::G_USUBE:
900  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
901  case TargetOpcode::G_MUL:
902  case TargetOpcode::G_UMULH:
903  return narrowScalarMul(MI, NarrowTy);
904  case TargetOpcode::G_EXTRACT:
905  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
906  case TargetOpcode::G_INSERT:
907  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
908  case TargetOpcode::G_LOAD: {
909  auto &MMO = **MI.memoperands_begin();
910  Register DstReg = MI.getOperand(0).getReg();
911  LLT DstTy = MRI.getType(DstReg);
912  if (DstTy.isVector())
913  return UnableToLegalize;
914 
915  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
916  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
917  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
918  MIRBuilder.buildAnyExt(DstReg, TmpReg);
919  MI.eraseFromParent();
920  return Legalized;
921  }
922 
923  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
924  }
925  case TargetOpcode::G_ZEXTLOAD:
926  case TargetOpcode::G_SEXTLOAD: {
927  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
928  Register DstReg = MI.getOperand(0).getReg();
929  Register PtrReg = MI.getOperand(1).getReg();
930 
931  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
932  auto &MMO = **MI.memoperands_begin();
933  unsigned MemSize = MMO.getSizeInBits();
934 
935  if (MemSize == NarrowSize) {
936  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
937  } else if (MemSize < NarrowSize) {
938  MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
939  } else if (MemSize > NarrowSize) {
940  // FIXME: Need to split the load.
941  return UnableToLegalize;
942  }
943 
944  if (ZExt)
945  MIRBuilder.buildZExt(DstReg, TmpReg);
946  else
947  MIRBuilder.buildSExt(DstReg, TmpReg);
948 
949  MI.eraseFromParent();
950  return Legalized;
951  }
952  case TargetOpcode::G_STORE: {
953  const auto &MMO = **MI.memoperands_begin();
954 
955  Register SrcReg = MI.getOperand(0).getReg();
956  LLT SrcTy = MRI.getType(SrcReg);
957  if (SrcTy.isVector())
958  return UnableToLegalize;
959 
960  int NumParts = SizeOp0 / NarrowSize;
961  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
962  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
963  if (SrcTy.isVector() && LeftoverBits != 0)
964  return UnableToLegalize;
965 
966  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
967  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
968  auto &MMO = **MI.memoperands_begin();
969  MIRBuilder.buildTrunc(TmpReg, SrcReg);
970  MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
971  MI.eraseFromParent();
972  return Legalized;
973  }
974 
975  return reduceLoadStoreWidth(MI, 0, NarrowTy);
976  }
977  case TargetOpcode::G_SELECT:
978  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
979  case TargetOpcode::G_AND:
980  case TargetOpcode::G_OR:
981  case TargetOpcode::G_XOR: {
982  // Legalize bitwise operation:
983  // A = BinOp<Ty> B, C
984  // into:
985  // B1, ..., BN = G_UNMERGE_VALUES B
986  // C1, ..., CN = G_UNMERGE_VALUES C
987  // A1 = BinOp<Ty/N> B1, C2
988  // ...
989  // AN = BinOp<Ty/N> BN, CN
990  // A = G_MERGE_VALUES A1, ..., AN
991  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
992  }
993  case TargetOpcode::G_SHL:
994  case TargetOpcode::G_LSHR:
995  case TargetOpcode::G_ASHR:
996  return narrowScalarShift(MI, TypeIdx, NarrowTy);
997  case TargetOpcode::G_CTLZ:
998  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
999  case TargetOpcode::G_CTTZ:
1000  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1001  case TargetOpcode::G_CTPOP:
1002  if (TypeIdx == 1)
1003  switch (MI.getOpcode()) {
1004  case TargetOpcode::G_CTLZ:
1005  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1006  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1007  case TargetOpcode::G_CTTZ:
1008  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1009  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1010  case TargetOpcode::G_CTPOP:
1011  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1012  default:
1013  return UnableToLegalize;
1014  }
1015 
1017  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1019  return Legalized;
1020  case TargetOpcode::G_INTTOPTR:
1021  if (TypeIdx != 1)
1022  return UnableToLegalize;
1023 
1025  narrowScalarSrc(MI, NarrowTy, 1);
1027  return Legalized;
1028  case TargetOpcode::G_PTRTOINT:
1029  if (TypeIdx != 0)
1030  return UnableToLegalize;
1031 
1033  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1035  return Legalized;
1036  case TargetOpcode::G_PHI: {
1037  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1038  // NarrowSize.
1039  if (SizeOp0 % NarrowSize != 0)
1040  return UnableToLegalize;
1041 
1042  unsigned NumParts = SizeOp0 / NarrowSize;
1043  SmallVector<Register, 2> DstRegs(NumParts);
1044  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1046  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1047  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1048  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1049  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1050  SrcRegs[i / 2]);
1051  }
1052  MachineBasicBlock &MBB = *MI.getParent();
1054  for (unsigned i = 0; i < NumParts; ++i) {
1055  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1056  MachineInstrBuilder MIB =
1057  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1058  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1059  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1060  }
1062  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1064  MI.eraseFromParent();
1065  return Legalized;
1066  }
1067  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1068  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1069  if (TypeIdx != 2)
1070  return UnableToLegalize;
1071 
1072  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1074  narrowScalarSrc(MI, NarrowTy, OpIdx);
1076  return Legalized;
1077  }
1078  case TargetOpcode::G_ICMP: {
1079  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
1080  if (NarrowSize * 2 != SrcSize)
1081  return UnableToLegalize;
1082 
1084  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
1085  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
1086  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
1087 
1088  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
1089  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
1090  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
1091 
1092  CmpInst::Predicate Pred =
1093  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1094  LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
1095 
1096  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
1097  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
1098  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
1099  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
1100  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1101  MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
1102  } else {
1103  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1104  MachineInstrBuilder CmpHEQ =
1105  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1107  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1108  MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
1109  }
1111  MI.eraseFromParent();
1112  return Legalized;
1113  }
1114  case TargetOpcode::G_SEXT_INREG: {
1115  if (TypeIdx != 0)
1116  return UnableToLegalize;
1117 
1118  int64_t SizeInBits = MI.getOperand(2).getImm();
1119 
1120  // So long as the new type has more bits than the bits we're extending we
1121  // don't need to break it apart.
1122  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1124  // We don't lose any non-extension bits by truncating the src and
1125  // sign-extending the dst.
1126  MachineOperand &MO1 = MI.getOperand(1);
1127  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1128  MO1.setReg(TruncMIB.getReg(0));
1129 
1130  MachineOperand &MO2 = MI.getOperand(0);
1131  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1133  MIRBuilder.buildSExt(MO2, DstExt);
1134  MO2.setReg(DstExt);
1136  return Legalized;
1137  }
1138 
1139  // Break it apart. Components below the extension point are unmodified. The
1140  // component containing the extension point becomes a narrower SEXT_INREG.
1141  // Components above it are ashr'd from the component containing the
1142  // extension point.
1143  if (SizeOp0 % NarrowSize != 0)
1144  return UnableToLegalize;
1145  int NumParts = SizeOp0 / NarrowSize;
1146 
1147  // List the registers where the destination will be scattered.
1148  SmallVector<Register, 2> DstRegs;
1149  // List the registers where the source will be split.
1150  SmallVector<Register, 2> SrcRegs;
1151 
1152  // Create all the temporary registers.
1153  for (int i = 0; i < NumParts; ++i) {
1154  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1155 
1156  SrcRegs.push_back(SrcReg);
1157  }
1158 
1159  // Explode the big arguments into smaller chunks.
1160  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1161 
1162  Register AshrCstReg =
1163  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1164  .getReg(0);
1165  Register FullExtensionReg = 0;
1166  Register PartialExtensionReg = 0;
1167 
1168  // Do the operation on each small part.
1169  for (int i = 0; i < NumParts; ++i) {
1170  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1171  DstRegs.push_back(SrcRegs[i]);
1172  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1173  assert(PartialExtensionReg &&
1174  "Expected to visit partial extension before full");
1175  if (FullExtensionReg) {
1176  DstRegs.push_back(FullExtensionReg);
1177  continue;
1178  }
1179  DstRegs.push_back(
1180  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1181  .getReg(0));
1182  FullExtensionReg = DstRegs.back();
1183  } else {
1184  DstRegs.push_back(
1185  MIRBuilder
1186  .buildInstr(
1187  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1188  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1189  .getReg(0));
1190  PartialExtensionReg = DstRegs.back();
1191  }
1192  }
1193 
1194  // Gather the destination registers into the final destination.
1195  Register DstReg = MI.getOperand(0).getReg();
1196  MIRBuilder.buildMerge(DstReg, DstRegs);
1197  MI.eraseFromParent();
1198  return Legalized;
1199  }
1200  case TargetOpcode::G_BSWAP:
1201  case TargetOpcode::G_BITREVERSE: {
1202  if (SizeOp0 % NarrowSize != 0)
1203  return UnableToLegalize;
1204 
1206  SmallVector<Register, 2> SrcRegs, DstRegs;
1207  unsigned NumParts = SizeOp0 / NarrowSize;
1208  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1209 
1210  for (unsigned i = 0; i < NumParts; ++i) {
1211  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1212  {SrcRegs[NumParts - 1 - i]});
1213  DstRegs.push_back(DstPart.getReg(0));
1214  }
1215 
1216  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1217 
1219  MI.eraseFromParent();
1220  return Legalized;
1221  }
1222  case TargetOpcode::G_PTR_ADD:
1223  case TargetOpcode::G_PTRMASK: {
1224  if (TypeIdx != 1)
1225  return UnableToLegalize;
1227  narrowScalarSrc(MI, NarrowTy, 2);
1229  return Legalized;
1230  }
1231  case TargetOpcode::G_FPTOUI:
1232  case TargetOpcode::G_FPTOSI:
1233  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1234  case TargetOpcode::G_FPEXT:
1235  if (TypeIdx != 0)
1236  return UnableToLegalize;
1238  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1240  return Legalized;
1241  }
1242 }
1243 
1245  LLT Ty = MRI.getType(Val);
1246  if (Ty.isScalar())
1247  return Val;
1248 
1250  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1251  if (Ty.isPointer()) {
1252  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1253  return Register();
1254  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1255  }
1256 
1257  Register NewVal = Val;
1258 
1259  assert(Ty.isVector());
1260  LLT EltTy = Ty.getElementType();
1261  if (EltTy.isPointer())
1262  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1263  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1264 }
1265 
1267  unsigned OpIdx, unsigned ExtOpcode) {
1268  MachineOperand &MO = MI.getOperand(OpIdx);
1269  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1270  MO.setReg(ExtB.getReg(0));
1271 }
1272 
1274  unsigned OpIdx) {
1275  MachineOperand &MO = MI.getOperand(OpIdx);
1276  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1277  MO.setReg(ExtB.getReg(0));
1278 }
1279 
1281  unsigned OpIdx, unsigned TruncOpcode) {
1282  MachineOperand &MO = MI.getOperand(OpIdx);
1283  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1285  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1286  MO.setReg(DstExt);
1287 }
1288 
1290  unsigned OpIdx, unsigned ExtOpcode) {
1291  MachineOperand &MO = MI.getOperand(OpIdx);
1292  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1294  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1295  MO.setReg(DstTrunc);
1296 }
1297 
1299  unsigned OpIdx) {
1300  MachineOperand &MO = MI.getOperand(OpIdx);
1302  MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1303 }
1304 
1306  unsigned OpIdx) {
1307  MachineOperand &MO = MI.getOperand(OpIdx);
1308 
1309  LLT OldTy = MRI.getType(MO.getReg());
1310  unsigned OldElts = OldTy.getNumElements();
1311  unsigned NewElts = MoreTy.getNumElements();
1312 
1313  unsigned NumParts = NewElts / OldElts;
1314 
1315  // Use concat_vectors if the result is a multiple of the number of elements.
1316  if (NumParts * OldElts == NewElts) {
1318  Parts.push_back(MO.getReg());
1319 
1320  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1321  for (unsigned I = 1; I != NumParts; ++I)
1322  Parts.push_back(ImpDef);
1323 
1324  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1325  MO.setReg(Concat.getReg(0));
1326  return;
1327  }
1328 
1329  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1330  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1331  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1332  MO.setReg(MoreReg);
1333 }
1334 
1335 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1336  MachineOperand &Op = MI.getOperand(OpIdx);
1337  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1338 }
1339 
1340 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1341  MachineOperand &MO = MI.getOperand(OpIdx);
1342  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1344  MIRBuilder.buildBitcast(MO, CastDst);
1345  MO.setReg(CastDst);
1346 }
1347 
1349 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1350  LLT WideTy) {
1351  if (TypeIdx != 1)
1352  return UnableToLegalize;
1353 
1354  Register DstReg = MI.getOperand(0).getReg();
1355  LLT DstTy = MRI.getType(DstReg);
1356  if (DstTy.isVector())
1357  return UnableToLegalize;
1358 
1359  Register Src1 = MI.getOperand(1).getReg();
1360  LLT SrcTy = MRI.getType(Src1);
1361  const int DstSize = DstTy.getSizeInBits();
1362  const int SrcSize = SrcTy.getSizeInBits();
1363  const int WideSize = WideTy.getSizeInBits();
1364  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1365 
1366  unsigned NumOps = MI.getNumOperands();
1367  unsigned NumSrc = MI.getNumOperands() - 1;
1368  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1369 
1370  if (WideSize >= DstSize) {
1371  // Directly pack the bits in the target type.
1372  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1373 
1374  for (unsigned I = 2; I != NumOps; ++I) {
1375  const unsigned Offset = (I - 1) * PartSize;
1376 
1377  Register SrcReg = MI.getOperand(I).getReg();
1378  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1379 
1380  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1381 
1382  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1383  MRI.createGenericVirtualRegister(WideTy);
1384 
1385  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1386  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1387  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1388  ResultReg = NextResult;
1389  }
1390 
1391  if (WideSize > DstSize)
1392  MIRBuilder.buildTrunc(DstReg, ResultReg);
1393  else if (DstTy.isPointer())
1394  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1395 
1396  MI.eraseFromParent();
1397  return Legalized;
1398  }
1399 
1400  // Unmerge the original values to the GCD type, and recombine to the next
1401  // multiple greater than the original type.
1402  //
1403  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1404  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1405  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1406  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1407  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1408  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1409  // %12:_(s12) = G_MERGE_VALUES %10, %11
1410  //
1411  // Padding with undef if necessary:
1412  //
1413  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1414  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1415  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1416  // %7:_(s2) = G_IMPLICIT_DEF
1417  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1418  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1419  // %10:_(s12) = G_MERGE_VALUES %8, %9
1420 
1421  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1422  LLT GCDTy = LLT::scalar(GCD);
1423 
1425  SmallVector<Register, 8> NewMergeRegs;
1426  SmallVector<Register, 8> Unmerges;
1427  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1428 
1429  // Decompose the original operands if they don't evenly divide.
1430  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1431  Register SrcReg = MI.getOperand(I).getReg();
1432  if (GCD == SrcSize) {
1433  Unmerges.push_back(SrcReg);
1434  } else {
1435  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1436  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1437  Unmerges.push_back(Unmerge.getReg(J));
1438  }
1439  }
1440 
1441  // Pad with undef to the next size that is a multiple of the requested size.
1442  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1443  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1444  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1445  Unmerges.push_back(UndefReg);
1446  }
1447 
1448  const int PartsPerGCD = WideSize / GCD;
1449 
1450  // Build merges of each piece.
1451  ArrayRef<Register> Slicer(Unmerges);
1452  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1453  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1454  NewMergeRegs.push_back(Merge.getReg(0));
1455  }
1456 
1457  // A truncate may be necessary if the requested type doesn't evenly divide the
1458  // original result type.
1459  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1460  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1461  } else {
1462  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1463  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1464  }
1465 
1466  MI.eraseFromParent();
1467  return Legalized;
1468 }
1469 
1471  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1472  LLT OrigTy = MRI.getType(OrigReg);
1473  LLT LCMTy = getLCMType(WideTy, OrigTy);
1474 
1475  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1476  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1477 
1478  Register UnmergeSrc = WideReg;
1479 
1480  // Create a merge to the LCM type, padding with undef
1481  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1482  // =>
1483  // %1:_(<4 x s32>) = G_FOO
1484  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1485  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1486  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1487  if (NumMergeParts > 1) {
1488  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1489  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1490  MergeParts[0] = WideReg;
1491  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1492  }
1493 
1494  // Unmerge to the original register and pad with dead defs.
1495  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1496  UnmergeResults[0] = OrigReg;
1497  for (int I = 1; I != NumUnmergeParts; ++I)
1498  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1499 
1500  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1501  return WideReg;
1502 }
1503 
1505 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1506  LLT WideTy) {
1507  if (TypeIdx != 0)
1508  return UnableToLegalize;
1509 
1510  int NumDst = MI.getNumOperands() - 1;
1511  Register SrcReg = MI.getOperand(NumDst).getReg();
1512  LLT SrcTy = MRI.getType(SrcReg);
1513  if (SrcTy.isVector())
1514  return UnableToLegalize;
1515 
1516  Register Dst0Reg = MI.getOperand(0).getReg();
1517  LLT DstTy = MRI.getType(Dst0Reg);
1518  if (!DstTy.isScalar())
1519  return UnableToLegalize;
1520 
1521  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1522  if (SrcTy.isPointer()) {
1524  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1525  LLVM_DEBUG(
1526  dbgs() << "Not casting non-integral address space integer\n");
1527  return UnableToLegalize;
1528  }
1529 
1530  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1531  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1532  }
1533 
1534  // Widen SrcTy to WideTy. This does not affect the result, but since the
1535  // user requested this size, it is probably better handled than SrcTy and
1536  // should reduce the total number of legalization artifacts
1537  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1538  SrcTy = WideTy;
1539  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1540  }
1541 
1542  // Theres no unmerge type to target. Directly extract the bits from the
1543  // source type
1544  unsigned DstSize = DstTy.getSizeInBits();
1545 
1546  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1547  for (int I = 1; I != NumDst; ++I) {
1548  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1549  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1550  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1551  }
1552 
1553  MI.eraseFromParent();
1554  return Legalized;
1555  }
1556 
1557  // Extend the source to a wider type.
1558  LLT LCMTy = getLCMType(SrcTy, WideTy);
1559 
1560  Register WideSrc = SrcReg;
1561  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1562  // TODO: If this is an integral address space, cast to integer and anyext.
1563  if (SrcTy.isPointer()) {
1564  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1565  return UnableToLegalize;
1566  }
1567 
1568  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1569  }
1570 
1571  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1572 
1573  // Create a sequence of unmerges and merges to the original results. Since we
1574  // may have widened the source, we will need to pad the results with dead defs
1575  // to cover the source register.
1576  // e.g. widen s48 to s64:
1577  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1578  //
1579  // =>
1580  // %4:_(s192) = G_ANYEXT %0:_(s96)
1581  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1582  // ; unpack to GCD type, with extra dead defs
1583  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1584  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1585  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1586  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1587  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1588  const LLT GCDTy = getGCDType(WideTy, DstTy);
1589  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1590  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1591 
1592  // Directly unmerge to the destination without going through a GCD type
1593  // if possible
1594  if (PartsPerRemerge == 1) {
1595  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1596 
1597  for (int I = 0; I != NumUnmerge; ++I) {
1598  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1599 
1600  for (int J = 0; J != PartsPerUnmerge; ++J) {
1601  int Idx = I * PartsPerUnmerge + J;
1602  if (Idx < NumDst)
1603  MIB.addDef(MI.getOperand(Idx).getReg());
1604  else {
1605  // Create dead def for excess components.
1606  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1607  }
1608  }
1609 
1610  MIB.addUse(Unmerge.getReg(I));
1611  }
1612  } else {
1614  for (int J = 0; J != NumUnmerge; ++J)
1615  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1616 
1617  SmallVector<Register, 8> RemergeParts;
1618  for (int I = 0; I != NumDst; ++I) {
1619  for (int J = 0; J < PartsPerRemerge; ++J) {
1620  const int Idx = I * PartsPerRemerge + J;
1621  RemergeParts.emplace_back(Parts[Idx]);
1622  }
1623 
1624  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1625  RemergeParts.clear();
1626  }
1627  }
1628 
1629  MI.eraseFromParent();
1630  return Legalized;
1631 }
1632 
1634 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1635  LLT WideTy) {
1636  Register DstReg = MI.getOperand(0).getReg();
1637  Register SrcReg = MI.getOperand(1).getReg();
1638  LLT SrcTy = MRI.getType(SrcReg);
1639 
1640  LLT DstTy = MRI.getType(DstReg);
1641  unsigned Offset = MI.getOperand(2).getImm();
1642 
1643  if (TypeIdx == 0) {
1644  if (SrcTy.isVector() || DstTy.isVector())
1645  return UnableToLegalize;
1646 
1647  SrcOp Src(SrcReg);
1648  if (SrcTy.isPointer()) {
1649  // Extracts from pointers can be handled only if they are really just
1650  // simple integers.
1652  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1653  return UnableToLegalize;
1654 
1655  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1656  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1657  SrcTy = SrcAsIntTy;
1658  }
1659 
1660  if (DstTy.isPointer())
1661  return UnableToLegalize;
1662 
1663  if (Offset == 0) {
1664  // Avoid a shift in the degenerate case.
1665  MIRBuilder.buildTrunc(DstReg,
1666  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1667  MI.eraseFromParent();
1668  return Legalized;
1669  }
1670 
1671  // Do a shift in the source type.
1672  LLT ShiftTy = SrcTy;
1673  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1674  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1675  ShiftTy = WideTy;
1676  }
1677 
1678  auto LShr = MIRBuilder.buildLShr(
1679  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1680  MIRBuilder.buildTrunc(DstReg, LShr);
1681  MI.eraseFromParent();
1682  return Legalized;
1683  }
1684 
1685  if (SrcTy.isScalar()) {
1687  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1689  return Legalized;
1690  }
1691 
1692  if (!SrcTy.isVector())
1693  return UnableToLegalize;
1694 
1695  if (DstTy != SrcTy.getElementType())
1696  return UnableToLegalize;
1697 
1698  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1699  return UnableToLegalize;
1700 
1702  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1703 
1704  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1705  Offset);
1706  widenScalarDst(MI, WideTy.getScalarType(), 0);
1708  return Legalized;
1709 }
1710 
1712 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1713  LLT WideTy) {
1714  if (TypeIdx != 0 || WideTy.isVector())
1715  return UnableToLegalize;
1717  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1718  widenScalarDst(MI, WideTy);
1720  return Legalized;
1721 }
1722 
1724 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1725  LLT WideTy) {
1726  if (TypeIdx == 1)
1727  return UnableToLegalize; // TODO
1728 
1729  unsigned Opcode;
1730  unsigned ExtOpcode;
1731  Optional<Register> CarryIn = None;
1732  switch (MI.getOpcode()) {
1733  default:
1734  llvm_unreachable("Unexpected opcode!");
1735  case TargetOpcode::G_SADDO:
1736  Opcode = TargetOpcode::G_ADD;
1737  ExtOpcode = TargetOpcode::G_SEXT;
1738  break;
1739  case TargetOpcode::G_SSUBO:
1740  Opcode = TargetOpcode::G_SUB;
1741  ExtOpcode = TargetOpcode::G_SEXT;
1742  break;
1743  case TargetOpcode::G_UADDO:
1744  Opcode = TargetOpcode::G_ADD;
1745  ExtOpcode = TargetOpcode::G_ZEXT;
1746  break;
1747  case TargetOpcode::G_USUBO:
1748  Opcode = TargetOpcode::G_SUB;
1749  ExtOpcode = TargetOpcode::G_ZEXT;
1750  break;
1751  case TargetOpcode::G_SADDE:
1752  Opcode = TargetOpcode::G_UADDE;
1753  ExtOpcode = TargetOpcode::G_SEXT;
1754  CarryIn = MI.getOperand(4).getReg();
1755  break;
1756  case TargetOpcode::G_SSUBE:
1757  Opcode = TargetOpcode::G_USUBE;
1758  ExtOpcode = TargetOpcode::G_SEXT;
1759  CarryIn = MI.getOperand(4).getReg();
1760  break;
1761  case TargetOpcode::G_UADDE:
1762  Opcode = TargetOpcode::G_UADDE;
1763  ExtOpcode = TargetOpcode::G_ZEXT;
1764  CarryIn = MI.getOperand(4).getReg();
1765  break;
1766  case TargetOpcode::G_USUBE:
1767  Opcode = TargetOpcode::G_USUBE;
1768  ExtOpcode = TargetOpcode::G_ZEXT;
1769  CarryIn = MI.getOperand(4).getReg();
1770  break;
1771  }
1772 
1773  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1774  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1775  // Do the arithmetic in the larger type.
1776  Register NewOp;
1777  if (CarryIn) {
1778  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1779  NewOp = MIRBuilder
1780  .buildInstr(Opcode, {WideTy, CarryOutTy},
1781  {LHSExt, RHSExt, *CarryIn})
1782  .getReg(0);
1783  } else {
1784  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1785  }
1786  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1787  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1788  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1789  // There is no overflow if the ExtOp is the same as NewOp.
1790  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1791  // Now trunc the NewOp to the original result.
1792  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1793  MI.eraseFromParent();
1794  return Legalized;
1795 }
1796 
1798 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1799  LLT WideTy) {
1800  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1801  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1802  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1803  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1804  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1805  // We can convert this to:
1806  // 1. Any extend iN to iM
1807  // 2. SHL by M-N
1808  // 3. [US][ADD|SUB|SHL]SAT
1809  // 4. L/ASHR by M-N
1810  //
1811  // It may be more efficient to lower this to a min and a max operation in
1812  // the higher precision arithmetic if the promoted operation isn't legal,
1813  // but this decision is up to the target's lowering request.
1814  Register DstReg = MI.getOperand(0).getReg();
1815 
1816  unsigned NewBits = WideTy.getScalarSizeInBits();
1817  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1818 
1819  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1820  // must not left shift the RHS to preserve the shift amount.
1821  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1822  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1823  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1824  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1825  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1826  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1827 
1828  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1829  {ShiftL, ShiftR}, MI.getFlags());
1830 
1831  // Use a shift that will preserve the number of sign bits when the trunc is
1832  // folded away.
1833  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1834  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1835 
1836  MIRBuilder.buildTrunc(DstReg, Result);
1837  MI.eraseFromParent();
1838  return Legalized;
1839 }
1840 
1842 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1843  LLT WideTy) {
1844  if (TypeIdx == 1)
1845  return UnableToLegalize;
1846 
1847  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1848  Register Result = MI.getOperand(0).getReg();
1849  Register OriginalOverflow = MI.getOperand(1).getReg();
1850  Register LHS = MI.getOperand(2).getReg();
1851  Register RHS = MI.getOperand(3).getReg();
1852  LLT SrcTy = MRI.getType(LHS);
1853  LLT OverflowTy = MRI.getType(OriginalOverflow);
1854  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1855 
1856  // To determine if the result overflowed in the larger type, we extend the
1857  // input to the larger type, do the multiply (checking if it overflows),
1858  // then also check the high bits of the result to see if overflow happened
1859  // there.
1860  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1861  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1862  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1863 
1864  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1865  {LeftOperand, RightOperand});
1866  auto Mul = Mulo->getOperand(0);
1867  MIRBuilder.buildTrunc(Result, Mul);
1868 
1869  MachineInstrBuilder ExtResult;
1870  // Overflow occurred if it occurred in the larger type, or if the high part
1871  // of the result does not zero/sign-extend the low part. Check this second
1872  // possibility first.
1873  if (IsSigned) {
1874  // For signed, overflow occurred when the high part does not sign-extend
1875  // the low part.
1876  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1877  } else {
1878  // Unsigned overflow occurred when the high part does not zero-extend the
1879  // low part.
1880  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1881  }
1882 
1883  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1884  // so we don't need to check the overflow result of larger type Mulo.
1885  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1886  auto Overflow =
1887  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1888  // Finally check if the multiplication in the larger type itself overflowed.
1889  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1890  } else {
1891  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1892  }
1893  MI.eraseFromParent();
1894  return Legalized;
1895 }
1896 
1898 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1899  switch (MI.getOpcode()) {
1900  default:
1901  return UnableToLegalize;
1902  case TargetOpcode::G_EXTRACT:
1903  return widenScalarExtract(MI, TypeIdx, WideTy);
1904  case TargetOpcode::G_INSERT:
1905  return widenScalarInsert(MI, TypeIdx, WideTy);
1906  case TargetOpcode::G_MERGE_VALUES:
1907  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1908  case TargetOpcode::G_UNMERGE_VALUES:
1909  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1910  case TargetOpcode::G_SADDO:
1911  case TargetOpcode::G_SSUBO:
1912  case TargetOpcode::G_UADDO:
1913  case TargetOpcode::G_USUBO:
1914  case TargetOpcode::G_SADDE:
1915  case TargetOpcode::G_SSUBE:
1916  case TargetOpcode::G_UADDE:
1917  case TargetOpcode::G_USUBE:
1918  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
1919  case TargetOpcode::G_UMULO:
1920  case TargetOpcode::G_SMULO:
1921  return widenScalarMulo(MI, TypeIdx, WideTy);
1922  case TargetOpcode::G_SADDSAT:
1923  case TargetOpcode::G_SSUBSAT:
1924  case TargetOpcode::G_SSHLSAT:
1925  case TargetOpcode::G_UADDSAT:
1926  case TargetOpcode::G_USUBSAT:
1927  case TargetOpcode::G_USHLSAT:
1928  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
1929  case TargetOpcode::G_CTTZ:
1930  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1931  case TargetOpcode::G_CTLZ:
1932  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1933  case TargetOpcode::G_CTPOP: {
1934  if (TypeIdx == 0) {
1936  widenScalarDst(MI, WideTy, 0);
1938  return Legalized;
1939  }
1940 
1941  Register SrcReg = MI.getOperand(1).getReg();
1942 
1943  // First ZEXT the input.
1944  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1945  LLT CurTy = MRI.getType(SrcReg);
1946  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1947  // The count is the same in the larger type except if the original
1948  // value was zero. This can be handled by setting the bit just off
1949  // the top of the original type.
1950  auto TopBit =
1951  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1952  MIBSrc = MIRBuilder.buildOr(
1953  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1954  }
1955 
1956  // Perform the operation at the larger size.
1957  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1958  // This is already the correct result for CTPOP and CTTZs
1959  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1960  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1961  // The correct result is NewOp - (Difference in widety and current ty).
1962  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1963  MIBNewOp = MIRBuilder.buildSub(
1964  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
1965  }
1966 
1967  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1968  MI.eraseFromParent();
1969  return Legalized;
1970  }
1971  case TargetOpcode::G_BSWAP: {
1973  Register DstReg = MI.getOperand(0).getReg();
1974 
1975  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1976  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1977  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1978  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1979 
1980  MI.getOperand(0).setReg(DstExt);
1981 
1983 
1984  LLT Ty = MRI.getType(DstReg);
1985  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1986  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1987  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
1988 
1989  MIRBuilder.buildTrunc(DstReg, ShrReg);
1991  return Legalized;
1992  }
1993  case TargetOpcode::G_BITREVERSE: {
1995 
1996  Register DstReg = MI.getOperand(0).getReg();
1997  LLT Ty = MRI.getType(DstReg);
1998  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1999 
2000  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2001  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2002  MI.getOperand(0).setReg(DstExt);
2004 
2005  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2006  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2007  MIRBuilder.buildTrunc(DstReg, Shift);
2009  return Legalized;
2010  }
2011  case TargetOpcode::G_FREEZE:
2013  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2014  widenScalarDst(MI, WideTy);
2016  return Legalized;
2017 
2018  case TargetOpcode::G_ABS:
2020  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2021  widenScalarDst(MI, WideTy);
2023  return Legalized;
2024 
2025  case TargetOpcode::G_ADD:
2026  case TargetOpcode::G_AND:
2027  case TargetOpcode::G_MUL:
2028  case TargetOpcode::G_OR:
2029  case TargetOpcode::G_XOR:
2030  case TargetOpcode::G_SUB:
2031  // Perform operation at larger width (any extension is fines here, high bits
2032  // don't affect the result) and then truncate the result back to the
2033  // original type.
2035  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2036  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2037  widenScalarDst(MI, WideTy);
2039  return Legalized;
2040 
2041  case TargetOpcode::G_SHL:
2043 
2044  if (TypeIdx == 0) {
2045  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2046  widenScalarDst(MI, WideTy);
2047  } else {
2048  assert(TypeIdx == 1);
2049  // The "number of bits to shift" operand must preserve its value as an
2050  // unsigned integer:
2051  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2052  }
2053 
2055  return Legalized;
2056 
2057  case TargetOpcode::G_SDIV:
2058  case TargetOpcode::G_SREM:
2059  case TargetOpcode::G_SMIN:
2060  case TargetOpcode::G_SMAX:
2062  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2063  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2064  widenScalarDst(MI, WideTy);
2066  return Legalized;
2067 
2068  case TargetOpcode::G_SDIVREM:
2070  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2071  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2072  widenScalarDst(MI, WideTy);
2073  widenScalarDst(MI, WideTy, 1);
2075  return Legalized;
2076 
2077  case TargetOpcode::G_ASHR:
2078  case TargetOpcode::G_LSHR:
2080 
2081  if (TypeIdx == 0) {
2082  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2083  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2084 
2085  widenScalarSrc(MI, WideTy, 1, CvtOp);
2086  widenScalarDst(MI, WideTy);
2087  } else {
2088  assert(TypeIdx == 1);
2089  // The "number of bits to shift" operand must preserve its value as an
2090  // unsigned integer:
2091  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2092  }
2093 
2095  return Legalized;
2096  case TargetOpcode::G_UDIV:
2097  case TargetOpcode::G_UREM:
2098  case TargetOpcode::G_UMIN:
2099  case TargetOpcode::G_UMAX:
2101  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2102  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2103  widenScalarDst(MI, WideTy);
2105  return Legalized;
2106 
2107  case TargetOpcode::G_UDIVREM:
2109  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2110  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2111  widenScalarDst(MI, WideTy);
2112  widenScalarDst(MI, WideTy, 1);
2114  return Legalized;
2115 
2116  case TargetOpcode::G_SELECT:
2118  if (TypeIdx == 0) {
2119  // Perform operation at larger width (any extension is fine here, high
2120  // bits don't affect the result) and then truncate the result back to the
2121  // original type.
2122  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2123  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2124  widenScalarDst(MI, WideTy);
2125  } else {
2126  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2127  // Explicit extension is required here since high bits affect the result.
2128  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2129  }
2131  return Legalized;
2132 
2133  case TargetOpcode::G_FPTOSI:
2134  case TargetOpcode::G_FPTOUI:
2136 
2137  if (TypeIdx == 0)
2138  widenScalarDst(MI, WideTy);
2139  else
2140  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2141 
2143  return Legalized;
2144  case TargetOpcode::G_SITOFP:
2146 
2147  if (TypeIdx == 0)
2148  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2149  else
2150  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2151 
2153  return Legalized;
2154  case TargetOpcode::G_UITOFP:
2156 
2157  if (TypeIdx == 0)
2158  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2159  else
2160  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2161 
2163  return Legalized;
2164  case TargetOpcode::G_LOAD:
2165  case TargetOpcode::G_SEXTLOAD:
2166  case TargetOpcode::G_ZEXTLOAD:
2168  widenScalarDst(MI, WideTy);
2170  return Legalized;
2171 
2172  case TargetOpcode::G_STORE: {
2173  if (TypeIdx != 0)
2174  return UnableToLegalize;
2175 
2176  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2177  if (!Ty.isScalar())
2178  return UnableToLegalize;
2179 
2181 
2182  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2183  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2184  widenScalarSrc(MI, WideTy, 0, ExtType);
2185 
2187  return Legalized;
2188  }
2189  case TargetOpcode::G_CONSTANT: {
2190  MachineOperand &SrcMO = MI.getOperand(1);
2192  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2193  MRI.getType(MI.getOperand(0).getReg()));
2194  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2195  ExtOpc == TargetOpcode::G_ANYEXT) &&
2196  "Illegal Extend");
2197  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2198  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2199  ? SrcVal.sext(WideTy.getSizeInBits())
2200  : SrcVal.zext(WideTy.getSizeInBits());
2202  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2203 
2204  widenScalarDst(MI, WideTy);
2206  return Legalized;
2207  }
2208  case TargetOpcode::G_FCONSTANT: {
2209  MachineOperand &SrcMO = MI.getOperand(1);
2211  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2212  bool LosesInfo;
2213  switch (WideTy.getSizeInBits()) {
2214  case 32:
2216  &LosesInfo);
2217  break;
2218  case 64:
2220  &LosesInfo);
2221  break;
2222  default:
2223  return UnableToLegalize;
2224  }
2225 
2226  assert(!LosesInfo && "extend should always be lossless");
2227 
2229  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2230 
2231  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2233  return Legalized;
2234  }
2235  case TargetOpcode::G_IMPLICIT_DEF: {
2237  widenScalarDst(MI, WideTy);
2239  return Legalized;
2240  }
2241  case TargetOpcode::G_BRCOND:
2243  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2245  return Legalized;
2246 
2247  case TargetOpcode::G_FCMP:
2249  if (TypeIdx == 0)
2250  widenScalarDst(MI, WideTy);
2251  else {
2252  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2253  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2254  }
2256  return Legalized;
2257 
2258  case TargetOpcode::G_ICMP:
2260  if (TypeIdx == 0)
2261  widenScalarDst(MI, WideTy);
2262  else {
2263  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2264  MI.getOperand(1).getPredicate()))
2265  ? TargetOpcode::G_SEXT
2266  : TargetOpcode::G_ZEXT;
2267  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2268  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2269  }
2271  return Legalized;
2272 
2273  case TargetOpcode::G_PTR_ADD:
2274  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2276  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2278  return Legalized;
2279 
2280  case TargetOpcode::G_PHI: {
2281  assert(TypeIdx == 0 && "Expecting only Idx 0");
2282 
2284  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2285  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2286  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2287  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2288  }
2289 
2290  MachineBasicBlock &MBB = *MI.getParent();
2292  widenScalarDst(MI, WideTy);
2294  return Legalized;
2295  }
2296  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2297  if (TypeIdx == 0) {
2298  Register VecReg = MI.getOperand(1).getReg();
2299  LLT VecTy = MRI.getType(VecReg);
2301 
2303  WideTy.getSizeInBits()),
2304  1, TargetOpcode::G_SEXT);
2305 
2306  widenScalarDst(MI, WideTy, 0);
2308  return Legalized;
2309  }
2310 
2311  if (TypeIdx != 2)
2312  return UnableToLegalize;
2314  // TODO: Probably should be zext
2315  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2317  return Legalized;
2318  }
2319  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2320  if (TypeIdx == 1) {
2322 
2323  Register VecReg = MI.getOperand(1).getReg();
2324  LLT VecTy = MRI.getType(VecReg);
2325  LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
2326 
2327  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2328  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2329  widenScalarDst(MI, WideVecTy, 0);
2331  return Legalized;
2332  }
2333 
2334  if (TypeIdx == 2) {
2336  // TODO: Probably should be zext
2337  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2339  return Legalized;
2340  }
2341 
2342  return UnableToLegalize;
2343  }
2344  case TargetOpcode::G_FADD:
2345  case TargetOpcode::G_FMUL:
2346  case TargetOpcode::G_FSUB:
2347  case TargetOpcode::G_FMA:
2348  case TargetOpcode::G_FMAD:
2349  case TargetOpcode::G_FNEG:
2350  case TargetOpcode::G_FABS:
2351  case TargetOpcode::G_FCANONICALIZE:
2352  case TargetOpcode::G_FMINNUM:
2353  case TargetOpcode::G_FMAXNUM:
2354  case TargetOpcode::G_FMINNUM_IEEE:
2355  case TargetOpcode::G_FMAXNUM_IEEE:
2356  case TargetOpcode::G_FMINIMUM:
2357  case TargetOpcode::G_FMAXIMUM:
2358  case TargetOpcode::G_FDIV:
2359  case TargetOpcode::G_FREM:
2360  case TargetOpcode::G_FCEIL:
2361  case TargetOpcode::G_FFLOOR:
2362  case TargetOpcode::G_FCOS:
2363  case TargetOpcode::G_FSIN:
2364  case TargetOpcode::G_FLOG10:
2365  case TargetOpcode::G_FLOG:
2366  case TargetOpcode::G_FLOG2:
2367  case TargetOpcode::G_FRINT:
2368  case TargetOpcode::G_FNEARBYINT:
2369  case TargetOpcode::G_FSQRT:
2370  case TargetOpcode::G_FEXP:
2371  case TargetOpcode::G_FEXP2:
2372  case TargetOpcode::G_FPOW:
2373  case TargetOpcode::G_INTRINSIC_TRUNC:
2374  case TargetOpcode::G_INTRINSIC_ROUND:
2375  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2376  assert(TypeIdx == 0);
2378 
2379  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2380  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2381 
2382  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2384  return Legalized;
2385  case TargetOpcode::G_FPOWI: {
2386  if (TypeIdx != 0)
2387  return UnableToLegalize;
2389  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2390  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2392  return Legalized;
2393  }
2394  case TargetOpcode::G_INTTOPTR:
2395  if (TypeIdx != 1)
2396  return UnableToLegalize;
2397 
2399  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2401  return Legalized;
2402  case TargetOpcode::G_PTRTOINT:
2403  if (TypeIdx != 0)
2404  return UnableToLegalize;
2405 
2407  widenScalarDst(MI, WideTy, 0);
2409  return Legalized;
2410  case TargetOpcode::G_BUILD_VECTOR: {
2412 
2413  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2414  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2415  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2416 
2417  // Avoid changing the result vector type if the source element type was
2418  // requested.
2419  if (TypeIdx == 1) {
2420  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2421  } else {
2422  widenScalarDst(MI, WideTy, 0);
2423  }
2424 
2426  return Legalized;
2427  }
2428  case TargetOpcode::G_SEXT_INREG:
2429  if (TypeIdx != 0)
2430  return UnableToLegalize;
2431 
2433  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2434  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2436  return Legalized;
2437  case TargetOpcode::G_PTRMASK: {
2438  if (TypeIdx != 1)
2439  return UnableToLegalize;
2441  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2443  return Legalized;
2444  }
2445  }
2446 }
2447 
2449  MachineIRBuilder &B, Register Src, LLT Ty) {
2450  auto Unmerge = B.buildUnmerge(Ty, Src);
2451  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2452  Pieces.push_back(Unmerge.getReg(I));
2453 }
2454 
2457  Register Dst = MI.getOperand(0).getReg();
2458  Register Src = MI.getOperand(1).getReg();
2459  LLT DstTy = MRI.getType(Dst);
2460  LLT SrcTy = MRI.getType(Src);
2461 
2462  if (SrcTy.isVector()) {
2463  LLT SrcEltTy = SrcTy.getElementType();
2464  SmallVector<Register, 8> SrcRegs;
2465 
2466  if (DstTy.isVector()) {
2467  int NumDstElt = DstTy.getNumElements();
2468  int NumSrcElt = SrcTy.getNumElements();
2469 
2470  LLT DstEltTy = DstTy.getElementType();
2471  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2472  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2473 
2474  // If there's an element size mismatch, insert intermediate casts to match
2475  // the result element type.
2476  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2477  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2478  //
2479  // =>
2480  //
2481  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2482  // %3:_(<2 x s8>) = G_BITCAST %2
2483  // %4:_(<2 x s8>) = G_BITCAST %3
2484  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2485  DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
2486  SrcPartTy = SrcEltTy;
2487  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2488  //
2489  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2490  //
2491  // =>
2492  //
2493  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2494  // %3:_(s16) = G_BITCAST %2
2495  // %4:_(s16) = G_BITCAST %3
2496  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2497  SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
2498  DstCastTy = DstEltTy;
2499  }
2500 
2501  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2502  for (Register &SrcReg : SrcRegs)
2503  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2504  } else
2505  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2506 
2507  MIRBuilder.buildMerge(Dst, SrcRegs);
2508  MI.eraseFromParent();
2509  return Legalized;
2510  }
2511 
2512  if (DstTy.isVector()) {
2513  SmallVector<Register, 8> SrcRegs;
2514  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2515  MIRBuilder.buildMerge(Dst, SrcRegs);
2516  MI.eraseFromParent();
2517  return Legalized;
2518  }
2519 
2520  return UnableToLegalize;
2521 }
2522 
2523 /// Figure out the bit offset into a register when coercing a vector index for
2524 /// the wide element type. This is only for the case when promoting vector to
2525 /// one with larger elements.
2526 //
2527 ///
2528 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2529 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2531  Register Idx,
2532  unsigned NewEltSize,
2533  unsigned OldEltSize) {
2534  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2535  LLT IdxTy = B.getMRI()->getType(Idx);
2536 
2537  // Now figure out the amount we need to shift to get the target bits.
2538  auto OffsetMask = B.buildConstant(
2539  IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2540  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2541  return B.buildShl(IdxTy, OffsetIdx,
2542  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2543 }
2544 
2545 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2546 /// is casting to a vector with a smaller element size, perform multiple element
2547 /// extracts and merge the results. If this is coercing to a vector with larger
2548 /// elements, index the bitcasted vector and extract the target element with bit
2549 /// operations. This is intended to force the indexing in the native register
2550 /// size for architectures that can dynamically index the register file.
2553  LLT CastTy) {
2554  if (TypeIdx != 1)
2555  return UnableToLegalize;
2556 
2557  Register Dst = MI.getOperand(0).getReg();
2558  Register SrcVec = MI.getOperand(1).getReg();
2559  Register Idx = MI.getOperand(2).getReg();
2560  LLT SrcVecTy = MRI.getType(SrcVec);
2561  LLT IdxTy = MRI.getType(Idx);
2562 
2563  LLT SrcEltTy = SrcVecTy.getElementType();
2564  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2565  unsigned OldNumElts = SrcVecTy.getNumElements();
2566 
2567  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2568  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2569 
2570  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2571  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2572  if (NewNumElts > OldNumElts) {
2573  // Decreasing the vector element size
2574  //
2575  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2576  // =>
2577  // v4i32:castx = bitcast x:v2i64
2578  //
2579  // i64 = bitcast
2580  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2581  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2582  //
2583  if (NewNumElts % OldNumElts != 0)
2584  return UnableToLegalize;
2585 
2586  // Type of the intermediate result vector.
2587  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2588  LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
2589 
2590  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2591 
2592  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2593  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2594 
2595  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2596  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2597  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2598  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2599  NewOps[I] = Elt.getReg(0);
2600  }
2601 
2602  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2603  MIRBuilder.buildBitcast(Dst, NewVec);
2604  MI.eraseFromParent();
2605  return Legalized;
2606  }
2607 
2608  if (NewNumElts < OldNumElts) {
2609  if (NewEltSize % OldEltSize != 0)
2610  return UnableToLegalize;
2611 
2612  // This only depends on powers of 2 because we use bit tricks to figure out
2613  // the bit offset we need to shift to get the target element. A general
2614  // expansion could emit division/multiply.
2615  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2616  return UnableToLegalize;
2617 
2618  // Increasing the vector element size.
2619  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2620  //
2621  // =>
2622  //
2623  // %cast = G_BITCAST %vec
2624  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2625  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2626  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2627  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2628  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2629  // %elt = G_TRUNC %elt_bits
2630 
2631  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2632  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2633 
2634  // Divide to get the index in the wider element type.
2635  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2636 
2637  Register WideElt = CastVec;
2638  if (CastTy.isVector()) {
2639  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2640  ScaledIdx).getReg(0);
2641  }
2642 
2643  // Compute the bit offset into the register of the target element.
2645  MIRBuilder, Idx, NewEltSize, OldEltSize);
2646 
2647  // Shift the wide element to get the target element.
2648  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2649  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2650  MI.eraseFromParent();
2651  return Legalized;
2652  }
2653 
2654  return UnableToLegalize;
2655 }
2656 
2657 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2658 /// TargetReg, while preserving other bits in \p TargetReg.
2659 ///
2660 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2662  Register TargetReg, Register InsertReg,
2663  Register OffsetBits) {
2664  LLT TargetTy = B.getMRI()->getType(TargetReg);
2665  LLT InsertTy = B.getMRI()->getType(InsertReg);
2666  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2667  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2668 
2669  // Produce a bitmask of the value to insert
2670  auto EltMask = B.buildConstant(
2671  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2672  InsertTy.getSizeInBits()));
2673  // Shift it into position
2674  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2675  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2676 
2677  // Clear out the bits in the wide element
2678  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2679 
2680  // The value to insert has all zeros already, so stick it into the masked
2681  // wide element.
2682  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2683 }
2684 
2685 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2686 /// is increasing the element size, perform the indexing in the target element
2687 /// type, and use bit operations to insert at the element position. This is
2688 /// intended for architectures that can dynamically index the register file and
2689 /// want to force indexing in the native register size.
2692  LLT CastTy) {
2693  if (TypeIdx != 0)
2694  return UnableToLegalize;
2695 
2696  Register Dst = MI.getOperand(0).getReg();
2697  Register SrcVec = MI.getOperand(1).getReg();
2698  Register Val = MI.getOperand(2).getReg();
2699  Register Idx = MI.getOperand(3).getReg();
2700 
2701  LLT VecTy = MRI.getType(Dst);
2702  LLT IdxTy = MRI.getType(Idx);
2703 
2704  LLT VecEltTy = VecTy.getElementType();
2705  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2706  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2707  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2708 
2709  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2710  unsigned OldNumElts = VecTy.getNumElements();
2711 
2712  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2713  if (NewNumElts < OldNumElts) {
2714  if (NewEltSize % OldEltSize != 0)
2715  return UnableToLegalize;
2716 
2717  // This only depends on powers of 2 because we use bit tricks to figure out
2718  // the bit offset we need to shift to get the target element. A general
2719  // expansion could emit division/multiply.
2720  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2721  return UnableToLegalize;
2722 
2723  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2724  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2725 
2726  // Divide to get the index in the wider element type.
2727  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2728 
2729  Register ExtractedElt = CastVec;
2730  if (CastTy.isVector()) {
2731  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2732  ScaledIdx).getReg(0);
2733  }
2734 
2735  // Compute the bit offset into the register of the target element.
2737  MIRBuilder, Idx, NewEltSize, OldEltSize);
2738 
2739  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2740  Val, OffsetBits);
2741  if (CastTy.isVector()) {
2742  InsertedElt = MIRBuilder.buildInsertVectorElement(
2743  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2744  }
2745 
2746  MIRBuilder.buildBitcast(Dst, InsertedElt);
2747  MI.eraseFromParent();
2748  return Legalized;
2749  }
2750 
2751  return UnableToLegalize;
2752 }
2753 
2756  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2757  Register DstReg = MI.getOperand(0).getReg();
2758  Register PtrReg = MI.getOperand(1).getReg();
2759  LLT DstTy = MRI.getType(DstReg);
2760  auto &MMO = **MI.memoperands_begin();
2761 
2762  if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
2763  if (MI.getOpcode() == TargetOpcode::G_LOAD) {
2764  // This load needs splitting into power of 2 sized loads.
2765  if (DstTy.isVector())
2766  return UnableToLegalize;
2767  if (isPowerOf2_32(DstTy.getSizeInBits()))
2768  return UnableToLegalize; // Don't know what we're being asked to do.
2769 
2770  // Our strategy here is to generate anyextending loads for the smaller
2771  // types up to next power-2 result type, and then combine the two larger
2772  // result values together, before truncating back down to the non-pow-2
2773  // type.
2774  // E.g. v1 = i24 load =>
2775  // v2 = i32 zextload (2 byte)
2776  // v3 = i32 load (1 byte)
2777  // v4 = i32 shl v3, 16
2778  // v5 = i32 or v4, v2
2779  // v1 = i24 trunc v5
2780  // By doing this we generate the correct truncate which should get
2781  // combined away as an artifact with a matching extend.
2782  uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
2783  uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
2784 
2786  MachineMemOperand *LargeMMO =
2787  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2788  MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
2789  &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2790 
2791  LLT PtrTy = MRI.getType(PtrReg);
2792  unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
2793  LLT AnyExtTy = LLT::scalar(AnyExtSize);
2794  Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2795  Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
2796  auto LargeLoad = MIRBuilder.buildLoadInstr(
2797  TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
2798 
2799  auto OffsetCst = MIRBuilder.buildConstant(
2800  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2801  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2802  auto SmallPtr =
2803  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
2804  auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr,
2805  *SmallMMO);
2806 
2807  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2808  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2809  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2810  MIRBuilder.buildTrunc(DstReg, {Or});
2811  MI.eraseFromParent();
2812  return Legalized;
2813  }
2814 
2815  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
2816  MI.eraseFromParent();
2817  return Legalized;
2818  }
2819 
2820  return UnableToLegalize;
2821 }
2822 
2825  // Lower a non-power of 2 store into multiple pow-2 stores.
2826  // E.g. split an i24 store into an i16 store + i8 store.
2827  // We do this by first extending the stored value to the next largest power
2828  // of 2 type, and then using truncating stores to store the components.
2829  // By doing this, likewise with G_LOAD, generate an extend that can be
2830  // artifact-combined away instead of leaving behind extracts.
2831  Register SrcReg = MI.getOperand(0).getReg();
2832  Register PtrReg = MI.getOperand(1).getReg();
2833  LLT SrcTy = MRI.getType(SrcReg);
2834  MachineMemOperand &MMO = **MI.memoperands_begin();
2835  if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
2836  return UnableToLegalize;
2837  if (SrcTy.isVector())
2838  return UnableToLegalize;
2839  if (isPowerOf2_32(SrcTy.getSizeInBits()))
2840  return UnableToLegalize; // Don't know what we're being asked to do.
2841 
2842  // Extend to the next pow-2.
2843  const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
2844  auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
2845 
2846  // Obtain the smaller value by shifting away the larger value.
2847  uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
2848  uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
2849  auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
2850  auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
2851 
2852  // Generate the PtrAdd and truncating stores.
2853  LLT PtrTy = MRI.getType(PtrReg);
2854  auto OffsetCst = MIRBuilder.buildConstant(
2855  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
2856  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2857  auto SmallPtr =
2858  MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
2859 
2861  MachineMemOperand *LargeMMO =
2862  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2863  MachineMemOperand *SmallMMO =
2864  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2865  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
2866  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
2867  MI.eraseFromParent();
2868  return Legalized;
2869 }
2870 
2872 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
2873  switch (MI.getOpcode()) {
2874  case TargetOpcode::G_LOAD: {
2875  if (TypeIdx != 0)
2876  return UnableToLegalize;
2877 
2879  bitcastDst(MI, CastTy, 0);
2881  return Legalized;
2882  }
2883  case TargetOpcode::G_STORE: {
2884  if (TypeIdx != 0)
2885  return UnableToLegalize;
2886 
2888  bitcastSrc(MI, CastTy, 0);
2890  return Legalized;
2891  }
2892  case TargetOpcode::G_SELECT: {
2893  if (TypeIdx != 0)
2894  return UnableToLegalize;
2895 
2896  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
2897  LLVM_DEBUG(
2898  dbgs() << "bitcast action not implemented for vector select\n");
2899  return UnableToLegalize;
2900  }
2901 
2903  bitcastSrc(MI, CastTy, 2);
2904  bitcastSrc(MI, CastTy, 3);
2905  bitcastDst(MI, CastTy, 0);
2907  return Legalized;
2908  }
2909  case TargetOpcode::G_AND:
2910  case TargetOpcode::G_OR:
2911  case TargetOpcode::G_XOR: {
2913  bitcastSrc(MI, CastTy, 1);
2914  bitcastSrc(MI, CastTy, 2);
2915  bitcastDst(MI, CastTy, 0);
2917  return Legalized;
2918  }
2919  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
2920  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
2921  case TargetOpcode::G_INSERT_VECTOR_ELT:
2922  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
2923  default:
2924  return UnableToLegalize;
2925  }
2926 }
2927 
2928 // Legalize an instruction by changing the opcode in place.
2929 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
2931  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
2933 }
2934 
2936 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
2937  using namespace TargetOpcode;
2938 
2939  switch(MI.getOpcode()) {
2940  default:
2941  return UnableToLegalize;
2942  case TargetOpcode::G_BITCAST:
2943  return lowerBitcast(MI);
2944  case TargetOpcode::G_SREM:
2945  case TargetOpcode::G_UREM: {
2946  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2947  auto Quot =
2948  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
2949  {MI.getOperand(1), MI.getOperand(2)});
2950 
2951  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
2952  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
2953  MI.eraseFromParent();
2954  return Legalized;
2955  }
2956  case TargetOpcode::G_SADDO:
2957  case TargetOpcode::G_SSUBO:
2958  return lowerSADDO_SSUBO(MI);
2959  case TargetOpcode::G_UMULH:
2960  case TargetOpcode::G_SMULH:
2961  return lowerSMULH_UMULH(MI);
2962  case TargetOpcode::G_SMULO:
2963  case TargetOpcode::G_UMULO: {
2964  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
2965  // result.
2966  Register Res = MI.getOperand(0).getReg();
2967  Register Overflow = MI.getOperand(1).getReg();
2968  Register LHS = MI.getOperand(2).getReg();
2969  Register RHS = MI.getOperand(3).getReg();
2970  LLT Ty = MRI.getType(Res);
2971 
2972  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
2973  ? TargetOpcode::G_SMULH
2974  : TargetOpcode::G_UMULH;
2975 
2977  const auto &TII = MIRBuilder.getTII();
2978  MI.setDesc(TII.get(TargetOpcode::G_MUL));
2979  MI.RemoveOperand(1);
2981 
2982  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
2983  auto Zero = MIRBuilder.buildConstant(Ty, 0);
2984 
2985  // Move insert point forward so we can use the Res register if needed.
2987 
2988  // For *signed* multiply, overflow is detected by checking:
2989  // (hi != (lo >> bitwidth-1))
2990  if (Opcode == TargetOpcode::G_SMULH) {
2991  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
2992  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
2993  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
2994  } else {
2995  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
2996  }
2997  return Legalized;
2998  }
2999  case TargetOpcode::G_FNEG: {
3000  Register Res = MI.getOperand(0).getReg();
3001  LLT Ty = MRI.getType(Res);
3002 
3003  // TODO: Handle vector types once we are able to
3004  // represent them.
3005  if (Ty.isVector())
3006  return UnableToLegalize;
3007  auto SignMask =
3009  Register SubByReg = MI.getOperand(1).getReg();
3010  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3011  MI.eraseFromParent();
3012  return Legalized;
3013  }
3014  case TargetOpcode::G_FSUB: {
3015  Register Res = MI.getOperand(0).getReg();
3016  LLT Ty = MRI.getType(Res);
3017 
3018  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3019  // First, check if G_FNEG is marked as Lower. If so, we may
3020  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3021  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3022  return UnableToLegalize;
3023  Register LHS = MI.getOperand(1).getReg();
3024  Register RHS = MI.getOperand(2).getReg();
3026  MIRBuilder.buildFNeg(Neg, RHS);
3027  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3028  MI.eraseFromParent();
3029  return Legalized;
3030  }
3031  case TargetOpcode::G_FMAD:
3032  return lowerFMad(MI);
3033  case TargetOpcode::G_FFLOOR:
3034  return lowerFFloor(MI);
3035  case TargetOpcode::G_INTRINSIC_ROUND:
3036  return lowerIntrinsicRound(MI);
3037  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3038  // Since round even is the assumed rounding mode for unconstrained FP
3039  // operations, rint and roundeven are the same operation.
3040  changeOpcode(MI, TargetOpcode::G_FRINT);
3041  return Legalized;
3042  }
3043  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3044  Register OldValRes = MI.getOperand(0).getReg();
3045  Register SuccessRes = MI.getOperand(1).getReg();
3046  Register Addr = MI.getOperand(2).getReg();
3047  Register CmpVal = MI.getOperand(3).getReg();
3048  Register NewVal = MI.getOperand(4).getReg();
3049  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3050  **MI.memoperands_begin());
3051  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3052  MI.eraseFromParent();
3053  return Legalized;
3054  }
3055  case TargetOpcode::G_LOAD:
3056  case TargetOpcode::G_SEXTLOAD:
3057  case TargetOpcode::G_ZEXTLOAD:
3058  return lowerLoad(MI);
3059  case TargetOpcode::G_STORE:
3060  return lowerStore(MI);
3061  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3062  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3063  case TargetOpcode::G_CTLZ:
3064  case TargetOpcode::G_CTTZ:
3065  case TargetOpcode::G_CTPOP:
3066  return lowerBitCount(MI);
3067  case G_UADDO: {
3068  Register Res = MI.getOperand(0).getReg();
3069  Register CarryOut = MI.getOperand(1).getReg();
3070  Register LHS = MI.getOperand(2).getReg();
3071  Register RHS = MI.getOperand(3).getReg();
3072 
3073  MIRBuilder.buildAdd(Res, LHS, RHS);
3074  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3075 
3076  MI.eraseFromParent();
3077  return Legalized;
3078  }
3079  case G_UADDE: {
3080  Register Res = MI.getOperand(0).getReg();
3081  Register CarryOut = MI.getOperand(1).getReg();
3082  Register LHS = MI.getOperand(2).getReg();
3083  Register RHS = MI.getOperand(3).getReg();
3084  Register CarryIn = MI.getOperand(4).getReg();
3085  LLT Ty = MRI.getType(Res);
3086 
3087  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3088  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3089  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3090  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3091 
3092  MI.eraseFromParent();
3093  return Legalized;
3094  }
3095  case G_USUBO: {
3096  Register Res = MI.getOperand(0).getReg();
3097  Register BorrowOut = MI.getOperand(1).getReg();
3098  Register LHS = MI.getOperand(2).getReg();
3099  Register RHS = MI.getOperand(3).getReg();
3100 
3101  MIRBuilder.buildSub(Res, LHS, RHS);
3102  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3103 
3104  MI.eraseFromParent();
3105  return Legalized;
3106  }
3107  case G_USUBE: {
3108  Register Res = MI.getOperand(0).getReg();
3109  Register BorrowOut = MI.getOperand(1).getReg();
3110  Register LHS = MI.getOperand(2).getReg();
3111  Register RHS = MI.getOperand(3).getReg();
3112  Register BorrowIn = MI.getOperand(4).getReg();
3113  const LLT CondTy = MRI.getType(BorrowOut);
3114  const LLT Ty = MRI.getType(Res);
3115 
3116  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3117  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3118  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3119 
3120  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3121  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3122  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3123 
3124  MI.eraseFromParent();
3125  return Legalized;
3126  }
3127  case G_UITOFP:
3128  return lowerUITOFP(MI);
3129  case G_SITOFP:
3130  return lowerSITOFP(MI);
3131  case G_FPTOUI:
3132  return lowerFPTOUI(MI);
3133  case G_FPTOSI:
3134  return lowerFPTOSI(MI);
3135  case G_FPTRUNC:
3136  return lowerFPTRUNC(MI);
3137  case G_FPOWI:
3138  return lowerFPOWI(MI);
3139  case G_SMIN:
3140  case G_SMAX:
3141  case G_UMIN:
3142  case G_UMAX:
3143  return lowerMinMax(MI);
3144  case G_FCOPYSIGN:
3145  return lowerFCopySign(MI);
3146  case G_FMINNUM:
3147  case G_FMAXNUM:
3148  return lowerFMinNumMaxNum(MI);
3149  case G_MERGE_VALUES:
3150  return lowerMergeValues(MI);
3151  case G_UNMERGE_VALUES:
3152  return lowerUnmergeValues(MI);
3153  case TargetOpcode::G_SEXT_INREG: {
3154  assert(MI.getOperand(2).isImm() && "Expected immediate");
3155  int64_t SizeInBits = MI.getOperand(2).getImm();
3156 
3157  Register DstReg = MI.getOperand(0).getReg();
3158  Register SrcReg = MI.getOperand(1).getReg();
3159  LLT DstTy = MRI.getType(DstReg);
3160  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3161 
3162  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3163  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3164  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3165  MI.eraseFromParent();
3166  return Legalized;
3167  }
3168  case G_EXTRACT_VECTOR_ELT:
3169  case G_INSERT_VECTOR_ELT:
3171  case G_SHUFFLE_VECTOR:
3172  return lowerShuffleVector(MI);
3173  case G_DYN_STACKALLOC:
3174  return lowerDynStackAlloc(MI);
3175  case G_EXTRACT:
3176  return lowerExtract(MI);
3177  case G_INSERT:
3178  return lowerInsert(MI);
3179  case G_BSWAP:
3180  return lowerBswap(MI);
3181  case G_BITREVERSE:
3182  return lowerBitreverse(MI);
3183  case G_READ_REGISTER:
3184  case G_WRITE_REGISTER:
3185  return lowerReadWriteRegister(MI);
3186  case G_UADDSAT:
3187  case G_USUBSAT: {
3188  // Try to make a reasonable guess about which lowering strategy to use. The
3189  // target can override this with custom lowering and calling the
3190  // implementation functions.
3191  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3192  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3193  return lowerAddSubSatToMinMax(MI);
3194  return lowerAddSubSatToAddoSubo(MI);
3195  }
3196  case G_SADDSAT:
3197  case G_SSUBSAT: {
3198  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3199 
3200  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3201  // since it's a shorter expansion. However, we would need to figure out the
3202  // preferred boolean type for the carry out for the query.
3203  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3204  return lowerAddSubSatToMinMax(MI);
3205  return lowerAddSubSatToAddoSubo(MI);
3206  }
3207  case G_SSHLSAT:
3208  case G_USHLSAT:
3209  return lowerShlSat(MI);
3210  case G_ABS:
3211  return lowerAbsToAddXor(MI);
3212  case G_SELECT:
3213  return lowerSelect(MI);
3214  case G_SDIVREM:
3215  case G_UDIVREM:
3216  return lowerDIVREM(MI);
3217  case G_FSHL:
3218  case G_FSHR:
3219  return lowerFunnelShift(MI);
3220  case G_ROTL:
3221  case G_ROTR:
3222  return lowerRotate(MI);
3223  }
3224 }
3225 
3227  Align MinAlign) const {
3228  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3229  // datalayout for the preferred alignment. Also there should be a target hook
3230  // for this to allow targets to reduce the alignment and ignore the
3231  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3232  // the type.
3234 }
3235 
3238  MachinePointerInfo &PtrInfo) {
3241  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3242 
3243  unsigned AddrSpace = DL.getAllocaAddrSpace();
3244  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3245 
3246  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3247  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3248 }
3249 
3251  LLT VecTy) {
3252  int64_t IdxVal;
3253  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3254  return IdxReg;
3255 
3256  LLT IdxTy = B.getMRI()->getType(IdxReg);
3257  unsigned NElts = VecTy.getNumElements();
3258  if (isPowerOf2_32(NElts)) {
3259  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3260  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3261  }
3262 
3263  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3264  .getReg(0);
3265 }
3266 
3268  Register Index) {
3269  LLT EltTy = VecTy.getElementType();
3270 
3271  // Calculate the element offset and add it to the pointer.
3272  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3273  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3274  "Converting bits to bytes lost precision");
3275 
3277 
3278  LLT IdxTy = MRI.getType(Index);
3279  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3280  MIRBuilder.buildConstant(IdxTy, EltSize));
3281 
3282  LLT PtrTy = MRI.getType(VecPtr);
3283  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3284 }
3285 
3287  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3288  Register DstReg = MI.getOperand(0).getReg();
3289  LLT DstTy = MRI.getType(DstReg);
3290  LLT LCMTy = getLCMType(DstTy, NarrowTy);
3291 
3292  unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3293 
3294  auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3295  SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3296 
3297  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3298  MI.eraseFromParent();
3299  return Legalized;
3300 }
3301 
3302 // Handle splitting vector operations which need to have the same number of
3303 // elements in each type index, but each type index may have a different element
3304 // type.
3305 //
3306 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3307 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3308 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3309 //
3310 // Also handles some irregular breakdown cases, e.g.
3311 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3312 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3313 // s64 = G_SHL s64, s32
3316  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3317  if (TypeIdx != 0)
3318  return UnableToLegalize;
3319 
3320  const LLT NarrowTy0 = NarrowTyArg;
3321  const unsigned NewNumElts =
3322  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
3323 
3324  const Register DstReg = MI.getOperand(0).getReg();
3325  LLT DstTy = MRI.getType(DstReg);
3326  LLT LeftoverTy0;
3327 
3328  // All of the operands need to have the same number of elements, so if we can
3329  // determine a type breakdown for the result type, we can for all of the
3330  // source types.
3331  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3332  if (NumParts < 0)
3333  return UnableToLegalize;
3334 
3336 
3337  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3338  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3339 
3340  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3341  Register SrcReg = MI.getOperand(I).getReg();
3342  LLT SrcTyI = MRI.getType(SrcReg);
3343  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
3344  LLT LeftoverTyI;
3345 
3346  // Split this operand into the requested typed registers, and any leftover
3347  // required to reproduce the original type.
3348  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3349  LeftoverRegs))
3350  return UnableToLegalize;
3351 
3352  if (I == 1) {
3353  // For the first operand, create an instruction for each part and setup
3354  // the result.
3355  for (Register PartReg : PartRegs) {
3356  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3357  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3358  .addDef(PartDstReg)
3359  .addUse(PartReg));
3360  DstRegs.push_back(PartDstReg);
3361  }
3362 
3363  for (Register LeftoverReg : LeftoverRegs) {
3364  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3365  NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3366  .addDef(PartDstReg)
3367  .addUse(LeftoverReg));
3368  LeftoverDstRegs.push_back(PartDstReg);
3369  }
3370  } else {
3371  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
3372 
3373  // Add the newly created operand splits to the existing instructions. The
3374  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3375  // pieces.
3376  unsigned InstCount = 0;
3377  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3378  NewInsts[InstCount++].addUse(PartRegs[J]);
3379  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3380  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3381  }
3382 
3383  PartRegs.clear();
3384  LeftoverRegs.clear();
3385  }
3386 
3387  // Insert the newly built operations and rebuild the result register.
3388  for (auto &MIB : NewInsts)
3389  MIRBuilder.insertInstr(MIB);
3390 
3391  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3392 
3393  MI.eraseFromParent();
3394  return Legalized;
3395 }
3396 
3399  LLT NarrowTy) {
3400  if (TypeIdx != 0)
3401  return UnableToLegalize;
3402 
3403  Register DstReg = MI.getOperand(0).getReg();
3404  Register SrcReg = MI.getOperand(1).getReg();
3405  LLT DstTy = MRI.getType(DstReg);
3406  LLT SrcTy = MRI.getType(SrcReg);
3407 
3408  LLT NarrowTy0 = NarrowTy;
3409  LLT NarrowTy1;
3410  unsigned NumParts;
3411 
3412  if (NarrowTy.isVector()) {
3413  // Uneven breakdown not handled.
3414  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3415  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3416  return UnableToLegalize;
3417 
3418  NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
3419  } else {
3420  NumParts = DstTy.getNumElements();
3421  NarrowTy1 = SrcTy.getElementType();
3422  }
3423 
3424  SmallVector<Register, 4> SrcRegs, DstRegs;
3425  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3426 
3427  for (unsigned I = 0; I < NumParts; ++I) {
3428  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3429  MachineInstr *NewInst =
3430  MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3431 
3432  NewInst->setFlags(MI.getFlags());
3433  DstRegs.push_back(DstReg);
3434  }
3435 
3436  if (NarrowTy.isVector())
3437  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3438  else
3439  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3440 
3441  MI.eraseFromParent();
3442  return Legalized;
3443 }
3444 
3447  LLT NarrowTy) {
3448  Register DstReg = MI.getOperand(0).getReg();
3449  Register Src0Reg = MI.getOperand(2).getReg();
3450  LLT DstTy = MRI.getType(DstReg);
3451  LLT SrcTy = MRI.getType(Src0Reg);
3452 
3453  unsigned NumParts;
3454  LLT NarrowTy0, NarrowTy1;
3455 
3456  if (TypeIdx == 0) {
3457  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3458  unsigned OldElts = DstTy.getNumElements();
3459 
3460  NarrowTy0 = NarrowTy;
3461  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3462  NarrowTy1 = NarrowTy.isVector() ?
3463  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
3464  SrcTy.getElementType();
3465 
3466  } else {
3467  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3468  unsigned OldElts = SrcTy.getNumElements();
3469 
3470  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3471  NarrowTy.getNumElements();
3472  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
3473  DstTy.getScalarSizeInBits());
3474  NarrowTy1 = NarrowTy;
3475  }
3476 
3477  // FIXME: Don't know how to handle the situation where the small vectors
3478  // aren't all the same size yet.
3479  if (NarrowTy1.isVector() &&
3480  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3481  return UnableToLegalize;
3482 
3483  CmpInst::Predicate Pred
3484  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3485 
3486  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3487  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3488  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3489 
3490  for (unsigned I = 0; I < NumParts; ++I) {
3491  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3492  DstRegs.push_back(DstReg);
3493 
3494  if (MI.getOpcode() == TargetOpcode::G_ICMP)
3495  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3496  else {
3497  MachineInstr *NewCmp
3498  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3499  NewCmp->setFlags(MI.getFlags());
3500  }
3501  }
3502 
3503  if (NarrowTy1.isVector())
3504  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3505  else
3506  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3507 
3508  MI.eraseFromParent();
3509  return Legalized;
3510 }
3511 
3514  LLT NarrowTy) {
3515  Register DstReg = MI.getOperand(0).getReg();
3516  Register CondReg = MI.getOperand(1).getReg();
3517 
3518  unsigned NumParts = 0;
3519  LLT NarrowTy0, NarrowTy1;
3520 
3521  LLT DstTy = MRI.getType(DstReg);
3522  LLT CondTy = MRI.getType(CondReg);
3523  unsigned Size = DstTy.getSizeInBits();
3524 
3525  assert(TypeIdx == 0 || CondTy.isVector());
3526 
3527  if (TypeIdx == 0) {
3528  NarrowTy0 = NarrowTy;
3529  NarrowTy1 = CondTy;
3530 
3531  unsigned NarrowSize = NarrowTy0.getSizeInBits();
3532  // FIXME: Don't know how to handle the situation where the small vectors
3533  // aren't all the same size yet.
3534  if (Size % NarrowSize != 0)
3535  return UnableToLegalize;
3536 
3537  NumParts = Size / NarrowSize;
3538 
3539  // Need to break down the condition type
3540  if (CondTy.isVector()) {
3541  if (CondTy.getNumElements() == NumParts)
3542  NarrowTy1 = CondTy.getElementType();
3543  else
3544  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
3545  CondTy.getScalarSizeInBits());
3546  }
3547  } else {
3548  NumParts = CondTy.getNumElements();
3549  if (NarrowTy.isVector()) {
3550  // TODO: Handle uneven breakdown.
3551  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3552  return UnableToLegalize;
3553 
3554  return UnableToLegalize;
3555  } else {
3556  NarrowTy0 = DstTy.getElementType();
3557  NarrowTy1 = NarrowTy;
3558  }
3559  }
3560 
3561  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3562  if (CondTy.isVector())
3563  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3564 
3565  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3566  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3567 
3568  for (unsigned i = 0; i < NumParts; ++i) {
3569  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3570  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3571  Src1Regs[i], Src2Regs[i]);
3572  DstRegs.push_back(DstReg);
3573  }
3574 
3575  if (NarrowTy0.isVector())
3576  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3577  else
3578  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3579 
3580  MI.eraseFromParent();
3581  return Legalized;
3582 }
3583 
3586  LLT NarrowTy) {
3587  const Register DstReg = MI.getOperand(0).getReg();
3588  LLT PhiTy = MRI.getType(DstReg);
3589  LLT LeftoverTy;
3590 
3591  // All of the operands need to have the same number of elements, so if we can
3592  // determine a type breakdown for the result type, we can for all of the
3593  // source types.
3594  int NumParts, NumLeftover;
3595  std::tie(NumParts, NumLeftover)
3596  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3597  if (NumParts < 0)
3598  return UnableToLegalize;
3599 
3600  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3602 
3603  const int TotalNumParts = NumParts + NumLeftover;
3604 
3605  // Insert the new phis in the result block first.
3606  for (int I = 0; I != TotalNumParts; ++I) {
3607  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3608  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3609  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3610  .addDef(PartDstReg));
3611  if (I < NumParts)
3612  DstRegs.push_back(PartDstReg);
3613  else
3614  LeftoverDstRegs.push_back(PartDstReg);
3615  }
3616 
3617  MachineBasicBlock *MBB = MI.getParent();
3619  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3620 
3621  SmallVector<Register, 4> PartRegs, LeftoverRegs;
3622 
3623  // Insert code to extract the incoming values in each predecessor block.
3624  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3625  PartRegs.clear();
3626  LeftoverRegs.clear();
3627 
3628  Register SrcReg = MI.getOperand(I).getReg();
3629  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3630  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3631 
3632  LLT Unused;
3633  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3634  LeftoverRegs))
3635  return UnableToLegalize;
3636 
3637  // Add the newly created operand splits to the existing instructions. The
3638  // odd-sized pieces are ordered after the requested NarrowTyArg sized
3639  // pieces.
3640  for (int J = 0; J != TotalNumParts; ++J) {
3641  MachineInstrBuilder MIB = NewInsts[J];
3642  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3643  MIB.addMBB(&OpMBB);
3644  }
3645  }
3646 
3647  MI.eraseFromParent();
3648  return Legalized;
3649 }
3650 
3653  unsigned TypeIdx,
3654  LLT NarrowTy) {
3655  if (TypeIdx != 1)
3656  return UnableToLegalize;
3657 
3658  const int NumDst = MI.getNumOperands() - 1;
3659  const Register SrcReg = MI.getOperand(NumDst).getReg();
3660  LLT SrcTy = MRI.getType(SrcReg);
3661 
3662  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3663 
3664  // TODO: Create sequence of extracts.
3665  if (DstTy == NarrowTy)
3666  return UnableToLegalize;
3667 
3668  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3669  if (DstTy == GCDTy) {
3670  // This would just be a copy of the same unmerge.
3671  // TODO: Create extracts, pad with undef and create intermediate merges.
3672  return UnableToLegalize;
3673  }
3674 
3675  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3676  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3677  const int PartsPerUnmerge = NumDst / NumUnmerge;
3678 
3679  for (int I = 0; I != NumUnmerge; ++I) {
3680  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3681 
3682  for (int J = 0; J != PartsPerUnmerge; ++J)
3683  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3684  MIB.addUse(Unmerge.getReg(I));
3685  }
3686 
3687  MI.eraseFromParent();
3688  return Legalized;
3689 }
3690 
3693  LLT NarrowTy) {
3694  Register Result = MI.getOperand(0).getReg();
3695  Register Overflow = MI.getOperand(1).getReg();
3696  Register LHS = MI.getOperand(2).getReg();
3697  Register RHS = MI.getOperand(3).getReg();
3698 
3699  LLT SrcTy = MRI.getType(LHS);
3700  if (!SrcTy.isVector())
3701  return UnableToLegalize;
3702 
3703  LLT ElementType = SrcTy.getElementType();
3704  LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3705  const int NumResult = SrcTy.getNumElements();
3706  LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3707 
3708  // Unmerge the operands to smaller parts of GCD type.
3709  auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3710  auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3711 
3712  const int NumOps = UnmergeLHS->getNumOperands() - 1;
3713  const int PartsPerUnmerge = NumResult / NumOps;
3714  LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3715  LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3716 
3717  // Perform the operation over unmerged parts.
3718  SmallVector<Register, 8> ResultParts;
3719  SmallVector<Register, 8> OverflowParts;
3720  for (int I = 0; I != NumOps; ++I) {
3721  Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3722  Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3723  auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3724  {Operand1, Operand2});
3725  ResultParts.push_back(PartMul->getOperand(0).getReg());
3726  OverflowParts.push_back(PartMul->getOperand(1).getReg());
3727  }
3728 
3729  LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3730  LLT OverflowLCMTy =
3731  LLT::scalarOrVector(ResultLCMTy.getNumElements(), OverflowElementTy);
3732 
3733  // Recombine the pieces to the original result and overflow registers.
3734  buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3735  buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3736  MI.eraseFromParent();
3737  return Legalized;
3738 }
3739 
3740 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3741 // a vector
3742 //
3743 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3744 // undef as necessary.
3745 //
3746 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3747 // -> <2 x s16>
3748 //
3749 // %4:_(s16) = G_IMPLICIT_DEF
3750 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3751 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3752 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
3753 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3754 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3757  LLT NarrowTy) {
3758  Register DstReg = MI.getOperand(0).getReg();
3759  LLT DstTy = MRI.getType(DstReg);
3760  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3761  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3762 
3763  // Break into a common type
3765  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3766  extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3767 
3768  // Build the requested new merge, padding with undef.
3769  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3770  TargetOpcode::G_ANYEXT);
3771 
3772  // Pack into the original result register.
3773  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3774 
3775  MI.eraseFromParent();
3776  return Legalized;
3777 }
3778 
3781  unsigned TypeIdx,
3782  LLT NarrowVecTy) {
3783  Register DstReg = MI.getOperand(0).getReg();
3784  Register SrcVec = MI.getOperand(1).getReg();
3785  Register InsertVal;
3786  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3787 
3788  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
3789  if (IsInsert)
3790  InsertVal = MI.getOperand(2).getReg();
3791 
3792  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3793 
3794  // TODO: Handle total scalarization case.
3795  if (!NarrowVecTy.isVector())
3796  return UnableToLegalize;
3797 
3798  LLT VecTy = MRI.getType(SrcVec);
3799 
3800  // If the index is a constant, we can really break this down as you would
3801  // expect, and index into the target size pieces.
3802  int64_t IdxVal;
3803  auto MaybeCst =
3804  getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
3805  /*HandleFConstants*/ false);
3806  if (MaybeCst) {
3807  IdxVal = MaybeCst->Value.getSExtValue();
3808  // Avoid out of bounds indexing the pieces.
3809  if (IdxVal >= VecTy.getNumElements()) {
3810  MIRBuilder.buildUndef(DstReg);
3811  MI.eraseFromParent();
3812  return Legalized;
3813  }
3814 
3815  SmallVector<Register, 8> VecParts;
3816  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
3817 
3818  // Build a sequence of NarrowTy pieces in VecParts for this operand.
3819  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
3820  TargetOpcode::G_ANYEXT);
3821 
3822  unsigned NewNumElts = NarrowVecTy.getNumElements();
3823 
3824  LLT IdxTy = MRI.getType(Idx);
3825  int64_t PartIdx = IdxVal / NewNumElts;
3826  auto NewIdx =
3827  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
3828 
3829  if (IsInsert) {
3830  LLT PartTy = MRI.getType(VecParts[PartIdx]);
3831 
3832  // Use the adjusted index to insert into one of the subvectors.
3833  auto InsertPart = MIRBuilder.buildInsertVectorElement(
3834  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
3835  VecParts[PartIdx] = InsertPart.getReg(0);
3836 
3837  // Recombine the inserted subvector with the others to reform the result
3838  // vector.
3839  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
3840  } else {
3841  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
3842  }
3843 
3844  MI.eraseFromParent();
3845  return Legalized;
3846  }
3847 
3848  // With a variable index, we can't perform the operation in a smaller type, so
3849  // we're forced to expand this.
3850  //
3851  // TODO: We could emit a chain of compare/select to figure out which piece to
3852  // index.
3854 }
3855 
3858  LLT NarrowTy) {
3859  // FIXME: Don't know how to handle secondary types yet.
3860  if (TypeIdx != 0)
3861  return UnableToLegalize;
3862 
3863  MachineMemOperand *MMO = *MI.memoperands_begin();
3864 
3865  // This implementation doesn't work for atomics. Give up instead of doing
3866  // something invalid.
3867  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
3869  return UnableToLegalize;
3870 
3871  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
3872  Register ValReg = MI.getOperand(0).getReg();
3873  Register AddrReg = MI.getOperand(1).getReg();
3874  LLT ValTy = MRI.getType(ValReg);
3875 
3876  // FIXME: Do we need a distinct NarrowMemory legalize action?
3877  if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
3878  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
3879  return UnableToLegalize;
3880  }
3881 
3882  int NumParts = -1;
3883  int NumLeftover = -1;
3884  LLT LeftoverTy;
3885  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
3886  if (IsLoad) {
3887  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
3888  } else {
3889  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
3890  NarrowLeftoverRegs)) {
3891  NumParts = NarrowRegs.size();
3892  NumLeftover = NarrowLeftoverRegs.size();
3893  }
3894  }
3895 
3896  if (NumParts == -1)
3897  return UnableToLegalize;
3898 
3899  LLT PtrTy = MRI.getType(AddrReg);
3900  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
3901 
3902  unsigned TotalSize = ValTy.getSizeInBits();
3903 
3904  // Split the load/store into PartTy sized pieces starting at Offset. If this
3905  // is a load, return the new registers in ValRegs. For a store, each elements
3906  // of ValRegs should be PartTy. Returns the next offset that needs to be
3907  // handled.
3908  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
3909  unsigned Offset) -> unsigned {
3911  unsigned PartSize = PartTy.getSizeInBits();
3912  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
3913  Offset += PartSize, ++Idx) {
3914  unsigned ByteSize = PartSize / 8;
3915  unsigned ByteOffset = Offset / 8;
3916  Register NewAddrReg;
3917 
3918  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
3919 
3920  MachineMemOperand *NewMMO =
3921  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
3922 
3923  if (IsLoad) {
3924  Register Dst = MRI.createGenericVirtualRegister(PartTy);
3925  ValRegs.push_back(Dst);
3926  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
3927  } else {
3928  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
3929  }
3930  }
3931 
3932  return Offset;
3933  };
3934 
3935  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
3936 
3937  // Handle the rest of the register if this isn't an even type breakdown.
3938  if (LeftoverTy.isValid())
3939  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
3940 
3941  if (IsLoad) {
3942  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
3943  LeftoverTy, NarrowLeftoverRegs);
3944  }
3945 
3946  MI.eraseFromParent();
3947  return Legalized;
3948 }
3949 
3952  LLT NarrowTy) {
3953  assert(TypeIdx == 0 && "only one type index expected");
3954 
3955  const unsigned Opc = MI.getOpcode();
3956  const int NumDefOps = MI.getNumExplicitDefs();
3957  const int NumSrcOps = MI.getNumOperands() - NumDefOps;
3958  const unsigned Flags = MI.getFlags();
3959  const unsigned NarrowSize = NarrowTy.getSizeInBits();
3960  const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
3961 
3962  assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
3963  "result and 1-3 sources or 2 results and "
3964  "1-2 sources");
3965 
3966  SmallVector<Register, 2> DstRegs;
3967  for (int I = 0; I < NumDefOps; ++I)
3968  DstRegs.push_back(MI.getOperand(I).getReg());
3969 
3970  // First of all check whether we are narrowing (changing the element type)
3971  // or reducing the vector elements
3972  const LLT DstTy = MRI.getType(DstRegs[0]);
3973  const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
3974 
3975  SmallVector<Register, 8> ExtractedRegs[3];
3977 
3978  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3979 
3980  // Break down all the sources into NarrowTy pieces we can operate on. This may
3981  // involve creating merges to a wider type, padded with undef.
3982  for (int I = 0; I != NumSrcOps; ++I) {
3983  Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
3984  LLT SrcTy = MRI.getType(SrcReg);
3985 
3986  // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
3987  // For fewerElements, this is a smaller vector with the same element type.
3988  LLT OpNarrowTy;
3989  if (IsNarrow) {
3990  OpNarrowTy = NarrowScalarTy;
3991 
3992  // In case of narrowing, we need to cast vectors to scalars for this to
3993  // work properly
3994  // FIXME: Can we do without the bitcast here if we're narrowing?
3995  if (SrcTy.isVector()) {
3996  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
3997  SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
3998  }
3999  } else {
4000  OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
4001  }
4002 
4003  LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
4004 
4005  // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
4006  buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
4007  TargetOpcode::G_ANYEXT);
4008  }
4009 
4010  SmallVector<Register, 8> ResultRegs[2];
4011 
4012  // Input operands for each sub-instruction.
4013  SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
4014 
4015  int NumParts = ExtractedRegs[0].size();
4016  const unsigned DstSize = DstTy.getSizeInBits();
4017  const LLT DstScalarTy = LLT::scalar(DstSize);
4018 
4019  // Narrowing needs to use scalar types
4020  LLT DstLCMTy, NarrowDstTy;
4021  if (IsNarrow) {
4022  DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4023  NarrowDstTy = NarrowScalarTy;
4024  } else {
4025  DstLCMTy = getLCMType(DstTy, NarrowTy);
4026  NarrowDstTy = NarrowTy;
4027  }
4028 
4029  // We widened the source registers to satisfy merge/unmerge size
4030  // constraints. We'll have some extra fully undef parts.
4031  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4032 
4033  for (int I = 0; I != NumRealParts; ++I) {
4034  // Emit this instruction on each of the split pieces.
4035  for (int J = 0; J != NumSrcOps; ++J)
4036  InputRegs[J] = ExtractedRegs[J][I];
4037 
4038  MachineInstrBuilder Inst;
4039  if (NumDefOps == 1)
4040  Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4041  else
4042  Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
4043  Flags);
4044 
4045  for (int J = 0; J != NumDefOps; ++J)
4046  ResultRegs[J].push_back(Inst.getReg(J));
4047  }
4048 
4049  // Fill out the widened result with undef instead of creating instructions
4050  // with undef inputs.
4051  int NumUndefParts = NumParts - NumRealParts;
4052  if (NumUndefParts != 0) {
4053  Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
4054  for (int I = 0; I != NumDefOps; ++I)
4055  ResultRegs[I].append(NumUndefParts, Undef);
4056  }
4057 
4058  // Extract the possibly padded result. Use a scratch register if we need to do
4059  // a final bitcast, otherwise use the original result register.
4060  Register MergeDstReg;
4061  for (int I = 0; I != NumDefOps; ++I) {
4062  if (IsNarrow && DstTy.isVector())
4063  MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4064  else
4065  MergeDstReg = DstRegs[I];
4066 
4067  buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
4068 
4069  // Recast to vector if we narrowed a vector
4070  if (IsNarrow && DstTy.isVector())
4071  MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
4072  }
4073 
4074  MI.eraseFromParent();
4075  return Legalized;
4076 }
4077 
4080  LLT NarrowTy) {
4081  Register DstReg = MI.getOperand(0).getReg();
4082  Register SrcReg = MI.getOperand(1).getReg();
4083  int64_t Imm = MI.getOperand(2).getImm();
4084 
4085  LLT DstTy = MRI.getType(DstReg);
4086 
4088  LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4089  LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4090 
4091  for (Register &R : Parts)
4092  R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4093 
4094  buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4095 
4096  MI.eraseFromParent();
4097  return Legalized;
4098 }
4099 
4102  LLT NarrowTy) {
4103  using namespace TargetOpcode;
4104 
4105  switch (MI.getOpcode()) {
4106  case G_IMPLICIT_DEF:
4107  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4108  case G_TRUNC:
4109  case G_AND:
4110  case G_OR:
4111  case G_XOR:
4112  case G_ADD:
4113  case G_SUB:
4114  case G_MUL:
4115  case G_PTR_ADD:
4116  case G_SMULH:
4117  case G_UMULH:
4118  case G_FADD:
4119  case G_FMUL:
4120  case G_FSUB:
4121  case G_FNEG:
4122  case G_FABS:
4123  case G_FCANONICALIZE:
4124  case G_FDIV:
4125  case G_FREM:
4126  case G_FMA:
4127  case G_FMAD:
4128  case G_FPOW:
4129  case G_FEXP:
4130  case G_FEXP2:
4131  case G_FLOG:
4132  case G_FLOG2:
4133  case G_FLOG10:
4134  case G_FNEARBYINT:
4135  case G_FCEIL:
4136  case G_FFLOOR:
4137  case G_FRINT:
4138  case G_INTRINSIC_ROUND:
4139  case G_INTRINSIC_ROUNDEVEN:
4140  case G_INTRINSIC_TRUNC:
4141  case G_FCOS:
4142  case G_FSIN:
4143  case G_FSQRT:
4144  case G_BSWAP:
4145  case G_BITREVERSE:
4146  case G_SDIV:
4147  case G_UDIV:
4148  case G_SREM:
4149  case G_UREM:
4150  case G_SDIVREM:
4151  case G_UDIVREM:
4152  case G_SMIN:
4153  case G_SMAX:
4154  case G_UMIN:
4155  case G_UMAX:
4156  case G_ABS:
4157  case G_FMINNUM:
4158  case G_FMAXNUM:
4159  case G_FMINNUM_IEEE:
4160  case G_FMAXNUM_IEEE:
4161  case G_FMINIMUM:
4162  case G_FMAXIMUM:
4163  case G_FSHL:
4164  case G_FSHR:
4165  case G_FREEZE:
4166  case G_SADDSAT:
4167  case G_SSUBSAT:
4168  case G_UADDSAT:
4169  case G_USUBSAT:
4170  return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4171  case G_UMULO:
4172  case G_SMULO:
4173  return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4174  case G_SHL:
4175  case G_LSHR:
4176  case G_ASHR:
4177  case G_SSHLSAT:
4178  case G_USHLSAT:
4179  case G_CTLZ:
4180  case G_CTLZ_ZERO_UNDEF:
4181  case G_CTTZ:
4182  case G_CTTZ_ZERO_UNDEF:
4183  case G_CTPOP:
4184  case G_FCOPYSIGN:
4185  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4186  case G_ZEXT:
4187  case G_SEXT:
4188  case G_ANYEXT:
4189  case G_FPEXT:
4190  case G_FPTRUNC:
4191  case G_SITOFP:
4192  case G_UITOFP:
4193  case G_FPTOSI:
4194  case G_FPTOUI:
4195  case G_INTTOPTR:
4196  case G_PTRTOINT:
4197  case G_ADDRSPACE_CAST:
4198  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4199  case G_ICMP:
4200  case G_FCMP:
4201  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4202  case G_SELECT:
4203  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4204  case G_PHI:
4205  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4206  case G_UNMERGE_VALUES:
4207  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4208  case G_BUILD_VECTOR:
4209  assert(TypeIdx == 0 && "not a vector type index");
4210  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4211  case G_CONCAT_VECTORS:
4212  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4213  return UnableToLegalize;
4214  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4215  case G_EXTRACT_VECTOR_ELT:
4216  case G_INSERT_VECTOR_ELT:
4217  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4218  case G_LOAD:
4219  case G_STORE:
4220  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
4221  case G_SEXT_INREG:
4222  return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4224  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4225  case G_SHUFFLE_VECTOR:
4226  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4227  default:
4228  return UnableToLegalize;
4229  }
4230 }
4231 
4233  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4234  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4235  if (TypeIdx != 0)
4236  return UnableToLegalize;
4237 
4238  Register DstReg = MI.getOperand(0).getReg();
4239  Register Src1Reg = MI.getOperand(1).getReg();
4240  Register Src2Reg = MI.getOperand(2).getReg();
4241  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4242  LLT DstTy = MRI.getType(DstReg);
4243  LLT Src1Ty = MRI.getType(Src1Reg);
4244  LLT Src2Ty = MRI.getType(Src2Reg);
4245  // The shuffle should be canonicalized by now.
4246  if (DstTy != Src1Ty)
4247  return UnableToLegalize;
4248  if (DstTy != Src2Ty)
4249  return UnableToLegalize;
4250 
4251  if (!isPowerOf2_32(DstTy.getNumElements()))
4252  return UnableToLegalize;
4253 
4254  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4255  // Further legalization attempts will be needed to do split further.
4256  NarrowTy = DstTy.changeNumElements(DstTy.getNumElements() / 2);
4257  unsigned NewElts = NarrowTy.getNumElements();
4258 
4259  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4260  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4261  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4262  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4263  SplitSrc2Regs[1]};
4264 
4265  Register Hi, Lo;
4266 
4267  // If Lo or Hi uses elements from at most two of the four input vectors, then
4268  // express it as a vector shuffle of those two inputs. Otherwise extract the
4269  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4271  for (unsigned High = 0; High < 2; ++High) {
4272  Register &Output = High ? Hi : Lo;
4273 
4274  // Build a shuffle mask for the output, discovering on the fly which
4275  // input vectors to use as shuffle operands (recorded in InputUsed).
4276  // If building a suitable shuffle vector proves too hard, then bail
4277  // out with useBuildVector set.
4278  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4279  unsigned FirstMaskIdx = High * NewElts;
4280  bool UseBuildVector = false;
4281  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4282  // The mask element. This indexes into the input.
4283  int Idx = Mask[FirstMaskIdx + MaskOffset];
4284 
4285  // The input vector this mask element indexes into.
4286  unsigned Input = (unsigned)Idx / NewElts;
4287 
4288  if (Input >= array_lengthof(Inputs)) {
4289  // The mask element does not index into any input vector.
4290  Ops.push_back(-1);
4291  continue;
4292  }
4293 
4294  // Turn the index into an offset from the start of the input vector.
4295  Idx -= Input * NewElts;
4296 
4297  // Find or create a shuffle vector operand to hold this input.
4298  unsigned OpNo;
4299  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4300  if (InputUsed[OpNo] == Input) {
4301  // This input vector is already an operand.
4302  break;
4303  } else if (InputUsed[OpNo] == -1U) {
4304  // Create a new operand for this input vector.
4305  InputUsed[OpNo] = Input;
4306  break;
4307  }
4308  }
4309 
4310  if (OpNo >= array_lengthof(InputUsed)) {
4311  // More than two input vectors used! Give up on trying to create a
4312  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4313  UseBuildVector = true;
4314  break;
4315  }
4316 
4317  // Add the mask index for the new shuffle vector.
4318  Ops.push_back(Idx + OpNo * NewElts);
4319  }
4320 
4321  if (UseBuildVector) {
4322  LLT EltTy = NarrowTy.getElementType();
4324 
4325  // Extract the input elements by hand.
4326  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4327  // The mask element. This indexes into the input.
4328  int Idx = Mask[FirstMaskIdx + MaskOffset];
4329 
4330  // The input vector this mask element indexes into.
4331  unsigned Input = (unsigned)Idx / NewElts;
4332 
4333  if (Input >= array_lengthof(Inputs)) {
4334  // The mask element is "undef" or indexes off the end of the input.
4335  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4336  continue;
4337  }
4338 
4339  // Turn the index into an offset from the start of the input vector.
4340  Idx -= Input * NewElts;
4341 
4342  // Extract the vector element by hand.
4343  SVOps.push_back(MIRBuilder
4344  .buildExtractVectorElement(
4345  EltTy, Inputs[Input],
4347  .getReg(0));
4348  }
4349 
4350  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4351  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4352  } else if (InputUsed[0] == -1U) {
4353  // No input vectors were used! The result is undefined.
4354  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4355  } else {
4356  Register Op0 = Inputs[InputUsed[0]];
4357  // If only one input was used, use an undefined vector for the other.
4358  Register Op1 = InputUsed[1] == -1U
4359  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4360  : Inputs[InputUsed[1]];
4361  // At least one input vector was used. Create a new shuffle vector.
4362  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4363  }
4364 
4365  Ops.clear();
4366  }
4367 
4368  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4369  MI.eraseFromParent();
4370  return Legalized;
4371 }
4372 
4374  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4375  unsigned Opc = MI.getOpcode();
4376  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4377  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4378  "Sequential reductions not expected");
4379 
4380  if (TypeIdx != 1)
4381  return UnableToLegalize;
4382 
4383  // The semantics of the normal non-sequential reductions allow us to freely
4384  // re-associate the operation.
4385  Register SrcReg = MI.getOperand(1).getReg();
4386  LLT SrcTy = MRI.getType(SrcReg);
4387  Register DstReg = MI.getOperand(0).getReg();
4388  LLT DstTy = MRI.getType(DstReg);
4389 
4390  if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
4391  return UnableToLegalize;
4392 
4393  SmallVector<Register> SplitSrcs;
4394  const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
4395  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4396  SmallVector<Register> PartialReductions;
4397  for (unsigned Part = 0; Part < NumParts; ++Part) {
4398  PartialReductions.push_back(
4399  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4400  }
4401 
4402  unsigned ScalarOpc;
4403  switch (Opc) {
4404  case TargetOpcode::G_VECREDUCE_FADD:
4405  ScalarOpc = TargetOpcode::G_FADD;
4406  break;
4407  case TargetOpcode::G_VECREDUCE_FMUL:
4408  ScalarOpc = TargetOpcode::G_FMUL;
4409  break;
4410  case TargetOpcode::G_VECREDUCE_FMAX:
4411  ScalarOpc = TargetOpcode::G_FMAXNUM;
4412  break;
4413  case TargetOpcode::G_VECREDUCE_FMIN:
4414  ScalarOpc = TargetOpcode::G_FMINNUM;
4415  break;
4416  case TargetOpcode::G_VECREDUCE_ADD:
4417  ScalarOpc = TargetOpcode::G_ADD;
4418  break;
4419  case TargetOpcode::G_VECREDUCE_MUL:
4420  ScalarOpc = TargetOpcode::G_MUL;
4421  break;
4422  case TargetOpcode::G_VECREDUCE_AND:
4423  ScalarOpc = TargetOpcode::G_AND;
4424  break;
4425  case TargetOpcode::G_VECREDUCE_OR:
4426  ScalarOpc = TargetOpcode::G_OR;
4427  break;
4428  case TargetOpcode::G_VECREDUCE_XOR:
4429  ScalarOpc = TargetOpcode::G_XOR;
4430  break;
4431  case TargetOpcode::G_VECREDUCE_SMAX:
4432  ScalarOpc = TargetOpcode::G_SMAX;
4433  break;
4434  case TargetOpcode::G_VECREDUCE_SMIN:
4435  ScalarOpc = TargetOpcode::G_SMIN;
4436  break;
4437  case TargetOpcode::G_VECREDUCE_UMAX:
4438  ScalarOpc = TargetOpcode::G_UMAX;
4439  break;
4440  case TargetOpcode::G_VECREDUCE_UMIN:
4441  ScalarOpc = TargetOpcode::G_UMIN;
4442  break;
4443  default:
4444  LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
4445  return UnableToLegalize;
4446  }
4447 
4448  // If the types involved are powers of 2, we can generate intermediate vector
4449  // ops, before generating a final reduction operation.
4450  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4451  isPowerOf2_32(NarrowTy.getNumElements())) {
4452  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4453  }
4454 
4455  Register Acc = PartialReductions[0];
4456  for (unsigned Part = 1; Part < NumParts; ++Part) {
4457  if (Part == NumParts - 1) {
4458  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4459  {Acc, PartialReductions[Part]});
4460  } else {
4461  Acc = MIRBuilder
4462  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4463  .getReg(0);
4464  }
4465  }
4466  MI.eraseFromParent();
4467  return Legalized;
4468 }
4469 
4471 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4472  LLT SrcTy, LLT NarrowTy,
4473  unsigned ScalarOpc) {
4474  SmallVector<Register> SplitSrcs;
4475  // Split the sources into NarrowTy size pieces.
4476  extractParts(SrcReg, NarrowTy,
4477  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4478  // We're going to do a tree reduction using vector operations until we have
4479  // one NarrowTy size value left.
4480  while (SplitSrcs.size() > 1) {
4481  SmallVector<Register> PartialRdxs;
4482  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4483  Register LHS = SplitSrcs[Idx];
4484  Register RHS = SplitSrcs[Idx + 1];
4485  // Create the intermediate vector op.
4486  Register Res =
4487  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4488  PartialRdxs.push_back(Res);
4489  }
4490  SplitSrcs = std::move(PartialRdxs);
4491  }
4492  // Finally generate the requested NarrowTy based reduction.
4494  MI.getOperand(1).setReg(SplitSrcs[0]);
4496  return Legalized;
4497 }
4498 
4501  const LLT HalfTy, const LLT AmtTy) {
4502 
4503  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4504  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4505  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4506 
4507  if (Amt.isNullValue()) {
4508  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4509  MI.eraseFromParent();
4510  return Legalized;
4511  }
4512 
4513  LLT NVT = HalfTy;
4514  unsigned NVTBits = HalfTy.getSizeInBits();
4515  unsigned VTBits = 2 * NVTBits;
4516 
4517  SrcOp Lo(Register(0)), Hi(Register(0));
4518  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4519  if (Amt.ugt(VTBits)) {
4520  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4521  } else if (Amt.ugt(NVTBits)) {
4522  Lo = MIRBuilder.buildConstant(NVT, 0);
4523  Hi = MIRBuilder.buildShl(NVT, InL,
4524  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4525  } else if (Amt == NVTBits) {
4526  Lo = MIRBuilder.buildConstant(NVT, 0);
4527  Hi = InL;
4528  } else {
4529  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4530  auto OrLHS =
4531  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4532  auto OrRHS = MIRBuilder.buildLShr(
4533  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4534  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4535  }
4536  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4537  if (Amt.ugt(VTBits)) {
4538  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4539  } else if (Amt.ugt(NVTBits)) {
4540  Lo = MIRBuilder.buildLShr(NVT, InH,
4541  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4542  Hi = MIRBuilder.buildConstant(NVT, 0);
4543  } else if (Amt == NVTBits) {
4544  Lo = InH;
4545  Hi = MIRBuilder.buildConstant(NVT, 0);
4546  } else {
4547  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4548 
4549  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4550  auto OrRHS = MIRBuilder.buildShl(
4551  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4552 
4553  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4554  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4555  }
4556  } else {
4557  if (Amt.ugt(VTBits)) {
4558  Hi = Lo = MIRBuilder.buildAShr(
4559  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4560  } else if (Amt.ugt(NVTBits)) {
4561  Lo = MIRBuilder.buildAShr(NVT, InH,
4562  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4563  Hi = MIRBuilder.buildAShr(NVT, InH,
4564  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4565  } else if (Amt == NVTBits) {
4566  Lo = InH;
4567  Hi = MIRBuilder.buildAShr(NVT, InH,
4568  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4569  } else {
4570  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4571 
4572  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4573  auto OrRHS = MIRBuilder.buildShl(
4574  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4575 
4576  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4577  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4578  }
4579  }
4580 
4581  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4582  MI.eraseFromParent();
4583 
4584  return Legalized;
4585 }
4586 
4587 // TODO: Optimize if constant shift amount.
4590  LLT RequestedTy) {
4591  if (TypeIdx == 1) {
4593  narrowScalarSrc(MI, RequestedTy, 2);
4595  return Legalized;
4596  }
4597 
4598  Register DstReg = MI.getOperand(0).getReg();
4599  LLT DstTy = MRI.getType(DstReg);
4600  if (DstTy.isVector())
4601  return UnableToLegalize;
4602 
4603  Register Amt = MI.getOperand(2).getReg();
4604  LLT ShiftAmtTy = MRI.getType(Amt);
4605  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4606  if (DstEltSize % 2 != 0)
4607  return UnableToLegalize;
4608 
4609  // Ignore the input type. We can only go to exactly half the size of the
4610  // input. If that isn't small enough, the resulting pieces will be further
4611  // legalized.
4612  const unsigned NewBitSize = DstEltSize / 2;
4613  const LLT HalfTy = LLT::scalar(NewBitSize);
4614  const LLT CondTy = LLT::scalar(1);
4615 
4616  if (const MachineInstr *KShiftAmt =
4617  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4619  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4620  }
4621 
4622  // TODO: Expand with known bits.
4623 
4624  // Handle the fully general expansion by an unknown amount.
4625  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4626 
4627  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4628  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4629  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4630 
4631  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4632  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4633 
4634  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4635  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4636  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4637 
4638  Register ResultRegs[2];
4639  switch (MI.getOpcode()) {
4640  case TargetOpcode::G_SHL: {
4641  // Short: ShAmt < NewBitSize
4642  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4643 
4644  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4645  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4646  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4647 
4648  // Long: ShAmt >= NewBitSize
4649  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4650  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4651 
4652  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4653  auto Hi = MIRBuilder.buildSelect(
4654  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4655 
4656  ResultRegs[0] = Lo.getReg(0);
4657  ResultRegs[1] = Hi.getReg(0);
4658  break;
4659  }
4660  case TargetOpcode::G_LSHR:
4661  case TargetOpcode::G_ASHR: {
4662  // Short: ShAmt < NewBitSize
4663  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4664 
4665  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4666  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4667  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4668 
4669  // Long: ShAmt >= NewBitSize
4670  MachineInstrBuilder HiL;
4671  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4672  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4673  } else {
4674  auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4675  HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4676  }
4677  auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4678  {InH, AmtExcess}); // Lo from Hi part.
4679 
4680  auto Lo = MIRBuilder.buildSelect(
4681  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4682 
4683  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4684 
4685  ResultRegs[0] = Lo.getReg(0);
4686  ResultRegs[1] = Hi.getReg(0);
4687  break;
4688  }
4689  default:
4690  llvm_unreachable("not a shift");
4691  }
4692 
4693  MIRBuilder.buildMerge(DstReg, ResultRegs);
4694  MI.eraseFromParent();
4695  return Legalized;
4696 }
4697 
4700  LLT MoreTy) {
4701  assert(TypeIdx == 0 && "Expecting only Idx 0");
4702 
4704  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4705  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4706  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4707  moreElementsVectorSrc(MI, MoreTy, I);
4708  }
4709 
4710  MachineBasicBlock &MBB = *MI.getParent();
4712  moreElementsVectorDst(MI, MoreTy, 0);
4714  return Legalized;
4715 }
4716 
4719  LLT MoreTy) {
4720  unsigned Opc = MI.getOpcode();
4721  switch (Opc) {
4722  case TargetOpcode::G_IMPLICIT_DEF:
4723  case TargetOpcode::G_LOAD: {
4724  if (TypeIdx != 0)
4725  return UnableToLegalize;
4727  moreElementsVectorDst(MI, MoreTy, 0);
4729  return Legalized;
4730  }
4731  case TargetOpcode::G_STORE:
4732  if (TypeIdx != 0)
4733  return UnableToLegalize;
4735  moreElementsVectorSrc(MI, MoreTy, 0);
4737  return Legalized;
4738  case TargetOpcode::G_AND:
4739  case TargetOpcode::G_OR:
4740  case TargetOpcode::G_XOR:
4741  case TargetOpcode::G_SMIN:
4742  case TargetOpcode::G_SMAX:
4743  case TargetOpcode::G_UMIN:
4744  case TargetOpcode::G_UMAX:
4745  case TargetOpcode::G_FMINNUM:
4746  case TargetOpcode::G_FMAXNUM:
4747  case TargetOpcode::G_FMINNUM_IEEE:
4748  case TargetOpcode::G_FMAXNUM_IEEE:
4749  case TargetOpcode::G_FMINIMUM:
4750  case TargetOpcode::G_FMAXIMUM: {
4752  moreElementsVectorSrc(MI, MoreTy, 1);
4753  moreElementsVectorSrc(MI, MoreTy, 2);
4754  moreElementsVectorDst(MI, MoreTy, 0);
4756  return Legalized;
4757  }
4758  case TargetOpcode::G_EXTRACT:
4759  if (TypeIdx != 1)
4760  return UnableToLegalize;
4762  moreElementsVectorSrc(MI, MoreTy, 1);
4764  return Legalized;
4765  case TargetOpcode::G_INSERT:
4766  case TargetOpcode::G_FREEZE:
4767  if (TypeIdx != 0)
4768  return UnableToLegalize;
4770  moreElementsVectorSrc(MI, MoreTy, 1);
4771  moreElementsVectorDst(MI, MoreTy, 0);
4773  return Legalized;
4774  case TargetOpcode::G_SELECT:
4775  if (TypeIdx != 0)
4776  return UnableToLegalize;
4777  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4778  return UnableToLegalize;
4779 
4781  moreElementsVectorSrc(MI, MoreTy, 2);
4782  moreElementsVectorSrc(MI, MoreTy, 3);
4783  moreElementsVectorDst(MI, MoreTy, 0);