LLVM  15.0.0git
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/Support/Debug.h"
36 
37 #define DEBUG_TYPE "legalizer"
38 
39 using namespace llvm;
40 using namespace LegalizeActions;
41 using namespace MIPatternMatch;
42 
43 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
44 ///
45 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
46 /// with any leftover piece as type \p LeftoverTy
47 ///
48 /// Returns -1 in the first element of the pair if the breakdown is not
49 /// satisfiable.
50 static std::pair<int, int>
51 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
52  assert(!LeftoverTy.isValid() && "this is an out argument");
53 
54  unsigned Size = OrigTy.getSizeInBits();
55  unsigned NarrowSize = NarrowTy.getSizeInBits();
56  unsigned NumParts = Size / NarrowSize;
57  unsigned LeftoverSize = Size - NumParts * NarrowSize;
58  assert(Size > NarrowSize);
59 
60  if (LeftoverSize == 0)
61  return {NumParts, 0};
62 
63  if (NarrowTy.isVector()) {
64  unsigned EltSize = OrigTy.getScalarSizeInBits();
65  if (LeftoverSize % EltSize != 0)
66  return {-1, -1};
67  LeftoverTy = LLT::scalarOrVector(
68  ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
69  } else {
70  LeftoverTy = LLT::scalar(LeftoverSize);
71  }
72 
73  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
74  return std::make_pair(NumParts, NumLeftover);
75 }
76 
78 
79  if (!Ty.isScalar())
80  return nullptr;
81 
82  switch (Ty.getSizeInBits()) {
83  case 16:
84  return Type::getHalfTy(Ctx);
85  case 32:
86  return Type::getFloatTy(Ctx);
87  case 64:
88  return Type::getDoubleTy(Ctx);
89  case 80:
90  return Type::getX86_FP80Ty(Ctx);
91  case 128:
92  return Type::getFP128Ty(Ctx);
93  default:
94  return nullptr;
95  }
96 }
97 
99  GISelChangeObserver &Observer,
101  : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
102  LI(*MF.getSubtarget().getLegalizerInfo()),
103  TLI(*MF.getSubtarget().getTargetLowering()) { }
104 
106  GISelChangeObserver &Observer,
108  : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
109  TLI(*MF.getSubtarget().getTargetLowering()) { }
110 
113  LostDebugLocObserver &LocObserver) {
114  LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
115 
117 
118  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
119  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
120  return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
121  auto Step = LI.getAction(MI, MRI);
122  switch (Step.Action) {
123  case Legal:
124  LLVM_DEBUG(dbgs() << ".. Already legal\n");
125  return AlreadyLegal;
126  case Libcall:
127  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
128  return libcall(MI, LocObserver);
129  case NarrowScalar:
130  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
131  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
132  case WidenScalar:
133  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
134  return widenScalar(MI, Step.TypeIdx, Step.NewType);
135  case Bitcast:
136  LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
137  return bitcast(MI, Step.TypeIdx, Step.NewType);
138  case Lower:
139  LLVM_DEBUG(dbgs() << ".. Lower\n");
140  return lower(MI, Step.TypeIdx, Step.NewType);
141  case FewerElements:
142  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
143  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
144  case MoreElements:
145  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
146  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
147  case Custom:
148  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
149  return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
150  default:
151  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
152  return UnableToLegalize;
153  }
154 }
155 
156 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
157  SmallVectorImpl<Register> &VRegs) {
158  for (int i = 0; i < NumParts; ++i)
159  VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
160  MIRBuilder.buildUnmerge(VRegs, Reg);
161 }
162 
163 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
164  LLT MainTy, LLT &LeftoverTy,
166  SmallVectorImpl<Register> &LeftoverRegs) {
167  assert(!LeftoverTy.isValid() && "this is an out argument");
168 
169  unsigned RegSize = RegTy.getSizeInBits();
170  unsigned MainSize = MainTy.getSizeInBits();
171  unsigned NumParts = RegSize / MainSize;
172  unsigned LeftoverSize = RegSize - NumParts * MainSize;
173 
174  // Use an unmerge when possible.
175  if (LeftoverSize == 0) {
176  for (unsigned I = 0; I < NumParts; ++I)
177  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
178  MIRBuilder.buildUnmerge(VRegs, Reg);
179  return true;
180  }
181 
182  // Perform irregular split. Leftover is last element of RegPieces.
183  if (MainTy.isVector()) {
184  SmallVector<Register, 8> RegPieces;
185  extractVectorParts(Reg, MainTy.getNumElements(), RegPieces);
186  for (unsigned i = 0; i < RegPieces.size() - 1; ++i)
187  VRegs.push_back(RegPieces[i]);
188  LeftoverRegs.push_back(RegPieces[RegPieces.size() - 1]);
189  LeftoverTy = MRI.getType(LeftoverRegs[0]);
190  return true;
191  }
192 
193  LeftoverTy = LLT::scalar(LeftoverSize);
194  // For irregular sizes, extract the individual parts.
195  for (unsigned I = 0; I != NumParts; ++I) {
196  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
197  VRegs.push_back(NewReg);
198  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
199  }
200 
201  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
202  Offset += LeftoverSize) {
203  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
204  LeftoverRegs.push_back(NewReg);
205  MIRBuilder.buildExtract(NewReg, Reg, Offset);
206  }
207 
208  return true;
209 }
210 
211 void LegalizerHelper::extractVectorParts(Register Reg, unsigned NumElts,
212  SmallVectorImpl<Register> &VRegs) {
213  LLT RegTy = MRI.getType(Reg);
214  assert(RegTy.isVector() && "Expected a vector type");
215 
216  LLT EltTy = RegTy.getElementType();
217  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
218  unsigned RegNumElts = RegTy.getNumElements();
219  unsigned LeftoverNumElts = RegNumElts % NumElts;
220  unsigned NumNarrowTyPieces = RegNumElts / NumElts;
221 
222  // Perfect split without leftover
223  if (LeftoverNumElts == 0)
224  return extractParts(Reg, NarrowTy, NumNarrowTyPieces, VRegs);
225 
226  // Irregular split. Provide direct access to all elements for artifact
227  // combiner using unmerge to elements. Then build vectors with NumElts
228  // elements. Remaining element(s) will be (used to build vector) Leftover.
230  extractParts(Reg, EltTy, RegNumElts, Elts);
231 
232  unsigned Offset = 0;
233  // Requested sub-vectors of NarrowTy.
234  for (unsigned i = 0; i < NumNarrowTyPieces; ++i, Offset += NumElts) {
235  ArrayRef<Register> Pieces(&Elts[Offset], NumElts);
236  VRegs.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
237  }
238 
239  // Leftover element(s).
240  if (LeftoverNumElts == 1) {
241  VRegs.push_back(Elts[Offset]);
242  } else {
243  LLT LeftoverTy = LLT::fixed_vector(LeftoverNumElts, EltTy);
244  ArrayRef<Register> Pieces(&Elts[Offset], LeftoverNumElts);
245  VRegs.push_back(MIRBuilder.buildMerge(LeftoverTy, Pieces).getReg(0));
246  }
247 }
248 
249 void LegalizerHelper::insertParts(Register DstReg,
250  LLT ResultTy, LLT PartTy,
251  ArrayRef<Register> PartRegs,
252  LLT LeftoverTy,
253  ArrayRef<Register> LeftoverRegs) {
254  if (!LeftoverTy.isValid()) {
255  assert(LeftoverRegs.empty());
256 
257  if (!ResultTy.isVector()) {
258  MIRBuilder.buildMerge(DstReg, PartRegs);
259  return;
260  }
261 
262  if (PartTy.isVector())
263  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
264  else
265  MIRBuilder.buildBuildVector(DstReg, PartRegs);
266  return;
267  }
268 
269  // Merge sub-vectors with different number of elements and insert into DstReg.
270  if (ResultTy.isVector()) {
271  assert(LeftoverRegs.size() == 1 && "Expected one leftover register");
272  SmallVector<Register, 8> AllRegs;
273  for (auto Reg : concat<const Register>(PartRegs, LeftoverRegs))
274  AllRegs.push_back(Reg);
275  return mergeMixedSubvectors(DstReg, AllRegs);
276  }
277 
278  SmallVector<Register> GCDRegs;
279  LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
280  for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
281  extractGCDType(GCDRegs, GCDTy, PartReg);
282  LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
283  buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
284 }
285 
286 void LegalizerHelper::appendVectorElts(SmallVectorImpl<Register> &Elts,
287  Register Reg) {
288  LLT Ty = MRI.getType(Reg);
289  SmallVector<Register, 8> RegElts;
290  extractParts(Reg, Ty.getScalarType(), Ty.getNumElements(), RegElts);
291  Elts.append(RegElts);
292 }
293 
294 /// Merge \p PartRegs with different types into \p DstReg.
295 void LegalizerHelper::mergeMixedSubvectors(Register DstReg,
296  ArrayRef<Register> PartRegs) {
297  SmallVector<Register, 8> AllElts;
298  for (unsigned i = 0; i < PartRegs.size() - 1; ++i)
299  appendVectorElts(AllElts, PartRegs[i]);
300 
301  Register Leftover = PartRegs[PartRegs.size() - 1];
302  if (MRI.getType(Leftover).isScalar())
303  AllElts.push_back(Leftover);
304  else
305  appendVectorElts(AllElts, Leftover);
306 
307  MIRBuilder.buildMerge(DstReg, AllElts);
308 }
309 
310 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
312  const MachineInstr &MI) {
313  assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
314 
315  const int StartIdx = Regs.size();
316  const int NumResults = MI.getNumOperands() - 1;
317  Regs.resize(Regs.size() + NumResults);
318  for (int I = 0; I != NumResults; ++I)
319  Regs[StartIdx + I] = MI.getOperand(I).getReg();
320 }
321 
322 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
323  LLT GCDTy, Register SrcReg) {
324  LLT SrcTy = MRI.getType(SrcReg);
325  if (SrcTy == GCDTy) {
326  // If the source already evenly divides the result type, we don't need to do
327  // anything.
328  Parts.push_back(SrcReg);
329  } else {
330  // Need to split into common type sized pieces.
331  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
332  getUnmergeResults(Parts, *Unmerge);
333  }
334 }
335 
336 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
337  LLT NarrowTy, Register SrcReg) {
338  LLT SrcTy = MRI.getType(SrcReg);
339  LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
340  extractGCDType(Parts, GCDTy, SrcReg);
341  return GCDTy;
342 }
343 
344 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
346  unsigned PadStrategy) {
347  LLT LCMTy = getLCMType(DstTy, NarrowTy);
348 
349  int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
350  int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
351  int NumOrigSrc = VRegs.size();
352 
353  Register PadReg;
354 
355  // Get a value we can use to pad the source value if the sources won't evenly
356  // cover the result type.
357  if (NumOrigSrc < NumParts * NumSubParts) {
358  if (PadStrategy == TargetOpcode::G_ZEXT)
359  PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
360  else if (PadStrategy == TargetOpcode::G_ANYEXT)
361  PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
362  else {
363  assert(PadStrategy == TargetOpcode::G_SEXT);
364 
365  // Shift the sign bit of the low register through the high register.
366  auto ShiftAmt =
368  PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
369  }
370  }
371 
372  // Registers for the final merge to be produced.
373  SmallVector<Register, 4> Remerge(NumParts);
374 
375  // Registers needed for intermediate merges, which will be merged into a
376  // source for Remerge.
377  SmallVector<Register, 4> SubMerge(NumSubParts);
378 
379  // Once we've fully read off the end of the original source bits, we can reuse
380  // the same high bits for remaining padding elements.
381  Register AllPadReg;
382 
383  // Build merges to the LCM type to cover the original result type.
384  for (int I = 0; I != NumParts; ++I) {
385  bool AllMergePartsArePadding = true;
386 
387  // Build the requested merges to the requested type.
388  for (int J = 0; J != NumSubParts; ++J) {
389  int Idx = I * NumSubParts + J;
390  if (Idx >= NumOrigSrc) {
391  SubMerge[J] = PadReg;
392  continue;
393  }
394 
395  SubMerge[J] = VRegs[Idx];
396 
397  // There are meaningful bits here we can't reuse later.
398  AllMergePartsArePadding = false;
399  }
400 
401  // If we've filled up a complete piece with padding bits, we can directly
402  // emit the natural sized constant if applicable, rather than a merge of
403  // smaller constants.
404  if (AllMergePartsArePadding && !AllPadReg) {
405  if (PadStrategy == TargetOpcode::G_ANYEXT)
406  AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
407  else if (PadStrategy == TargetOpcode::G_ZEXT)
408  AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
409 
410  // If this is a sign extension, we can't materialize a trivial constant
411  // with the right type and have to produce a merge.
412  }
413 
414  if (AllPadReg) {
415  // Avoid creating additional instructions if we're just adding additional
416  // copies of padding bits.
417  Remerge[I] = AllPadReg;
418  continue;
419  }
420 
421  if (NumSubParts == 1)
422  Remerge[I] = SubMerge[0];
423  else
424  Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
425 
426  // In the sign extend padding case, re-use the first all-signbit merge.
427  if (AllMergePartsArePadding && !AllPadReg)
428  AllPadReg = Remerge[I];
429  }
430 
431  VRegs = std::move(Remerge);
432  return LCMTy;
433 }
434 
435 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
436  ArrayRef<Register> RemergeRegs) {
437  LLT DstTy = MRI.getType(DstReg);
438 
439  // Create the merge to the widened source, and extract the relevant bits into
440  // the result.
441 
442  if (DstTy == LCMTy) {
443  MIRBuilder.buildMerge(DstReg, RemergeRegs);
444  return;
445  }
446 
447  auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
448  if (DstTy.isScalar() && LCMTy.isScalar()) {
449  MIRBuilder.buildTrunc(DstReg, Remerge);
450  return;
451  }
452 
453  if (LCMTy.isVector()) {
454  unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
455  SmallVector<Register, 8> UnmergeDefs(NumDefs);
456  UnmergeDefs[0] = DstReg;
457  for (unsigned I = 1; I != NumDefs; ++I)
458  UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
459 
460  MIRBuilder.buildUnmerge(UnmergeDefs,
461  MIRBuilder.buildMerge(LCMTy, RemergeRegs));
462  return;
463  }
464 
465  llvm_unreachable("unhandled case");
466 }
467 
468 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
469 #define RTLIBCASE_INT(LibcallPrefix) \
470  do { \
471  switch (Size) { \
472  case 32: \
473  return RTLIB::LibcallPrefix##32; \
474  case 64: \
475  return RTLIB::LibcallPrefix##64; \
476  case 128: \
477  return RTLIB::LibcallPrefix##128; \
478  default: \
479  llvm_unreachable("unexpected size"); \
480  } \
481  } while (0)
482 
483 #define RTLIBCASE(LibcallPrefix) \
484  do { \
485  switch (Size) { \
486  case 32: \
487  return RTLIB::LibcallPrefix##32; \
488  case 64: \
489  return RTLIB::LibcallPrefix##64; \
490  case 80: \
491  return RTLIB::LibcallPrefix##80; \
492  case 128: \
493  return RTLIB::LibcallPrefix##128; \
494  default: \
495  llvm_unreachable("unexpected size"); \
496  } \
497  } while (0)
498 
499  switch (Opcode) {
500  case TargetOpcode::G_SDIV:
501  RTLIBCASE_INT(SDIV_I);
502  case TargetOpcode::G_UDIV:
503  RTLIBCASE_INT(UDIV_I);
504  case TargetOpcode::G_SREM:
505  RTLIBCASE_INT(SREM_I);
506  case TargetOpcode::G_UREM:
507  RTLIBCASE_INT(UREM_I);
508  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
509  RTLIBCASE_INT(CTLZ_I);
510  case TargetOpcode::G_FADD:
511  RTLIBCASE(ADD_F);
512  case TargetOpcode::G_FSUB:
513  RTLIBCASE(SUB_F);
514  case TargetOpcode::G_FMUL:
515  RTLIBCASE(MUL_F);
516  case TargetOpcode::G_FDIV:
517  RTLIBCASE(DIV_F);
518  case TargetOpcode::G_FEXP:
519  RTLIBCASE(EXP_F);
520  case TargetOpcode::G_FEXP2:
521  RTLIBCASE(EXP2_F);
522  case TargetOpcode::G_FREM:
523  RTLIBCASE(REM_F);
524  case TargetOpcode::G_FPOW:
525  RTLIBCASE(POW_F);
526  case TargetOpcode::G_FMA:
527  RTLIBCASE(FMA_F);
528  case TargetOpcode::G_FSIN:
529  RTLIBCASE(SIN_F);
530  case TargetOpcode::G_FCOS:
531  RTLIBCASE(COS_F);
532  case TargetOpcode::G_FLOG10:
533  RTLIBCASE(LOG10_F);
534  case TargetOpcode::G_FLOG:
535  RTLIBCASE(LOG_F);
536  case TargetOpcode::G_FLOG2:
537  RTLIBCASE(LOG2_F);
538  case TargetOpcode::G_FCEIL:
539  RTLIBCASE(CEIL_F);
540  case TargetOpcode::G_FFLOOR:
541  RTLIBCASE(FLOOR_F);
542  case TargetOpcode::G_FMINNUM:
543  RTLIBCASE(FMIN_F);
544  case TargetOpcode::G_FMAXNUM:
545  RTLIBCASE(FMAX_F);
546  case TargetOpcode::G_FSQRT:
547  RTLIBCASE(SQRT_F);
548  case TargetOpcode::G_FRINT:
549  RTLIBCASE(RINT_F);
550  case TargetOpcode::G_FNEARBYINT:
551  RTLIBCASE(NEARBYINT_F);
552  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
553  RTLIBCASE(ROUNDEVEN_F);
554  }
555  llvm_unreachable("Unknown libcall function");
556 }
557 
558 /// True if an instruction is in tail position in its caller. Intended for
559 /// legalizing libcalls as tail calls when possible.
561  const TargetInstrInfo &TII,
563  MachineBasicBlock &MBB = *MI.getParent();
564  const Function &F = MBB.getParent()->getFunction();
565 
566  // Conservatively require the attributes of the call to match those of
567  // the return. Ignore NoAlias and NonNull because they don't affect the
568  // call sequence.
569  AttributeList CallerAttrs = F.getAttributes();
570  if (AttrBuilder(F.getContext(), CallerAttrs.getRetAttrs())
571  .removeAttribute(Attribute::NoAlias)
572  .removeAttribute(Attribute::NonNull)
573  .hasAttributes())
574  return false;
575 
576  // It's not safe to eliminate the sign / zero extension of the return value.
577  if (CallerAttrs.hasRetAttr(Attribute::ZExt) ||
578  CallerAttrs.hasRetAttr(Attribute::SExt))
579  return false;
580 
581  // Only tail call if the following instruction is a standard return or if we
582  // have a `thisreturn` callee, and a sequence like:
583  //
584  // G_MEMCPY %0, %1, %2
585  // $x0 = COPY %0
586  // RET_ReallyLR implicit $x0
587  auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
588  if (Next != MBB.instr_end() && Next->isCopy()) {
589  switch (MI.getOpcode()) {
590  default:
591  llvm_unreachable("unsupported opcode");
592  case TargetOpcode::G_BZERO:
593  return false;
594  case TargetOpcode::G_MEMCPY:
595  case TargetOpcode::G_MEMMOVE:
596  case TargetOpcode::G_MEMSET:
597  break;
598  }
599 
600  Register VReg = MI.getOperand(0).getReg();
601  if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
602  return false;
603 
604  Register PReg = Next->getOperand(0).getReg();
605  if (!PReg.isPhysical())
606  return false;
607 
608  auto Ret = next_nodbg(Next, MBB.instr_end());
609  if (Ret == MBB.instr_end() || !Ret->isReturn())
610  return false;
611 
612  if (Ret->getNumImplicitOperands() != 1)
613  return false;
614 
615  if (PReg != Ret->getOperand(0).getReg())
616  return false;
617 
618  // Skip over the COPY that we just validated.
619  Next = Ret;
620  }
621 
622  if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
623  return false;
624 
625  return true;
626 }
627 
629 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
630  const CallLowering::ArgInfo &Result,
632  const CallingConv::ID CC) {
633  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
634 
636  Info.CallConv = CC;
638  Info.OrigRet = Result;
639  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
640  if (!CLI.lowerCall(MIRBuilder, Info))
642 
644 }
645 
648  const CallLowering::ArgInfo &Result,
650  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
651  const char *Name = TLI.getLibcallName(Libcall);
652  const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
653  return createLibcall(MIRBuilder, Name, Result, Args, CC);
654 }
655 
656 // Useful for libcalls where all operands have the same type.
658 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
659  Type *OpType) {
660  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
661 
662  // FIXME: What does the original arg index mean here?
664  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
665  Args.push_back({MO.getReg(), OpType, 0});
666  return createLibcall(MIRBuilder, Libcall,
667  {MI.getOperand(0).getReg(), OpType, 0}, Args);
668 }
669 
672  MachineInstr &MI, LostDebugLocObserver &LocObserver) {
673  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
674 
676  // Add all the args, except for the last which is an imm denoting 'tail'.
677  for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
678  Register Reg = MI.getOperand(i).getReg();
679 
680  // Need derive an IR type for call lowering.
681  LLT OpLLT = MRI.getType(Reg);
682  Type *OpTy = nullptr;
683  if (OpLLT.isPointer())
684  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
685  else
686  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
687  Args.push_back({Reg, OpTy, 0});
688  }
689 
690  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
691  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
692  RTLIB::Libcall RTLibcall;
693  unsigned Opc = MI.getOpcode();
694  switch (Opc) {
695  case TargetOpcode::G_BZERO:
696  RTLibcall = RTLIB::BZERO;
697  break;
698  case TargetOpcode::G_MEMCPY:
699  RTLibcall = RTLIB::MEMCPY;
700  Args[0].Flags[0].setReturned();
701  break;
702  case TargetOpcode::G_MEMMOVE:
703  RTLibcall = RTLIB::MEMMOVE;
704  Args[0].Flags[0].setReturned();
705  break;
706  case TargetOpcode::G_MEMSET:
707  RTLibcall = RTLIB::MEMSET;
708  Args[0].Flags[0].setReturned();
709  break;
710  default:
711  llvm_unreachable("unsupported opcode");
712  }
713  const char *Name = TLI.getLibcallName(RTLibcall);
714 
715  // Unsupported libcall on the target.
716  if (!Name) {
717  LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
718  << MIRBuilder.getTII().getName(Opc) << "\n");
720  }
721 
723  Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
725  Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
726  Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
727  isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
728 
729  std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
730  if (!CLI.lowerCall(MIRBuilder, Info))
732 
733  if (Info.LoweredTailCall) {
734  assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
735 
736  // Check debug locations before removing the return.
737  LocObserver.checkpoint(true);
738 
739  // We must have a return following the call (or debug insts) to get past
740  // isLibCallInTailPosition.
741  do {
742  MachineInstr *Next = MI.getNextNode();
743  assert(Next &&
744  (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&
745  "Expected instr following MI to be return or debug inst?");
746  // We lowered a tail call, so the call is now the return from the block.
747  // Delete the old return.
748  Next->eraseFromParent();
749  } while (MI.getNextNode());
750 
751  // We expect to lose the debug location from the return.
752  LocObserver.checkpoint(false);
753  }
754 
756 }
757 
758 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
759  Type *FromType) {
760  auto ToMVT = MVT::getVT(ToType);
761  auto FromMVT = MVT::getVT(FromType);
762 
763  switch (Opcode) {
764  case TargetOpcode::G_FPEXT:
765  return RTLIB::getFPEXT(FromMVT, ToMVT);
766  case TargetOpcode::G_FPTRUNC:
767  return RTLIB::getFPROUND(FromMVT, ToMVT);
768  case TargetOpcode::G_FPTOSI:
769  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
770  case TargetOpcode::G_FPTOUI:
771  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
772  case TargetOpcode::G_SITOFP:
773  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
774  case TargetOpcode::G_UITOFP:
775  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
776  }
777  llvm_unreachable("Unsupported libcall function");
778 }
779 
782  Type *FromType) {
783  RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
784  return createLibcall(MIRBuilder, Libcall,
785  {MI.getOperand(0).getReg(), ToType, 0},
786  {{MI.getOperand(1).getReg(), FromType, 0}});
787 }
788 
791  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
792  unsigned Size = LLTy.getSizeInBits();
793  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
794 
795  switch (MI.getOpcode()) {
796  default:
797  return UnableToLegalize;
798  case TargetOpcode::G_SDIV:
799  case TargetOpcode::G_UDIV:
800  case TargetOpcode::G_SREM:
801  case TargetOpcode::G_UREM:
802  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
803  Type *HLTy = IntegerType::get(Ctx, Size);
804  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
805  if (Status != Legalized)
806  return Status;
807  break;
808  }
809  case TargetOpcode::G_FADD:
810  case TargetOpcode::G_FSUB:
811  case TargetOpcode::G_FMUL:
812  case TargetOpcode::G_FDIV:
813  case TargetOpcode::G_FMA:
814  case TargetOpcode::G_FPOW:
815  case TargetOpcode::G_FREM:
816  case TargetOpcode::G_FCOS:
817  case TargetOpcode::G_FSIN:
818  case TargetOpcode::G_FLOG10:
819  case TargetOpcode::G_FLOG:
820  case TargetOpcode::G_FLOG2:
821  case TargetOpcode::G_FEXP:
822  case TargetOpcode::G_FEXP2:
823  case TargetOpcode::G_FCEIL:
824  case TargetOpcode::G_FFLOOR:
825  case TargetOpcode::G_FMINNUM:
826  case TargetOpcode::G_FMAXNUM:
827  case TargetOpcode::G_FSQRT:
828  case TargetOpcode::G_FRINT:
829  case TargetOpcode::G_FNEARBYINT:
830  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
831  Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
832  if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
833  LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
834  return UnableToLegalize;
835  }
836  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
837  if (Status != Legalized)
838  return Status;
839  break;
840  }
841  case TargetOpcode::G_FPEXT:
842  case TargetOpcode::G_FPTRUNC: {
843  Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
844  Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
845  if (!FromTy || !ToTy)
846  return UnableToLegalize;
848  if (Status != Legalized)
849  return Status;
850  break;
851  }
852  case TargetOpcode::G_FPTOSI:
853  case TargetOpcode::G_FPTOUI: {
854  // FIXME: Support other types
855  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
856  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
857  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
858  return UnableToLegalize;
860  MI, MIRBuilder,
861  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
862  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
863  if (Status != Legalized)
864  return Status;
865  break;
866  }
867  case TargetOpcode::G_SITOFP:
868  case TargetOpcode::G_UITOFP: {
869  // FIXME: Support other types
870  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
871  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
872  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
873  return UnableToLegalize;
875  MI, MIRBuilder,
876  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
877  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
878  if (Status != Legalized)
879  return Status;
880  break;
881  }
882  case TargetOpcode::G_BZERO:
883  case TargetOpcode::G_MEMCPY:
884  case TargetOpcode::G_MEMMOVE:
885  case TargetOpcode::G_MEMSET: {
886  LegalizeResult Result =
887  createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
888  if (Result != Legalized)
889  return Result;
890  MI.eraseFromParent();
891  return Result;
892  }
893  }
894 
895  MI.eraseFromParent();
896  return Legalized;
897 }
898 
900  unsigned TypeIdx,
901  LLT NarrowTy) {
902  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
903  uint64_t NarrowSize = NarrowTy.getSizeInBits();
904 
905  switch (MI.getOpcode()) {
906  default:
907  return UnableToLegalize;
908  case TargetOpcode::G_IMPLICIT_DEF: {
909  Register DstReg = MI.getOperand(0).getReg();
910  LLT DstTy = MRI.getType(DstReg);
911 
912  // If SizeOp0 is not an exact multiple of NarrowSize, emit
913  // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
914  // FIXME: Although this would also be legal for the general case, it causes
915  // a lot of regressions in the emitted code (superfluous COPYs, artifact
916  // combines not being hit). This seems to be a problem related to the
917  // artifact combiner.
918  if (SizeOp0 % NarrowSize != 0) {
919  LLT ImplicitTy = NarrowTy;
920  if (DstTy.isVector())
921  ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
922 
923  Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
924  MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
925 
926  MI.eraseFromParent();
927  return Legalized;
928  }
929 
930  int NumParts = SizeOp0 / NarrowSize;
931 
932  SmallVector<Register, 2> DstRegs;
933  for (int i = 0; i < NumParts; ++i)
934  DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
935 
936  if (DstTy.isVector())
937  MIRBuilder.buildBuildVector(DstReg, DstRegs);
938  else
939  MIRBuilder.buildMerge(DstReg, DstRegs);
940  MI.eraseFromParent();
941  return Legalized;
942  }
943  case TargetOpcode::G_CONSTANT: {
944  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
945  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
946  unsigned TotalSize = Ty.getSizeInBits();
947  unsigned NarrowSize = NarrowTy.getSizeInBits();
948  int NumParts = TotalSize / NarrowSize;
949 
950  SmallVector<Register, 4> PartRegs;
951  for (int I = 0; I != NumParts; ++I) {
952  unsigned Offset = I * NarrowSize;
953  auto K = MIRBuilder.buildConstant(NarrowTy,
954  Val.lshr(Offset).trunc(NarrowSize));
955  PartRegs.push_back(K.getReg(0));
956  }
957 
958  LLT LeftoverTy;
959  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
960  SmallVector<Register, 1> LeftoverRegs;
961  if (LeftoverBits != 0) {
962  LeftoverTy = LLT::scalar(LeftoverBits);
963  auto K = MIRBuilder.buildConstant(
964  LeftoverTy,
965  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
966  LeftoverRegs.push_back(K.getReg(0));
967  }
968 
969  insertParts(MI.getOperand(0).getReg(),
970  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
971 
972  MI.eraseFromParent();
973  return Legalized;
974  }
975  case TargetOpcode::G_SEXT:
976  case TargetOpcode::G_ZEXT:
977  case TargetOpcode::G_ANYEXT:
978  return narrowScalarExt(MI, TypeIdx, NarrowTy);
979  case TargetOpcode::G_TRUNC: {
980  if (TypeIdx != 1)
981  return UnableToLegalize;
982 
983  uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
984  if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
985  LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
986  return UnableToLegalize;
987  }
988 
989  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
990  MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
991  MI.eraseFromParent();
992  return Legalized;
993  }
994 
995  case TargetOpcode::G_FREEZE: {
996  if (TypeIdx != 0)
997  return UnableToLegalize;
998 
999  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1000  // Should widen scalar first
1001  if (Ty.getSizeInBits() % NarrowTy.getSizeInBits() != 0)
1002  return UnableToLegalize;
1003 
1004  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
1006  for (unsigned i = 0; i < Unmerge->getNumDefs(); ++i) {
1007  Parts.push_back(
1008  MIRBuilder.buildFreeze(NarrowTy, Unmerge.getReg(i)).getReg(0));
1009  }
1010 
1011  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Parts);
1012  MI.eraseFromParent();
1013  return Legalized;
1014  }
1015  case TargetOpcode::G_ADD:
1016  case TargetOpcode::G_SUB:
1017  case TargetOpcode::G_SADDO:
1018  case TargetOpcode::G_SSUBO:
1019  case TargetOpcode::G_SADDE:
1020  case TargetOpcode::G_SSUBE:
1021  case TargetOpcode::G_UADDO:
1022  case TargetOpcode::G_USUBO:
1023  case TargetOpcode::G_UADDE:
1024  case TargetOpcode::G_USUBE:
1025  return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
1026  case TargetOpcode::G_MUL:
1027  case TargetOpcode::G_UMULH:
1028  return narrowScalarMul(MI, NarrowTy);
1029  case TargetOpcode::G_EXTRACT:
1030  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
1031  case TargetOpcode::G_INSERT:
1032  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
1033  case TargetOpcode::G_LOAD: {
1034  auto &LoadMI = cast<GLoad>(MI);
1035  Register DstReg = LoadMI.getDstReg();
1036  LLT DstTy = MRI.getType(DstReg);
1037  if (DstTy.isVector())
1038  return UnableToLegalize;
1039 
1040  if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
1041  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1042  MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
1043  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1044  LoadMI.eraseFromParent();
1045  return Legalized;
1046  }
1047 
1048  return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
1049  }
1050  case TargetOpcode::G_ZEXTLOAD:
1051  case TargetOpcode::G_SEXTLOAD: {
1052  auto &LoadMI = cast<GExtLoad>(MI);
1053  Register DstReg = LoadMI.getDstReg();
1054  Register PtrReg = LoadMI.getPointerReg();
1055 
1056  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1057  auto &MMO = LoadMI.getMMO();
1058  unsigned MemSize = MMO.getSizeInBits();
1059 
1060  if (MemSize == NarrowSize) {
1061  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1062  } else if (MemSize < NarrowSize) {
1063  MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
1064  } else if (MemSize > NarrowSize) {
1065  // FIXME: Need to split the load.
1066  return UnableToLegalize;
1067  }
1068 
1069  if (isa<GZExtLoad>(LoadMI))
1070  MIRBuilder.buildZExt(DstReg, TmpReg);
1071  else
1072  MIRBuilder.buildSExt(DstReg, TmpReg);
1073 
1074  LoadMI.eraseFromParent();
1075  return Legalized;
1076  }
1077  case TargetOpcode::G_STORE: {
1078  auto &StoreMI = cast<GStore>(MI);
1079 
1080  Register SrcReg = StoreMI.getValueReg();
1081  LLT SrcTy = MRI.getType(SrcReg);
1082  if (SrcTy.isVector())
1083  return UnableToLegalize;
1084 
1085  int NumParts = SizeOp0 / NarrowSize;
1086  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
1087  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
1088  if (SrcTy.isVector() && LeftoverBits != 0)
1089  return UnableToLegalize;
1090 
1091  if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
1092  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1093  MIRBuilder.buildTrunc(TmpReg, SrcReg);
1094  MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1095  StoreMI.eraseFromParent();
1096  return Legalized;
1097  }
1098 
1099  return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1100  }
1101  case TargetOpcode::G_SELECT:
1102  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1103  case TargetOpcode::G_AND:
1104  case TargetOpcode::G_OR:
1105  case TargetOpcode::G_XOR: {
1106  // Legalize bitwise operation:
1107  // A = BinOp<Ty> B, C
1108  // into:
1109  // B1, ..., BN = G_UNMERGE_VALUES B
1110  // C1, ..., CN = G_UNMERGE_VALUES C
1111  // A1 = BinOp<Ty/N> B1, C2
1112  // ...
1113  // AN = BinOp<Ty/N> BN, CN
1114  // A = G_MERGE_VALUES A1, ..., AN
1115  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1116  }
1117  case TargetOpcode::G_SHL:
1118  case TargetOpcode::G_LSHR:
1119  case TargetOpcode::G_ASHR:
1120  return narrowScalarShift(MI, TypeIdx, NarrowTy);
1121  case TargetOpcode::G_CTLZ:
1122  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1123  case TargetOpcode::G_CTTZ:
1124  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1125  case TargetOpcode::G_CTPOP:
1126  if (TypeIdx == 1)
1127  switch (MI.getOpcode()) {
1128  case TargetOpcode::G_CTLZ:
1129  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1130  return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1131  case TargetOpcode::G_CTTZ:
1132  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1133  return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1134  case TargetOpcode::G_CTPOP:
1135  return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1136  default:
1137  return UnableToLegalize;
1138  }
1139 
1141  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1143  return Legalized;
1144  case TargetOpcode::G_INTTOPTR:
1145  if (TypeIdx != 1)
1146  return UnableToLegalize;
1147 
1149  narrowScalarSrc(MI, NarrowTy, 1);
1151  return Legalized;
1152  case TargetOpcode::G_PTRTOINT:
1153  if (TypeIdx != 0)
1154  return UnableToLegalize;
1155 
1157  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1159  return Legalized;
1160  case TargetOpcode::G_PHI: {
1161  // FIXME: add support for when SizeOp0 isn't an exact multiple of
1162  // NarrowSize.
1163  if (SizeOp0 % NarrowSize != 0)
1164  return UnableToLegalize;
1165 
1166  unsigned NumParts = SizeOp0 / NarrowSize;
1167  SmallVector<Register, 2> DstRegs(NumParts);
1168  SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1170  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1171  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1172  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1173  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1174  SrcRegs[i / 2]);
1175  }
1176  MachineBasicBlock &MBB = *MI.getParent();
1178  for (unsigned i = 0; i < NumParts; ++i) {
1179  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1180  MachineInstrBuilder MIB =
1181  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1182  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1183  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1184  }
1186  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1188  MI.eraseFromParent();
1189  return Legalized;
1190  }
1191  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1192  case TargetOpcode::G_INSERT_VECTOR_ELT: {
1193  if (TypeIdx != 2)
1194  return UnableToLegalize;
1195 
1196  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1198  narrowScalarSrc(MI, NarrowTy, OpIdx);
1200  return Legalized;
1201  }
1202  case TargetOpcode::G_ICMP: {
1203  Register LHS = MI.getOperand(2).getReg();
1204  LLT SrcTy = MRI.getType(LHS);
1205  uint64_t SrcSize = SrcTy.getSizeInBits();
1206  CmpInst::Predicate Pred =
1207  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1208 
1209  // TODO: Handle the non-equality case for weird sizes.
1210  if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1211  return UnableToLegalize;
1212 
1213  LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1214  SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1215  if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1216  LHSLeftoverRegs))
1217  return UnableToLegalize;
1218 
1219  LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1220  SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1221  if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1222  RHSPartRegs, RHSLeftoverRegs))
1223  return UnableToLegalize;
1224 
1225  // We now have the LHS and RHS of the compare split into narrow-type
1226  // registers, plus potentially some leftover type.
1227  Register Dst = MI.getOperand(0).getReg();
1228  LLT ResTy = MRI.getType(Dst);
1229  if (ICmpInst::isEquality(Pred)) {
1230  // For each part on the LHS and RHS, keep track of the result of XOR-ing
1231  // them together. For each equal part, the result should be all 0s. For
1232  // each non-equal part, we'll get at least one 1.
1233  auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1235  for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1236  auto LHS = std::get<0>(LHSAndRHS);
1237  auto RHS = std::get<1>(LHSAndRHS);
1238  auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1239  Xors.push_back(Xor);
1240  }
1241 
1242  // Build a G_XOR for each leftover register. Each G_XOR must be widened
1243  // to the desired narrow type so that we can OR them together later.
1244  SmallVector<Register, 4> WidenedXors;
1245  for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1246  auto LHS = std::get<0>(LHSAndRHS);
1247  auto RHS = std::get<1>(LHSAndRHS);
1248  auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1249  LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1250  buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1251  /* PadStrategy = */ TargetOpcode::G_ZEXT);
1252  Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1253  }
1254 
1255  // Now, for each part we broke up, we know if they are equal/not equal
1256  // based off the G_XOR. We can OR these all together and compare against
1257  // 0 to get the result.
1258  assert(Xors.size() >= 2 && "Should have gotten at least two Xors?");
1259  auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1260  for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1261  Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1262  MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1263  } else {
1264  // TODO: Handle non-power-of-two types.
1265  assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?");
1266  assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?");
1267  Register LHSL = LHSPartRegs[0];
1268  Register LHSH = LHSPartRegs[1];
1269  Register RHSL = RHSPartRegs[0];
1270  Register RHSH = RHSPartRegs[1];
1271  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1272  MachineInstrBuilder CmpHEQ =
1273  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1275  ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1276  MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1277  }
1278  MI.eraseFromParent();
1279  return Legalized;
1280  }
1281  case TargetOpcode::G_SEXT_INREG: {
1282  if (TypeIdx != 0)
1283  return UnableToLegalize;
1284 
1285  int64_t SizeInBits = MI.getOperand(2).getImm();
1286 
1287  // So long as the new type has more bits than the bits we're extending we
1288  // don't need to break it apart.
1289  if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1291  // We don't lose any non-extension bits by truncating the src and
1292  // sign-extending the dst.
1293  MachineOperand &MO1 = MI.getOperand(1);
1294  auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1295  MO1.setReg(TruncMIB.getReg(0));
1296 
1297  MachineOperand &MO2 = MI.getOperand(0);
1298  Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1300  MIRBuilder.buildSExt(MO2, DstExt);
1301  MO2.setReg(DstExt);
1303  return Legalized;
1304  }
1305 
1306  // Break it apart. Components below the extension point are unmodified. The
1307  // component containing the extension point becomes a narrower SEXT_INREG.
1308  // Components above it are ashr'd from the component containing the
1309  // extension point.
1310  if (SizeOp0 % NarrowSize != 0)
1311  return UnableToLegalize;
1312  int NumParts = SizeOp0 / NarrowSize;
1313 
1314  // List the registers where the destination will be scattered.
1315  SmallVector<Register, 2> DstRegs;
1316  // List the registers where the source will be split.
1317  SmallVector<Register, 2> SrcRegs;
1318 
1319  // Create all the temporary registers.
1320  for (int i = 0; i < NumParts; ++i) {
1321  Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1322 
1323  SrcRegs.push_back(SrcReg);
1324  }
1325 
1326  // Explode the big arguments into smaller chunks.
1327  MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1328 
1329  Register AshrCstReg =
1330  MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1331  .getReg(0);
1332  Register FullExtensionReg = 0;
1333  Register PartialExtensionReg = 0;
1334 
1335  // Do the operation on each small part.
1336  for (int i = 0; i < NumParts; ++i) {
1337  if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1338  DstRegs.push_back(SrcRegs[i]);
1339  else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1340  assert(PartialExtensionReg &&
1341  "Expected to visit partial extension before full");
1342  if (FullExtensionReg) {
1343  DstRegs.push_back(FullExtensionReg);
1344  continue;
1345  }
1346  DstRegs.push_back(
1347  MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1348  .getReg(0));
1349  FullExtensionReg = DstRegs.back();
1350  } else {
1351  DstRegs.push_back(
1352  MIRBuilder
1353  .buildInstr(
1354  TargetOpcode::G_SEXT_INREG, {NarrowTy},
1355  {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1356  .getReg(0));
1357  PartialExtensionReg = DstRegs.back();
1358  }
1359  }
1360 
1361  // Gather the destination registers into the final destination.
1362  Register DstReg = MI.getOperand(0).getReg();
1363  MIRBuilder.buildMerge(DstReg, DstRegs);
1364  MI.eraseFromParent();
1365  return Legalized;
1366  }
1367  case TargetOpcode::G_BSWAP:
1368  case TargetOpcode::G_BITREVERSE: {
1369  if (SizeOp0 % NarrowSize != 0)
1370  return UnableToLegalize;
1371 
1373  SmallVector<Register, 2> SrcRegs, DstRegs;
1374  unsigned NumParts = SizeOp0 / NarrowSize;
1375  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1376 
1377  for (unsigned i = 0; i < NumParts; ++i) {
1378  auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1379  {SrcRegs[NumParts - 1 - i]});
1380  DstRegs.push_back(DstPart.getReg(0));
1381  }
1382 
1383  MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1384 
1386  MI.eraseFromParent();
1387  return Legalized;
1388  }
1389  case TargetOpcode::G_PTR_ADD:
1390  case TargetOpcode::G_PTRMASK: {
1391  if (TypeIdx != 1)
1392  return UnableToLegalize;
1394  narrowScalarSrc(MI, NarrowTy, 2);
1396  return Legalized;
1397  }
1398  case TargetOpcode::G_FPTOUI:
1399  case TargetOpcode::G_FPTOSI:
1400  return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1401  case TargetOpcode::G_FPEXT:
1402  if (TypeIdx != 0)
1403  return UnableToLegalize;
1405  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1407  return Legalized;
1408  }
1409 }
1410 
1412  LLT Ty = MRI.getType(Val);
1413  if (Ty.isScalar())
1414  return Val;
1415 
1417  LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1418  if (Ty.isPointer()) {
1419  if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1420  return Register();
1421  return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1422  }
1423 
1424  Register NewVal = Val;
1425 
1426  assert(Ty.isVector());
1427  LLT EltTy = Ty.getElementType();
1428  if (EltTy.isPointer())
1429  NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1430  return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1431 }
1432 
1434  unsigned OpIdx, unsigned ExtOpcode) {
1435  MachineOperand &MO = MI.getOperand(OpIdx);
1436  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1437  MO.setReg(ExtB.getReg(0));
1438 }
1439 
1441  unsigned OpIdx) {
1442  MachineOperand &MO = MI.getOperand(OpIdx);
1443  auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1444  MO.setReg(ExtB.getReg(0));
1445 }
1446 
1448  unsigned OpIdx, unsigned TruncOpcode) {
1449  MachineOperand &MO = MI.getOperand(OpIdx);
1450  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1452  MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1453  MO.setReg(DstExt);
1454 }
1455 
1457  unsigned OpIdx, unsigned ExtOpcode) {
1458  MachineOperand &MO = MI.getOperand(OpIdx);
1459  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1461  MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1462  MO.setReg(DstTrunc);
1463 }
1464 
1466  unsigned OpIdx) {
1467  MachineOperand &MO = MI.getOperand(OpIdx);
1469  Register Dst = MO.getReg();
1470  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1471  MO.setReg(DstExt);
1473 }
1474 
1476  unsigned OpIdx) {
1477  MachineOperand &MO = MI.getOperand(OpIdx);
1480 }
1481 
1482 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1483  MachineOperand &Op = MI.getOperand(OpIdx);
1484  Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1485 }
1486 
1487 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1488  MachineOperand &MO = MI.getOperand(OpIdx);
1489  Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1491  MIRBuilder.buildBitcast(MO, CastDst);
1492  MO.setReg(CastDst);
1493 }
1494 
1496 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1497  LLT WideTy) {
1498  if (TypeIdx != 1)
1499  return UnableToLegalize;
1500 
1501  Register DstReg = MI.getOperand(0).getReg();
1502  LLT DstTy = MRI.getType(DstReg);
1503  if (DstTy.isVector())
1504  return UnableToLegalize;
1505 
1506  Register Src1 = MI.getOperand(1).getReg();
1507  LLT SrcTy = MRI.getType(Src1);
1508  const int DstSize = DstTy.getSizeInBits();
1509  const int SrcSize = SrcTy.getSizeInBits();
1510  const int WideSize = WideTy.getSizeInBits();
1511  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1512 
1513  unsigned NumOps = MI.getNumOperands();
1514  unsigned NumSrc = MI.getNumOperands() - 1;
1515  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1516 
1517  if (WideSize >= DstSize) {
1518  // Directly pack the bits in the target type.
1519  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1520 
1521  for (unsigned I = 2; I != NumOps; ++I) {
1522  const unsigned Offset = (I - 1) * PartSize;
1523 
1524  Register SrcReg = MI.getOperand(I).getReg();
1525  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
1526 
1527  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1528 
1529  Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1530  MRI.createGenericVirtualRegister(WideTy);
1531 
1532  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1533  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1534  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1535  ResultReg = NextResult;
1536  }
1537 
1538  if (WideSize > DstSize)
1539  MIRBuilder.buildTrunc(DstReg, ResultReg);
1540  else if (DstTy.isPointer())
1541  MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1542 
1543  MI.eraseFromParent();
1544  return Legalized;
1545  }
1546 
1547  // Unmerge the original values to the GCD type, and recombine to the next
1548  // multiple greater than the original type.
1549  //
1550  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1551  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1552  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1553  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1554  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1555  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1556  // %12:_(s12) = G_MERGE_VALUES %10, %11
1557  //
1558  // Padding with undef if necessary:
1559  //
1560  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1561  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1562  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1563  // %7:_(s2) = G_IMPLICIT_DEF
1564  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1565  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1566  // %10:_(s12) = G_MERGE_VALUES %8, %9
1567 
1568  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1569  LLT GCDTy = LLT::scalar(GCD);
1570 
1572  SmallVector<Register, 8> NewMergeRegs;
1573  SmallVector<Register, 8> Unmerges;
1574  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1575 
1576  // Decompose the original operands if they don't evenly divide.
1577  for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) {
1578  Register SrcReg = MO.getReg();
1579  if (GCD == SrcSize) {
1580  Unmerges.push_back(SrcReg);
1581  } else {
1582  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1583  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1584  Unmerges.push_back(Unmerge.getReg(J));
1585  }
1586  }
1587 
1588  // Pad with undef to the next size that is a multiple of the requested size.
1589  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1590  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1591  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1592  Unmerges.push_back(UndefReg);
1593  }
1594 
1595  const int PartsPerGCD = WideSize / GCD;
1596 
1597  // Build merges of each piece.
1598  ArrayRef<Register> Slicer(Unmerges);
1599  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1600  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1601  NewMergeRegs.push_back(Merge.getReg(0));
1602  }
1603 
1604  // A truncate may be necessary if the requested type doesn't evenly divide the
1605  // original result type.
1606  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1607  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1608  } else {
1609  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1610  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1611  }
1612 
1613  MI.eraseFromParent();
1614  return Legalized;
1615 }
1616 
1618  Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1619  LLT OrigTy = MRI.getType(OrigReg);
1620  LLT LCMTy = getLCMType(WideTy, OrigTy);
1621 
1622  const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1623  const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1624 
1625  Register UnmergeSrc = WideReg;
1626 
1627  // Create a merge to the LCM type, padding with undef
1628  // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1629  // =>
1630  // %1:_(<4 x s32>) = G_FOO
1631  // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1632  // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1633  // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1634  if (NumMergeParts > 1) {
1635  Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1636  SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1637  MergeParts[0] = WideReg;
1638  UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1639  }
1640 
1641  // Unmerge to the original register and pad with dead defs.
1642  SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1643  UnmergeResults[0] = OrigReg;
1644  for (int I = 1; I != NumUnmergeParts; ++I)
1645  UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1646 
1647  MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1648  return WideReg;
1649 }
1650 
1652 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1653  LLT WideTy) {
1654  if (TypeIdx != 0)
1655  return UnableToLegalize;
1656 
1657  int NumDst = MI.getNumOperands() - 1;
1658  Register SrcReg = MI.getOperand(NumDst).getReg();
1659  LLT SrcTy = MRI.getType(SrcReg);
1660  if (SrcTy.isVector())
1661  return UnableToLegalize;
1662 
1663  Register Dst0Reg = MI.getOperand(0).getReg();
1664  LLT DstTy = MRI.getType(Dst0Reg);
1665  if (!DstTy.isScalar())
1666  return UnableToLegalize;
1667 
1668  if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1669  if (SrcTy.isPointer()) {
1671  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1672  LLVM_DEBUG(
1673  dbgs() << "Not casting non-integral address space integer\n");
1674  return UnableToLegalize;
1675  }
1676 
1677  SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1678  SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1679  }
1680 
1681  // Widen SrcTy to WideTy. This does not affect the result, but since the
1682  // user requested this size, it is probably better handled than SrcTy and
1683  // should reduce the total number of legalization artifacts.
1684  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1685  SrcTy = WideTy;
1686  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1687  }
1688 
1689  // Theres no unmerge type to target. Directly extract the bits from the
1690  // source type
1691  unsigned DstSize = DstTy.getSizeInBits();
1692 
1693  MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1694  for (int I = 1; I != NumDst; ++I) {
1695  auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1696  auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1697  MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1698  }
1699 
1700  MI.eraseFromParent();
1701  return Legalized;
1702  }
1703 
1704  // Extend the source to a wider type.
1705  LLT LCMTy = getLCMType(SrcTy, WideTy);
1706 
1707  Register WideSrc = SrcReg;
1708  if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1709  // TODO: If this is an integral address space, cast to integer and anyext.
1710  if (SrcTy.isPointer()) {
1711  LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
1712  return UnableToLegalize;
1713  }
1714 
1715  WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1716  }
1717 
1718  auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1719 
1720  // Create a sequence of unmerges and merges to the original results. Since we
1721  // may have widened the source, we will need to pad the results with dead defs
1722  // to cover the source register.
1723  // e.g. widen s48 to s64:
1724  // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1725  //
1726  // =>
1727  // %4:_(s192) = G_ANYEXT %0:_(s96)
1728  // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1729  // ; unpack to GCD type, with extra dead defs
1730  // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1731  // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1732  // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1733  // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1734  // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1735  const LLT GCDTy = getGCDType(WideTy, DstTy);
1736  const int NumUnmerge = Unmerge->getNumOperands() - 1;
1737  const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1738 
1739  // Directly unmerge to the destination without going through a GCD type
1740  // if possible
1741  if (PartsPerRemerge == 1) {
1742  const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1743 
1744  for (int I = 0; I != NumUnmerge; ++I) {
1745  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1746 
1747  for (int J = 0; J != PartsPerUnmerge; ++J) {
1748  int Idx = I * PartsPerUnmerge + J;
1749  if (Idx < NumDst)
1750  MIB.addDef(MI.getOperand(Idx).getReg());
1751  else {
1752  // Create dead def for excess components.
1753  MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1754  }
1755  }
1756 
1757  MIB.addUse(Unmerge.getReg(I));
1758  }
1759  } else {
1761  for (int J = 0; J != NumUnmerge; ++J)
1762  extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1763 
1764  SmallVector<Register, 8> RemergeParts;
1765  for (int I = 0; I != NumDst; ++I) {
1766  for (int J = 0; J < PartsPerRemerge; ++J) {
1767  const int Idx = I * PartsPerRemerge + J;
1768  RemergeParts.emplace_back(Parts[Idx]);
1769  }
1770 
1771  MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1772  RemergeParts.clear();
1773  }
1774  }
1775 
1776  MI.eraseFromParent();
1777  return Legalized;
1778 }
1779 
1781 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1782  LLT WideTy) {
1783  Register DstReg = MI.getOperand(0).getReg();
1784  Register SrcReg = MI.getOperand(1).getReg();
1785  LLT SrcTy = MRI.getType(SrcReg);
1786 
1787  LLT DstTy = MRI.getType(DstReg);
1788  unsigned Offset = MI.getOperand(2).getImm();
1789 
1790  if (TypeIdx == 0) {
1791  if (SrcTy.isVector() || DstTy.isVector())
1792  return UnableToLegalize;
1793 
1794  SrcOp Src(SrcReg);
1795  if (SrcTy.isPointer()) {
1796  // Extracts from pointers can be handled only if they are really just
1797  // simple integers.
1799  if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1800  return UnableToLegalize;
1801 
1802  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1803  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1804  SrcTy = SrcAsIntTy;
1805  }
1806 
1807  if (DstTy.isPointer())
1808  return UnableToLegalize;
1809 
1810  if (Offset == 0) {
1811  // Avoid a shift in the degenerate case.
1812  MIRBuilder.buildTrunc(DstReg,
1813  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1814  MI.eraseFromParent();
1815  return Legalized;
1816  }
1817 
1818  // Do a shift in the source type.
1819  LLT ShiftTy = SrcTy;
1820  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1821  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1822  ShiftTy = WideTy;
1823  }
1824 
1825  auto LShr = MIRBuilder.buildLShr(
1826  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1827  MIRBuilder.buildTrunc(DstReg, LShr);
1828  MI.eraseFromParent();
1829  return Legalized;
1830  }
1831 
1832  if (SrcTy.isScalar()) {
1834  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1836  return Legalized;
1837  }
1838 
1839  if (!SrcTy.isVector())
1840  return UnableToLegalize;
1841 
1842  if (DstTy != SrcTy.getElementType())
1843  return UnableToLegalize;
1844 
1845  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1846  return UnableToLegalize;
1847 
1849  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1850 
1851  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1852  Offset);
1853  widenScalarDst(MI, WideTy.getScalarType(), 0);
1855  return Legalized;
1856 }
1857 
1859 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1860  LLT WideTy) {
1861  if (TypeIdx != 0 || WideTy.isVector())
1862  return UnableToLegalize;
1864  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1865  widenScalarDst(MI, WideTy);
1867  return Legalized;
1868 }
1869 
1871 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1872  LLT WideTy) {
1873  unsigned Opcode;
1874  unsigned ExtOpcode;
1875  Optional<Register> CarryIn = None;
1876  switch (MI.getOpcode()) {
1877  default:
1878  llvm_unreachable("Unexpected opcode!");
1879  case TargetOpcode::G_SADDO:
1880  Opcode = TargetOpcode::G_ADD;
1881  ExtOpcode = TargetOpcode::G_SEXT;
1882  break;
1883  case TargetOpcode::G_SSUBO:
1884  Opcode = TargetOpcode::G_SUB;
1885  ExtOpcode = TargetOpcode::G_SEXT;
1886  break;
1887  case TargetOpcode::G_UADDO:
1888  Opcode = TargetOpcode::G_ADD;
1889  ExtOpcode = TargetOpcode::G_ZEXT;
1890  break;
1891  case TargetOpcode::G_USUBO:
1892  Opcode = TargetOpcode::G_SUB;
1893  ExtOpcode = TargetOpcode::G_ZEXT;
1894  break;
1895  case TargetOpcode::G_SADDE:
1896  Opcode = TargetOpcode::G_UADDE;
1897  ExtOpcode = TargetOpcode::G_SEXT;
1898  CarryIn = MI.getOperand(4).getReg();
1899  break;
1900  case TargetOpcode::G_SSUBE:
1901  Opcode = TargetOpcode::G_USUBE;
1902  ExtOpcode = TargetOpcode::G_SEXT;
1903  CarryIn = MI.getOperand(4).getReg();
1904  break;
1905  case TargetOpcode::G_UADDE:
1906  Opcode = TargetOpcode::G_UADDE;
1907  ExtOpcode = TargetOpcode::G_ZEXT;
1908  CarryIn = MI.getOperand(4).getReg();
1909  break;
1910  case TargetOpcode::G_USUBE:
1911  Opcode = TargetOpcode::G_USUBE;
1912  ExtOpcode = TargetOpcode::G_ZEXT;
1913  CarryIn = MI.getOperand(4).getReg();
1914  break;
1915  }
1916 
1917  if (TypeIdx == 1) {
1918  unsigned BoolExtOp = MIRBuilder.getBoolExtOp(WideTy.isVector(), false);
1919 
1921  widenScalarDst(MI, WideTy, 1);
1922  if (CarryIn)
1923  widenScalarSrc(MI, WideTy, 4, BoolExtOp);
1924 
1926  return Legalized;
1927  }
1928 
1929  auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1930  auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1931  // Do the arithmetic in the larger type.
1932  Register NewOp;
1933  if (CarryIn) {
1934  LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1935  NewOp = MIRBuilder
1936  .buildInstr(Opcode, {WideTy, CarryOutTy},
1937  {LHSExt, RHSExt, *CarryIn})
1938  .getReg(0);
1939  } else {
1940  NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1941  }
1942  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1943  auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1944  auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1945  // There is no overflow if the ExtOp is the same as NewOp.
1946  MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1947  // Now trunc the NewOp to the original result.
1948  MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1949  MI.eraseFromParent();
1950  return Legalized;
1951 }
1952 
1954 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1955  LLT WideTy) {
1956  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1957  MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1958  MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1959  bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1960  MI.getOpcode() == TargetOpcode::G_USHLSAT;
1961  // We can convert this to:
1962  // 1. Any extend iN to iM
1963  // 2. SHL by M-N
1964  // 3. [US][ADD|SUB|SHL]SAT
1965  // 4. L/ASHR by M-N
1966  //
1967  // It may be more efficient to lower this to a min and a max operation in
1968  // the higher precision arithmetic if the promoted operation isn't legal,
1969  // but this decision is up to the target's lowering request.
1970  Register DstReg = MI.getOperand(0).getReg();
1971 
1972  unsigned NewBits = WideTy.getScalarSizeInBits();
1973  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1974 
1975  // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1976  // must not left shift the RHS to preserve the shift amount.
1977  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1978  auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1979  : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1980  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1981  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1982  auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1983 
1984  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1985  {ShiftL, ShiftR}, MI.getFlags());
1986 
1987  // Use a shift that will preserve the number of sign bits when the trunc is
1988  // folded away.
1989  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1990  : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1991 
1992  MIRBuilder.buildTrunc(DstReg, Result);
1993  MI.eraseFromParent();
1994  return Legalized;
1995 }
1996 
1998 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1999  LLT WideTy) {
2000  if (TypeIdx == 1) {
2002  widenScalarDst(MI, WideTy, 1);
2004  return Legalized;
2005  }
2006 
2007  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
2008  Register Result = MI.getOperand(0).getReg();
2009  Register OriginalOverflow = MI.getOperand(1).getReg();
2010  Register LHS = MI.getOperand(2).getReg();
2011  Register RHS = MI.getOperand(3).getReg();
2012  LLT SrcTy = MRI.getType(LHS);
2013  LLT OverflowTy = MRI.getType(OriginalOverflow);
2014  unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
2015 
2016  // To determine if the result overflowed in the larger type, we extend the
2017  // input to the larger type, do the multiply (checking if it overflows),
2018  // then also check the high bits of the result to see if overflow happened
2019  // there.
2020  unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2021  auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
2022  auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
2023 
2024  auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
2025  {LeftOperand, RightOperand});
2026  auto Mul = Mulo->getOperand(0);
2027  MIRBuilder.buildTrunc(Result, Mul);
2028 
2029  MachineInstrBuilder ExtResult;
2030  // Overflow occurred if it occurred in the larger type, or if the high part
2031  // of the result does not zero/sign-extend the low part. Check this second
2032  // possibility first.
2033  if (IsSigned) {
2034  // For signed, overflow occurred when the high part does not sign-extend
2035  // the low part.
2036  ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
2037  } else {
2038  // Unsigned overflow occurred when the high part does not zero-extend the
2039  // low part.
2040  ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
2041  }
2042 
2043  // Multiplication cannot overflow if the WideTy is >= 2 * original width,
2044  // so we don't need to check the overflow result of larger type Mulo.
2045  if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
2046  auto Overflow =
2047  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
2048  // Finally check if the multiplication in the larger type itself overflowed.
2049  MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
2050  } else {
2051  MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
2052  }
2053  MI.eraseFromParent();
2054  return Legalized;
2055 }
2056 
2058 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
2059  switch (MI.getOpcode()) {
2060  default:
2061  return UnableToLegalize;
2062  case TargetOpcode::G_ATOMICRMW_XCHG:
2063  case TargetOpcode::G_ATOMICRMW_ADD:
2064  case TargetOpcode::G_ATOMICRMW_SUB:
2065  case TargetOpcode::G_ATOMICRMW_AND:
2066  case TargetOpcode::G_ATOMICRMW_OR:
2067  case TargetOpcode::G_ATOMICRMW_XOR:
2068  case TargetOpcode::G_ATOMICRMW_MIN:
2069  case TargetOpcode::G_ATOMICRMW_MAX:
2070  case TargetOpcode::G_ATOMICRMW_UMIN:
2071  case TargetOpcode::G_ATOMICRMW_UMAX:
2072  assert(TypeIdx == 0 && "atomicrmw with second scalar type");
2074  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2075  widenScalarDst(MI, WideTy, 0);
2077  return Legalized;
2078  case TargetOpcode::G_ATOMIC_CMPXCHG:
2079  assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type");
2081  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2082  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2083  widenScalarDst(MI, WideTy, 0);
2085  return Legalized;
2086  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
2087  if (TypeIdx == 0) {
2089  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2090  widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2091  widenScalarDst(MI, WideTy, 0);
2093  return Legalized;
2094  }
2095  assert(TypeIdx == 1 &&
2096  "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type");
2098  widenScalarDst(MI, WideTy, 1);
2100  return Legalized;
2101  case TargetOpcode::G_EXTRACT:
2102  return widenScalarExtract(MI, TypeIdx, WideTy);
2103  case TargetOpcode::G_INSERT:
2104  return widenScalarInsert(MI, TypeIdx, WideTy);
2105  case TargetOpcode::G_MERGE_VALUES:
2106  return widenScalarMergeValues(MI, TypeIdx, WideTy);
2107  case TargetOpcode::G_UNMERGE_VALUES:
2108  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2109  case TargetOpcode::G_SADDO:
2110  case TargetOpcode::G_SSUBO:
2111  case TargetOpcode::G_UADDO:
2112  case TargetOpcode::G_USUBO:
2113  case TargetOpcode::G_SADDE:
2114  case TargetOpcode::G_SSUBE:
2115  case TargetOpcode::G_UADDE:
2116  case TargetOpcode::G_USUBE:
2117  return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2118  case TargetOpcode::G_UMULO:
2119  case TargetOpcode::G_SMULO:
2120  return widenScalarMulo(MI, TypeIdx, WideTy);
2121  case TargetOpcode::G_SADDSAT:
2122  case TargetOpcode::G_SSUBSAT:
2123  case TargetOpcode::G_SSHLSAT:
2124  case TargetOpcode::G_UADDSAT:
2125  case TargetOpcode::G_USUBSAT:
2126  case TargetOpcode::G_USHLSAT:
2127  return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2128  case TargetOpcode::G_CTTZ:
2129  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2130  case TargetOpcode::G_CTLZ:
2131  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2132  case TargetOpcode::G_CTPOP: {
2133  if (TypeIdx == 0) {
2135  widenScalarDst(MI, WideTy, 0);
2137  return Legalized;
2138  }
2139 
2140  Register SrcReg = MI.getOperand(1).getReg();
2141 
2142  // First extend the input.
2143  unsigned ExtOpc = MI.getOpcode() == TargetOpcode::G_CTTZ ||
2144  MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF
2145  ? TargetOpcode::G_ANYEXT
2146  : TargetOpcode::G_ZEXT;
2147  auto MIBSrc = MIRBuilder.buildInstr(ExtOpc, {WideTy}, {SrcReg});
2148  LLT CurTy = MRI.getType(SrcReg);
2149  unsigned NewOpc = MI.getOpcode();
2150  if (NewOpc == TargetOpcode::G_CTTZ) {
2151  // The count is the same in the larger type except if the original
2152  // value was zero. This can be handled by setting the bit just off
2153  // the top of the original type.
2154  auto TopBit =
2155  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2156  MIBSrc = MIRBuilder.buildOr(
2157  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2158  // Now we know the operand is non-zero, use the more relaxed opcode.
2159  NewOpc = TargetOpcode::G_CTTZ_ZERO_UNDEF;
2160  }
2161 
2162  // Perform the operation at the larger size.
2163  auto MIBNewOp = MIRBuilder.buildInstr(NewOpc, {WideTy}, {MIBSrc});
2164  // This is already the correct result for CTPOP and CTTZs
2165  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2166  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2167  // The correct result is NewOp - (Difference in widety and current ty).
2168  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2169  MIBNewOp = MIRBuilder.buildSub(
2170  WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2171  }
2172 
2173  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2174  MI.eraseFromParent();
2175  return Legalized;
2176  }
2177  case TargetOpcode::G_BSWAP: {
2179  Register DstReg = MI.getOperand(0).getReg();
2180 
2181  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2182  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2183  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2184  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2185 
2186  MI.getOperand(0).setReg(DstExt);
2187 
2189 
2190  LLT Ty = MRI.getType(DstReg);
2191  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2192  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2193  MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2194 
2195  MIRBuilder.buildTrunc(DstReg, ShrReg);
2197  return Legalized;
2198  }
2199  case TargetOpcode::G_BITREVERSE: {
2201 
2202  Register DstReg = MI.getOperand(0).getReg();
2203  LLT Ty = MRI.getType(DstReg);
2204  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2205 
2206  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2207  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2208  MI.getOperand(0).setReg(DstExt);
2210 
2211  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2212  auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2213  MIRBuilder.buildTrunc(DstReg, Shift);
2215  return Legalized;
2216  }
2217  case TargetOpcode::G_FREEZE:
2219  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2220  widenScalarDst(MI, WideTy);
2222  return Legalized;
2223 
2224  case TargetOpcode::G_ABS:
2226  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2227  widenScalarDst(MI, WideTy);
2229  return Legalized;
2230 
2231  case TargetOpcode::G_ADD:
2232  case TargetOpcode::G_AND:
2233  case TargetOpcode::G_MUL:
2234  case TargetOpcode::G_OR:
2235  case TargetOpcode::G_XOR:
2236  case TargetOpcode::G_SUB:
2237  // Perform operation at larger width (any extension is fines here, high bits
2238  // don't affect the result) and then truncate the result back to the
2239  // original type.
2241  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2242  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2243  widenScalarDst(MI, WideTy);
2245  return Legalized;
2246 
2247  case TargetOpcode::G_SBFX:
2248  case TargetOpcode::G_UBFX:
2250 
2251  if (TypeIdx == 0) {
2252  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2253  widenScalarDst(MI, WideTy);
2254  } else {
2255  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2256  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2257  }
2258 
2260  return Legalized;
2261 
2262  case TargetOpcode::G_SHL:
2264 
2265  if (TypeIdx == 0) {
2266  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2267  widenScalarDst(MI, WideTy);
2268  } else {
2269  assert(TypeIdx == 1);
2270  // The "number of bits to shift" operand must preserve its value as an
2271  // unsigned integer:
2272  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2273  }
2274 
2276  return Legalized;
2277 
2278  case TargetOpcode::G_SDIV:
2279  case TargetOpcode::G_SREM:
2280  case TargetOpcode::G_SMIN:
2281  case TargetOpcode::G_SMAX:
2283  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2284  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2285  widenScalarDst(MI, WideTy);
2287  return Legalized;
2288 
2289  case TargetOpcode::G_SDIVREM:
2291  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2292  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2293  widenScalarDst(MI, WideTy);
2294  widenScalarDst(MI, WideTy, 1);
2296  return Legalized;
2297 
2298  case TargetOpcode::G_ASHR:
2299  case TargetOpcode::G_LSHR:
2301 
2302  if (TypeIdx == 0) {
2303  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2304  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2305 
2306  widenScalarSrc(MI, WideTy, 1, CvtOp);
2307  widenScalarDst(MI, WideTy);
2308  } else {
2309  assert(TypeIdx == 1);
2310  // The "number of bits to shift" operand must preserve its value as an
2311  // unsigned integer:
2312  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2313  }
2314 
2316  return Legalized;
2317  case TargetOpcode::G_UDIV:
2318  case TargetOpcode::G_UREM:
2319  case TargetOpcode::G_UMIN:
2320  case TargetOpcode::G_UMAX:
2322  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2323  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2324  widenScalarDst(MI, WideTy);
2326  return Legalized;
2327 
2328  case TargetOpcode::G_UDIVREM:
2330  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2331  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2332  widenScalarDst(MI, WideTy);
2333  widenScalarDst(MI, WideTy, 1);
2335  return Legalized;
2336 
2337  case TargetOpcode::G_SELECT:
2339  if (TypeIdx == 0) {
2340  // Perform operation at larger width (any extension is fine here, high
2341  // bits don't affect the result) and then truncate the result back to the
2342  // original type.
2343  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2344  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2345  widenScalarDst(MI, WideTy);
2346  } else {
2347  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2348  // Explicit extension is required here since high bits affect the result.
2349  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2350  }
2352  return Legalized;
2353 
2354  case TargetOpcode::G_FPTOSI:
2355  case TargetOpcode::G_FPTOUI:
2357 
2358  if (TypeIdx == 0)
2359  widenScalarDst(MI, WideTy);
2360  else
2361  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2362 
2364  return Legalized;
2365  case TargetOpcode::G_SITOFP:
2367 
2368  if (TypeIdx == 0)
2369  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2370  else
2371  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2372 
2374  return Legalized;
2375  case TargetOpcode::G_UITOFP:
2377 
2378  if (TypeIdx == 0)
2379  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2380  else
2381  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2382 
2384  return Legalized;
2385  case TargetOpcode::G_LOAD:
2386  case TargetOpcode::G_SEXTLOAD:
2387  case TargetOpcode::G_ZEXTLOAD:
2389  widenScalarDst(MI, WideTy);
2391  return Legalized;
2392 
2393  case TargetOpcode::G_STORE: {
2394  if (TypeIdx != 0)
2395  return UnableToLegalize;
2396 
2397  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2398  if (!Ty.isScalar())
2399  return UnableToLegalize;
2400 
2402 
2403  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2404  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2405  widenScalarSrc(MI, WideTy, 0, ExtType);
2406 
2408  return Legalized;
2409  }
2410  case TargetOpcode::G_CONSTANT: {
2411  MachineOperand &SrcMO = MI.getOperand(1);
2413  unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2414  MRI.getType(MI.getOperand(0).getReg()));
2415  assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
2416  ExtOpc == TargetOpcode::G_ANYEXT) &&
2417  "Illegal Extend");
2418  const APInt &SrcVal = SrcMO.getCImm()->getValue();
2419  const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2420  ? SrcVal.sext(WideTy.getSizeInBits())
2421  : SrcVal.zext(WideTy.getSizeInBits());
2423  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2424 
2425  widenScalarDst(MI, WideTy);
2427  return Legalized;
2428  }
2429  case TargetOpcode::G_FCONSTANT: {
2430  MachineOperand &SrcMO = MI.getOperand(1);
2432  APFloat Val = SrcMO.getFPImm()->getValueAPF();
2433  bool LosesInfo;
2434  switch (WideTy.getSizeInBits()) {
2435  case 32:
2437  &LosesInfo);
2438  break;
2439  case 64:
2441  &LosesInfo);
2442  break;
2443  default:
2444  return UnableToLegalize;
2445  }
2446 
2447  assert(!LosesInfo && "extend should always be lossless");
2448 
2450  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2451 
2452  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2454  return Legalized;
2455  }
2456  case TargetOpcode::G_IMPLICIT_DEF: {
2458  widenScalarDst(MI, WideTy);
2460  return Legalized;
2461  }
2462  case TargetOpcode::G_BRCOND:
2464  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2466  return Legalized;
2467 
2468  case TargetOpcode::G_FCMP:
2470  if (TypeIdx == 0)
2471  widenScalarDst(MI, WideTy);
2472  else {
2473  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2474  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2475  }
2477  return Legalized;
2478 
2479  case TargetOpcode::G_ICMP:
2481  if (TypeIdx == 0)
2482  widenScalarDst(MI, WideTy);
2483  else {
2484  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2485  MI.getOperand(1).getPredicate()))
2486  ? TargetOpcode::G_SEXT
2487  : TargetOpcode::G_ZEXT;
2488  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2489  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2490  }
2492  return Legalized;
2493 
2494  case TargetOpcode::G_PTR_ADD:
2495  assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
2497  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2499  return Legalized;
2500 
2501  case TargetOpcode::G_PHI: {
2502  assert(TypeIdx == 0 && "Expecting only Idx 0");
2503 
2505  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2506  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2507  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2508  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2509  }
2510 
2511  MachineBasicBlock &MBB = *MI.getParent();
2513  widenScalarDst(MI, WideTy);
2515  return Legalized;
2516  }
2517  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2518  if (TypeIdx == 0) {
2519  Register VecReg = MI.getOperand(1).getReg();
2520  LLT VecTy = MRI.getType(VecReg);
2522 
2524  MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2525  TargetOpcode::G_ANYEXT);
2526 
2527  widenScalarDst(MI, WideTy, 0);
2529  return Legalized;
2530  }
2531 
2532  if (TypeIdx != 2)
2533  return UnableToLegalize;
2535  // TODO: Probably should be zext
2536  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2538  return Legalized;
2539  }
2540  case TargetOpcode::G_INSERT_VECTOR_ELT: {
2541  if (TypeIdx == 1) {
2543 
2544  Register VecReg = MI.getOperand(1).getReg();
2545  LLT VecTy = MRI.getType(VecReg);
2546  LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2547 
2548  widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2549  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2550  widenScalarDst(MI, WideVecTy, 0);
2552  return Legalized;
2553  }
2554 
2555  if (TypeIdx == 2) {
2557  // TODO: Probably should be zext
2558  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2560  return Legalized;
2561  }
2562 
2563  return UnableToLegalize;
2564  }
2565  case TargetOpcode::G_FADD:
2566  case TargetOpcode::G_FMUL:
2567  case TargetOpcode::G_FSUB:
2568  case TargetOpcode::G_FMA:
2569  case TargetOpcode::G_FMAD:
2570  case TargetOpcode::G_FNEG:
2571  case TargetOpcode::G_FABS:
2572  case TargetOpcode::G_FCANONICALIZE:
2573  case TargetOpcode::G_FMINNUM:
2574  case TargetOpcode::G_FMAXNUM:
2575  case TargetOpcode::G_FMINNUM_IEEE:
2576  case TargetOpcode::G_FMAXNUM_IEEE:
2577  case TargetOpcode::G_FMINIMUM:
2578  case TargetOpcode::G_FMAXIMUM:
2579  case TargetOpcode::G_FDIV:
2580  case TargetOpcode::G_FREM:
2581  case TargetOpcode::G_FCEIL:
2582  case TargetOpcode::G_FFLOOR:
2583  case TargetOpcode::G_FCOS:
2584  case TargetOpcode::G_FSIN:
2585  case TargetOpcode::G_FLOG10:
2586  case TargetOpcode::G_FLOG:
2587  case TargetOpcode::G_FLOG2:
2588  case TargetOpcode::G_FRINT:
2589  case TargetOpcode::G_FNEARBYINT:
2590  case TargetOpcode::G_FSQRT:
2591  case TargetOpcode::G_FEXP:
2592  case TargetOpcode::G_FEXP2:
2593  case TargetOpcode::G_FPOW:
2594  case TargetOpcode::G_INTRINSIC_TRUNC:
2595  case TargetOpcode::G_INTRINSIC_ROUND:
2596  case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2597  assert(TypeIdx == 0);
2599 
2600  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2601  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2602 
2603  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2605  return Legalized;
2606  case TargetOpcode::G_FPOWI: {
2607  if (TypeIdx != 0)
2608  return UnableToLegalize;
2610  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2611  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2613  return Legalized;
2614  }
2615  case TargetOpcode::G_INTTOPTR:
2616  if (TypeIdx != 1)
2617  return UnableToLegalize;
2618 
2620  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2622  return Legalized;
2623  case TargetOpcode::G_PTRTOINT:
2624  if (TypeIdx != 0)
2625  return UnableToLegalize;
2626 
2628  widenScalarDst(MI, WideTy, 0);
2630  return Legalized;
2631  case TargetOpcode::G_BUILD_VECTOR: {
2633 
2634  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2635  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2636  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2637 
2638  // Avoid changing the result vector type if the source element type was
2639  // requested.
2640  if (TypeIdx == 1) {
2641  MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2642  } else {
2643  widenScalarDst(MI, WideTy, 0);
2644  }
2645 
2647  return Legalized;
2648  }
2649  case TargetOpcode::G_SEXT_INREG:
2650  if (TypeIdx != 0)
2651  return UnableToLegalize;
2652 
2654  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2655  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2657  return Legalized;
2658  case TargetOpcode::G_PTRMASK: {
2659  if (TypeIdx != 1)
2660  return UnableToLegalize;
2662  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2664  return Legalized;
2665  }
2666  }
2667 }
2668 
2670  MachineIRBuilder &B, Register Src, LLT Ty) {
2671  auto Unmerge = B.buildUnmerge(Ty, Src);
2672  for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2673  Pieces.push_back(Unmerge.getReg(I));
2674 }
2675 
2678  Register Dst = MI.getOperand(0).getReg();
2679  Register Src = MI.getOperand(1).getReg();
2680  LLT DstTy = MRI.getType(Dst);
2681  LLT SrcTy = MRI.getType(Src);
2682 
2683  if (SrcTy.isVector()) {
2684  LLT SrcEltTy = SrcTy.getElementType();
2685  SmallVector<Register, 8> SrcRegs;
2686 
2687  if (DstTy.isVector()) {
2688  int NumDstElt = DstTy.getNumElements();
2689  int NumSrcElt = SrcTy.getNumElements();
2690 
2691  LLT DstEltTy = DstTy.getElementType();
2692  LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2693  LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2694 
2695  // If there's an element size mismatch, insert intermediate casts to match
2696  // the result element type.
2697  if (NumSrcElt < NumDstElt) { // Source element type is larger.
2698  // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2699  //
2700  // =>
2701  //
2702  // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2703  // %3:_(<2 x s8>) = G_BITCAST %2
2704  // %4:_(<2 x s8>) = G_BITCAST %3
2705  // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2706  DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2707  SrcPartTy = SrcEltTy;
2708  } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2709  //
2710  // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2711  //
2712  // =>
2713  //
2714  // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2715  // %3:_(s16) = G_BITCAST %2
2716  // %4:_(s16) = G_BITCAST %3
2717  // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2718  SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2719  DstCastTy = DstEltTy;
2720  }
2721 
2722  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2723  for (Register &SrcReg : SrcRegs)
2724  SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2725  } else
2726  getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2727 
2728  MIRBuilder.buildMerge(Dst, SrcRegs);
2729  MI.eraseFromParent();
2730  return Legalized;
2731  }
2732 
2733  if (DstTy.isVector()) {
2734  SmallVector<Register, 8> SrcRegs;
2735  getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2736  MIRBuilder.buildMerge(Dst, SrcRegs);
2737  MI.eraseFromParent();
2738  return Legalized;
2739  }
2740 
2741  return UnableToLegalize;
2742 }
2743 
2744 /// Figure out the bit offset into a register when coercing a vector index for
2745 /// the wide element type. This is only for the case when promoting vector to
2746 /// one with larger elements.
2747 //
2748 ///
2749 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2750 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2752  Register Idx,
2753  unsigned NewEltSize,
2754  unsigned OldEltSize) {
2755  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2756  LLT IdxTy = B.getMRI()->getType(Idx);
2757 
2758  // Now figure out the amount we need to shift to get the target bits.
2759  auto OffsetMask = B.buildConstant(
2760  IdxTy, ~(APInt::getAllOnes(IdxTy.getSizeInBits()) << Log2EltRatio));
2761  auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2762  return B.buildShl(IdxTy, OffsetIdx,
2763  B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2764 }
2765 
2766 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2767 /// is casting to a vector with a smaller element size, perform multiple element
2768 /// extracts and merge the results. If this is coercing to a vector with larger
2769 /// elements, index the bitcasted vector and extract the target element with bit
2770 /// operations. This is intended to force the indexing in the native register
2771 /// size for architectures that can dynamically index the register file.
2774  LLT CastTy) {
2775  if (TypeIdx != 1)
2776  return UnableToLegalize;
2777 
2778  Register Dst = MI.getOperand(0).getReg();
2779  Register SrcVec = MI.getOperand(1).getReg();
2780  Register Idx = MI.getOperand(2).getReg();
2781  LLT SrcVecTy = MRI.getType(SrcVec);
2782  LLT IdxTy = MRI.getType(Idx);
2783 
2784  LLT SrcEltTy = SrcVecTy.getElementType();
2785  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2786  unsigned OldNumElts = SrcVecTy.getNumElements();
2787 
2788  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2789  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2790 
2791  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2792  const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2793  if (NewNumElts > OldNumElts) {
2794  // Decreasing the vector element size
2795  //
2796  // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2797  // =>
2798  // v4i32:castx = bitcast x:v2i64
2799  //
2800  // i64 = bitcast
2801  // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2802  // (i32 (extract_vector_elt castx, (2 * y + 1)))
2803  //
2804  if (NewNumElts % OldNumElts != 0)
2805  return UnableToLegalize;
2806 
2807  // Type of the intermediate result vector.
2808  const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2809  LLT MidTy =
2810  LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2811 
2812  auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2813 
2814  SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2815  auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2816 
2817  for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2818  auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2819  auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2820  auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2821  NewOps[I] = Elt.getReg(0);
2822  }
2823 
2824  auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2825  MIRBuilder.buildBitcast(Dst, NewVec);
2826  MI.eraseFromParent();
2827  return Legalized;
2828  }
2829 
2830  if (NewNumElts < OldNumElts) {
2831  if (NewEltSize % OldEltSize != 0)
2832  return UnableToLegalize;
2833 
2834  // This only depends on powers of 2 because we use bit tricks to figure out
2835  // the bit offset we need to shift to get the target element. A general
2836  // expansion could emit division/multiply.
2837  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2838  return UnableToLegalize;
2839 
2840  // Increasing the vector element size.
2841  // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2842  //
2843  // =>
2844  //
2845  // %cast = G_BITCAST %vec
2846  // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2847  // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2848  // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2849  // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2850  // %elt_bits = G_LSHR %wide_elt, %offset_bits
2851  // %elt = G_TRUNC %elt_bits
2852 
2853  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2854  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2855 
2856  // Divide to get the index in the wider element type.
2857  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2858 
2859  Register WideElt = CastVec;
2860  if (CastTy.isVector()) {
2861  WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2862  ScaledIdx).getReg(0);
2863  }
2864 
2865  // Compute the bit offset into the register of the target element.
2867  MIRBuilder, Idx, NewEltSize, OldEltSize);
2868 
2869  // Shift the wide element to get the target element.
2870  auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2871  MIRBuilder.buildTrunc(Dst, ExtractedBits);
2872  MI.eraseFromParent();
2873  return Legalized;
2874  }
2875 
2876  return UnableToLegalize;
2877 }
2878 
2879 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2880 /// TargetReg, while preserving other bits in \p TargetReg.
2881 ///
2882 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2884  Register TargetReg, Register InsertReg,
2885  Register OffsetBits) {
2886  LLT TargetTy = B.getMRI()->getType(TargetReg);
2887  LLT InsertTy = B.getMRI()->getType(InsertReg);
2888  auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2889  auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2890 
2891  // Produce a bitmask of the value to insert
2892  auto EltMask = B.buildConstant(
2893  TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2894  InsertTy.getSizeInBits()));
2895  // Shift it into position
2896  auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2897  auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2898 
2899  // Clear out the bits in the wide element
2900  auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2901 
2902  // The value to insert has all zeros already, so stick it into the masked
2903  // wide element.
2904  return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2905 }
2906 
2907 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2908 /// is increasing the element size, perform the indexing in the target element
2909 /// type, and use bit operations to insert at the element position. This is
2910 /// intended for architectures that can dynamically index the register file and
2911 /// want to force indexing in the native register size.
2914  LLT CastTy) {
2915  if (TypeIdx != 0)
2916  return UnableToLegalize;
2917 
2918  Register Dst = MI.getOperand(0).getReg();
2919  Register SrcVec = MI.getOperand(1).getReg();
2920  Register Val = MI.getOperand(2).getReg();
2921  Register Idx = MI.getOperand(3).getReg();
2922 
2923  LLT VecTy = MRI.getType(Dst);
2924  LLT IdxTy = MRI.getType(Idx);
2925 
2926  LLT VecEltTy = VecTy.getElementType();
2927  LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2928  const unsigned NewEltSize = NewEltTy.getSizeInBits();
2929  const unsigned OldEltSize = VecEltTy.getSizeInBits();
2930 
2931  unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2932  unsigned OldNumElts = VecTy.getNumElements();
2933 
2934  Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2935  if (NewNumElts < OldNumElts) {
2936  if (NewEltSize % OldEltSize != 0)
2937  return UnableToLegalize;
2938 
2939  // This only depends on powers of 2 because we use bit tricks to figure out
2940  // the bit offset we need to shift to get the target element. A general
2941  // expansion could emit division/multiply.
2942  if (!isPowerOf2_32(NewEltSize / OldEltSize))
2943  return UnableToLegalize;
2944 
2945  const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2946  auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2947 
2948  // Divide to get the index in the wider element type.
2949  auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2950 
2951  Register ExtractedElt = CastVec;
2952  if (CastTy.isVector()) {
2953  ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2954  ScaledIdx).getReg(0);
2955  }
2956 
2957  // Compute the bit offset into the register of the target element.
2959  MIRBuilder, Idx, NewEltSize, OldEltSize);
2960 
2961  Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2962  Val, OffsetBits);
2963  if (CastTy.isVector()) {
2964  InsertedElt = MIRBuilder.buildInsertVectorElement(
2965  CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2966  }
2967 
2968  MIRBuilder.buildBitcast(Dst, InsertedElt);
2969  MI.eraseFromParent();
2970  return Legalized;
2971  }
2972 
2973  return UnableToLegalize;
2974 }
2975 
2977  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2978  Register DstReg = LoadMI.getDstReg();
2979  Register PtrReg = LoadMI.getPointerReg();
2980  LLT DstTy = MRI.getType(DstReg);
2981  MachineMemOperand &MMO = LoadMI.getMMO();
2982  LLT MemTy = MMO.getMemoryType();
2984 
2985  unsigned MemSizeInBits = MemTy.getSizeInBits();
2986  unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2987 
2988  if (MemSizeInBits != MemStoreSizeInBits) {
2989  if (MemTy.isVector())
2990  return UnableToLegalize;
2991 
2992  // Promote to a byte-sized load if not loading an integral number of
2993  // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2994  LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2995  MachineMemOperand *NewMMO =
2996  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2997 
2998  Register LoadReg = DstReg;
2999  LLT LoadTy = DstTy;
3000 
3001  // If this wasn't already an extending load, we need to widen the result
3002  // register to avoid creating a load with a narrower result than the source.
3003  if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
3004  LoadTy = WideMemTy;
3005  LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
3006  }
3007 
3008  if (isa<GSExtLoad>(LoadMI)) {
3009  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3010  MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
3011  } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == LoadTy) {
3012  auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
3013  // The extra bits are guaranteed to be zero, since we stored them that
3014  // way. A zext load from Wide thus automatically gives zext from MemVT.
3015  MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
3016  } else {
3017  MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
3018  }
3019 
3020  if (DstTy != LoadTy)
3021  MIRBuilder.buildTrunc(DstReg, LoadReg);
3022 
3023  LoadMI.eraseFromParent();
3024  return Legalized;
3025  }
3026 
3027  // Big endian lowering not implemented.
3029  return UnableToLegalize;
3030 
3031  // This load needs splitting into power of 2 sized loads.
3032  //
3033  // Our strategy here is to generate anyextending loads for the smaller
3034  // types up to next power-2 result type, and then combine the two larger
3035  // result values together, before truncating back down to the non-pow-2
3036  // type.
3037  // E.g. v1 = i24 load =>
3038  // v2 = i32 zextload (2 byte)
3039  // v3 = i32 load (1 byte)
3040  // v4 = i32 shl v3, 16
3041  // v5 = i32 or v4, v2
3042  // v1 = i24 trunc v5
3043  // By doing this we generate the correct truncate which should get
3044  // combined away as an artifact with a matching extend.
3045 
3046  uint64_t LargeSplitSize, SmallSplitSize;
3047 
3048  if (!isPowerOf2_32(MemSizeInBits)) {
3049  // This load needs splitting into power of 2 sized loads.
3050  LargeSplitSize = PowerOf2Floor(MemSizeInBits);
3051  SmallSplitSize = MemSizeInBits - LargeSplitSize;
3052  } else {
3053  // This is already a power of 2, but we still need to split this in half.
3054  //
3055  // Assume we're being asked to decompose an unaligned load.
3056  // TODO: If this requires multiple splits, handle them all at once.
3057  auto &Ctx = MF.getFunction().getContext();
3058  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3059  return UnableToLegalize;
3060 
3061  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3062  }
3063 
3064  if (MemTy.isVector()) {
3065  // TODO: Handle vector extloads
3066  if (MemTy != DstTy)
3067  return UnableToLegalize;
3068 
3069  // TODO: We can do better than scalarizing the vector and at least split it
3070  // in half.
3071  return reduceLoadStoreWidth(LoadMI, 0, DstTy.getElementType());
3072  }
3073 
3074  MachineMemOperand *LargeMMO =
3075  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3076  MachineMemOperand *SmallMMO =
3077  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3078 
3079  LLT PtrTy = MRI.getType(PtrReg);
3080  unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
3081  LLT AnyExtTy = LLT::scalar(AnyExtSize);
3082  auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
3083  PtrReg, *LargeMMO);
3084 
3085  auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
3086  LargeSplitSize / 8);
3087  Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3088  auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3089  auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
3090  SmallPtr, *SmallMMO);
3091 
3092  auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
3093  auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
3094 
3095  if (AnyExtTy == DstTy)
3096  MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
3097  else if (AnyExtTy.getSizeInBits() != DstTy.getSizeInBits()) {
3098  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3099  MIRBuilder.buildTrunc(DstReg, {Or});
3100  } else {
3101  assert(DstTy.isPointer() && "expected pointer");
3102  auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
3103 
3104  // FIXME: We currently consider this to be illegal for non-integral address
3105  // spaces, but we need still need a way to reinterpret the bits.
3106  MIRBuilder.buildIntToPtr(DstReg, Or);
3107  }
3108 
3109  LoadMI.eraseFromParent();
3110  return Legalized;
3111 }
3112 
3114  // Lower a non-power of 2 store into multiple pow-2 stores.
3115  // E.g. split an i24 store into an i16 store + i8 store.
3116  // We do this by first extending the stored value to the next largest power
3117  // of 2 type, and then using truncating stores to store the components.
3118  // By doing this, likewise with G_LOAD, generate an extend that can be
3119  // artifact-combined away instead of leaving behind extracts.
3120  Register SrcReg = StoreMI.getValueReg();
3121  Register PtrReg = StoreMI.getPointerReg();
3122  LLT SrcTy = MRI.getType(SrcReg);
3124  MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3125  LLT MemTy = MMO.getMemoryType();
3126 
3127  unsigned StoreWidth = MemTy.getSizeInBits();
3128  unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3129 
3130  if (StoreWidth != StoreSizeInBits) {
3131  if (SrcTy.isVector())
3132  return UnableToLegalize;
3133 
3134  // Promote to a byte-sized store with upper bits zero if not
3135  // storing an integral number of bytes. For example, promote
3136  // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3137  LLT WideTy = LLT::scalar(StoreSizeInBits);
3138 
3139  if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3140  // Avoid creating a store with a narrower source than result.
3141  SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3142  SrcTy = WideTy;
3143  }
3144 
3145  auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3146 
3147  MachineMemOperand *NewMMO =
3148  MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3149  MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3150  StoreMI.eraseFromParent();
3151  return Legalized;
3152  }
3153 
3154  if (MemTy.isVector()) {
3155  // TODO: Handle vector trunc stores
3156  if (MemTy != SrcTy)
3157  return UnableToLegalize;
3158 
3159  // TODO: We can do better than scalarizing the vector and at least split it
3160  // in half.
3161  return reduceLoadStoreWidth(StoreMI, 0, SrcTy.getElementType());
3162  }
3163 
3164  unsigned MemSizeInBits = MemTy.getSizeInBits();
3165  uint64_t LargeSplitSize, SmallSplitSize;
3166 
3167  if (!isPowerOf2_32(MemSizeInBits)) {
3168  LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3169  SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3170  } else {
3171  auto &Ctx = MF.getFunction().getContext();
3172  if (TLI.allowsMemoryAccess(Ctx, MIRBuilder.getDataLayout(), MemTy, MMO))
3173  return UnableToLegalize; // Don't know what we're being asked to do.
3174 
3175  SmallSplitSize = LargeSplitSize = MemSizeInBits / 2;
3176  }
3177 
3178  // Extend to the next pow-2. If this store was itself the result of lowering,
3179  // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3180  // that's wider than the stored size.
3181  unsigned AnyExtSize = PowerOf2Ceil(MemTy.getSizeInBits());
3182  const LLT NewSrcTy = LLT::scalar(AnyExtSize);
3183 
3184  if (SrcTy.isPointer()) {
3185  const LLT IntPtrTy = LLT::scalar(SrcTy.getSizeInBits());
3186  SrcReg = MIRBuilder.buildPtrToInt(IntPtrTy, SrcReg).getReg(0);
3187  }
3188 
3189  auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3190 
3191  // Obtain the smaller value by shifting away the larger value.
3192  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3193  auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3194 
3195  // Generate the PtrAdd and truncating stores.
3196  LLT PtrTy = MRI.getType(PtrReg);
3197  auto OffsetCst = MIRBuilder.buildConstant(
3198  LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3199  auto SmallPtr =
3200  MIRBuilder.buildPtrAdd(PtrTy, PtrReg, OffsetCst);
3201 
3202  MachineMemOperand *LargeMMO =
3203  MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3204  MachineMemOperand *SmallMMO =
3205  MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3206  MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3207  MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3208  StoreMI.eraseFromParent();
3209  return Legalized;
3210 }
3211 
3213 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3214  switch (MI.getOpcode()) {
3215  case TargetOpcode::G_LOAD: {
3216  if (TypeIdx != 0)
3217  return UnableToLegalize;
3218  MachineMemOperand &MMO = **MI.memoperands_begin();
3219 
3220  // Not sure how to interpret a bitcast of an extending load.
3221  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3222  return UnableToLegalize;
3223 
3225  bitcastDst(MI, CastTy, 0);
3226  MMO.setType(CastTy);
3228  return Legalized;
3229  }
3230  case TargetOpcode::G_STORE: {
3231  if (TypeIdx != 0)
3232  return UnableToLegalize;
3233 
3234  MachineMemOperand &MMO = **MI.memoperands_begin();
3235 
3236  // Not sure how to interpret a bitcast of a truncating store.
3237  if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3238  return UnableToLegalize;
3239 
3241  bitcastSrc(MI, CastTy, 0);
3242  MMO.setType(CastTy);
3244  return Legalized;
3245  }
3246  case TargetOpcode::G_SELECT: {
3247  if (TypeIdx != 0)
3248  return UnableToLegalize;
3249 
3250  if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3251  LLVM_DEBUG(
3252  dbgs() << "bitcast action not implemented for vector select\n");
3253  return UnableToLegalize;
3254  }
3255 
3257  bitcastSrc(MI, CastTy, 2);
3258  bitcastSrc(MI, CastTy, 3);
3259  bitcastDst(MI, CastTy, 0);
3261  return Legalized;
3262  }
3263  case TargetOpcode::G_AND:
3264  case TargetOpcode::G_OR:
3265  case TargetOpcode::G_XOR: {
3267  bitcastSrc(MI, CastTy, 1);
3268  bitcastSrc(MI, CastTy, 2);
3269  bitcastDst(MI, CastTy, 0);
3271  return Legalized;
3272  }
3273  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3274  return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3275  case TargetOpcode::G_INSERT_VECTOR_ELT:
3276  return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3277  default:
3278  return UnableToLegalize;
3279  }
3280 }
3281 
3282 // Legalize an instruction by changing the opcode in place.
3283 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3285  MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3287 }
3288 
3290 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3291  using namespace TargetOpcode;
3292 
3293  switch(MI.getOpcode()) {
3294  default:
3295  return UnableToLegalize;
3296  case TargetOpcode::G_BITCAST:
3297  return lowerBitcast(MI);
3298  case TargetOpcode::G_SREM:
3299  case TargetOpcode::G_UREM: {
3300  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3301  auto Quot =
3302  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3303  {MI.getOperand(1), MI.getOperand(2)});
3304 
3305  auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3306  MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3307  MI.eraseFromParent();
3308  return Legalized;
3309  }
3310  case TargetOpcode::G_SADDO:
3311  case TargetOpcode::G_SSUBO:
3312  return lowerSADDO_SSUBO(MI);
3313  case TargetOpcode::G_UMULH:
3314  case TargetOpcode::G_SMULH:
3315  return lowerSMULH_UMULH(MI);
3316  case TargetOpcode::G_SMULO:
3317  case TargetOpcode::G_UMULO: {
3318  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3319  // result.
3320  Register Res = MI.getOperand(0).getReg();
3321  Register Overflow = MI.getOperand(1).getReg();
3322  Register LHS = MI.getOperand(2).getReg();
3323  Register RHS = MI.getOperand(3).getReg();
3324  LLT Ty = MRI.getType(Res);
3325 
3326  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3327  ? TargetOpcode::G_SMULH
3328  : TargetOpcode::G_UMULH;
3329 
3331  const auto &TII = MIRBuilder.getTII();
3332  MI.setDesc(TII.get(TargetOpcode::G_MUL));
3333  MI.removeOperand(1);
3335 
3336  auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3337  auto Zero = MIRBuilder.buildConstant(Ty, 0);
3338 
3339  // Move insert point forward so we can use the Res register if needed.
3341 
3342  // For *signed* multiply, overflow is detected by checking:
3343  // (hi != (lo >> bitwidth-1))
3344  if (Opcode == TargetOpcode::G_SMULH) {
3345  auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3346  auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3347  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3348  } else {
3349  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3350  }
3351  return Legalized;
3352  }
3353  case TargetOpcode::G_FNEG: {
3354  Register Res = MI.getOperand(0).getReg();
3355  LLT Ty = MRI.getType(Res);
3356 
3357  // TODO: Handle vector types once we are able to
3358  // represent them.
3359  if (Ty.isVector())
3360  return UnableToLegalize;
3361  auto SignMask =
3363  Register SubByReg = MI.getOperand(1).getReg();
3364  MIRBuilder.buildXor(Res, SubByReg, SignMask);
3365  MI.eraseFromParent();
3366  return Legalized;
3367  }
3368  case TargetOpcode::G_FSUB: {
3369  Register Res = MI.getOperand(0).getReg();
3370  LLT Ty = MRI.getType(Res);
3371 
3372  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3373  // First, check if G_FNEG is marked as Lower. If so, we may
3374  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3375  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3376  return UnableToLegalize;
3377  Register LHS = MI.getOperand(1).getReg();
3378  Register RHS = MI.getOperand(2).getReg();
3380  MIRBuilder.buildFNeg(Neg, RHS);
3381  MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3382  MI.eraseFromParent();
3383  return Legalized;
3384  }
3385  case TargetOpcode::G_FMAD:
3386  return lowerFMad(MI);
3387  case TargetOpcode::G_FFLOOR:
3388  return lowerFFloor(MI);
3389  case TargetOpcode::G_INTRINSIC_ROUND:
3390  return lowerIntrinsicRound(MI);
3391  case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3392  // Since round even is the assumed rounding mode for unconstrained FP
3393  // operations, rint and roundeven are the same operation.
3394  changeOpcode(MI, TargetOpcode::G_FRINT);
3395  return Legalized;
3396  }
3397  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3398  Register OldValRes = MI.getOperand(0).getReg();
3399  Register SuccessRes = MI.getOperand(1).getReg();
3400  Register Addr = MI.getOperand(2).getReg();
3401  Register CmpVal = MI.getOperand(3).getReg();
3402  Register NewVal = MI.getOperand(4).getReg();
3403  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3404  **MI.memoperands_begin());
3405  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3406  MI.eraseFromParent();
3407  return Legalized;
3408  }
3409  case TargetOpcode::G_LOAD:
3410  case TargetOpcode::G_SEXTLOAD:
3411  case TargetOpcode::G_ZEXTLOAD:
3412  return lowerLoad(cast<GAnyLoad>(MI));
3413  case TargetOpcode::G_STORE:
3414  return lowerStore(cast<GStore>(MI));
3415  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3416  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3417  case TargetOpcode::G_CTLZ:
3418  case TargetOpcode::G_CTTZ:
3419  case TargetOpcode::G_CTPOP:
3420  return lowerBitCount(MI);
3421  case G_UADDO: {
3422  Register Res = MI.getOperand(0).getReg();
3423  Register CarryOut = MI.getOperand(1).getReg();
3424  Register LHS = MI.getOperand(2).getReg();
3425  Register RHS = MI.getOperand(3).getReg();
3426 
3427  MIRBuilder.buildAdd(Res, LHS, RHS);
3428  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3429 
3430  MI.eraseFromParent();
3431  return Legalized;
3432  }
3433  case G_UADDE: {
3434  Register Res = MI.getOperand(0).getReg();
3435  Register CarryOut = MI.getOperand(1).getReg();
3436  Register LHS = MI.getOperand(2).getReg();
3437  Register RHS = MI.getOperand(3).getReg();
3438  Register CarryIn = MI.getOperand(4).getReg();
3439  LLT Ty = MRI.getType(Res);
3440 
3441  auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3442  auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3443  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3444  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3445 
3446  MI.eraseFromParent();
3447  return Legalized;
3448  }
3449  case G_USUBO: {
3450  Register Res = MI.getOperand(0).getReg();
3451  Register BorrowOut = MI.getOperand(1).getReg();
3452  Register LHS = MI.getOperand(2).getReg();
3453  Register RHS = MI.getOperand(3).getReg();
3454 
3455  MIRBuilder.buildSub(Res, LHS, RHS);
3457 
3458  MI.eraseFromParent();
3459  return Legalized;
3460  }
3461  case G_USUBE: {
3462  Register Res = MI.getOperand(0).getReg();
3463  Register BorrowOut = MI.getOperand(1).getReg();
3464  Register LHS = MI.getOperand(2).getReg();
3465  Register RHS = MI.getOperand(3).getReg();
3466  Register BorrowIn = MI.getOperand(4).getReg();
3467  const LLT CondTy = MRI.getType(BorrowOut);
3468  const LLT Ty = MRI.getType(Res);
3469 
3470  auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3471  auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3472  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3473 
3474  auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3475  auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3476  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3477 
3478  MI.eraseFromParent();
3479  return Legalized;
3480  }
3481  case G_UITOFP:
3482  return lowerUITOFP(MI);
3483  case G_SITOFP:
3484  return lowerSITOFP(MI);
3485  case G_FPTOUI:
3486  return lowerFPTOUI(MI);
3487  case G_FPTOSI:
3488  return lowerFPTOSI(MI);
3489  case G_FPTRUNC:
3490  return lowerFPTRUNC(MI);
3491  case G_FPOWI:
3492  return lowerFPOWI(MI);
3493  case G_SMIN:
3494  case G_SMAX:
3495  case G_UMIN:
3496  case G_UMAX:
3497  return lowerMinMax(MI);
3498  case G_FCOPYSIGN:
3499  return lowerFCopySign(MI);
3500  case G_FMINNUM:
3501  case G_FMAXNUM:
3502  return lowerFMinNumMaxNum(MI);
3503  case G_MERGE_VALUES:
3504  return lowerMergeValues(MI);
3505  case G_UNMERGE_VALUES:
3506  return lowerUnmergeValues(MI);
3507  case TargetOpcode::G_SEXT_INREG: {
3508  assert(MI.getOperand(2).isImm() && "Expected immediate");
3509  int64_t SizeInBits = MI.getOperand(2).getImm();
3510 
3511  Register DstReg = MI.getOperand(0).getReg();
3512  Register SrcReg = MI.getOperand(1).getReg();
3513  LLT DstTy = MRI.getType(DstReg);
3514  Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3515 
3516  auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3517  MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3518  MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3519  MI.eraseFromParent();
3520  return Legalized;
3521  }
3522  case G_EXTRACT_VECTOR_ELT:
3523  case G_INSERT_VECTOR_ELT:
3525  case G_SHUFFLE_VECTOR:
3526  return lowerShuffleVector(MI);
3527  case G_DYN_STACKALLOC:
3528  return lowerDynStackAlloc(MI);
3529  case G_EXTRACT:
3530  return lowerExtract(MI);
3531  case G_INSERT:
3532  return lowerInsert(MI);
3533  case G_BSWAP:
3534  return lowerBswap(MI);
3535  case G_BITREVERSE:
3536  return lowerBitreverse(MI);
3537  case G_READ_REGISTER:
3538  case G_WRITE_REGISTER:
3539  return lowerReadWriteRegister(MI);
3540  case G_UADDSAT:
3541  case G_USUBSAT: {
3542  // Try to make a reasonable guess about which lowering strategy to use. The
3543  // target can override this with custom lowering and calling the
3544  // implementation functions.
3545  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3546  if (LI.isLegalOrCustom({G_UMIN, Ty}))
3547  return lowerAddSubSatToMinMax(MI);
3548  return lowerAddSubSatToAddoSubo(MI);
3549  }
3550  case G_SADDSAT:
3551  case G_SSUBSAT: {
3552  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3553 
3554  // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3555  // since it's a shorter expansion. However, we would need to figure out the
3556  // preferred boolean type for the carry out for the query.
3557  if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3558  return lowerAddSubSatToMinMax(MI);
3559  return lowerAddSubSatToAddoSubo(MI);
3560  }
3561  case G_SSHLSAT:
3562  case G_USHLSAT:
3563  return lowerShlSat(MI);
3564  case G_ABS:
3565  return lowerAbsToAddXor(MI);
3566  case G_SELECT:
3567  return lowerSelect(MI);
3568  case G_SDIVREM:
3569  case G_UDIVREM:
3570  return lowerDIVREM(MI);
3571  case G_FSHL:
3572  case G_FSHR:
3573  return lowerFunnelShift(MI);
3574  case G_ROTL:
3575  case G_ROTR:
3576  return lowerRotate(MI);
3577  case G_MEMSET:
3578  case G_MEMCPY:
3579  case G_MEMMOVE:
3580  return lowerMemCpyFamily(MI);
3581  case G_MEMCPY_INLINE:
3582  return lowerMemcpyInline(MI);
3584  return lowerVectorReduction(MI);
3585  }
3586 }
3587 
3589  Align MinAlign) const {
3590  // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3591  // datalayout for the preferred alignment. Also there should be a target hook
3592  // for this to allow targets to reduce the alignment and ignore the
3593  // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3594  // the type.
3596 }
3597 
3600  MachinePointerInfo &PtrInfo) {
3603  int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3604 
3605  unsigned AddrSpace = DL.getAllocaAddrSpace();
3606  LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3607 
3608  PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3609  return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3610 }
3611 
3613  LLT VecTy) {
3614  int64_t IdxVal;
3615  if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3616  return IdxReg;
3617 
3618  LLT IdxTy = B.getMRI()->getType(IdxReg);
3619  unsigned NElts = VecTy.getNumElements();
3620  if (isPowerOf2_32(NElts)) {
3621  APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3622  return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3623  }
3624 
3625  return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3626  .getReg(0);
3627 }
3628 
3630  Register Index) {
3631  LLT EltTy = VecTy.getElementType();
3632 
3633  // Calculate the element offset and add it to the pointer.
3634  unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3635  assert(EltSize * 8 == EltTy.getSizeInBits() &&
3636  "Converting bits to bytes lost precision");
3637 
3638  Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3639 
3640  LLT IdxTy = MRI.getType(Index);
3641  auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3642  MIRBuilder.buildConstant(IdxTy, EltSize));
3643 
3644  LLT PtrTy = MRI.getType(VecPtr);
3645  return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3646 }
3647 
3648 #ifndef NDEBUG
3649 /// Check that all vector operands have same number of elements. Other operands
3650 /// should be listed in NonVecOp.
3653  std::initializer_list<unsigned> NonVecOpIndices) {
3654  if (MI.getNumMemOperands() != 0)
3655  return false;
3656 
3657  LLT VecTy = MRI.getType(MI.getReg(0));
3658  if (!VecTy.isVector())
3659  return false;
3660  unsigned NumElts = VecTy.getNumElements();
3661 
3662  for (unsigned OpIdx = 1; OpIdx < MI.getNumOperands(); ++OpIdx) {
3663  MachineOperand &Op = MI.getOperand(OpIdx);
3664  if (!Op.isReg()) {
3665  if (!is_contained(NonVecOpIndices, OpIdx))
3666  return false;
3667  continue;
3668  }
3669 
3670  LLT Ty = MRI.getType(Op.getReg());
3671  if (!Ty.isVector()) {
3672  if (!is_contained(NonVecOpIndices, OpIdx))
3673  return false;
3674  continue;
3675  }
3676 
3677  if (Ty.getNumElements() != NumElts)
3678  return false;
3679  }
3680 
3681  return true;
3682 }
3683 #endif
3684 
3685 /// Fill \p DstOps with DstOps that have same number of elements combined as
3686 /// the Ty. These DstOps have either scalar type when \p NumElts = 1 or are
3687 /// vectors with \p NumElts elements. When Ty.getNumElements() is not multiple
3688 /// of \p NumElts last DstOp (leftover) has fewer then \p NumElts elements.
3689 static void makeDstOps(SmallVectorImpl<DstOp> &DstOps, LLT Ty,
3690  unsigned NumElts) {
3691  LLT LeftoverTy;
3692  assert(Ty.isVector() && "Expected vector type");
3693  LLT EltTy = Ty.getElementType();
3694  LLT NarrowTy = (NumElts == 1) ? EltTy : LLT::fixed_vector(NumElts, EltTy);
3695  int NumParts, NumLeftover;
3696  std::tie(NumParts, NumLeftover) =
3697  getNarrowTypeBreakDown(Ty, NarrowTy, LeftoverTy);
3698 
3699  assert(NumParts > 0 && "Error in getNarrowTypeBreakDown");
3700  for (int i = 0; i < NumParts; ++i) {
3701  DstOps.push_back(NarrowTy);
3702  }
3703 
3704  if (LeftoverTy.isValid()) {
3705  assert(NumLeftover == 1 && "expected exactly one leftover");
3706  DstOps.push_back(LeftoverTy);
3707  }
3708 }
3709 
3710 /// Operand \p Op is used on \p N sub-instructions. Fill \p Ops with \p N SrcOps
3711 /// made from \p Op depending on operand type.
3712 static void broadcastSrcOp(SmallVectorImpl<SrcOp> &Ops, unsigned N,
3713  MachineOperand &Op) {
3714  for (unsigned i = 0; i < N; ++i) {
3715  if (Op.isReg())
3716  Ops.push_back(Op.getReg());
3717  else if (Op.isImm())
3718  Ops.push_back(Op.getImm());
3719  else if (Op.isPredicate())
3720  Ops.push_back(static_cast<CmpInst::Predicate>(Op.getPredicate()));
3721  else
3722  llvm_unreachable("Unsupported type");
3723  }
3724 }
3725 
3726 // Handle splitting vector operations which need to have the same number of
3727 // elements in each type index, but each type index may have a different element
3728 // type.
3729 //
3730 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3731 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3732 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3733 //
3734 // Also handles some irregular breakdown cases, e.g.
3735 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3736 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3737 // s64 = G_SHL s64, s32
3740  GenericMachineInstr &MI, unsigned NumElts,
3741  std::initializer_list<unsigned> NonVecOpIndices) {
3742  assert(hasSameNumEltsOnAllVectorOperands(MI, MRI, NonVecOpIndices) &&
3743  "Non-compatible opcode or not specified non-vector operands");
3744  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3745 
3746  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3747  unsigned NumDefs = MI.getNumDefs();
3748 
3749  // Create DstOps (sub-vectors with NumElts elts + Leftover) for each output.
3750  // Build instructions with DstOps to use instruction found by CSE directly.
3751  // CSE copies found instruction into given vreg when building with vreg dest.
3752  SmallVector<SmallVector<DstOp, 8>, 2> OutputOpsPieces(NumDefs);
3753  // Output registers will be taken from created instructions.
3754  SmallVector<SmallVector<Register, 8>, 2> OutputRegs(NumDefs);
3755  for (unsigned i = 0; i < NumDefs; ++i) {
3756  makeDstOps(OutputOpsPieces[i], MRI.getType(MI.getReg(i)), NumElts);
3757  }
3758 
3759  // Split vector input operands into sub-vectors with NumElts elts + Leftover.
3760  // Operands listed in NonVecOpIndices will be used as is without splitting;
3761  // examples: compare predicate in icmp and fcmp (op 1), vector select with i1
3762  // scalar condition (op 1), immediate in sext_inreg (op 2).
3763  SmallVector<SmallVector<SrcOp, 8>, 3> InputOpsPieces(NumInputs);
3764  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3765  ++UseIdx, ++UseNo) {
3766  if (is_contained(NonVecOpIndices, UseIdx)) {
3767  broadcastSrcOp(InputOpsPieces[UseNo], OutputOpsPieces[0].size(),
3768  MI.getOperand(UseIdx));
3769  } else {
3770  SmallVector<Register, 8> SplitPieces;
3771  extractVectorParts(MI.getReg(UseIdx), NumElts, SplitPieces);
3772  for (auto Reg : SplitPieces)
3773  InputOpsPieces[UseNo].push_back(Reg);
3774  }
3775  }
3776 
3777  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3778 
3779  // Take i-th piece of each input operand split and build sub-vector/scalar
3780  // instruction. Set i-th DstOp(s) from OutputOpsPieces as destination(s).
3781  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3782  SmallVector<DstOp, 2> Defs;
3783  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3784  Defs.push_back(OutputOpsPieces[DstNo][i]);
3785 
3787  for (unsigned InputNo = 0; InputNo < NumInputs; ++InputNo)
3788  Uses.push_back(InputOpsPieces[InputNo][i]);
3789 
3790  auto I = MIRBuilder.buildInstr(MI.getOpcode(), Defs, Uses, MI.getFlags());
3791  for (unsigned DstNo = 0; DstNo < NumDefs; ++DstNo)
3792  OutputRegs[DstNo].push_back(I.getReg(DstNo));
3793  }
3794 
3795  // Merge small outputs into MI's output for each def operand.
3796  if (NumLeftovers) {
3797  for (unsigned i = 0; i < NumDefs; ++i)
3798  mergeMixedSubvectors(MI.getReg(i), OutputRegs[i]);
3799  } else {
3800  for (unsigned i = 0; i < NumDefs; ++i)
3801  MIRBuilder.buildMerge(MI.getReg(i), OutputRegs[i]);
3802  }
3803 
3804  MI.eraseFromParent();
3805  return Legalized;
3806 }
3807 
3810  unsigned NumElts) {
3811  unsigned OrigNumElts = MRI.getType(MI.getReg(0)).getNumElements();
3812 
3813  unsigned NumInputs = MI.getNumOperands() - MI.getNumDefs();
3814  unsigned NumDefs = MI.getNumDefs();
3815 
3816  SmallVector<DstOp, 8> OutputOpsPieces;
3817  SmallVector<Register, 8> OutputRegs;
3818  makeDstOps(OutputOpsPieces, MRI.getType(MI.getReg(0)), NumElts);
3819 
3820  // Instructions that perform register split will be inserted in basic block
3821  // where register is defined (basic block is in the next operand).
3822  SmallVector<SmallVector<Register, 8>, 3> InputOpsPieces(NumInputs / 2);
3823  for (unsigned UseIdx = NumDefs, UseNo = 0; UseIdx < MI.getNumOperands();
3824  UseIdx += 2, ++UseNo) {
3825  MachineBasicBlock &OpMBB = *MI.getOperand(UseIdx + 1).getMBB();
3826  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3827  extractVectorParts(MI.getReg(UseIdx), NumElts, InputOpsPieces[UseNo]);
3828  }
3829 
3830  // Build PHIs with fewer elements.
3831  unsigned NumLeftovers = OrigNumElts % NumElts ? 1 : 0;
3832  MIRBuilder.setInsertPt(*MI.getParent(), MI);
3833  for (unsigned i = 0; i < OrigNumElts / NumElts + NumLeftovers; ++i) {
3834  auto Phi = MIRBuilder.buildInstr(TargetOpcode::G_PHI);
3835  Phi.addDef(
3836  MRI.createGenericVirtualRegister(OutputOpsPieces[i].getLLTTy(MRI)));
3837  OutputRegs.push_back(Phi.getReg(0));
3838 
3839  for (unsigned j = 0; j < NumInputs / 2; ++j) {
3840  Phi.addUse(InputOpsPieces[j][i]);
3841  Phi.add(MI.getOperand(1 + j * 2 + 1));
3842  }
3843  }
3844 
3845  // Merge small outputs into MI's def.
3846  if (NumLeftovers) {
3847  mergeMixedSubvectors(MI.getReg(0), OutputRegs);
3848  } else {
3849  MIRBuilder.buildMerge(MI.getReg(0), OutputRegs);
3850  }
3851 
3852  MI.eraseFromParent();
3853  return Legalized;
3854 }
3855 
3858  unsigned TypeIdx,
3859  LLT NarrowTy) {
3860  const int NumDst = MI.getNumOperands() - 1;
3861  const Register SrcReg = MI.getOperand(NumDst).getReg();
3862  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3863  LLT SrcTy = MRI.getType(SrcReg);
3864 
3865  if (TypeIdx != 1 || NarrowTy == DstTy)
3866  return UnableToLegalize;
3867 
3868  // Requires compatible types. Otherwise SrcReg should have been defined by
3869  // merge-like instruction that would get artifact combined. Most likely
3870  // instruction that defines SrcReg has to perform more/fewer elements
3871  // legalization compatible with NarrowTy.
3872  assert(SrcTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3873  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3874 
3875  if ((SrcTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3876  (NarrowTy.getSizeInBits() % DstTy.getSizeInBits() != 0))
3877  return UnableToLegalize;
3878 
3879  // This is most likely DstTy (smaller then register size) packed in SrcTy
3880  // (larger then register size) and since unmerge was not combined it will be
3881  // lowered to bit sequence extracts from register. Unpack SrcTy to NarrowTy
3882  // (register size) pieces first. Then unpack each of NarrowTy pieces to DstTy.
3883 
3884  // %1:_(DstTy), %2, %3, %4 = G_UNMERGE_VALUES %0:_(SrcTy)
3885  //
3886  // %5:_(NarrowTy), %6 = G_UNMERGE_VALUES %0:_(SrcTy) - reg sequence
3887  // %1:_(DstTy), %2 = G_UNMERGE_VALUES %5:_(NarrowTy) - sequence of bits in reg
3888  // %3:_(DstTy), %4 = G_UNMERGE_VALUES %6:_(NarrowTy)
3889  auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, SrcReg);
3890  const int NumUnmerge = Unmerge->getNumOperands() - 1;
3891  const int PartsPerUnmerge = NumDst / NumUnmerge;
3892 
3893  for (int I = 0; I != NumUnmerge; ++I) {
3894  auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3895 
3896  for (int J = 0; J != PartsPerUnmerge; ++J)
3897  MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3898  MIB.addUse(Unmerge.getReg(I));
3899  }
3900 
3901  MI.eraseFromParent();
3902  return Legalized;
3903 }
3904 
3907  LLT NarrowTy) {
3908  Register DstReg = MI.getOperand(0).getReg();
3909  LLT DstTy = MRI.getType(DstReg);
3910  LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3911  // Requires compatible types. Otherwise user of DstReg did not perform unmerge
3912  // that should have been artifact combined. Most likely instruction that uses
3913  // DstReg has to do more/fewer elements legalization compatible with NarrowTy.
3914  assert(DstTy.isVector() && NarrowTy.isVector() && "Expected vector types");
3915  assert((DstTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3916  if (NarrowTy == SrcTy)
3917  return UnableToLegalize;
3918 
3919  // This attempts to lower part of LCMTy merge/unmerge sequence. Intended use
3920  // is for old mir tests. Since the changes to more/fewer elements it should no
3921  // longer be possible to generate MIR like this when starting from llvm-ir
3922  // because LCMTy approach was replaced with merge/unmerge to vector elements.
3923  if (TypeIdx == 1) {
3924  assert(SrcTy.isVector() && "Expected vector types");
3925  assert((SrcTy.getScalarType() == NarrowTy.getScalarType()) && "bad type");
3926  if ((DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0) ||
3927  (NarrowTy.getNumElements() >= SrcTy.getNumElements()))
3928  return UnableToLegalize;
3929  // %2:_(DstTy) = G_CONCAT_VECTORS %0:_(SrcTy), %1:_(SrcTy)
3930  //
3931  // %3:_(EltTy), %4, %5 = G_UNMERGE_VALUES %0:_(SrcTy)
3932  // %6:_(EltTy), %7, %8 = G_UNMERGE_VALUES %1:_(SrcTy)
3933  // %9:_(NarrowTy) = G_BUILD_VECTOR %3:_(EltTy), %4
3934  // %10:_(NarrowTy) = G_BUILD_VECTOR %5:_(EltTy), %6
3935  // %11:_(NarrowTy) = G_BUILD_VECTOR %7:_(EltTy), %8
3936  // %2:_(DstTy) = G_CONCAT_VECTORS %9:_(NarrowTy), %10, %11
3937 
3939  LLT EltTy = MRI.getType(MI.getOperand(1).getReg()).getScalarType();
3940  for (unsigned i = 1; i < MI.getNumOperands(); ++i) {
3941  auto Unmerge = MIRBuilder.buildUnmerge(EltTy, MI.getOperand(i).getReg());
3942  for (unsigned j = 0; j < Unmerge->getNumDefs(); ++j)
3943  Elts.push_back(Unmerge.getReg(j));
3944  }
3945 
3946  SmallVector<Register, 8> NarrowTyElts;
3947  unsigned NumNarrowTyElts = NarrowTy.getNumElements();
3948  unsigned NumNarrowTyPieces = DstTy.getNumElements() / NumNarrowTyElts;
3949  for (unsigned i = 0, Offset = 0; i < NumNarrowTyPieces;
3950  ++i, Offset += NumNarrowTyElts) {
3951  ArrayRef<Register> Pieces(&Elts[Offset], NumNarrowTyElts);
3952  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Pieces).getReg(0));
3953  }
3954 
3955  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3956  MI.eraseFromParent();
3957  return Legalized;
3958  }
3959 
3960  assert(TypeIdx == 0 && "Bad type index");
3961  if ((NarrowTy.getSizeInBits() % SrcTy.getSizeInBits() != 0) ||
3962  (DstTy.getSizeInBits() % NarrowTy.getSizeInBits() != 0))
3963  return UnableToLegalize;
3964 
3965  // This is most likely SrcTy (smaller then register size) packed in DstTy
3966  // (larger then register size) and since merge was not combined it will be
3967  // lowered to bit sequence packing into register. Merge SrcTy to NarrowTy
3968  // (register size) pieces first. Then merge each of NarrowTy pieces to DstTy.
3969 
3970  // %0:_(DstTy) = G_MERGE_VALUES %1:_(SrcTy), %2, %3, %4
3971  //
3972  // %5:_(NarrowTy) = G_MERGE_VALUES %1:_(SrcTy), %2 - sequence of bits in reg
3973  // %6:_(NarrowTy) = G_MERGE_VALUES %3:_(SrcTy), %4
3974  // %0:_(DstTy) = G_MERGE_VALUES %5:_(NarrowTy), %6 - reg sequence
3975  SmallVector<Register, 8> NarrowTyElts;
3976  unsigned NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3977  unsigned NumSrcElts = SrcTy.isVector() ? SrcTy.getNumElements() : 1;
3978  unsigned NumElts = NarrowTy.getNumElements() / NumSrcElts;
3979  for (unsigned i = 0; i < NumParts; ++i) {
3980  SmallVector<Register, 8> Sources;
3981  for (unsigned j = 0; j < NumElts; ++j)
3982  Sources.push_back(MI.getOperand(1 + i * NumElts + j).getReg());
3983  NarrowTyElts.push_back(MIRBuilder.buildMerge(NarrowTy, Sources).getReg(0));
3984  }
3985 
3986  MIRBuilder.buildMerge(DstReg, NarrowTyElts);
3987  MI.eraseFromParent();
3988  return Legalized;
3989 }
3990 
3993  unsigned TypeIdx,
3994  LLT NarrowVecTy) {
3995  Register DstReg = MI.getOperand(0).getReg();
3996  Register SrcVec = MI.getOperand(1).getReg();
3997  Register InsertVal;
3998  bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3999 
4000  assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
4001  if (IsInsert)
4002  InsertVal = MI.getOperand(2).getReg();
4003 
4004  Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
4005 
4006  // TODO: Handle total scalarization case.
4007  if (!NarrowVecTy.isVector())
4008  return UnableToLegalize;
4009 
4010  LLT VecTy = MRI.getType(SrcVec);
4011 
4012  // If the index is a constant, we can really break this down as you would
4013  // expect, and index into the target size pieces.
4014  int64_t IdxVal;
4015  auto MaybeCst = getIConstantVRegValWithLookThrough(Idx, MRI);
4016  if (MaybeCst) {
4017  IdxVal = MaybeCst->Value.getSExtValue();
4018  // Avoid out of bounds indexing the pieces.
4019  if (IdxVal >= VecTy.getNumElements()) {
4020  MIRBuilder.buildUndef(DstReg);
4021  MI.eraseFromParent();
4022  return Legalized;
4023  }
4024 
4025  SmallVector<Register, 8> VecParts;
4026  LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4027 
4028  // Build a sequence of NarrowTy pieces in VecParts for this operand.
4029  LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4030  TargetOpcode::G_ANYEXT);
4031 
4032  unsigned NewNumElts = NarrowVecTy.getNumElements();
4033 
4034  LLT IdxTy = MRI.getType(Idx);
4035  int64_t PartIdx = IdxVal / NewNumElts;
4036  auto NewIdx =
4037  MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4038 
4039  if (IsInsert) {
4040  LLT PartTy = MRI.getType(VecParts[PartIdx]);
4041 
4042  // Use the adjusted index to insert into one of the subvectors.
4043  auto InsertPart = MIRBuilder.buildInsertVectorElement(
4044  PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4045  VecParts[PartIdx] = InsertPart.getReg(0);
4046 
4047  // Recombine the inserted subvector with the others to reform the result
4048  // vector.
4049  buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4050  } else {
4051  MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4052  }
4053 
4054  MI.eraseFromParent();
4055  return Legalized;
4056  }
4057 
4058  // With a variable index, we can't perform the operation in a smaller type, so
4059  // we're forced to expand this.
4060  //
4061  // TODO: We could emit a chain of compare/select to figure out which piece to
4062  // index.
4064 }
4065 
4068  LLT NarrowTy) {
4069  // FIXME: Don't know how to handle secondary types yet.
4070  if (TypeIdx != 0)
4071  return UnableToLegalize;
4072 
4073  // This implementation doesn't work for atomics. Give up instead of doing
4074  // something invalid.
4075  if (LdStMI.isAtomic())
4076  return UnableToLegalize;
4077 
4078  bool IsLoad = isa<GLoad>(LdStMI);
4079  Register ValReg = LdStMI.getReg(0);
4080  Register AddrReg = LdStMI.getPointerReg();
4081  LLT ValTy = MRI.getType(ValReg);
4082 
4083  // FIXME: Do we need a distinct NarrowMemory legalize action?
4084  if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4085  LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
4086  return UnableToLegalize;
4087  }
4088 
4089  int NumParts = -1;
4090  int NumLeftover = -1;
4091  LLT LeftoverTy;
4092  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4093  if (IsLoad) {
4094  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4095  } else {
4096  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4097  NarrowLeftoverRegs)) {
4098  NumParts = NarrowRegs.size();
4099  NumLeftover = NarrowLeftoverRegs.size();
4100  }
4101  }
4102 
4103  if (NumParts == -1)
4104  return UnableToLegalize;
4105 
4106  LLT PtrTy = MRI.getType(AddrReg);
4107  const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4108 
4109  unsigned TotalSize = ValTy.getSizeInBits();
4110 
4111  // Split the load/store into PartTy sized pieces starting at Offset. If this
4112  // is a load, return the new registers in ValRegs. For a store, each elements
4113  // of ValRegs should be PartTy. Returns the next offset that needs to be
4114  // handled.
4116  auto MMO = LdStMI.getMMO();
4117  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4118  unsigned NumParts, unsigned Offset) -> unsigned {
4120  unsigned PartSize = PartTy.getSizeInBits();
4121  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4122  ++Idx) {
4123  unsigned ByteOffset = Offset / 8;
4124  Register NewAddrReg;
4125 
4126  MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4127 
4128  MachineMemOperand *NewMMO =
4129  MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4130 
4131  if (IsLoad) {
4132  Register Dst = MRI.createGenericVirtualRegister(PartTy);
4133  ValRegs.push_back(Dst);
4134  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4135  } else {
4136  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4137  }
4138  Offset = isBigEndian ? Offset - PartSize : Offset + PartSize;
4139  }
4140 
4141  return Offset;
4142  };
4143 
4144  unsigned Offset = isBigEndian ? TotalSize - NarrowTy.getSizeInBits() : 0;
4145  unsigned HandledOffset =
4146  splitTypePieces(NarrowTy, NarrowRegs, NumParts, Offset);
4147 
4148  // Handle the rest of the register if this isn't an even type breakdown.
4149  if (LeftoverTy.isValid())
4150  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, NumLeftover, HandledOffset);
4151 
4152  if (IsLoad) {
4153  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4154  LeftoverTy, NarrowLeftoverRegs);
4155  }
4156 
4157  LdStMI.eraseFromParent();
4158  return Legalized;
4159 }
4160 
4163  LLT NarrowTy) {
4164  using namespace TargetOpcode;
4165  GenericMachineInstr &GMI = cast<GenericMachineInstr>(MI);
4166  unsigned NumElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
4167 
4168  switch (MI.getOpcode()) {
4169  case G_IMPLICIT_DEF:
4170  case G_TRUNC:
4171  case G_AND:
4172  case G_OR:
4173  case G_XOR:
4174  case G_ADD:
4175  case G_SUB:
4176  case G_MUL:
4177  case G_PTR_ADD:
4178  case G_SMULH:
4179  case G_UMULH:
4180  case G_FADD:
4181  case G_FMUL:
4182  case G_FSUB:
4183  case G_FNEG:
4184  case G_FABS:
4185  case G_FCANONICALIZE:
4186  case G_FDIV:
4187  case G_FREM:
4188  case G_FMA:
4189  case G_FMAD:
4190  case G_FPOW:
4191  case G_FEXP:
4192  case G_FEXP2:
4193  case G_FLOG:
4194  case G_FLOG2:
4195  case G_FLOG10:
4196  case G_FNEARBYINT:
4197  case G_FCEIL:
4198  case G_FFLOOR:
4199  case G_FRINT:
4200  case G_INTRINSIC_ROUND:
4201  case G_INTRINSIC_ROUNDEVEN:
4202  case G_INTRINSIC_TRUNC:
4203  case G_FCOS:
4204  case G_FSIN:
4205  case G_FSQRT:
4206  case G_BSWAP:
4207  case G_BITREVERSE:
4208  case G_SDIV:
4209  case G_UDIV:
4210  case G_SREM:
4211  case G_UREM:
4212  case G_SDIVREM:
4213  case G_UDIVREM:
4214  case G_SMIN:
4215  case G_SMAX:
4216  case G_UMIN:
4217  case G_UMAX:
4218  case G_ABS:
4219  case G_FMINNUM:
4220  case G_FMAXNUM:
4221  case G_FMINNUM_IEEE:
4222  case G_FMAXNUM_IEEE:
4223  case G_FMINIMUM:
4224  case G_FMAXIMUM:
4225  case G_FSHL:
4226  case G_FSHR:
4227  case G_ROTL:
4228  case G_ROTR:
4229  case G_FREEZE:
4230  case G_SADDSAT:
4231  case G_SSUBSAT:
4232  case G_UADDSAT:
4233  case G_USUBSAT:
4234  case G_UMULO:
4235  case G_SMULO:
4236  case G_SHL:
4237  case G_LSHR:
4238  case G_ASHR:
4239  case G_SSHLSAT:
4240  case G_USHLSAT:
4241  case G_CTLZ:
4242  case G_CTLZ_ZERO_UNDEF:
4243  case G_CTTZ:
4244  case G_CTTZ_ZERO_UNDEF:
4245  case G_CTPOP:
4246  case G_FCOPYSIGN:
4247  case G_ZEXT:
4248  case G_SEXT:
4249  case G_ANYEXT:
4250  case G_FPEXT:
4251  case G_FPTRUNC:
4252  case G_SITOFP:
4253  case G_UITOFP:
4254  case G_FPTOSI:
4255  case G_FPTOUI:
4256  case G_INTTOPTR:
4257  case G_PTRTOINT:
4258  case G_ADDRSPACE_CAST:
4259  case G_UADDO:
4260  case G_USUBO:
4261  case G_UADDE:
4262  case G_USUBE:
4263  case G_SADDO:
4264  case G_SSUBO:
4265  case G_SADDE:
4266  case G_SSUBE:
4267  return fewerElementsVectorMultiEltType(GMI, NumElts);
4268  case G_ICMP:
4269  case G_FCMP:
4270  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*cpm predicate*/});
4271  case G_SELECT:
4272  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4273  return fewerElementsVectorMultiEltType(GMI, NumElts);
4274  return fewerElementsVectorMultiEltType(GMI, NumElts, {1 /*scalar cond*/});
4275  case G_PHI:
4276  return fewerElementsVectorPhi(GMI, NumElts);
4277  case G_UNMERGE_VALUES:
4278  return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4279  case G_BUILD_VECTOR:
4280  assert(TypeIdx == 0 && "not a vector type index");
4281  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4282  case G_CONCAT_VECTORS:
4283  if (TypeIdx != 1) // TODO: This probably does work as expected already.
4284  return UnableToLegalize;
4285  return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4286  case G_EXTRACT_VECTOR_ELT:
4287  case G_INSERT_VECTOR_ELT:
4288  return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4289  case G_LOAD:
4290  case G_STORE:
4291  return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4292  case G_SEXT_INREG:
4293  return fewerElementsVectorMultiEltType(GMI, NumElts, {2 /*imm*/});
4295  return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4296  case G_SHUFFLE_VECTOR:
4297  return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4298  default:
4299  return UnableToLegalize;
4300  }
4301 }
4302 
4304  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4305  assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
4306  if (TypeIdx != 0)
4307  return UnableToLegalize;
4308 
4309  Register DstReg = MI.getOperand(0).getReg();
4310  Register Src1Reg = MI.getOperand(1).getReg();
4311  Register Src2Reg = MI.getOperand(2).getReg();
4312  ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4313  LLT DstTy = MRI.getType(DstReg);
4314  LLT Src1Ty = MRI.getType(Src1Reg);
4315  LLT Src2Ty = MRI.getType(Src2Reg);
4316  // The shuffle should be canonicalized by now.
4317  if (DstTy != Src1Ty)
4318  return UnableToLegalize;
4319  if (DstTy != Src2Ty)
4320  return UnableToLegalize;
4321 
4322  if (!isPowerOf2_32(DstTy.getNumElements()))
4323  return UnableToLegalize;
4324 
4325  // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4326  // Further legalization attempts will be needed to do split further.
4327  NarrowTy =
4329  unsigned NewElts = NarrowTy.getNumElements();
4330 
4331  SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4332  extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4333  extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4334  Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4335  SplitSrc2Regs[1]};
4336 
4337  Register Hi, Lo;
4338 
4339  // If Lo or Hi uses elements from at most two of the four input vectors, then
4340  // express it as a vector shuffle of those two inputs. Otherwise extract the
4341  // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4343  for (unsigned High = 0; High < 2; ++High) {
4344  Register &Output = High ? Hi : Lo;
4345 
4346  // Build a shuffle mask for the output, discovering on the fly which
4347  // input vectors to use as shuffle operands (recorded in InputUsed).
4348  // If building a suitable shuffle vector proves too hard, then bail
4349  // out with useBuildVector set.
4350  unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4351  unsigned FirstMaskIdx = High * NewElts;
4352  bool UseBuildVector = false;
4353  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4354  // The mask element. This indexes into the input.
4355  int Idx = Mask[FirstMaskIdx + MaskOffset];
4356 
4357  // The input vector this mask element indexes into.
4358  unsigned Input = (unsigned)Idx / NewElts;
4359 
4360  if (Input >= array_lengthof(Inputs)) {
4361  // The mask element does not index into any input vector.
4362  Ops.push_back(-1);
4363  continue;
4364  }
4365 
4366  // Turn the index into an offset from the start of the input vector.
4367  Idx -= Input * NewElts;
4368 
4369  // Find or create a shuffle vector operand to hold this input.
4370  unsigned OpNo;
4371  for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4372  if (InputUsed[OpNo] == Input) {
4373  // This input vector is already an operand.
4374  break;
4375  } else if (InputUsed[OpNo] == -1U) {
4376  // Create a new operand for this input vector.
4377  InputUsed[OpNo] = Input;
4378  break;
4379  }
4380  }
4381 
4382  if (OpNo >= array_lengthof(InputUsed)) {
4383  // More than two input vectors used! Give up on trying to create a
4384  // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4385  UseBuildVector = true;
4386  break;
4387  }
4388 
4389  // Add the mask index for the new shuffle vector.
4390  Ops.push_back(Idx + OpNo * NewElts);
4391  }
4392 
4393  if (UseBuildVector) {
4394  LLT EltTy = NarrowTy.getElementType();
4396 
4397  // Extract the input elements by hand.
4398  for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4399  // The mask element. This indexes into the input.
4400  int Idx = Mask[FirstMaskIdx + MaskOffset];
4401 
4402  // The input vector this mask element indexes into.
4403  unsigned Input = (unsigned)Idx / NewElts;
4404 
4405  if (Input >= array_lengthof(Inputs)) {
4406  // The mask element is "undef" or indexes off the end of the input.
4407  SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4408  continue;
4409  }
4410 
4411  // Turn the index into an offset from the start of the input vector.
4412  Idx -= Input * NewElts;
4413 
4414  // Extract the vector element by hand.
4415  SVOps.push_back(MIRBuilder
4416  .buildExtractVectorElement(
4417  EltTy, Inputs[Input],
4419  .getReg(0));
4420  }
4421 
4422  // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4423  Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4424  } else if (InputUsed[0] == -1U) {
4425  // No input vectors were used! The result is undefined.
4426  Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4427  } else {
4428  Register Op0 = Inputs[InputUsed[0]];
4429  // If only one input was used, use an undefined vector for the other.
4430  Register Op1 = InputUsed[1] == -1U
4431  ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4432  : Inputs[InputUsed[1]];
4433  // At least one input vector was used. Create a new shuffle vector.
4434  Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4435  }
4436 
4437  Ops.clear();
4438  }
4439 
4440  MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4441  MI.eraseFromParent();
4442  return Legalized;
4443 }
4444 
4445 static unsigned getScalarOpcForReduction(unsigned Opc) {
4446  unsigned ScalarOpc;
4447  switch (Opc) {
4448  case TargetOpcode::G_VECREDUCE_FADD:
4449  ScalarOpc = TargetOpcode::G_FADD;
4450  break;
4451  case TargetOpcode::G_VECREDUCE_FMUL:
4452  ScalarOpc = TargetOpcode::G_FMUL;
4453  break;
4454  case TargetOpcode::G_VECREDUCE_FMAX:
4455  ScalarOpc = TargetOpcode::G_FMAXNUM;
4456  break;
4457  case TargetOpcode::G_VECREDUCE_FMIN:
4458  ScalarOpc = TargetOpcode::G_FMINNUM;
4459  break;
4460  case TargetOpcode::G_VECREDUCE_ADD:
4461  ScalarOpc = TargetOpcode::G_ADD;
4462  break;
4463  case TargetOpcode::G_VECREDUCE_MUL:
4464  ScalarOpc = TargetOpcode::G_MUL;
4465  break;
4466  case TargetOpcode::G_VECREDUCE_AND:
4467  ScalarOpc = TargetOpcode::G_AND;
4468  break;
4469  case TargetOpcode::G_VECREDUCE_OR:
4470  ScalarOpc = TargetOpcode::G_OR;
4471  break;
4472  case TargetOpcode::G_VECREDUCE_XOR:
4473  ScalarOpc = TargetOpcode::G_XOR;
4474  break;
4475  case TargetOpcode::G_VECREDUCE_SMAX:
4476  ScalarOpc = TargetOpcode::G_SMAX;
4477  break;
4478  case TargetOpcode::G_VECREDUCE_SMIN:
4479  ScalarOpc = TargetOpcode::G_SMIN;
4480  break;
4481  case TargetOpcode::G_VECREDUCE_UMAX:
4482  ScalarOpc = TargetOpcode::G_UMAX;
4483  break;
4484  case TargetOpcode::G_VECREDUCE_UMIN:
4485  ScalarOpc = TargetOpcode::G_UMIN;
4486  break;
4487  default:
4488  llvm_unreachable("Unhandled reduction");
4489  }
4490  return ScalarOpc;
4491 }
4492 
4494  MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4495  unsigned Opc = MI.getOpcode();
4496  assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
4497  Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
4498  "Sequential reductions not expected");
4499 
4500  if (TypeIdx != 1)
4501  return UnableToLegalize;
4502 
4503  // The semantics of the normal non-sequential reductions allow us to freely
4504  // re-associate the operation.
4505  Register SrcReg = MI.getOperand(1).getReg();
4506  LLT SrcTy = MRI.getType(SrcReg);
4507  Register DstReg = MI.getOperand(0).getReg();
4508  LLT DstTy = MRI.getType(DstReg);
4509 
4510  if (NarrowTy.isVector() &&
4511  (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0))
4512  return UnableToLegalize;
4513 
4514  unsigned ScalarOpc = getScalarOpcForReduction(Opc);
4515  SmallVector<Register> SplitSrcs;
4516  // If NarrowTy is a scalar then we're being asked to scalarize.
4517  const unsigned NumParts =
4518  NarrowTy.isVector() ? SrcTy.getNumElements() / NarrowTy.getNumElements()
4519  : SrcTy.getNumElements();
4520 
4521  extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4522  if (NarrowTy.isScalar()) {
4523  if (DstTy != NarrowTy)
4524  return UnableToLegalize; // FIXME: handle implicit extensions.
4525 
4526  if (isPowerOf2_32(NumParts)) {
4527  // Generate a tree of scalar operations to reduce the critical path.
4528  SmallVector<Register> PartialResults;
4529  unsigned NumPartsLeft = NumParts;
4530  while (NumPartsLeft > 1) {
4531  for (unsigned Idx = 0; Idx < NumPartsLeft - 1; Idx += 2) {
4532  PartialResults.emplace_back(
4533  MIRBuilder
4534  .buildInstr(ScalarOpc, {NarrowTy},
4535  {SplitSrcs[Idx], SplitSrcs[Idx + 1]})
4536  .getReg(0));
4537  }
4538  SplitSrcs = PartialResults;
4539  PartialResults.clear();
4540  NumPartsLeft = SplitSrcs.size();
4541  }
4542  assert(SplitSrcs.size() == 1);
4543  MIRBuilder.buildCopy(DstReg, SplitSrcs[0]);
4544  MI.eraseFromParent();
4545  return Legalized;
4546  }
4547  // If we can't generate a tree, then just do sequential operations.
4548  Register Acc = SplitSrcs[0];
4549  for (unsigned Idx = 1; Idx < NumParts; ++Idx)
4550  Acc = MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {Acc, SplitSrcs[Idx]})
4551  .getReg(0);
4552  MIRBuilder.buildCopy(DstReg, Acc);
4553  MI.eraseFromParent();
4554  return Legalized;
4555  }
4556  SmallVector<Register> PartialReductions;
4557  for (unsigned Part = 0; Part < NumParts; ++Part) {
4558  PartialReductions.push_back(
4559  MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4560  }
4561 
4562 
4563  // If the types involved are powers of 2, we can generate intermediate vector
4564  // ops, before generating a final reduction operation.
4565  if (isPowerOf2_32(SrcTy.getNumElements()) &&
4566  isPowerOf2_32(NarrowTy.getNumElements())) {
4567  return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4568  }
4569 
4570  Register Acc = PartialReductions[0];
4571  for (unsigned Part = 1; Part < NumParts; ++Part) {
4572  if (Part == NumParts - 1) {
4573  MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4574  {Acc, PartialReductions[Part]});
4575  } else {
4576  Acc = MIRBuilder
4577  .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4578  .getReg(0);
4579  }
4580  }
4581  MI.eraseFromParent();
4582  return Legalized;
4583 }
4584 
4586 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4587  LLT SrcTy, LLT NarrowTy,
4588  unsigned ScalarOpc) {
4589  SmallVector<Register> SplitSrcs;
4590  // Split the sources into NarrowTy size pieces.
4591  extractParts(SrcReg, NarrowTy,
4592  SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4593  // We're going to do a tree reduction using vector operations until we have
4594  // one NarrowTy size value left.
4595  while (SplitSrcs.size() > 1) {
4596  SmallVector<Register> PartialRdxs;
4597  for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4598  Register LHS = SplitSrcs[Idx];
4599  Register RHS = SplitSrcs[Idx + 1];
4600  // Create the intermediate vector op.
4601  Register Res =
4602  MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4603  PartialRdxs.push_back(Res);
4604  }
4605  SplitSrcs = std::move(PartialRdxs);
4606  }
4607  // Finally generate the requested NarrowTy based reduction.
4609  MI.getOperand(1).setReg(SplitSrcs[0]);
4611  return Legalized;
4612 }
4613 
4616  const LLT HalfTy, const LLT AmtTy) {
4617 
4618  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4619  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4620  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4621 
4622  if (Amt.isZero()) {
4623  MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4624  MI.eraseFromParent();
4625  return Legalized;
4626  }
4627 
4628  LLT NVT = HalfTy;
4629  unsigned NVTBits = HalfTy.getSizeInBits();
4630  unsigned VTBits = 2 * NVTBits;
4631 
4632  SrcOp Lo(Register(0)), Hi(Register(0));
4633  if (MI.getOpcode() == TargetOpcode::G_SHL) {
4634  if (Amt.ugt(VTBits)) {
4635  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4636  } else if (Amt.ugt(NVTBits)) {
4637  Lo = MIRBuilder.buildConstant(NVT, 0);
4638  Hi = MIRBuilder.buildShl(NVT, InL,
4639  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4640  } else if (Amt == NVTBits) {
4641  Lo = MIRBuilder.buildConstant(NVT, 0);
4642  Hi = InL;
4643  } else {
4644  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4645  auto OrLHS =
4646  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4647  auto OrRHS = MIRBuilder.buildLShr(
4648  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4649  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4650  }
4651  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4652  if (Amt.ugt(VTBits)) {
4653  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4654  } else if (Amt.ugt(NVTBits)) {
4655  Lo = MIRBuilder.buildLShr(NVT, InH,
4656  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4657  Hi = MIRBuilder.buildConstant(NVT, 0);
4658  } else if (Amt == NVTBits) {
4659  Lo = InH;
4660  Hi = MIRBuilder.buildConstant(NVT, 0);
4661  } else {
4662  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4663 
4664  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4665  auto OrRHS = MIRBuilder.buildShl(
4666  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4667 
4668  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4669  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4670  }
4671  } else {
4672  if (Amt.ugt(VTBits)) {
4673  Hi = Lo = MIRBuilder.buildAShr(
4674  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4675  } else if (Amt.ugt(NVTBits)) {
4676  Lo = MIRBuilder.buildAShr(NVT, InH,
4677  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4678  Hi = MIRBuilder.buildAShr(NVT, InH,
4679  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4680  } else if (Amt == NVTBits) {
4681  Lo = InH;
4682  Hi = MIRBuilder.buildAShr(NVT, InH,
4683  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4684  } else {
4685  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4686 
4687  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4688  auto OrRHS = MIRBuilder.buildShl(
4689  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4690 
4691  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4692  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4693  }
4694  }
4695 
4696  MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4697  MI.eraseFromParent();
4698 
4699  return Legalized;
4700 }
4701 
4702 // TODO: Optimize if constant shift amount.
4705  LLT RequestedTy) {
4706  if (TypeIdx == 1) {
4708  narrowScalarSrc(MI, RequestedTy, 2);
4710  return Legalized;
4711  }
4712 
4713  Register DstReg = MI.getOperand(0).getReg();
4714  LLT DstTy = MRI.getType(DstReg);
4715  if (DstTy.isVector())
4716  return UnableToLegalize;
4717 
4718  Register Amt = MI.getOperand(2).getReg();
4719  LLT ShiftAmtTy = MRI.getType(Amt);
4720  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4721  if (DstEltSize % 2 != 0)
4722  return UnableToLegalize;
4723 
4724  // Ignore the input type. We can only go to exactly half the size of the
4725  // input. If that isn't small enough, the resulting pieces will be further
4726  // legalized.
4727  const unsigned NewBitSize = DstEltSize / 2;
4728  const LLT HalfTy = LLT::scalar(NewBitSize);
4729  const LLT CondTy = LLT::scalar(1);
4730 
4731  if (auto VRegAndVal = getIConstantVRegValWithLookThrough(Amt, MRI)) {
4732  return narrowScalarShiftByConstant(MI, VRegAndVal->Value, HalfTy,
4733  ShiftAmtTy);
4734  }
4735 
4736  // TODO: Expand with known bits.
4737 
4738  // Handle the fully general expansion by an unknown amount.
4739  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4740 
4741  Register InL = MRI.createGenericVirtualRegister(HalfTy);
4742  Register InH = MRI.createGenericVirtualRegister(HalfTy);
4743  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4744 
4745  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4746  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4747 
4748  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4749  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4750  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4751 
4752  Register ResultRegs[2];
4753  switch (MI.getOpcode()) {
4754  case TargetOpcode::G_SHL: {
4755  // Short: ShAmt < NewBitSize
4756  auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4757 
4758  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4759  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4760  auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4761 
4762  // Long: ShAmt >= NewBitSize
4763  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4764  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4765 
4766  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4767  auto Hi = MIRBuilder.buildSelect(
4768  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4769 
4770  ResultRegs[0] = Lo.getReg(0);
4771  ResultRegs[1] = Hi.getReg(0);
4772  break;
4773  }
4774  case TargetOpcode::G_LSHR:
4775  case TargetOpcode::G_ASHR: {
4776  // Short: ShAmt < NewBitSize
4777  auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4778 
4779  auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4780  auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4781  auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4782 
4783  // Long: ShAmt >= NewBitSize
4784  MachineInstrBuilder HiL;
4785  if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4786  HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4787  } else {
4788  auto ShiftAmt =