LLVM  9.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41  assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43  unsigned Size = OrigTy.getSizeInBits();
44  unsigned NarrowSize = NarrowTy.getSizeInBits();
45  unsigned NumParts = Size / NarrowSize;
46  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47  assert(Size > NarrowSize);
48 
49  if (LeftoverSize == 0)
50  return {NumParts, 0};
51 
52  if (NarrowTy.isVector()) {
53  unsigned EltSize = OrigTy.getScalarSizeInBits();
54  if (LeftoverSize % EltSize != 0)
55  return {-1, -1};
56  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57  } else {
58  LeftoverTy = LLT::scalar(LeftoverSize);
59  }
60 
61  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62  return std::make_pair(NumParts, NumLeftover);
63 }
64 
66  GISelChangeObserver &Observer,
67  MachineIRBuilder &Builder)
68  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70  MIRBuilder.setMF(MF);
71  MIRBuilder.setChangeObserver(Observer);
72 }
73 
75  GISelChangeObserver &Observer,
77  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78  MIRBuilder.setMF(MF);
79  MIRBuilder.setChangeObserver(Observer);
80 }
83  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85  auto Step = LI.getAction(MI, MRI);
86  switch (Step.Action) {
87  case Legal:
88  LLVM_DEBUG(dbgs() << ".. Already legal\n");
89  return AlreadyLegal;
90  case Libcall:
91  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
92  return libcall(MI);
93  case NarrowScalar:
94  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
95  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
96  case WidenScalar:
97  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
98  return widenScalar(MI, Step.TypeIdx, Step.NewType);
99  case Lower:
100  LLVM_DEBUG(dbgs() << ".. Lower\n");
101  return lower(MI, Step.TypeIdx, Step.NewType);
102  case FewerElements:
103  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
104  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
105  case MoreElements:
106  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
107  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
108  case Custom:
109  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
110  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
112  default:
113  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
114  return UnableToLegalize;
115  }
116 }
117 
118 void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
119  SmallVectorImpl<unsigned> &VRegs) {
120  for (int i = 0; i < NumParts; ++i)
122  MIRBuilder.buildUnmerge(VRegs, Reg);
123 }
124 
125 bool LegalizerHelper::extractParts(unsigned Reg, LLT RegTy,
126  LLT MainTy, LLT &LeftoverTy,
128  SmallVectorImpl<unsigned> &LeftoverRegs) {
129  assert(!LeftoverTy.isValid() && "this is an out argument");
130 
131  unsigned RegSize = RegTy.getSizeInBits();
132  unsigned MainSize = MainTy.getSizeInBits();
133  unsigned NumParts = RegSize / MainSize;
134  unsigned LeftoverSize = RegSize - NumParts * MainSize;
135 
136  // Use an unmerge when possible.
137  if (LeftoverSize == 0) {
138  for (unsigned I = 0; I < NumParts; ++I)
139  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
140  MIRBuilder.buildUnmerge(VRegs, Reg);
141  return true;
142  }
143 
144  if (MainTy.isVector()) {
145  unsigned EltSize = MainTy.getScalarSizeInBits();
146  if (LeftoverSize % EltSize != 0)
147  return false;
148  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
149  } else {
150  LeftoverTy = LLT::scalar(LeftoverSize);
151  }
152 
153  // For irregular sizes, extract the individual parts.
154  for (unsigned I = 0; I != NumParts; ++I) {
155  unsigned NewReg = MRI.createGenericVirtualRegister(MainTy);
156  VRegs.push_back(NewReg);
157  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
158  }
159 
160  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
161  Offset += LeftoverSize) {
162  unsigned NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
163  LeftoverRegs.push_back(NewReg);
164  MIRBuilder.buildExtract(NewReg, Reg, Offset);
165  }
166 
167  return true;
168 }
169 
170 void LegalizerHelper::insertParts(unsigned DstReg,
171  LLT ResultTy, LLT PartTy,
172  ArrayRef<unsigned> PartRegs,
173  LLT LeftoverTy,
174  ArrayRef<unsigned> LeftoverRegs) {
175  if (!LeftoverTy.isValid()) {
176  assert(LeftoverRegs.empty());
177 
178  if (!ResultTy.isVector()) {
179  MIRBuilder.buildMerge(DstReg, PartRegs);
180  return;
181  }
182 
183  if (PartTy.isVector())
184  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
185  else
186  MIRBuilder.buildBuildVector(DstReg, PartRegs);
187  return;
188  }
189 
190  unsigned PartSize = PartTy.getSizeInBits();
191  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
192 
193  unsigned CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
194  MIRBuilder.buildUndef(CurResultReg);
195 
196  unsigned Offset = 0;
197  for (unsigned PartReg : PartRegs) {
198  unsigned NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
199  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
200  CurResultReg = NewResultReg;
201  Offset += PartSize;
202  }
203 
204  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
205  // Use the original output register for the final insert to avoid a copy.
206  unsigned NewResultReg = (I + 1 == E) ?
207  DstReg : MRI.createGenericVirtualRegister(ResultTy);
208 
209  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
210  CurResultReg = NewResultReg;
211  Offset += LeftoverPartSize;
212  }
213 }
214 
215 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
216  switch (Opcode) {
217  case TargetOpcode::G_SDIV:
218  assert((Size == 32 || Size == 64) && "Unsupported size");
219  return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
220  case TargetOpcode::G_UDIV:
221  assert((Size == 32 || Size == 64) && "Unsupported size");
222  return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
223  case TargetOpcode::G_SREM:
224  assert((Size == 32 || Size == 64) && "Unsupported size");
225  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
226  case TargetOpcode::G_UREM:
227  assert((Size == 32 || Size == 64) && "Unsupported size");
228  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
229  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
230  assert(Size == 32 && "Unsupported size");
231  return RTLIB::CTLZ_I32;
232  case TargetOpcode::G_FADD:
233  assert((Size == 32 || Size == 64) && "Unsupported size");
234  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
235  case TargetOpcode::G_FSUB:
236  assert((Size == 32 || Size == 64) && "Unsupported size");
237  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
238  case TargetOpcode::G_FMUL:
239  assert((Size == 32 || Size == 64) && "Unsupported size");
240  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
241  case TargetOpcode::G_FDIV:
242  assert((Size == 32 || Size == 64) && "Unsupported size");
243  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
244  case TargetOpcode::G_FEXP:
245  assert((Size == 32 || Size == 64) && "Unsupported size");
246  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
247  case TargetOpcode::G_FEXP2:
248  assert((Size == 32 || Size == 64) && "Unsupported size");
249  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
250  case TargetOpcode::G_FREM:
251  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
252  case TargetOpcode::G_FPOW:
253  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
254  case TargetOpcode::G_FMA:
255  assert((Size == 32 || Size == 64) && "Unsupported size");
256  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
257  case TargetOpcode::G_FSIN:
258  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
259  return Size == 128 ? RTLIB::SIN_F128
260  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
261  case TargetOpcode::G_FCOS:
262  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263  return Size == 128 ? RTLIB::COS_F128
264  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
265  case TargetOpcode::G_FLOG10:
266  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267  return Size == 128 ? RTLIB::LOG10_F128
268  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
269  case TargetOpcode::G_FLOG:
270  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271  return Size == 128 ? RTLIB::LOG_F128
272  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
273  case TargetOpcode::G_FLOG2:
274  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275  return Size == 128 ? RTLIB::LOG2_F128
276  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
277  }
278  llvm_unreachable("Unknown libcall function");
279 }
280 
283  const CallLowering::ArgInfo &Result,
285  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
286  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
287  const char *Name = TLI.getLibcallName(Libcall);
288 
289  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
290  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
291  MachineOperand::CreateES(Name), Result, Args))
293 
295 }
296 
297 // Useful for libcalls where all operands have the same type.
300  Type *OpType) {
301  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
302 
304  for (unsigned i = 1; i < MI.getNumOperands(); i++)
305  Args.push_back({MI.getOperand(i).getReg(), OpType});
306  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
307  Args);
308 }
309 
310 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
311  Type *FromType) {
312  auto ToMVT = MVT::getVT(ToType);
313  auto FromMVT = MVT::getVT(FromType);
314 
315  switch (Opcode) {
316  case TargetOpcode::G_FPEXT:
317  return RTLIB::getFPEXT(FromMVT, ToMVT);
318  case TargetOpcode::G_FPTRUNC:
319  return RTLIB::getFPROUND(FromMVT, ToMVT);
320  case TargetOpcode::G_FPTOSI:
321  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
322  case TargetOpcode::G_FPTOUI:
323  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
324  case TargetOpcode::G_SITOFP:
325  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
326  case TargetOpcode::G_UITOFP:
327  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
328  }
329  llvm_unreachable("Unsupported libcall function");
330 }
331 
334  Type *FromType) {
336  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
337  {{MI.getOperand(1).getReg(), FromType}});
338 }
339 
342  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
343  unsigned Size = LLTy.getSizeInBits();
344  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
345 
346  MIRBuilder.setInstr(MI);
347 
348  switch (MI.getOpcode()) {
349  default:
350  return UnableToLegalize;
351  case TargetOpcode::G_SDIV:
352  case TargetOpcode::G_UDIV:
353  case TargetOpcode::G_SREM:
354  case TargetOpcode::G_UREM:
355  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
356  Type *HLTy = IntegerType::get(Ctx, Size);
357  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
358  if (Status != Legalized)
359  return Status;
360  break;
361  }
362  case TargetOpcode::G_FADD:
363  case TargetOpcode::G_FSUB:
364  case TargetOpcode::G_FMUL:
365  case TargetOpcode::G_FDIV:
366  case TargetOpcode::G_FMA:
367  case TargetOpcode::G_FPOW:
368  case TargetOpcode::G_FREM:
369  case TargetOpcode::G_FCOS:
370  case TargetOpcode::G_FSIN:
371  case TargetOpcode::G_FLOG10:
372  case TargetOpcode::G_FLOG:
373  case TargetOpcode::G_FLOG2:
374  case TargetOpcode::G_FEXP:
375  case TargetOpcode::G_FEXP2: {
376  if (Size > 64) {
377  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
378  return UnableToLegalize;
379  }
380  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
381  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
382  if (Status != Legalized)
383  return Status;
384  break;
385  }
386  case TargetOpcode::G_FPEXT: {
387  // FIXME: Support other floating point types (half, fp128 etc)
388  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
389  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
390  if (ToSize != 64 || FromSize != 32)
391  return UnableToLegalize;
394  if (Status != Legalized)
395  return Status;
396  break;
397  }
398  case TargetOpcode::G_FPTRUNC: {
399  // FIXME: Support other floating point types (half, fp128 etc)
400  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
401  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
402  if (ToSize != 32 || FromSize != 64)
403  return UnableToLegalize;
406  if (Status != Legalized)
407  return Status;
408  break;
409  }
410  case TargetOpcode::G_FPTOSI:
411  case TargetOpcode::G_FPTOUI: {
412  // FIXME: Support other types
413  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
414  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
415  if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
416  return UnableToLegalize;
418  MI, MIRBuilder, Type::getInt32Ty(Ctx),
419  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
420  if (Status != Legalized)
421  return Status;
422  break;
423  }
424  case TargetOpcode::G_SITOFP:
425  case TargetOpcode::G_UITOFP: {
426  // FIXME: Support other types
427  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
428  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
429  if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
430  return UnableToLegalize;
432  MI, MIRBuilder,
433  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
434  Type::getInt32Ty(Ctx));
435  if (Status != Legalized)
436  return Status;
437  break;
438  }
439  }
440 
441  MI.eraseFromParent();
442  return Legalized;
443 }
444 
446  unsigned TypeIdx,
447  LLT NarrowTy) {
448  MIRBuilder.setInstr(MI);
449 
450  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
451  uint64_t NarrowSize = NarrowTy.getSizeInBits();
452 
453  switch (MI.getOpcode()) {
454  default:
455  return UnableToLegalize;
456  case TargetOpcode::G_IMPLICIT_DEF: {
457  // FIXME: add support for when SizeOp0 isn't an exact multiple of
458  // NarrowSize.
459  if (SizeOp0 % NarrowSize != 0)
460  return UnableToLegalize;
461  int NumParts = SizeOp0 / NarrowSize;
462 
463  SmallVector<unsigned, 2> DstRegs;
464  for (int i = 0; i < NumParts; ++i)
465  DstRegs.push_back(
466  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
467 
468  unsigned DstReg = MI.getOperand(0).getReg();
469  if(MRI.getType(DstReg).isVector())
470  MIRBuilder.buildBuildVector(DstReg, DstRegs);
471  else
472  MIRBuilder.buildMerge(DstReg, DstRegs);
473  MI.eraseFromParent();
474  return Legalized;
475  }
476  case TargetOpcode::G_CONSTANT: {
477  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
478  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
479  unsigned TotalSize = Ty.getSizeInBits();
480  unsigned NarrowSize = NarrowTy.getSizeInBits();
481  int NumParts = TotalSize / NarrowSize;
482 
483  SmallVector<unsigned, 4> PartRegs;
484  for (int I = 0; I != NumParts; ++I) {
485  unsigned Offset = I * NarrowSize;
486  auto K = MIRBuilder.buildConstant(NarrowTy,
487  Val.lshr(Offset).trunc(NarrowSize));
488  PartRegs.push_back(K.getReg(0));
489  }
490 
491  LLT LeftoverTy;
492  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
493  SmallVector<unsigned, 1> LeftoverRegs;
494  if (LeftoverBits != 0) {
495  LeftoverTy = LLT::scalar(LeftoverBits);
496  auto K = MIRBuilder.buildConstant(
497  LeftoverTy,
498  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
499  LeftoverRegs.push_back(K.getReg(0));
500  }
501 
502  insertParts(MI.getOperand(0).getReg(),
503  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
504 
505  MI.eraseFromParent();
506  return Legalized;
507  }
508  case TargetOpcode::G_ADD: {
509  // FIXME: add support for when SizeOp0 isn't an exact multiple of
510  // NarrowSize.
511  if (SizeOp0 % NarrowSize != 0)
512  return UnableToLegalize;
513  // Expand in terms of carry-setting/consuming G_ADDE instructions.
514  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
515 
516  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
517  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
518  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
519 
520  unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
521  MIRBuilder.buildConstant(CarryIn, 0);
522 
523  for (int i = 0; i < NumParts; ++i) {
524  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
525  unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
526 
527  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
528  Src2Regs[i], CarryIn);
529 
530  DstRegs.push_back(DstReg);
531  CarryIn = CarryOut;
532  }
533  unsigned DstReg = MI.getOperand(0).getReg();
534  if(MRI.getType(DstReg).isVector())
535  MIRBuilder.buildBuildVector(DstReg, DstRegs);
536  else
537  MIRBuilder.buildMerge(DstReg, DstRegs);
538  MI.eraseFromParent();
539  return Legalized;
540  }
541  case TargetOpcode::G_SUB: {
542  // FIXME: add support for when SizeOp0 isn't an exact multiple of
543  // NarrowSize.
544  if (SizeOp0 % NarrowSize != 0)
545  return UnableToLegalize;
546 
547  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
548 
549  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
550  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
551  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
552 
553  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
554  unsigned BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
555  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
556  {Src1Regs[0], Src2Regs[0]});
557  DstRegs.push_back(DstReg);
558  unsigned BorrowIn = BorrowOut;
559  for (int i = 1; i < NumParts; ++i) {
560  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
561  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
562 
563  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
564  {Src1Regs[i], Src2Regs[i], BorrowIn});
565 
566  DstRegs.push_back(DstReg);
567  BorrowIn = BorrowOut;
568  }
569  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
570  MI.eraseFromParent();
571  return Legalized;
572  }
573  case TargetOpcode::G_MUL:
574  case TargetOpcode::G_UMULH:
575  return narrowScalarMul(MI, NarrowTy);
576  case TargetOpcode::G_EXTRACT:
577  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
578  case TargetOpcode::G_INSERT:
579  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
580  case TargetOpcode::G_LOAD: {
581  const auto &MMO = **MI.memoperands_begin();
582  unsigned DstReg = MI.getOperand(0).getReg();
583  LLT DstTy = MRI.getType(DstReg);
584  if (DstTy.isVector())
585  return UnableToLegalize;
586 
587  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
588  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
589  auto &MMO = **MI.memoperands_begin();
590  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
591  MIRBuilder.buildAnyExt(DstReg, TmpReg);
592  MI.eraseFromParent();
593  return Legalized;
594  }
595 
596  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
597  }
598  case TargetOpcode::G_ZEXTLOAD:
599  case TargetOpcode::G_SEXTLOAD: {
600  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
601  unsigned DstReg = MI.getOperand(0).getReg();
602  unsigned PtrReg = MI.getOperand(1).getReg();
603 
604  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
605  auto &MMO = **MI.memoperands_begin();
606  if (MMO.getSizeInBits() == NarrowSize) {
607  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
608  } else {
609  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
610  : TargetOpcode::G_SEXTLOAD;
611  MIRBuilder.buildInstr(ExtLoad)
612  .addDef(TmpReg)
613  .addUse(PtrReg)
614  .addMemOperand(&MMO);
615  }
616 
617  if (ZExt)
618  MIRBuilder.buildZExt(DstReg, TmpReg);
619  else
620  MIRBuilder.buildSExt(DstReg, TmpReg);
621 
622  MI.eraseFromParent();
623  return Legalized;
624  }
625  case TargetOpcode::G_STORE: {
626  const auto &MMO = **MI.memoperands_begin();
627 
628  unsigned SrcReg = MI.getOperand(0).getReg();
629  LLT SrcTy = MRI.getType(SrcReg);
630  if (SrcTy.isVector())
631  return UnableToLegalize;
632 
633  int NumParts = SizeOp0 / NarrowSize;
634  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
635  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
636  if (SrcTy.isVector() && LeftoverBits != 0)
637  return UnableToLegalize;
638 
639  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
640  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
641  auto &MMO = **MI.memoperands_begin();
642  MIRBuilder.buildTrunc(TmpReg, SrcReg);
643  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
644  MI.eraseFromParent();
645  return Legalized;
646  }
647 
648  return reduceLoadStoreWidth(MI, 0, NarrowTy);
649  }
650  case TargetOpcode::G_SELECT:
651  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
652  case TargetOpcode::G_AND:
653  case TargetOpcode::G_OR:
654  case TargetOpcode::G_XOR: {
655  // Legalize bitwise operation:
656  // A = BinOp<Ty> B, C
657  // into:
658  // B1, ..., BN = G_UNMERGE_VALUES B
659  // C1, ..., CN = G_UNMERGE_VALUES C
660  // A1 = BinOp<Ty/N> B1, C2
661  // ...
662  // AN = BinOp<Ty/N> BN, CN
663  // A = G_MERGE_VALUES A1, ..., AN
664  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
665  }
666  case TargetOpcode::G_SHL:
667  case TargetOpcode::G_LSHR:
668  case TargetOpcode::G_ASHR:
669  return narrowScalarShift(MI, TypeIdx, NarrowTy);
670  case TargetOpcode::G_CTLZ:
671  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
672  case TargetOpcode::G_CTTZ:
673  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
674  case TargetOpcode::G_CTPOP:
675  if (TypeIdx != 0)
676  return UnableToLegalize; // TODO
677 
678  Observer.changingInstr(MI);
679  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
680  Observer.changedInstr(MI);
681  return Legalized;
682  case TargetOpcode::G_INTTOPTR:
683  if (TypeIdx != 1)
684  return UnableToLegalize;
685 
686  Observer.changingInstr(MI);
687  narrowScalarSrc(MI, NarrowTy, 1);
688  Observer.changedInstr(MI);
689  return Legalized;
690  case TargetOpcode::G_PTRTOINT:
691  if (TypeIdx != 0)
692  return UnableToLegalize;
693 
694  Observer.changingInstr(MI);
695  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
696  Observer.changedInstr(MI);
697  return Legalized;
698  }
699 }
700 
701 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
702  unsigned OpIdx, unsigned ExtOpcode) {
703  MachineOperand &MO = MI.getOperand(OpIdx);
704  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
705  MO.setReg(ExtB->getOperand(0).getReg());
706 }
707 
708 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
709  unsigned OpIdx) {
710  MachineOperand &MO = MI.getOperand(OpIdx);
711  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
712  {MO.getReg()});
713  MO.setReg(ExtB->getOperand(0).getReg());
714 }
715 
716 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
717  unsigned OpIdx, unsigned TruncOpcode) {
718  MachineOperand &MO = MI.getOperand(OpIdx);
719  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
721  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
722  MO.setReg(DstExt);
723 }
724 
725 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
726  unsigned OpIdx, unsigned ExtOpcode) {
727  MachineOperand &MO = MI.getOperand(OpIdx);
728  unsigned DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
730  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
731  MO.setReg(DstTrunc);
732 }
733 
734 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
735  unsigned OpIdx) {
736  MachineOperand &MO = MI.getOperand(OpIdx);
737  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
739  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
740  MO.setReg(DstExt);
741 }
742 
743 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
744  unsigned OpIdx) {
745  MachineOperand &MO = MI.getOperand(OpIdx);
746 
747  LLT OldTy = MRI.getType(MO.getReg());
748  unsigned OldElts = OldTy.getNumElements();
749  unsigned NewElts = MoreTy.getNumElements();
750 
751  unsigned NumParts = NewElts / OldElts;
752 
753  // Use concat_vectors if the result is a multiple of the number of elements.
754  if (NumParts * OldElts == NewElts) {
756  Parts.push_back(MO.getReg());
757 
758  unsigned ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
759  for (unsigned I = 1; I != NumParts; ++I)
760  Parts.push_back(ImpDef);
761 
762  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
763  MO.setReg(Concat.getReg(0));
764  return;
765  }
766 
767  unsigned MoreReg = MRI.createGenericVirtualRegister(MoreTy);
768  unsigned ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
769  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
770  MO.setReg(MoreReg);
771 }
772 
774 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
775  LLT WideTy) {
776  if (TypeIdx != 1)
777  return UnableToLegalize;
778 
779  unsigned DstReg = MI.getOperand(0).getReg();
780  LLT DstTy = MRI.getType(DstReg);
781  if (!DstTy.isScalar())
782  return UnableToLegalize;
783 
784  unsigned NumOps = MI.getNumOperands();
785  unsigned NumSrc = MI.getNumOperands() - 1;
786  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
787 
788  unsigned Src1 = MI.getOperand(1).getReg();
789  unsigned ResultReg = MIRBuilder.buildZExt(DstTy, Src1)->getOperand(0).getReg();
790 
791  for (unsigned I = 2; I != NumOps; ++I) {
792  const unsigned Offset = (I - 1) * PartSize;
793 
794  unsigned SrcReg = MI.getOperand(I).getReg();
795  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
796 
797  auto ZextInput = MIRBuilder.buildZExt(DstTy, SrcReg);
798 
799  unsigned NextResult = I + 1 == NumOps ? DstReg :
800  MRI.createGenericVirtualRegister(DstTy);
801 
802  auto ShiftAmt = MIRBuilder.buildConstant(DstTy, Offset);
803  auto Shl = MIRBuilder.buildShl(DstTy, ZextInput, ShiftAmt);
804  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
805  ResultReg = NextResult;
806  }
807 
808  MI.eraseFromParent();
809  return Legalized;
810 }
811 
813 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
814  LLT WideTy) {
815  if (TypeIdx != 0)
816  return UnableToLegalize;
817 
818  unsigned NumDst = MI.getNumOperands() - 1;
819  unsigned SrcReg = MI.getOperand(NumDst).getReg();
820  LLT SrcTy = MRI.getType(SrcReg);
821  if (!SrcTy.isScalar())
822  return UnableToLegalize;
823 
824  unsigned Dst0Reg = MI.getOperand(0).getReg();
825  LLT DstTy = MRI.getType(Dst0Reg);
826  if (!DstTy.isScalar())
827  return UnableToLegalize;
828 
829  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
830  LLT NewSrcTy = LLT::scalar(NewSrcSize);
831  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
832 
833  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
834 
835  for (unsigned I = 1; I != NumDst; ++I) {
836  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
837  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
838  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
839  }
840 
841  Observer.changingInstr(MI);
842 
843  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
844  for (unsigned I = 0; I != NumDst; ++I)
845  widenScalarDst(MI, WideTy, I);
846 
847  Observer.changedInstr(MI);
848 
849  return Legalized;
850 }
851 
853 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
854  LLT WideTy) {
855  unsigned DstReg = MI.getOperand(0).getReg();
856  unsigned SrcReg = MI.getOperand(1).getReg();
857  LLT SrcTy = MRI.getType(SrcReg);
858 
859  LLT DstTy = MRI.getType(DstReg);
860  unsigned Offset = MI.getOperand(2).getImm();
861 
862  if (TypeIdx == 0) {
863  if (SrcTy.isVector() || DstTy.isVector())
864  return UnableToLegalize;
865 
866  SrcOp Src(SrcReg);
867  if (SrcTy.isPointer()) {
868  // Extracts from pointers can be handled only if they are really just
869  // simple integers.
870  const DataLayout &DL = MIRBuilder.getDataLayout();
872  return UnableToLegalize;
873 
874  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
875  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
876  SrcTy = SrcAsIntTy;
877  }
878 
879  if (DstTy.isPointer())
880  return UnableToLegalize;
881 
882  if (Offset == 0) {
883  // Avoid a shift in the degenerate case.
884  MIRBuilder.buildTrunc(DstReg,
885  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
886  MI.eraseFromParent();
887  return Legalized;
888  }
889 
890  // Do a shift in the source type.
891  LLT ShiftTy = SrcTy;
892  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
893  Src = MIRBuilder.buildAnyExt(WideTy, Src);
894  ShiftTy = WideTy;
895  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
896  return UnableToLegalize;
897 
898  auto LShr = MIRBuilder.buildLShr(
899  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
900  MIRBuilder.buildTrunc(DstReg, LShr);
901  MI.eraseFromParent();
902  return Legalized;
903  }
904 
905  if (SrcTy.isScalar()) {
906  Observer.changingInstr(MI);
907  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
908  Observer.changedInstr(MI);
909  return Legalized;
910  }
911 
912  if (!SrcTy.isVector())
913  return UnableToLegalize;
914 
915  if (DstTy != SrcTy.getElementType())
916  return UnableToLegalize;
917 
918  if (Offset % SrcTy.getScalarSizeInBits() != 0)
919  return UnableToLegalize;
920 
921  Observer.changingInstr(MI);
922  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
923 
924  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
925  Offset);
926  widenScalarDst(MI, WideTy.getScalarType(), 0);
927  Observer.changedInstr(MI);
928  return Legalized;
929 }
930 
932 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
933  LLT WideTy) {
934  if (TypeIdx != 0)
935  return UnableToLegalize;
936  Observer.changingInstr(MI);
937  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
938  widenScalarDst(MI, WideTy);
939  Observer.changedInstr(MI);
940  return Legalized;
941 }
942 
944 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
945  MIRBuilder.setInstr(MI);
946 
947  switch (MI.getOpcode()) {
948  default:
949  return UnableToLegalize;
950  case TargetOpcode::G_EXTRACT:
951  return widenScalarExtract(MI, TypeIdx, WideTy);
952  case TargetOpcode::G_INSERT:
953  return widenScalarInsert(MI, TypeIdx, WideTy);
954  case TargetOpcode::G_MERGE_VALUES:
955  return widenScalarMergeValues(MI, TypeIdx, WideTy);
956  case TargetOpcode::G_UNMERGE_VALUES:
957  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
958  case TargetOpcode::G_UADDO:
959  case TargetOpcode::G_USUBO: {
960  if (TypeIdx == 1)
961  return UnableToLegalize; // TODO
962  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
963  {MI.getOperand(2).getReg()});
964  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
965  {MI.getOperand(3).getReg()});
966  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
967  ? TargetOpcode::G_ADD
968  : TargetOpcode::G_SUB;
969  // Do the arithmetic in the larger type.
970  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
971  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
973  auto AndOp = MIRBuilder.buildInstr(
974  TargetOpcode::G_AND, {WideTy},
975  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
976  // There is no overflow if the AndOp is the same as NewOp.
978  AndOp);
979  // Now trunc the NewOp to the original result.
980  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
981  MI.eraseFromParent();
982  return Legalized;
983  }
984  case TargetOpcode::G_CTTZ:
985  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
986  case TargetOpcode::G_CTLZ:
987  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
988  case TargetOpcode::G_CTPOP: {
989  if (TypeIdx == 0) {
990  Observer.changingInstr(MI);
991  widenScalarDst(MI, WideTy, 0);
992  Observer.changedInstr(MI);
993  return Legalized;
994  }
995 
996  unsigned SrcReg = MI.getOperand(1).getReg();
997 
998  // First ZEXT the input.
999  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1000  LLT CurTy = MRI.getType(SrcReg);
1001  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1002  // The count is the same in the larger type except if the original
1003  // value was zero. This can be handled by setting the bit just off
1004  // the top of the original type.
1005  auto TopBit =
1006  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1007  MIBSrc = MIRBuilder.buildOr(
1008  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1009  }
1010 
1011  // Perform the operation at the larger size.
1012  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1013  // This is already the correct result for CTPOP and CTTZs
1014  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1015  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1016  // The correct result is NewOp - (Difference in widety and current ty).
1017  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1018  MIBNewOp = MIRBuilder.buildInstr(
1019  TargetOpcode::G_SUB, {WideTy},
1020  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1021  }
1022 
1023  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1024  MI.eraseFromParent();
1025  return Legalized;
1026  }
1027  case TargetOpcode::G_BSWAP: {
1028  Observer.changingInstr(MI);
1029  unsigned DstReg = MI.getOperand(0).getReg();
1030 
1031  unsigned ShrReg = MRI.createGenericVirtualRegister(WideTy);
1032  unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
1033  unsigned ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1034  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1035 
1036  MI.getOperand(0).setReg(DstExt);
1037 
1039 
1040  LLT Ty = MRI.getType(DstReg);
1041  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1042  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1043  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1044  .addDef(ShrReg)
1045  .addUse(DstExt)
1046  .addUse(ShiftAmtReg);
1047 
1048  MIRBuilder.buildTrunc(DstReg, ShrReg);
1049  Observer.changedInstr(MI);
1050  return Legalized;
1051  }
1052  case TargetOpcode::G_ADD:
1053  case TargetOpcode::G_AND:
1054  case TargetOpcode::G_MUL:
1055  case TargetOpcode::G_OR:
1056  case TargetOpcode::G_XOR:
1057  case TargetOpcode::G_SUB:
1058  // Perform operation at larger width (any extension is fines here, high bits
1059  // don't affect the result) and then truncate the result back to the
1060  // original type.
1061  Observer.changingInstr(MI);
1062  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1063  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1064  widenScalarDst(MI, WideTy);
1065  Observer.changedInstr(MI);
1066  return Legalized;
1067 
1068  case TargetOpcode::G_SHL:
1069  Observer.changingInstr(MI);
1070 
1071  if (TypeIdx == 0) {
1072  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1073  widenScalarDst(MI, WideTy);
1074  } else {
1075  assert(TypeIdx == 1);
1076  // The "number of bits to shift" operand must preserve its value as an
1077  // unsigned integer:
1078  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1079  }
1080 
1081  Observer.changedInstr(MI);
1082  return Legalized;
1083 
1084  case TargetOpcode::G_SDIV:
1085  case TargetOpcode::G_SREM:
1086  Observer.changingInstr(MI);
1087  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1088  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1089  widenScalarDst(MI, WideTy);
1090  Observer.changedInstr(MI);
1091  return Legalized;
1092 
1093  case TargetOpcode::G_ASHR:
1094  case TargetOpcode::G_LSHR:
1095  Observer.changingInstr(MI);
1096 
1097  if (TypeIdx == 0) {
1098  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1099  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1100 
1101  widenScalarSrc(MI, WideTy, 1, CvtOp);
1102  widenScalarDst(MI, WideTy);
1103  } else {
1104  assert(TypeIdx == 1);
1105  // The "number of bits to shift" operand must preserve its value as an
1106  // unsigned integer:
1107  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1108  }
1109 
1110  Observer.changedInstr(MI);
1111  return Legalized;
1112  case TargetOpcode::G_UDIV:
1113  case TargetOpcode::G_UREM:
1114  Observer.changingInstr(MI);
1115  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1116  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1117  widenScalarDst(MI, WideTy);
1118  Observer.changedInstr(MI);
1119  return Legalized;
1120 
1121  case TargetOpcode::G_SELECT:
1122  Observer.changingInstr(MI);
1123  if (TypeIdx == 0) {
1124  // Perform operation at larger width (any extension is fine here, high
1125  // bits don't affect the result) and then truncate the result back to the
1126  // original type.
1127  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1128  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1129  widenScalarDst(MI, WideTy);
1130  } else {
1131  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1132  // Explicit extension is required here since high bits affect the result.
1133  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1134  }
1135  Observer.changedInstr(MI);
1136  return Legalized;
1137 
1138  case TargetOpcode::G_FPTOSI:
1139  case TargetOpcode::G_FPTOUI:
1140  if (TypeIdx != 0)
1141  return UnableToLegalize;
1142  Observer.changingInstr(MI);
1143  widenScalarDst(MI, WideTy);
1144  Observer.changedInstr(MI);
1145  return Legalized;
1146 
1147  case TargetOpcode::G_SITOFP:
1148  if (TypeIdx != 1)
1149  return UnableToLegalize;
1150  Observer.changingInstr(MI);
1151  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1152  Observer.changedInstr(MI);
1153  return Legalized;
1154 
1155  case TargetOpcode::G_UITOFP:
1156  if (TypeIdx != 1)
1157  return UnableToLegalize;
1158  Observer.changingInstr(MI);
1159  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1160  Observer.changedInstr(MI);
1161  return Legalized;
1162 
1163  case TargetOpcode::G_LOAD:
1164  case TargetOpcode::G_SEXTLOAD:
1165  case TargetOpcode::G_ZEXTLOAD:
1166  Observer.changingInstr(MI);
1167  widenScalarDst(MI, WideTy);
1168  Observer.changedInstr(MI);
1169  return Legalized;
1170 
1171  case TargetOpcode::G_STORE: {
1172  if (TypeIdx != 0)
1173  return UnableToLegalize;
1174 
1175  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1176  if (!isPowerOf2_32(Ty.getSizeInBits()))
1177  return UnableToLegalize;
1178 
1179  Observer.changingInstr(MI);
1180 
1181  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1182  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1183  widenScalarSrc(MI, WideTy, 0, ExtType);
1184 
1185  Observer.changedInstr(MI);
1186  return Legalized;
1187  }
1188  case TargetOpcode::G_CONSTANT: {
1189  MachineOperand &SrcMO = MI.getOperand(1);
1191  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1192  Observer.changingInstr(MI);
1193  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1194 
1195  widenScalarDst(MI, WideTy);
1196  Observer.changedInstr(MI);
1197  return Legalized;
1198  }
1199  case TargetOpcode::G_FCONSTANT: {
1200  MachineOperand &SrcMO = MI.getOperand(1);
1202  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1203  bool LosesInfo;
1204  switch (WideTy.getSizeInBits()) {
1205  case 32:
1207  &LosesInfo);
1208  break;
1209  case 64:
1211  &LosesInfo);
1212  break;
1213  default:
1214  return UnableToLegalize;
1215  }
1216 
1217  assert(!LosesInfo && "extend should always be lossless");
1218 
1219  Observer.changingInstr(MI);
1220  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1221 
1222  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1223  Observer.changedInstr(MI);
1224  return Legalized;
1225  }
1226  case TargetOpcode::G_IMPLICIT_DEF: {
1227  Observer.changingInstr(MI);
1228  widenScalarDst(MI, WideTy);
1229  Observer.changedInstr(MI);
1230  return Legalized;
1231  }
1232  case TargetOpcode::G_BRCOND:
1233  Observer.changingInstr(MI);
1234  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1235  Observer.changedInstr(MI);
1236  return Legalized;
1237 
1238  case TargetOpcode::G_FCMP:
1239  Observer.changingInstr(MI);
1240  if (TypeIdx == 0)
1241  widenScalarDst(MI, WideTy);
1242  else {
1243  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1244  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1245  }
1246  Observer.changedInstr(MI);
1247  return Legalized;
1248 
1249  case TargetOpcode::G_ICMP:
1250  Observer.changingInstr(MI);
1251  if (TypeIdx == 0)
1252  widenScalarDst(MI, WideTy);
1253  else {
1254  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1255  MI.getOperand(1).getPredicate()))
1256  ? TargetOpcode::G_SEXT
1257  : TargetOpcode::G_ZEXT;
1258  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1259  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1260  }
1261  Observer.changedInstr(MI);
1262  return Legalized;
1263 
1264  case TargetOpcode::G_GEP:
1265  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1266  Observer.changingInstr(MI);
1267  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1268  Observer.changedInstr(MI);
1269  return Legalized;
1270 
1271  case TargetOpcode::G_PHI: {
1272  assert(TypeIdx == 0 && "Expecting only Idx 0");
1273 
1274  Observer.changingInstr(MI);
1275  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1276  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1277  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1278  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1279  }
1280 
1281  MachineBasicBlock &MBB = *MI.getParent();
1282  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1283  widenScalarDst(MI, WideTy);
1284  Observer.changedInstr(MI);
1285  return Legalized;
1286  }
1287  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1288  if (TypeIdx == 0) {
1289  unsigned VecReg = MI.getOperand(1).getReg();
1290  LLT VecTy = MRI.getType(VecReg);
1291  Observer.changingInstr(MI);
1292 
1293  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1294  WideTy.getSizeInBits()),
1295  1, TargetOpcode::G_SEXT);
1296 
1297  widenScalarDst(MI, WideTy, 0);
1298  Observer.changedInstr(MI);
1299  return Legalized;
1300  }
1301 
1302  if (TypeIdx != 2)
1303  return UnableToLegalize;
1304  Observer.changingInstr(MI);
1305  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1306  Observer.changedInstr(MI);
1307  return Legalized;
1308  }
1309  case TargetOpcode::G_FADD:
1310  case TargetOpcode::G_FMUL:
1311  case TargetOpcode::G_FSUB:
1312  case TargetOpcode::G_FMA:
1313  case TargetOpcode::G_FNEG:
1314  case TargetOpcode::G_FABS:
1315  case TargetOpcode::G_FCANONICALIZE:
1316  case TargetOpcode::G_FDIV:
1317  case TargetOpcode::G_FREM:
1318  case TargetOpcode::G_FCEIL:
1319  case TargetOpcode::G_FFLOOR:
1320  case TargetOpcode::G_FCOS:
1321  case TargetOpcode::G_FSIN:
1322  case TargetOpcode::G_FLOG10:
1323  case TargetOpcode::G_FLOG:
1324  case TargetOpcode::G_FLOG2:
1325  case TargetOpcode::G_FRINT:
1326  case TargetOpcode::G_FNEARBYINT:
1327  case TargetOpcode::G_FSQRT:
1328  case TargetOpcode::G_FEXP:
1329  case TargetOpcode::G_FEXP2:
1330  case TargetOpcode::G_FPOW:
1331  case TargetOpcode::G_INTRINSIC_TRUNC:
1332  case TargetOpcode::G_INTRINSIC_ROUND:
1333  assert(TypeIdx == 0);
1334  Observer.changingInstr(MI);
1335 
1336  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1337  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1338 
1339  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1340  Observer.changedInstr(MI);
1341  return Legalized;
1342  case TargetOpcode::G_INTTOPTR:
1343  if (TypeIdx != 1)
1344  return UnableToLegalize;
1345 
1346  Observer.changingInstr(MI);
1347  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1348  Observer.changedInstr(MI);
1349  return Legalized;
1350  case TargetOpcode::G_PTRTOINT:
1351  if (TypeIdx != 0)
1352  return UnableToLegalize;
1353 
1354  Observer.changingInstr(MI);
1355  widenScalarDst(MI, WideTy, 0);
1356  Observer.changedInstr(MI);
1357  return Legalized;
1358  }
1359 }
1360 
1362 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1363  using namespace TargetOpcode;
1364  MIRBuilder.setInstr(MI);
1365 
1366  switch(MI.getOpcode()) {
1367  default:
1368  return UnableToLegalize;
1369  case TargetOpcode::G_SREM:
1370  case TargetOpcode::G_UREM: {
1371  unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
1372  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1373  .addDef(QuotReg)
1374  .addUse(MI.getOperand(1).getReg())
1375  .addUse(MI.getOperand(2).getReg());
1376 
1377  unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
1378  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1380  ProdReg);
1381  MI.eraseFromParent();
1382  return Legalized;
1383  }
1384  case TargetOpcode::G_SMULO:
1385  case TargetOpcode::G_UMULO: {
1386  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1387  // result.
1388  unsigned Res = MI.getOperand(0).getReg();
1389  unsigned Overflow = MI.getOperand(1).getReg();
1390  unsigned LHS = MI.getOperand(2).getReg();
1391  unsigned RHS = MI.getOperand(3).getReg();
1392 
1393  MIRBuilder.buildMul(Res, LHS, RHS);
1394 
1395  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1396  ? TargetOpcode::G_SMULH
1397  : TargetOpcode::G_UMULH;
1398 
1399  unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
1400  MIRBuilder.buildInstr(Opcode)
1401  .addDef(HiPart)
1402  .addUse(LHS)
1403  .addUse(RHS);
1404 
1405  unsigned Zero = MRI.createGenericVirtualRegister(Ty);
1406  MIRBuilder.buildConstant(Zero, 0);
1407 
1408  // For *signed* multiply, overflow is detected by checking:
1409  // (hi != (lo >> bitwidth-1))
1410  if (Opcode == TargetOpcode::G_SMULH) {
1411  unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
1412  unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1413  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1414  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1415  .addDef(Shifted)
1416  .addUse(Res)
1417  .addUse(ShiftAmt);
1418  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1419  } else {
1420  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1421  }
1422  MI.eraseFromParent();
1423  return Legalized;
1424  }
1425  case TargetOpcode::G_FNEG: {
1426  // TODO: Handle vector types once we are able to
1427  // represent them.
1428  if (Ty.isVector())
1429  return UnableToLegalize;
1430  unsigned Res = MI.getOperand(0).getReg();
1431  Type *ZeroTy;
1433  switch (Ty.getSizeInBits()) {
1434  case 16:
1435  ZeroTy = Type::getHalfTy(Ctx);
1436  break;
1437  case 32:
1438  ZeroTy = Type::getFloatTy(Ctx);
1439  break;
1440  case 64:
1441  ZeroTy = Type::getDoubleTy(Ctx);
1442  break;
1443  case 128:
1444  ZeroTy = Type::getFP128Ty(Ctx);
1445  break;
1446  default:
1447  llvm_unreachable("unexpected floating-point type");
1448  }
1449  ConstantFP &ZeroForNegation =
1450  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1451  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1452  unsigned SubByReg = MI.getOperand(1).getReg();
1453  unsigned ZeroReg = Zero->getOperand(0).getReg();
1454  MachineInstr *SrcMI = MRI.getVRegDef(SubByReg);
1455  MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1456  SrcMI->getFlags());
1457  MI.eraseFromParent();
1458  return Legalized;
1459  }
1460  case TargetOpcode::G_FSUB: {
1461  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1462  // First, check if G_FNEG is marked as Lower. If so, we may
1463  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1464  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1465  return UnableToLegalize;
1466  unsigned Res = MI.getOperand(0).getReg();
1467  unsigned LHS = MI.getOperand(1).getReg();
1468  unsigned RHS = MI.getOperand(2).getReg();
1469  unsigned Neg = MRI.createGenericVirtualRegister(Ty);
1470  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1471  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1472  MI.eraseFromParent();
1473  return Legalized;
1474  }
1475  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1476  unsigned OldValRes = MI.getOperand(0).getReg();
1477  unsigned SuccessRes = MI.getOperand(1).getReg();
1478  unsigned Addr = MI.getOperand(2).getReg();
1479  unsigned CmpVal = MI.getOperand(3).getReg();
1480  unsigned NewVal = MI.getOperand(4).getReg();
1481  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1482  **MI.memoperands_begin());
1483  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1484  MI.eraseFromParent();
1485  return Legalized;
1486  }
1487  case TargetOpcode::G_LOAD:
1488  case TargetOpcode::G_SEXTLOAD:
1489  case TargetOpcode::G_ZEXTLOAD: {
1490  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1491  unsigned DstReg = MI.getOperand(0).getReg();
1492  unsigned PtrReg = MI.getOperand(1).getReg();
1493  LLT DstTy = MRI.getType(DstReg);
1494  auto &MMO = **MI.memoperands_begin();
1495 
1496  if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1497  // In the case of G_LOAD, this was a non-extending load already and we're
1498  // about to lower to the same instruction.
1499  if (MI.getOpcode() == TargetOpcode::G_LOAD)
1500  return UnableToLegalize;
1501  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1502  MI.eraseFromParent();
1503  return Legalized;
1504  }
1505 
1506  if (DstTy.isScalar()) {
1507  unsigned TmpReg =
1508  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1509  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1510  switch (MI.getOpcode()) {
1511  default:
1512  llvm_unreachable("Unexpected opcode");
1513  case TargetOpcode::G_LOAD:
1514  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1515  break;
1516  case TargetOpcode::G_SEXTLOAD:
1517  MIRBuilder.buildSExt(DstReg, TmpReg);
1518  break;
1519  case TargetOpcode::G_ZEXTLOAD:
1520  MIRBuilder.buildZExt(DstReg, TmpReg);
1521  break;
1522  }
1523  MI.eraseFromParent();
1524  return Legalized;
1525  }
1526 
1527  return UnableToLegalize;
1528  }
1529  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1530  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1531  case TargetOpcode::G_CTLZ:
1532  case TargetOpcode::G_CTTZ:
1533  case TargetOpcode::G_CTPOP:
1534  return lowerBitCount(MI, TypeIdx, Ty);
1535  case G_UADDO: {
1536  unsigned Res = MI.getOperand(0).getReg();
1537  unsigned CarryOut = MI.getOperand(1).getReg();
1538  unsigned LHS = MI.getOperand(2).getReg();
1539  unsigned RHS = MI.getOperand(3).getReg();
1540 
1541  MIRBuilder.buildAdd(Res, LHS, RHS);
1542  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1543 
1544  MI.eraseFromParent();
1545  return Legalized;
1546  }
1547  case G_UADDE: {
1548  unsigned Res = MI.getOperand(0).getReg();
1549  unsigned CarryOut = MI.getOperand(1).getReg();
1550  unsigned LHS = MI.getOperand(2).getReg();
1551  unsigned RHS = MI.getOperand(3).getReg();
1552  unsigned CarryIn = MI.getOperand(4).getReg();
1553 
1554  unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1555  unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1556 
1557  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1558  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1559  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1560  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1561 
1562  MI.eraseFromParent();
1563  return Legalized;
1564  }
1565  case G_USUBO: {
1566  unsigned Res = MI.getOperand(0).getReg();
1567  unsigned BorrowOut = MI.getOperand(1).getReg();
1568  unsigned LHS = MI.getOperand(2).getReg();
1569  unsigned RHS = MI.getOperand(3).getReg();
1570 
1571  MIRBuilder.buildSub(Res, LHS, RHS);
1572  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1573 
1574  MI.eraseFromParent();
1575  return Legalized;
1576  }
1577  case G_USUBE: {
1578  unsigned Res = MI.getOperand(0).getReg();
1579  unsigned BorrowOut = MI.getOperand(1).getReg();
1580  unsigned LHS = MI.getOperand(2).getReg();
1581  unsigned RHS = MI.getOperand(3).getReg();
1582  unsigned BorrowIn = MI.getOperand(4).getReg();
1583 
1584  unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
1585  unsigned ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1586  unsigned LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1587  unsigned LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1588 
1589  MIRBuilder.buildSub(TmpRes, LHS, RHS);
1590  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1591  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1592  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1593  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1594  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1595 
1596  MI.eraseFromParent();
1597  return Legalized;
1598  }
1599  case G_UITOFP:
1600  return lowerUITOFP(MI, TypeIdx, Ty);
1601  case G_SITOFP:
1602  return lowerSITOFP(MI, TypeIdx, Ty);
1603  }
1604 }
1605 
1606 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
1607  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1608  SmallVector<unsigned, 2> DstRegs;
1609 
1610  unsigned NarrowSize = NarrowTy.getSizeInBits();
1611  unsigned DstReg = MI.getOperand(0).getReg();
1612  unsigned Size = MRI.getType(DstReg).getSizeInBits();
1613  int NumParts = Size / NarrowSize;
1614  // FIXME: Don't know how to handle the situation where the small vectors
1615  // aren't all the same size yet.
1616  if (Size % NarrowSize != 0)
1617  return UnableToLegalize;
1618 
1619  for (int i = 0; i < NumParts; ++i) {
1620  unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1621  MIRBuilder.buildUndef(TmpReg);
1622  DstRegs.push_back(TmpReg);
1623  }
1624 
1625  if (NarrowTy.isVector())
1626  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1627  else
1628  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1629 
1630  MI.eraseFromParent();
1631  return Legalized;
1632 }
1633 
1635 LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
1636  LLT NarrowTy) {
1637  const unsigned Opc = MI.getOpcode();
1638  const unsigned NumOps = MI.getNumOperands() - 1;
1639  const unsigned NarrowSize = NarrowTy.getSizeInBits();
1640  const unsigned DstReg = MI.getOperand(0).getReg();
1641  const unsigned Flags = MI.getFlags();
1642  const LLT DstTy = MRI.getType(DstReg);
1643  const unsigned Size = DstTy.getSizeInBits();
1644  const int NumParts = Size / NarrowSize;
1645  const LLT EltTy = DstTy.getElementType();
1646  const unsigned EltSize = EltTy.getSizeInBits();
1647  const unsigned BitsForNumParts = NarrowSize * NumParts;
1648 
1649  // Check if we have any leftovers. If we do, then only handle the case where
1650  // the leftover is one element.
1651  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1652  return UnableToLegalize;
1653 
1654  if (BitsForNumParts != Size) {
1655  unsigned AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1656  MIRBuilder.buildUndef(AccumDstReg);
1657 
1658  // Handle the pieces which evenly divide into the requested type with
1659  // extract/op/insert sequence.
1660  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1661  SmallVector<SrcOp, 4> SrcOps;
1662  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1663  unsigned PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1664  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1665  SrcOps.push_back(PartOpReg);
1666  }
1667 
1668  unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1669  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1670 
1671  unsigned PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1672  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1673  AccumDstReg = PartInsertReg;
1674  }
1675 
1676  // Handle the remaining element sized leftover piece.
1677  SmallVector<SrcOp, 4> SrcOps;
1678  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1679  unsigned PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1680  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1681  BitsForNumParts);
1682  SrcOps.push_back(PartOpReg);
1683  }
1684 
1685  unsigned PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1686  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1687  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1688  MI.eraseFromParent();
1689 
1690  return Legalized;
1691  }
1692 
1693  SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1694 
1695  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1696 
1697  if (NumOps >= 2)
1698  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1699 
1700  if (NumOps >= 3)
1701  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1702 
1703  for (int i = 0; i < NumParts; ++i) {
1704  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1705 
1706  if (NumOps == 1)
1707  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1708  else if (NumOps == 2) {
1709  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1710  } else if (NumOps == 3) {
1711  MIRBuilder.buildInstr(Opc, {DstReg},
1712  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1713  }
1714 
1715  DstRegs.push_back(DstReg);
1716  }
1717 
1718  if (NarrowTy.isVector())
1719  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1720  else
1721  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1722 
1723  MI.eraseFromParent();
1724  return Legalized;
1725 }
1726 
1727 // Handle splitting vector operations which need to have the same number of
1728 // elements in each type index, but each type index may have a different element
1729 // type.
1730 //
1731 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1732 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1733 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1734 //
1735 // Also handles some irregular breakdown cases, e.g.
1736 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1737 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1738 // s64 = G_SHL s64, s32
1740 LegalizerHelper::fewerElementsVectorMultiEltType(
1741  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1742  if (TypeIdx != 0)
1743  return UnableToLegalize;
1744 
1745  const LLT NarrowTy0 = NarrowTyArg;
1746  const unsigned NewNumElts =
1747  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
1748 
1749  const unsigned DstReg = MI.getOperand(0).getReg();
1750  LLT DstTy = MRI.getType(DstReg);
1751  LLT LeftoverTy0;
1752 
1753  int NumParts, NumLeftover;
1754  // All of the operands need to have the same number of elements, so if we can
1755  // determine a type breakdown for the result type, we can for all of the
1756  // source types.
1757  std::tie(NumParts, NumLeftover)
1758  = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0);
1759  if (NumParts < 0)
1760  return UnableToLegalize;
1761 
1763 
1764  SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
1765  SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
1766 
1767  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1768  LLT LeftoverTy;
1769  unsigned SrcReg = MI.getOperand(I).getReg();
1770  LLT SrcTyI = MRI.getType(SrcReg);
1771  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
1772  LLT LeftoverTyI;
1773 
1774  // Split this operand into the requested typed registers, and any leftover
1775  // required to reproduce the original type.
1776  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
1777  LeftoverRegs))
1778  return UnableToLegalize;
1779 
1780  if (I == 1) {
1781  // For the first operand, create an instruction for each part and setup
1782  // the result.
1783  for (unsigned PartReg : PartRegs) {
1784  unsigned PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1786  .addDef(PartDstReg)
1787  .addUse(PartReg));
1788  DstRegs.push_back(PartDstReg);
1789  }
1790 
1791  for (unsigned LeftoverReg : LeftoverRegs) {
1792  unsigned PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
1794  .addDef(PartDstReg)
1795  .addUse(LeftoverReg));
1796  LeftoverDstRegs.push_back(PartDstReg);
1797  }
1798  } else {
1799  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
1800 
1801  // Add the newly created operand splits to the existing instructions. The
1802  // odd-sized pieces are ordered after the requested NarrowTyArg sized
1803  // pieces.
1804  unsigned InstCount = 0;
1805  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
1806  NewInsts[InstCount++].addUse(PartRegs[J]);
1807  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
1808  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
1809  }
1810 
1811  PartRegs.clear();
1812  LeftoverRegs.clear();
1813  }
1814 
1815  // Insert the newly built operations and rebuild the result register.
1816  for (auto &MIB : NewInsts)
1817  MIRBuilder.insertInstr(MIB);
1818 
1819  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
1820 
1821  MI.eraseFromParent();
1822  return Legalized;
1823 }
1824 
1826 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
1827  LLT NarrowTy) {
1828  if (TypeIdx != 0)
1829  return UnableToLegalize;
1830 
1831  unsigned DstReg = MI.getOperand(0).getReg();
1832  unsigned SrcReg = MI.getOperand(1).getReg();
1833  LLT DstTy = MRI.getType(DstReg);
1834  LLT SrcTy = MRI.getType(SrcReg);
1835 
1836  LLT NarrowTy0 = NarrowTy;
1837  LLT NarrowTy1;
1838  unsigned NumParts;
1839 
1840  if (NarrowTy.isVector()) {
1841  // Uneven breakdown not handled.
1842  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
1843  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
1844  return UnableToLegalize;
1845 
1846  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
1847  } else {
1848  NumParts = DstTy.getNumElements();
1849  NarrowTy1 = SrcTy.getElementType();
1850  }
1851 
1852  SmallVector<unsigned, 4> SrcRegs, DstRegs;
1853  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
1854 
1855  for (unsigned I = 0; I < NumParts; ++I) {
1856  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1857  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
1858  .addDef(DstReg)
1859  .addUse(SrcRegs[I]);
1860 
1861  NewInst->setFlags(MI.getFlags());
1862  DstRegs.push_back(DstReg);
1863  }
1864 
1865  if (NarrowTy.isVector())
1866  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1867  else
1868  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1869 
1870  MI.eraseFromParent();
1871  return Legalized;
1872 }
1873 
1875 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
1876  LLT NarrowTy) {
1877  unsigned DstReg = MI.getOperand(0).getReg();
1878  unsigned Src0Reg = MI.getOperand(2).getReg();
1879  LLT DstTy = MRI.getType(DstReg);
1880  LLT SrcTy = MRI.getType(Src0Reg);
1881 
1882  unsigned NumParts;
1883  LLT NarrowTy0, NarrowTy1;
1884 
1885  if (TypeIdx == 0) {
1886  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1887  unsigned OldElts = DstTy.getNumElements();
1888 
1889  NarrowTy0 = NarrowTy;
1890  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
1891  NarrowTy1 = NarrowTy.isVector() ?
1892  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
1893  SrcTy.getElementType();
1894 
1895  } else {
1896  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
1897  unsigned OldElts = SrcTy.getNumElements();
1898 
1899  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
1900  NarrowTy.getNumElements();
1901  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
1902  DstTy.getScalarSizeInBits());
1903  NarrowTy1 = NarrowTy;
1904  }
1905 
1906  // FIXME: Don't know how to handle the situation where the small vectors
1907  // aren't all the same size yet.
1908  if (NarrowTy1.isVector() &&
1909  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
1910  return UnableToLegalize;
1911 
1912  CmpInst::Predicate Pred
1913  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1914 
1915  SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
1916  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
1917  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
1918 
1919  for (unsigned I = 0; I < NumParts; ++I) {
1920  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1921  DstRegs.push_back(DstReg);
1922 
1923  if (MI.getOpcode() == TargetOpcode::G_ICMP)
1924  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1925  else {
1926  MachineInstr *NewCmp
1927  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
1928  NewCmp->setFlags(MI.getFlags());
1929  }
1930  }
1931 
1932  if (NarrowTy1.isVector())
1933  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1934  else
1935  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1936 
1937  MI.eraseFromParent();
1938  return Legalized;
1939 }
1940 
1942 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
1943  LLT NarrowTy) {
1944  unsigned DstReg = MI.getOperand(0).getReg();
1945  unsigned CondReg = MI.getOperand(1).getReg();
1946 
1947  unsigned NumParts = 0;
1948  LLT NarrowTy0, NarrowTy1;
1949 
1950  LLT DstTy = MRI.getType(DstReg);
1951  LLT CondTy = MRI.getType(CondReg);
1952  unsigned Size = DstTy.getSizeInBits();
1953 
1954  assert(TypeIdx == 0 || CondTy.isVector());
1955 
1956  if (TypeIdx == 0) {
1957  NarrowTy0 = NarrowTy;
1958  NarrowTy1 = CondTy;
1959 
1960  unsigned NarrowSize = NarrowTy0.getSizeInBits();
1961  // FIXME: Don't know how to handle the situation where the small vectors
1962  // aren't all the same size yet.
1963  if (Size % NarrowSize != 0)
1964  return UnableToLegalize;
1965 
1966  NumParts = Size / NarrowSize;
1967 
1968  // Need to break down the condition type
1969  if (CondTy.isVector()) {
1970  if (CondTy.getNumElements() == NumParts)
1971  NarrowTy1 = CondTy.getElementType();
1972  else
1973  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
1974  CondTy.getScalarSizeInBits());
1975  }
1976  } else {
1977  NumParts = CondTy.getNumElements();
1978  if (NarrowTy.isVector()) {
1979  // TODO: Handle uneven breakdown.
1980  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
1981  return UnableToLegalize;
1982 
1983  return UnableToLegalize;
1984  } else {
1985  NarrowTy0 = DstTy.getElementType();
1986  NarrowTy1 = NarrowTy;
1987  }
1988  }
1989 
1990  SmallVector<unsigned, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1991  if (CondTy.isVector())
1992  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
1993 
1994  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
1995  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
1996 
1997  for (unsigned i = 0; i < NumParts; ++i) {
1998  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
1999  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2000  Src1Regs[i], Src2Regs[i]);
2001  DstRegs.push_back(DstReg);
2002  }
2003 
2004  if (NarrowTy0.isVector())
2005  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2006  else
2007  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2008 
2009  MI.eraseFromParent();
2010  return Legalized;
2011 }
2012 
2014 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2015  LLT NarrowTy) {
2016  const unsigned DstReg = MI.getOperand(0).getReg();
2017  LLT PhiTy = MRI.getType(DstReg);
2018  LLT LeftoverTy;
2019 
2020  // All of the operands need to have the same number of elements, so if we can
2021  // determine a type breakdown for the result type, we can for all of the
2022  // source types.
2023  int NumParts, NumLeftover;
2024  std::tie(NumParts, NumLeftover)
2025  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2026  if (NumParts < 0)
2027  return UnableToLegalize;
2028 
2029  SmallVector<unsigned, 4> DstRegs, LeftoverDstRegs;
2031 
2032  const int TotalNumParts = NumParts + NumLeftover;
2033 
2034  // Insert the new phis in the result block first.
2035  for (int I = 0; I != TotalNumParts; ++I) {
2036  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2037  unsigned PartDstReg = MRI.createGenericVirtualRegister(Ty);
2038  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2039  .addDef(PartDstReg));
2040  if (I < NumParts)
2041  DstRegs.push_back(PartDstReg);
2042  else
2043  LeftoverDstRegs.push_back(PartDstReg);
2044  }
2045 
2046  MachineBasicBlock *MBB = MI.getParent();
2047  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2048  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2049 
2050  SmallVector<unsigned, 4> PartRegs, LeftoverRegs;
2051 
2052  // Insert code to extract the incoming values in each predecessor block.
2053  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2054  PartRegs.clear();
2055  LeftoverRegs.clear();
2056 
2057  unsigned SrcReg = MI.getOperand(I).getReg();
2058  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2059  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2060 
2061  LLT Unused;
2062  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2063  LeftoverRegs))
2064  return UnableToLegalize;
2065 
2066  // Add the newly created operand splits to the existing instructions. The
2067  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2068  // pieces.
2069  for (int J = 0; J != TotalNumParts; ++J) {
2070  MachineInstrBuilder MIB = NewInsts[J];
2071  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2072  MIB.addMBB(&OpMBB);
2073  }
2074  }
2075 
2076  MI.eraseFromParent();
2077  return Legalized;
2078 }
2079 
2081 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
2082  LLT NarrowTy) {
2083  // FIXME: Don't know how to handle secondary types yet.
2084  if (TypeIdx != 0)
2085  return UnableToLegalize;
2086 
2087  MachineMemOperand *MMO = *MI.memoperands_begin();
2088 
2089  // This implementation doesn't work for atomics. Give up instead of doing
2090  // something invalid.
2091  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2093  return UnableToLegalize;
2094 
2095  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2096  unsigned ValReg = MI.getOperand(0).getReg();
2097  unsigned AddrReg = MI.getOperand(1).getReg();
2098  LLT ValTy = MRI.getType(ValReg);
2099 
2100  int NumParts = -1;
2101  int NumLeftover = -1;
2102  LLT LeftoverTy;
2103  SmallVector<unsigned, 8> NarrowRegs, NarrowLeftoverRegs;
2104  if (IsLoad) {
2105  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2106  } else {
2107  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2108  NarrowLeftoverRegs)) {
2109  NumParts = NarrowRegs.size();
2110  NumLeftover = NarrowLeftoverRegs.size();
2111  }
2112  }
2113 
2114  if (NumParts == -1)
2115  return UnableToLegalize;
2116 
2117  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2118 
2119  unsigned TotalSize = ValTy.getSizeInBits();
2120 
2121  // Split the load/store into PartTy sized pieces starting at Offset. If this
2122  // is a load, return the new registers in ValRegs. For a store, each elements
2123  // of ValRegs should be PartTy. Returns the next offset that needs to be
2124  // handled.
2125  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<unsigned> &ValRegs,
2126  unsigned Offset) -> unsigned {
2128  unsigned PartSize = PartTy.getSizeInBits();
2129  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2130  Offset += PartSize, ++Idx) {
2131  unsigned ByteSize = PartSize / 8;
2132  unsigned ByteOffset = Offset / 8;
2133  unsigned NewAddrReg = 0;
2134 
2135  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2136 
2137  MachineMemOperand *NewMMO =
2138  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2139 
2140  if (IsLoad) {
2141  unsigned Dst = MRI.createGenericVirtualRegister(PartTy);
2142  ValRegs.push_back(Dst);
2143  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2144  } else {
2145  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2146  }
2147  }
2148 
2149  return Offset;
2150  };
2151 
2152  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2153 
2154  // Handle the rest of the register if this isn't an even type breakdown.
2155  if (LeftoverTy.isValid())
2156  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2157 
2158  if (IsLoad) {
2159  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2160  LeftoverTy, NarrowLeftoverRegs);
2161  }
2162 
2163  MI.eraseFromParent();
2164  return Legalized;
2165 }
2166 
2169  LLT NarrowTy) {
2170  using namespace TargetOpcode;
2171 
2172  MIRBuilder.setInstr(MI);
2173  switch (MI.getOpcode()) {
2174  case G_IMPLICIT_DEF:
2175  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2176  case G_AND:
2177  case G_OR:
2178  case G_XOR:
2179  case G_ADD:
2180  case G_SUB:
2181  case G_MUL:
2182  case G_SMULH:
2183  case G_UMULH:
2184  case G_FADD:
2185  case G_FMUL:
2186  case G_FSUB:
2187  case G_FNEG:
2188  case G_FABS:
2189  case G_FCANONICALIZE:
2190  case G_FDIV:
2191  case G_FREM:
2192  case G_FMA:
2193  case G_FPOW:
2194  case G_FEXP:
2195  case G_FEXP2:
2196  case G_FLOG:
2197  case G_FLOG2:
2198  case G_FLOG10:
2199  case G_FNEARBYINT:
2200  case G_FCEIL:
2201  case G_FFLOOR:
2202  case G_FRINT:
2203  case G_INTRINSIC_ROUND:
2204  case G_INTRINSIC_TRUNC:
2205  case G_FCOS:
2206  case G_FSIN:
2207  case G_FSQRT:
2208  case G_BSWAP:
2209  case G_SDIV:
2210  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2211  case G_SHL:
2212  case G_LSHR:
2213  case G_ASHR:
2214  case G_CTLZ:
2215  case G_CTLZ_ZERO_UNDEF:
2216  case G_CTTZ:
2217  case G_CTTZ_ZERO_UNDEF:
2218  case G_CTPOP:
2219  case G_FCOPYSIGN:
2220  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2221  case G_ZEXT:
2222  case G_SEXT:
2223  case G_ANYEXT:
2224  case G_FPEXT:
2225  case G_FPTRUNC:
2226  case G_SITOFP:
2227  case G_UITOFP:
2228  case G_FPTOSI:
2229  case G_FPTOUI:
2230  case G_INTTOPTR:
2231  case G_PTRTOINT:
2232  case G_ADDRSPACE_CAST:
2233  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2234  case G_ICMP:
2235  case G_FCMP:
2236  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2237  case G_SELECT:
2238  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2239  case G_PHI:
2240  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2241  case G_LOAD:
2242  case G_STORE:
2243  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2244  default:
2245  return UnableToLegalize;
2246  }
2247 }
2248 
2250 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
2251  const LLT HalfTy, const LLT AmtTy) {
2252 
2253  unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2254  unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2255  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2256 
2257  if (Amt.isNullValue()) {
2258  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2259  MI.eraseFromParent();
2260  return Legalized;
2261  }
2262 
2263  LLT NVT = HalfTy;
2264  unsigned NVTBits = HalfTy.getSizeInBits();
2265  unsigned VTBits = 2 * NVTBits;
2266 
2267  SrcOp Lo(0), Hi(0);
2268  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2269  if (Amt.ugt(VTBits)) {
2270  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2271  } else if (Amt.ugt(NVTBits)) {
2272  Lo = MIRBuilder.buildConstant(NVT, 0);
2273  Hi = MIRBuilder.buildShl(NVT, InL,
2274  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2275  } else if (Amt == NVTBits) {
2276  Lo = MIRBuilder.buildConstant(NVT, 0);
2277  Hi = InL;
2278  } else {
2279  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2280  auto OrLHS =
2281  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2282  auto OrRHS = MIRBuilder.buildLShr(
2283  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2284  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2285  }
2286  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2287  if (Amt.ugt(VTBits)) {
2288  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2289  } else if (Amt.ugt(NVTBits)) {
2290  Lo = MIRBuilder.buildLShr(NVT, InH,
2291  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2292  Hi = MIRBuilder.buildConstant(NVT, 0);
2293  } else if (Amt == NVTBits) {
2294  Lo = InH;
2295  Hi = MIRBuilder.buildConstant(NVT, 0);
2296  } else {
2297  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2298 
2299  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2300  auto OrRHS = MIRBuilder.buildShl(
2301  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2302 
2303  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2304  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2305  }
2306  } else {
2307  if (Amt.ugt(VTBits)) {
2308  Hi = Lo = MIRBuilder.buildAShr(
2309  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2310  } else if (Amt.ugt(NVTBits)) {
2311  Lo = MIRBuilder.buildAShr(NVT, InH,
2312  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2313  Hi = MIRBuilder.buildAShr(NVT, InH,
2314  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2315  } else if (Amt == NVTBits) {
2316  Lo = InH;
2317  Hi = MIRBuilder.buildAShr(NVT, InH,
2318  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2319  } else {
2320  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2321 
2322  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2323  auto OrRHS = MIRBuilder.buildShl(
2324  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2325 
2326  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2327  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2328  }
2329  }
2330 
2331  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2332  MI.eraseFromParent();
2333 
2334  return Legalized;
2335 }
2336 
2337 // TODO: Optimize if constant shift amount.
2339 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
2340  LLT RequestedTy) {
2341  if (TypeIdx == 1) {
2342  Observer.changingInstr(MI);
2343  narrowScalarSrc(MI, RequestedTy, 2);
2344  Observer.changedInstr(MI);
2345  return Legalized;
2346  }
2347 
2348  unsigned DstReg = MI.getOperand(0).getReg();
2349  LLT DstTy = MRI.getType(DstReg);
2350  if (DstTy.isVector())
2351  return UnableToLegalize;
2352 
2353  unsigned Amt = MI.getOperand(2).getReg();
2354  LLT ShiftAmtTy = MRI.getType(Amt);
2355  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2356  if (DstEltSize % 2 != 0)
2357  return UnableToLegalize;
2358 
2359  // Ignore the input type. We can only go to exactly half the size of the
2360  // input. If that isn't small enough, the resulting pieces will be further
2361  // legalized.
2362  const unsigned NewBitSize = DstEltSize / 2;
2363  const LLT HalfTy = LLT::scalar(NewBitSize);
2364  const LLT CondTy = LLT::scalar(1);
2365 
2366  if (const MachineInstr *KShiftAmt =
2367  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2368  return narrowScalarShiftByConstant(
2369  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2370  }
2371 
2372  // TODO: Expand with known bits.
2373 
2374  // Handle the fully general expansion by an unknown amount.
2375  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2376 
2377  unsigned InL = MRI.createGenericVirtualRegister(HalfTy);
2378  unsigned InH = MRI.createGenericVirtualRegister(HalfTy);
2379  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2380 
2381  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2382  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2383 
2384  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2385  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2386  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2387 
2388  unsigned ResultRegs[2];
2389  switch (MI.getOpcode()) {
2390  case TargetOpcode::G_SHL: {
2391  // Short: ShAmt < NewBitSize
2392  auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2393 
2394  auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2395  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2396  auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2397 
2398  // Long: ShAmt >= NewBitSize
2399  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
2400  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2401 
2402  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2403  auto Hi = MIRBuilder.buildSelect(
2404  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2405 
2406  ResultRegs[0] = Lo.getReg(0);
2407  ResultRegs[1] = Hi.getReg(0);
2408  break;
2409  }
2410  case TargetOpcode::G_LSHR: {
2411  // Short: ShAmt < NewBitSize
2412  auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2413 
2414  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2415  auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2416  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2417 
2418  // Long: ShAmt >= NewBitSize
2419  auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
2420  auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2421 
2422  auto Lo = MIRBuilder.buildSelect(
2423  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2424  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2425 
2426  ResultRegs[0] = Lo.getReg(0);
2427  ResultRegs[1] = Hi.getReg(0);
2428  break;
2429  }
2430  case TargetOpcode::G_ASHR: {
2431  // Short: ShAmt < NewBitSize
2432  auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2433 
2434  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2435  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2436  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2437 
2438  // Long: ShAmt >= NewBitSize
2439 
2440  // Sign of Hi part.
2441  auto HiL = MIRBuilder.buildAShr(
2442  HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2443 
2444  auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2445 
2446  auto Lo = MIRBuilder.buildSelect(
2447  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2448 
2449  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2450 
2451  ResultRegs[0] = Lo.getReg(0);
2452  ResultRegs[1] = Hi.getReg(0);
2453  break;
2454  }
2455  default:
2456  llvm_unreachable("not a shift");
2457  }
2458 
2459  MIRBuilder.buildMerge(DstReg, ResultRegs);
2460  MI.eraseFromParent();
2461  return Legalized;
2462 }
2463 
2465 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
2466  LLT MoreTy) {
2467  assert(TypeIdx == 0 && "Expecting only Idx 0");
2468 
2469  Observer.changingInstr(MI);
2470  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2471  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2472  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2473  moreElementsVectorSrc(MI, MoreTy, I);
2474  }
2475 
2476  MachineBasicBlock &MBB = *MI.getParent();
2477  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2478  moreElementsVectorDst(MI, MoreTy, 0);
2479  Observer.changedInstr(MI);
2480  return Legalized;
2481 }
2482 
2485  LLT MoreTy) {
2486  MIRBuilder.setInstr(MI);
2487  unsigned Opc = MI.getOpcode();
2488  switch (Opc) {
2489  case TargetOpcode::G_IMPLICIT_DEF: {
2490  Observer.changingInstr(MI);
2491  moreElementsVectorDst(MI, MoreTy, 0);
2492  Observer.changedInstr(MI);
2493  return Legalized;
2494  }
2495  case TargetOpcode::G_AND:
2496  case TargetOpcode::G_OR:
2497  case TargetOpcode::G_XOR: {
2498  Observer.changingInstr(MI);
2499  moreElementsVectorSrc(MI, MoreTy, 1);
2500  moreElementsVectorSrc(MI, MoreTy, 2);
2501  moreElementsVectorDst(MI, MoreTy, 0);
2502  Observer.changedInstr(MI);
2503  return Legalized;
2504  }
2505  case TargetOpcode::G_EXTRACT:
2506  if (TypeIdx != 1)
2507  return UnableToLegalize;
2508  Observer.changingInstr(MI);
2509  moreElementsVectorSrc(MI, MoreTy, 1);
2510  Observer.changedInstr(MI);
2511  return Legalized;
2512  case TargetOpcode::G_INSERT:
2513  if (TypeIdx != 0)
2514  return UnableToLegalize;
2515  Observer.changingInstr(MI);
2516  moreElementsVectorSrc(MI, MoreTy, 1);
2517  moreElementsVectorDst(MI, MoreTy, 0);
2518  Observer.changedInstr(MI);
2519  return Legalized;
2520  case TargetOpcode::G_SELECT:
2521  if (TypeIdx != 0)
2522  return UnableToLegalize;
2523  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2524  return UnableToLegalize;
2525 
2526  Observer.changingInstr(MI);
2527  moreElementsVectorSrc(MI, MoreTy, 2);
2528  moreElementsVectorSrc(MI, MoreTy, 3);
2529  moreElementsVectorDst(MI, MoreTy, 0);
2530  Observer.changedInstr(MI);
2531  return Legalized;
2532  case TargetOpcode::G_PHI:
2533  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2534  default:
2535  return UnableToLegalize;
2536  }
2537 }
2538 
2539 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<unsigned> &DstRegs,
2540  ArrayRef<unsigned> Src1Regs,
2541  ArrayRef<unsigned> Src2Regs,
2542  LLT NarrowTy) {
2544  unsigned SrcParts = Src1Regs.size();
2545  unsigned DstParts = DstRegs.size();
2546 
2547  unsigned DstIdx = 0; // Low bits of the result.
2548  unsigned FactorSum =
2549  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2550  DstRegs[DstIdx] = FactorSum;
2551 
2552  unsigned CarrySumPrevDstIdx;
2553  SmallVector<unsigned, 4> Factors;
2554 
2555  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
2556  // Collect low parts of muls for DstIdx.
2557  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
2558  i <= std::min(DstIdx, SrcParts - 1); ++i) {
2559  MachineInstrBuilder Mul =
2560  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2561  Factors.push_back(Mul.getReg(0));
2562  }
2563  // Collect high parts of muls from previous DstIdx.
2564  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
2565  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
2566  MachineInstrBuilder Umulh =
2567  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2568  Factors.push_back(Umulh.getReg(0));
2569  }
2570  // Add CarrySum from additons calculated for previous DstIdx.
2571  if (DstIdx != 1) {
2572  Factors.push_back(CarrySumPrevDstIdx);
2573  }
2574 
2575  unsigned CarrySum = 0;
2576  // Add all factors and accumulate all carries into CarrySum.
2577  if (DstIdx != DstParts - 1) {
2578  MachineInstrBuilder Uaddo =
2579  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2580  FactorSum = Uaddo.getReg(0);
2581  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2582  for (unsigned i = 2; i < Factors.size(); ++i) {
2583  MachineInstrBuilder Uaddo =
2584  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2585  FactorSum = Uaddo.getReg(0);
2586  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2587  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2588  }
2589  } else {
2590  // Since value for the next index is not calculated, neither is CarrySum.
2591  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2592  for (unsigned i = 2; i < Factors.size(); ++i)
2593  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2594  }
2595 
2596  CarrySumPrevDstIdx = CarrySum;
2597  DstRegs[DstIdx] = FactorSum;
2598  Factors.clear();
2599  }
2600 }
2601 
2603 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
2604  unsigned DstReg = MI.getOperand(0).getReg();
2605  unsigned Src1 = MI.getOperand(1).getReg();
2606  unsigned Src2 = MI.getOperand(2).getReg();
2607 
2608  LLT Ty = MRI.getType(DstReg);
2609  if (Ty.isVector())
2610  return UnableToLegalize;
2611 
2612  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
2613  unsigned DstSize = Ty.getSizeInBits();
2614  unsigned NarrowSize = NarrowTy.getSizeInBits();
2615  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
2616  return UnableToLegalize;
2617 
2618  unsigned NumDstParts = DstSize / NarrowSize;
2619  unsigned NumSrcParts = SrcSize / NarrowSize;
2620  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
2621  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
2622 
2623  SmallVector<unsigned, 2> Src1Parts, Src2Parts, DstTmpRegs;
2624  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
2625  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
2626  DstTmpRegs.resize(DstTmpParts);
2627  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
2628 
2629  // Take only high half of registers if this is high mul.
2630  ArrayRef<unsigned> DstRegs(
2631  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
2632  MIRBuilder.buildMerge(DstReg, DstRegs);
2633  MI.eraseFromParent();
2634  return Legalized;
2635 }
2636 
2638 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
2639  LLT NarrowTy) {
2640  if (TypeIdx != 1)
2641  return UnableToLegalize;
2642 
2643  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2644 
2645  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2646  // FIXME: add support for when SizeOp1 isn't an exact multiple of
2647  // NarrowSize.
2648  if (SizeOp1 % NarrowSize != 0)
2649  return UnableToLegalize;
2650  int NumParts = SizeOp1 / NarrowSize;
2651 
2652  SmallVector<unsigned, 2> SrcRegs, DstRegs;
2653  SmallVector<uint64_t, 2> Indexes;
2654  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2655 
2656  unsigned OpReg = MI.getOperand(0).getReg();
2657  uint64_t OpStart = MI.getOperand(2).getImm();
2658  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2659  for (int i = 0; i < NumParts; ++i) {
2660  unsigned SrcStart = i * NarrowSize;
2661 
2662  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2663  // No part of the extract uses this subregister, ignore it.
2664  continue;
2665  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2666  // The entire subregister is extracted, forward the value.
2667  DstRegs.push_back(SrcRegs[i]);
2668  continue;
2669  }
2670 
2671  // OpSegStart is where this destination segment would start in OpReg if it
2672  // extended infinitely in both directions.
2673  int64_t ExtractOffset;
2674  uint64_t SegSize;
2675  if (OpStart < SrcStart) {
2676  ExtractOffset = 0;
2677  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2678  } else {
2679  ExtractOffset = OpStart - SrcStart;
2680  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2681  }
2682 
2683  unsigned SegReg = SrcRegs[i];
2684  if (ExtractOffset != 0 || SegSize != NarrowSize) {
2685  // A genuine extract is needed.
2686  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2687  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2688  }
2689 
2690  DstRegs.push_back(SegReg);
2691  }
2692 
2693  unsigned DstReg = MI.getOperand(0).getReg();
2694  if(MRI.getType(DstReg).isVector())
2695  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2696  else
2697  MIRBuilder.buildMerge(DstReg, DstRegs);
2698  MI.eraseFromParent();
2699  return Legalized;
2700 }
2701 
2703 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
2704  LLT NarrowTy) {
2705  // FIXME: Don't know how to handle secondary types yet.
2706  if (TypeIdx != 0)
2707  return UnableToLegalize;
2708 
2709  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2710  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2711 
2712  // FIXME: add support for when SizeOp0 isn't an exact multiple of
2713  // NarrowSize.
2714  if (SizeOp0 % NarrowSize != 0)
2715  return UnableToLegalize;
2716 
2717  int NumParts = SizeOp0 / NarrowSize;
2718 
2719  SmallVector<unsigned, 2> SrcRegs, DstRegs;
2720  SmallVector<uint64_t, 2> Indexes;
2721  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2722 
2723  unsigned OpReg = MI.getOperand(2).getReg();
2724  uint64_t OpStart = MI.getOperand(3).getImm();
2725  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2726  for (int i = 0; i < NumParts; ++i) {
2727  unsigned DstStart = i * NarrowSize;
2728 
2729  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2730  // No part of the insert affects this subregister, forward the original.
2731  DstRegs.push_back(SrcRegs[i]);
2732  continue;
2733  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2734  // The entire subregister is defined by this insert, forward the new
2735  // value.
2736  DstRegs.push_back(OpReg);
2737  continue;
2738  }
2739 
2740  // OpSegStart is where this destination segment would start in OpReg if it
2741  // extended infinitely in both directions.
2742  int64_t ExtractOffset, InsertOffset;
2743  uint64_t SegSize;
2744  if (OpStart < DstStart) {
2745  InsertOffset = 0;
2746  ExtractOffset = DstStart - OpStart;
2747  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
2748  } else {
2749  InsertOffset = OpStart - DstStart;
2750  ExtractOffset = 0;
2751  SegSize =
2752  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
2753  }
2754 
2755  unsigned SegReg = OpReg;
2756  if (ExtractOffset != 0 || SegSize != OpSize) {
2757  // A genuine extract is needed.
2758  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2759  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
2760  }
2761 
2762  unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
2763  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
2764  DstRegs.push_back(DstReg);
2765  }
2766 
2767  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
2768  unsigned DstReg = MI.getOperand(0).getReg();
2769  if(MRI.getType(DstReg).isVector())
2770  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2771  else
2772  MIRBuilder.buildMerge(DstReg, DstRegs);
2773  MI.eraseFromParent();
2774  return Legalized;
2775 }
2776 
2778 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
2779  LLT NarrowTy) {
2780  unsigned DstReg = MI.getOperand(0).getReg();
2781  LLT DstTy = MRI.getType(DstReg);
2782 
2783  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
2784 
2785  SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2786  SmallVector<unsigned, 4> Src0Regs, Src0LeftoverRegs;
2787  SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2788  LLT LeftoverTy;
2789  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
2790  Src0Regs, Src0LeftoverRegs))
2791  return UnableToLegalize;
2792 
2793  LLT Unused;
2794  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
2795  Src1Regs, Src1LeftoverRegs))
2796  llvm_unreachable("inconsistent extractParts result");
2797 
2798  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2799  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
2800  {Src0Regs[I], Src1Regs[I]});
2801  DstRegs.push_back(Inst->getOperand(0).getReg());
2802  }
2803 
2804  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2805  auto Inst = MIRBuilder.buildInstr(
2806  MI.getOpcode(),
2807  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
2808  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
2809  }
2810 
2811  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2812  LeftoverTy, DstLeftoverRegs);
2813 
2814  MI.eraseFromParent();
2815  return Legalized;
2816 }
2817 
2819 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
2820  LLT NarrowTy) {
2821  if (TypeIdx != 0)
2822  return UnableToLegalize;
2823 
2824  unsigned CondReg = MI.getOperand(1).getReg();
2825  LLT CondTy = MRI.getType(CondReg);
2826  if (CondTy.isVector()) // TODO: Handle vselect
2827  return UnableToLegalize;
2828 
2829  unsigned DstReg = MI.getOperand(0).getReg();
2830  LLT DstTy = MRI.getType(DstReg);
2831 
2832  SmallVector<unsigned, 4> DstRegs, DstLeftoverRegs;
2833  SmallVector<unsigned, 4> Src1Regs, Src1LeftoverRegs;
2834  SmallVector<unsigned, 4> Src2Regs, Src2LeftoverRegs;
2835  LLT LeftoverTy;
2836  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
2837  Src1Regs, Src1LeftoverRegs))
2838  return UnableToLegalize;
2839 
2840  LLT Unused;
2841  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
2842  Src2Regs, Src2LeftoverRegs))
2843  llvm_unreachable("inconsistent extractParts result");
2844 
2845  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
2846  auto Select = MIRBuilder.buildSelect(NarrowTy,
2847  CondReg, Src1Regs[I], Src2Regs[I]);
2848  DstRegs.push_back(Select->getOperand(0).getReg());
2849  }
2850 
2851  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
2852  auto Select = MIRBuilder.buildSelect(
2853  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
2854  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
2855  }
2856 
2857  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
2858  LeftoverTy, DstLeftoverRegs);
2859 
2860  MI.eraseFromParent();
2861  return Legalized;
2862 }
2863 
2865 LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
2866  unsigned Opc = MI.getOpcode();
2867  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
2868  auto isSupported = [this](const LegalityQuery &Q) {
2869  auto QAction = LI.getAction(Q).Action;
2870  return QAction == Legal || QAction == Libcall || QAction == Custom;
2871  };
2872  switch (Opc) {
2873  default:
2874  return UnableToLegalize;
2875  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
2876  // This trivially expands to CTLZ.
2877  Observer.changingInstr(MI);
2878  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
2879  Observer.changedInstr(MI);
2880  return Legalized;
2881  }
2882  case TargetOpcode::G_CTLZ: {
2883  unsigned SrcReg = MI.getOperand(1).getReg();
2884  unsigned Len = Ty.getSizeInBits();
2885  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
2886  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
2887  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
2888  {Ty}, {SrcReg});
2889  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2890  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2891  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2892  SrcReg, MIBZero);
2893  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2894  MIBCtlzZU);
2895  MI.eraseFromParent();
2896  return Legalized;
2897  }
2898  // for now, we do this:
2899  // NewLen = NextPowerOf2(Len);
2900  // x = x | (x >> 1);
2901  // x = x | (x >> 2);
2902  // ...
2903  // x = x | (x >>16);
2904  // x = x | (x >>32); // for 64-bit input
2905  // Upto NewLen/2
2906  // return Len - popcount(x);
2907  //
2908  // Ref: "Hacker's Delight" by Henry Warren
2909  unsigned Op = SrcReg;
2910  unsigned NewLen = PowerOf2Ceil(Len);
2911  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
2912  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
2913  auto MIBOp = MIRBuilder.buildInstr(
2914  TargetOpcode::G_OR, {Ty},
2915  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
2916  {Op, MIBShiftAmt})});
2917  Op = MIBOp->getOperand(0).getReg();
2918  }
2919  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
2920  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2921  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
2922  MI.eraseFromParent();
2923  return Legalized;
2924  }
2925  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
2926  // This trivially expands to CTTZ.
2927  Observer.changingInstr(MI);
2928  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
2929  Observer.changedInstr(MI);
2930  return Legalized;
2931  }
2932  case TargetOpcode::G_CTTZ: {
2933  unsigned SrcReg = MI.getOperand(1).getReg();
2934  unsigned Len = Ty.getSizeInBits();
2935  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
2936  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
2937  // zero.
2938  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
2939  {Ty}, {SrcReg});
2940  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
2941  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
2942  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
2943  SrcReg, MIBZero);
2944  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
2945  MIBCttzZU);
2946  MI.eraseFromParent();
2947  return Legalized;
2948  }
2949  // for now, we use: { return popcount(~x & (x - 1)); }
2950  // unless the target has ctlz but not ctpop, in which case we use:
2951  // { return 32 - nlz(~x & (x-1)); }
2952  // Ref: "Hacker's Delight" by Henry Warren
2953  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
2954  auto MIBNot =
2955  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
2956  auto MIBTmp = MIRBuilder.buildInstr(
2957  TargetOpcode::G_AND, {Ty},
2958  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
2959  {SrcReg, MIBCstNeg1})});
2960  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
2961  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
2962  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
2964  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
2965  {MIBCstLen,
2966  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
2967  MI.eraseFromParent();
2968  return Legalized;
2969  }
2970  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
2971  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
2972  return Legalized;
2973  }
2974  }
2975 }
2976 
2977 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
2978 // representation.
2980 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
2981  unsigned Dst = MI.getOperand(0).getReg();
2982  unsigned Src = MI.getOperand(1).getReg();
2983  const LLT S64 = LLT::scalar(64);
2984  const LLT S32 = LLT::scalar(32);
2985  const LLT S1 = LLT::scalar(1);
2986 
2987  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
2988 
2989  // unsigned cul2f(ulong u) {
2990  // uint lz = clz(u);
2991  // uint e = (u != 0) ? 127U + 63U - lz : 0;
2992  // u = (u << lz) & 0x7fffffffffffffffUL;
2993  // ulong t = u & 0xffffffffffUL;
2994  // uint v = (e << 23) | (uint)(u >> 40);
2995  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
2996  // return as_float(v + r);
2997  // }
2998 
2999  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3000  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3001 
3002  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3003 
3004  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3005  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3006 
3007  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3008  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3009 
3010  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3011  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3012 
3013  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3014 
3015  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3016  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3017 
3018  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3019  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3020  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3021 
3022  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3023  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3024  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3025  auto One = MIRBuilder.buildConstant(S32, 1);
3026 
3027  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3028  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3029  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3030  MIRBuilder.buildAdd(Dst, V, R);
3031 
3032  return Legalized;
3033 }
3034 
3036 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3037  unsigned Dst = MI.getOperand(0).getReg();
3038  unsigned Src = MI.getOperand(1).getReg();
3039  LLT DstTy = MRI.getType(Dst);
3040  LLT SrcTy = MRI.getType(Src);
3041 
3042  if (SrcTy != LLT::scalar(64))
3043  return UnableToLegalize;
3044 
3045  if (DstTy == LLT::scalar(32)) {
3046  // TODO: SelectionDAG has several alternative expansions to port which may
3047  // be more reasonble depending on the available instructions. If a target
3048  // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3049  // intermediate type, this is probably worse.
3050  return lowerU64ToF32BitOps(MI);
3051  }
3052 
3053  return UnableToLegalize;
3054 }
3055 
3057 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3058  unsigned Dst = MI.getOperand(0).getReg();
3059  unsigned Src = MI.getOperand(1).getReg();
3060  LLT DstTy = MRI.getType(Dst);
3061  LLT SrcTy = MRI.getType(Src);
3062 
3063  const LLT S64 = LLT::scalar(64);
3064  const LLT S32 = LLT::scalar(32);
3065  const LLT S1 = LLT::scalar(1);
3066 
3067  if (SrcTy != S64)
3068  return UnableToLegalize;
3069 
3070  if (DstTy == S32) {
3071  // signed cl2f(long l) {
3072  // long s = l >> 63;
3073  // float r = cul2f((l + s) ^ s);
3074  // return s ? -r : r;
3075  // }
3076  unsigned L = Src;
3077  auto SignBit = MIRBuilder.buildConstant(S64, 63);
3078  auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3079 
3080  auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3081  auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3082  auto R = MIRBuilder.buildUITOFP(S32, Xor);
3083 
3084  auto RNeg = MIRBuilder.buildFNeg(S32, R);
3085  auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3086  MIRBuilder.buildConstant(S64, 0));
3087  MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3088  return Legalized;
3089  }
3090 
3091  return UnableToLegalize;
3092 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
uint64_t CallInst * C
unsigned getReg(unsigned Idx) const
Get the register for the operand index.
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:833
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineBasicBlock * getMBB() const
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
void setFPImm(const ConstantFP *CFP)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
void push_back(const T &Elt)
Definition: SmallVector.h:211
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned getReg() const
getReg - Returns the register number.
unsigned Reg
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
unsigned less than
Definition: InstrTypes.h:734
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:810
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:371
static uint32_t Concat[]
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
MachineInstrBuilder buildStore(unsigned Val, unsigned Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:879
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4446
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:122
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
MachineInstr * getOpcodeDef(unsigned Opcode, unsigned Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:284
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
unsigned getReg() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:709
void setImm(int64_t immVal)
MachineInstrBuilder buildInsert(unsigned Res, unsigned Src, unsigned Op, unsigned Index)
virtual const CallLowering * getCallLowering() const
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:288
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:196
Some kind of error has occurred and we could not legalize this instruction.
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
size_t size() const
Definition: SmallVector.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:168
const APFloat & getValueAPF() const
Definition: Constants.h:302
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
unsigned createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:119
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:533
void setFlags(unsigned flags)
Definition: MachineInstr.h:303
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:694
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class for arbitrary precision integers.
Definition: APInt.h:69
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static MachineOperand CreateES(const char *SymName, unsigned char TargetFlags=0)
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType)
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:253
Representation of each machine instruction.
Definition: MachineInstr.h:63
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
Instruction has been legalized and the MachineFunction changed.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< unsigned > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static Constant * getZeroValueForNegation(Type *Ty)
Floating point negation must be implemented with f(x) = -0.0 - x.
Definition: Constants.cpp:780
uint32_t Size
Definition: Profile.cpp:46
void setCImm(const ConstantInt *CI)
const DataLayout & getDataLayout() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Optional< MachineInstrBuilder > materializeGEP(unsigned &Res, unsigned Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:289
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
unsigned getSizeInBits(unsigned Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
This file describes how to lower LLVM calls to machine code calls.
MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr, unsigned CmpVal, unsigned NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:732
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
const ConstantInt * getCImm() const
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:658
This file describes how to lower LLVM code to machine code.
unsigned getPredicate() const