LLVM  10.0.0svn
LegalizerHelper.cpp
Go to the documentation of this file.
1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file implements the LegalizerHelper class to legalize
10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
11 /// primary legalization.
12 //
13 //===----------------------------------------------------------------------===//
14 
23 #include "llvm/Support/Debug.h"
26 
27 #define DEBUG_TYPE "legalizer"
28 
29 using namespace llvm;
30 using namespace LegalizeActions;
31 
32 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
33 ///
34 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
35 /// with any leftover piece as type \p LeftoverTy
36 ///
37 /// Returns -1 in the first element of the pair if the breakdown is not
38 /// satisfiable.
39 static std::pair<int, int>
40 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
41  assert(!LeftoverTy.isValid() && "this is an out argument");
42 
43  unsigned Size = OrigTy.getSizeInBits();
44  unsigned NarrowSize = NarrowTy.getSizeInBits();
45  unsigned NumParts = Size / NarrowSize;
46  unsigned LeftoverSize = Size - NumParts * NarrowSize;
47  assert(Size > NarrowSize);
48 
49  if (LeftoverSize == 0)
50  return {NumParts, 0};
51 
52  if (NarrowTy.isVector()) {
53  unsigned EltSize = OrigTy.getScalarSizeInBits();
54  if (LeftoverSize % EltSize != 0)
55  return {-1, -1};
56  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
57  } else {
58  LeftoverTy = LLT::scalar(LeftoverSize);
59  }
60 
61  int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
62  return std::make_pair(NumParts, NumLeftover);
63 }
64 
66  GISelChangeObserver &Observer,
67  MachineIRBuilder &Builder)
68  : MIRBuilder(Builder), MRI(MF.getRegInfo()),
69  LI(*MF.getSubtarget().getLegalizerInfo()), Observer(Observer) {
70  MIRBuilder.setMF(MF);
71  MIRBuilder.setChangeObserver(Observer);
72 }
73 
75  GISelChangeObserver &Observer,
77  : MIRBuilder(B), MRI(MF.getRegInfo()), LI(LI), Observer(Observer) {
78  MIRBuilder.setMF(MF);
79  MIRBuilder.setChangeObserver(Observer);
80 }
83  LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
84 
85  if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
86  MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
87  return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
89  auto Step = LI.getAction(MI, MRI);
90  switch (Step.Action) {
91  case Legal:
92  LLVM_DEBUG(dbgs() << ".. Already legal\n");
93  return AlreadyLegal;
94  case Libcall:
95  LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
96  return libcall(MI);
97  case NarrowScalar:
98  LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
99  return narrowScalar(MI, Step.TypeIdx, Step.NewType);
100  case WidenScalar:
101  LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
102  return widenScalar(MI, Step.TypeIdx, Step.NewType);
103  case Lower:
104  LLVM_DEBUG(dbgs() << ".. Lower\n");
105  return lower(MI, Step.TypeIdx, Step.NewType);
106  case FewerElements:
107  LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
108  return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
109  case MoreElements:
110  LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
111  return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
112  case Custom:
113  LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
114  return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
116  default:
117  LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
118  return UnableToLegalize;
119  }
120 }
121 
122 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
123  SmallVectorImpl<Register> &VRegs) {
124  for (int i = 0; i < NumParts; ++i)
126  MIRBuilder.buildUnmerge(VRegs, Reg);
127 }
128 
129 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
130  LLT MainTy, LLT &LeftoverTy,
132  SmallVectorImpl<Register> &LeftoverRegs) {
133  assert(!LeftoverTy.isValid() && "this is an out argument");
134 
135  unsigned RegSize = RegTy.getSizeInBits();
136  unsigned MainSize = MainTy.getSizeInBits();
137  unsigned NumParts = RegSize / MainSize;
138  unsigned LeftoverSize = RegSize - NumParts * MainSize;
139 
140  // Use an unmerge when possible.
141  if (LeftoverSize == 0) {
142  for (unsigned I = 0; I < NumParts; ++I)
143  VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
144  MIRBuilder.buildUnmerge(VRegs, Reg);
145  return true;
146  }
147 
148  if (MainTy.isVector()) {
149  unsigned EltSize = MainTy.getScalarSizeInBits();
150  if (LeftoverSize % EltSize != 0)
151  return false;
152  LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
153  } else {
154  LeftoverTy = LLT::scalar(LeftoverSize);
155  }
156 
157  // For irregular sizes, extract the individual parts.
158  for (unsigned I = 0; I != NumParts; ++I) {
159  Register NewReg = MRI.createGenericVirtualRegister(MainTy);
160  VRegs.push_back(NewReg);
161  MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
162  }
163 
164  for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
165  Offset += LeftoverSize) {
166  Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
167  LeftoverRegs.push_back(NewReg);
168  MIRBuilder.buildExtract(NewReg, Reg, Offset);
169  }
170 
171  return true;
172 }
173 
174 void LegalizerHelper::insertParts(Register DstReg,
175  LLT ResultTy, LLT PartTy,
176  ArrayRef<Register> PartRegs,
177  LLT LeftoverTy,
178  ArrayRef<Register> LeftoverRegs) {
179  if (!LeftoverTy.isValid()) {
180  assert(LeftoverRegs.empty());
181 
182  if (!ResultTy.isVector()) {
183  MIRBuilder.buildMerge(DstReg, PartRegs);
184  return;
185  }
186 
187  if (PartTy.isVector())
188  MIRBuilder.buildConcatVectors(DstReg, PartRegs);
189  else
190  MIRBuilder.buildBuildVector(DstReg, PartRegs);
191  return;
192  }
193 
194  unsigned PartSize = PartTy.getSizeInBits();
195  unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
196 
197  Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
198  MIRBuilder.buildUndef(CurResultReg);
199 
200  unsigned Offset = 0;
201  for (Register PartReg : PartRegs) {
202  Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
203  MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
204  CurResultReg = NewResultReg;
205  Offset += PartSize;
206  }
207 
208  for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
209  // Use the original output register for the final insert to avoid a copy.
210  Register NewResultReg = (I + 1 == E) ?
211  DstReg : MRI.createGenericVirtualRegister(ResultTy);
212 
213  MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
214  CurResultReg = NewResultReg;
215  Offset += LeftoverPartSize;
216  }
217 }
218 
219 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
220  switch (Opcode) {
221  case TargetOpcode::G_SDIV:
222  assert((Size == 32 || Size == 64) && "Unsupported size");
223  return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
224  case TargetOpcode::G_UDIV:
225  assert((Size == 32 || Size == 64) && "Unsupported size");
226  return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
227  case TargetOpcode::G_SREM:
228  assert((Size == 32 || Size == 64) && "Unsupported size");
229  return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
230  case TargetOpcode::G_UREM:
231  assert((Size == 32 || Size == 64) && "Unsupported size");
232  return Size == 64 ? RTLIB::UREM_I64 : RTLIB::UREM_I32;
233  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
234  assert(Size == 32 && "Unsupported size");
235  return RTLIB::CTLZ_I32;
236  case TargetOpcode::G_FADD:
237  assert((Size == 32 || Size == 64) && "Unsupported size");
238  return Size == 64 ? RTLIB::ADD_F64 : RTLIB::ADD_F32;
239  case TargetOpcode::G_FSUB:
240  assert((Size == 32 || Size == 64) && "Unsupported size");
241  return Size == 64 ? RTLIB::SUB_F64 : RTLIB::SUB_F32;
242  case TargetOpcode::G_FMUL:
243  assert((Size == 32 || Size == 64) && "Unsupported size");
244  return Size == 64 ? RTLIB::MUL_F64 : RTLIB::MUL_F32;
245  case TargetOpcode::G_FDIV:
246  assert((Size == 32 || Size == 64) && "Unsupported size");
247  return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
248  case TargetOpcode::G_FEXP:
249  assert((Size == 32 || Size == 64) && "Unsupported size");
250  return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
251  case TargetOpcode::G_FEXP2:
252  assert((Size == 32 || Size == 64) && "Unsupported size");
253  return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
254  case TargetOpcode::G_FREM:
255  return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
256  case TargetOpcode::G_FPOW:
257  return Size == 64 ? RTLIB::POW_F64 : RTLIB::POW_F32;
258  case TargetOpcode::G_FMA:
259  assert((Size == 32 || Size == 64) && "Unsupported size");
260  return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
261  case TargetOpcode::G_FSIN:
262  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
263  return Size == 128 ? RTLIB::SIN_F128
264  : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
265  case TargetOpcode::G_FCOS:
266  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
267  return Size == 128 ? RTLIB::COS_F128
268  : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
269  case TargetOpcode::G_FLOG10:
270  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
271  return Size == 128 ? RTLIB::LOG10_F128
272  : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
273  case TargetOpcode::G_FLOG:
274  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
275  return Size == 128 ? RTLIB::LOG_F128
276  : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
277  case TargetOpcode::G_FLOG2:
278  assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
279  return Size == 128 ? RTLIB::LOG2_F128
280  : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
281  case TargetOpcode::G_FCEIL:
282  assert((Size == 32 || Size == 64) && "Unsupported size");
283  return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
284  case TargetOpcode::G_FFLOOR:
285  assert((Size == 32 || Size == 64) && "Unsupported size");
286  return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
287  }
288  llvm_unreachable("Unknown libcall function");
289 }
290 
293  const CallLowering::ArgInfo &Result,
295  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
296  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
297  const char *Name = TLI.getLibcallName(Libcall);
298 
299  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
300  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
301  MachineOperand::CreateES(Name), Result, Args))
303 
305 }
306 
307 // Useful for libcalls where all operands have the same type.
310  Type *OpType) {
311  auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
312 
314  for (unsigned i = 1; i < MI.getNumOperands(); i++)
315  Args.push_back({MI.getOperand(i).getReg(), OpType});
316  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
317  Args);
318 }
319 
322  MachineInstr &MI) {
323  assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
324  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
325 
327  for (unsigned i = 1; i < MI.getNumOperands(); i++) {
328  Register Reg = MI.getOperand(i).getReg();
329 
330  // Need derive an IR type for call lowering.
331  LLT OpLLT = MRI.getType(Reg);
332  Type *OpTy = nullptr;
333  if (OpLLT.isPointer())
334  OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
335  else
336  OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
337  Args.push_back({Reg, OpTy});
338  }
339 
340  auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
341  auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
343  RTLIB::Libcall RTLibcall;
344  switch (ID) {
345  case Intrinsic::memcpy:
346  RTLibcall = RTLIB::MEMCPY;
347  break;
348  case Intrinsic::memset:
349  RTLibcall = RTLIB::MEMSET;
350  break;
351  case Intrinsic::memmove:
352  RTLibcall = RTLIB::MEMMOVE;
353  break;
354  default:
356  }
357  const char *Name = TLI.getLibcallName(RTLibcall);
358 
359  MIRBuilder.setInstr(MI);
360  MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
361  if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(RTLibcall),
363  CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx)), Args))
365 
367 }
368 
369 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
370  Type *FromType) {
371  auto ToMVT = MVT::getVT(ToType);
372  auto FromMVT = MVT::getVT(FromType);
373 
374  switch (Opcode) {
375  case TargetOpcode::G_FPEXT:
376  return RTLIB::getFPEXT(FromMVT, ToMVT);
377  case TargetOpcode::G_FPTRUNC:
378  return RTLIB::getFPROUND(FromMVT, ToMVT);
379  case TargetOpcode::G_FPTOSI:
380  return RTLIB::getFPTOSINT(FromMVT, ToMVT);
381  case TargetOpcode::G_FPTOUI:
382  return RTLIB::getFPTOUINT(FromMVT, ToMVT);
383  case TargetOpcode::G_SITOFP:
384  return RTLIB::getSINTTOFP(FromMVT, ToMVT);
385  case TargetOpcode::G_UITOFP:
386  return RTLIB::getUINTTOFP(FromMVT, ToMVT);
387  }
388  llvm_unreachable("Unsupported libcall function");
389 }
390 
393  Type *FromType) {
395  return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
396  {{MI.getOperand(1).getReg(), FromType}});
397 }
398 
401  LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
402  unsigned Size = LLTy.getSizeInBits();
403  auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
404 
405  MIRBuilder.setInstr(MI);
406 
407  switch (MI.getOpcode()) {
408  default:
409  return UnableToLegalize;
410  case TargetOpcode::G_SDIV:
411  case TargetOpcode::G_UDIV:
412  case TargetOpcode::G_SREM:
413  case TargetOpcode::G_UREM:
414  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
415  Type *HLTy = IntegerType::get(Ctx, Size);
416  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
417  if (Status != Legalized)
418  return Status;
419  break;
420  }
421  case TargetOpcode::G_FADD:
422  case TargetOpcode::G_FSUB:
423  case TargetOpcode::G_FMUL:
424  case TargetOpcode::G_FDIV:
425  case TargetOpcode::G_FMA:
426  case TargetOpcode::G_FPOW:
427  case TargetOpcode::G_FREM:
428  case TargetOpcode::G_FCOS:
429  case TargetOpcode::G_FSIN:
430  case TargetOpcode::G_FLOG10:
431  case TargetOpcode::G_FLOG:
432  case TargetOpcode::G_FLOG2:
433  case TargetOpcode::G_FEXP:
434  case TargetOpcode::G_FEXP2:
435  case TargetOpcode::G_FCEIL:
436  case TargetOpcode::G_FFLOOR: {
437  if (Size > 64) {
438  LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
439  return UnableToLegalize;
440  }
441  Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
442  auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
443  if (Status != Legalized)
444  return Status;
445  break;
446  }
447  case TargetOpcode::G_FPEXT: {
448  // FIXME: Support other floating point types (half, fp128 etc)
449  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
450  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
451  if (ToSize != 64 || FromSize != 32)
452  return UnableToLegalize;
455  if (Status != Legalized)
456  return Status;
457  break;
458  }
459  case TargetOpcode::G_FPTRUNC: {
460  // FIXME: Support other floating point types (half, fp128 etc)
461  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
462  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
463  if (ToSize != 32 || FromSize != 64)
464  return UnableToLegalize;
467  if (Status != Legalized)
468  return Status;
469  break;
470  }
471  case TargetOpcode::G_FPTOSI:
472  case TargetOpcode::G_FPTOUI: {
473  // FIXME: Support other types
474  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
475  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
476  if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
477  return UnableToLegalize;
479  MI, MIRBuilder,
480  ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
481  FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
482  if (Status != Legalized)
483  return Status;
484  break;
485  }
486  case TargetOpcode::G_SITOFP:
487  case TargetOpcode::G_UITOFP: {
488  // FIXME: Support other types
489  unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
490  unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
491  if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
492  return UnableToLegalize;
494  MI, MIRBuilder,
495  ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
496  FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
497  if (Status != Legalized)
498  return Status;
499  break;
500  }
501  }
502 
503  MI.eraseFromParent();
504  return Legalized;
505 }
506 
508  unsigned TypeIdx,
509  LLT NarrowTy) {
510  MIRBuilder.setInstr(MI);
511 
512  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
513  uint64_t NarrowSize = NarrowTy.getSizeInBits();
514 
515  switch (MI.getOpcode()) {
516  default:
517  return UnableToLegalize;
518  case TargetOpcode::G_IMPLICIT_DEF: {
519  // FIXME: add support for when SizeOp0 isn't an exact multiple of
520  // NarrowSize.
521  if (SizeOp0 % NarrowSize != 0)
522  return UnableToLegalize;
523  int NumParts = SizeOp0 / NarrowSize;
524 
525  SmallVector<Register, 2> DstRegs;
526  for (int i = 0; i < NumParts; ++i)
527  DstRegs.push_back(
528  MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
529 
530  Register DstReg = MI.getOperand(0).getReg();
531  if(MRI.getType(DstReg).isVector())
532  MIRBuilder.buildBuildVector(DstReg, DstRegs);
533  else
534  MIRBuilder.buildMerge(DstReg, DstRegs);
535  MI.eraseFromParent();
536  return Legalized;
537  }
538  case TargetOpcode::G_CONSTANT: {
539  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
540  const APInt &Val = MI.getOperand(1).getCImm()->getValue();
541  unsigned TotalSize = Ty.getSizeInBits();
542  unsigned NarrowSize = NarrowTy.getSizeInBits();
543  int NumParts = TotalSize / NarrowSize;
544 
545  SmallVector<Register, 4> PartRegs;
546  for (int I = 0; I != NumParts; ++I) {
547  unsigned Offset = I * NarrowSize;
548  auto K = MIRBuilder.buildConstant(NarrowTy,
549  Val.lshr(Offset).trunc(NarrowSize));
550  PartRegs.push_back(K.getReg(0));
551  }
552 
553  LLT LeftoverTy;
554  unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
555  SmallVector<Register, 1> LeftoverRegs;
556  if (LeftoverBits != 0) {
557  LeftoverTy = LLT::scalar(LeftoverBits);
558  auto K = MIRBuilder.buildConstant(
559  LeftoverTy,
560  Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
561  LeftoverRegs.push_back(K.getReg(0));
562  }
563 
564  insertParts(MI.getOperand(0).getReg(),
565  Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
566 
567  MI.eraseFromParent();
568  return Legalized;
569  }
570  case TargetOpcode::G_ADD: {
571  // FIXME: add support for when SizeOp0 isn't an exact multiple of
572  // NarrowSize.
573  if (SizeOp0 % NarrowSize != 0)
574  return UnableToLegalize;
575  // Expand in terms of carry-setting/consuming G_ADDE instructions.
576  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
577 
578  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
579  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
580  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
581 
583  MIRBuilder.buildConstant(CarryIn, 0);
584 
585  for (int i = 0; i < NumParts; ++i) {
586  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
588 
589  MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
590  Src2Regs[i], CarryIn);
591 
592  DstRegs.push_back(DstReg);
593  CarryIn = CarryOut;
594  }
595  Register DstReg = MI.getOperand(0).getReg();
596  if(MRI.getType(DstReg).isVector())
597  MIRBuilder.buildBuildVector(DstReg, DstRegs);
598  else
599  MIRBuilder.buildMerge(DstReg, DstRegs);
600  MI.eraseFromParent();
601  return Legalized;
602  }
603  case TargetOpcode::G_SUB: {
604  // FIXME: add support for when SizeOp0 isn't an exact multiple of
605  // NarrowSize.
606  if (SizeOp0 % NarrowSize != 0)
607  return UnableToLegalize;
608 
609  int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
610 
611  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
612  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
613  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
614 
615  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
617  MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
618  {Src1Regs[0], Src2Regs[0]});
619  DstRegs.push_back(DstReg);
620  Register BorrowIn = BorrowOut;
621  for (int i = 1; i < NumParts; ++i) {
622  DstReg = MRI.createGenericVirtualRegister(NarrowTy);
623  BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
624 
625  MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
626  {Src1Regs[i], Src2Regs[i], BorrowIn});
627 
628  DstRegs.push_back(DstReg);
629  BorrowIn = BorrowOut;
630  }
631  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
632  MI.eraseFromParent();
633  return Legalized;
634  }
635  case TargetOpcode::G_MUL:
636  case TargetOpcode::G_UMULH:
637  return narrowScalarMul(MI, NarrowTy);
638  case TargetOpcode::G_EXTRACT:
639  return narrowScalarExtract(MI, TypeIdx, NarrowTy);
640  case TargetOpcode::G_INSERT:
641  return narrowScalarInsert(MI, TypeIdx, NarrowTy);
642  case TargetOpcode::G_LOAD: {
643  const auto &MMO = **MI.memoperands_begin();
644  Register DstReg = MI.getOperand(0).getReg();
645  LLT DstTy = MRI.getType(DstReg);
646  if (DstTy.isVector())
647  return UnableToLegalize;
648 
649  if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
650  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
651  auto &MMO = **MI.memoperands_begin();
652  MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
653  MIRBuilder.buildAnyExt(DstReg, TmpReg);
654  MI.eraseFromParent();
655  return Legalized;
656  }
657 
658  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
659  }
660  case TargetOpcode::G_ZEXTLOAD:
661  case TargetOpcode::G_SEXTLOAD: {
662  bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
663  Register DstReg = MI.getOperand(0).getReg();
664  Register PtrReg = MI.getOperand(1).getReg();
665 
666  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
667  auto &MMO = **MI.memoperands_begin();
668  if (MMO.getSizeInBits() == NarrowSize) {
669  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
670  } else {
671  unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
672  : TargetOpcode::G_SEXTLOAD;
673  MIRBuilder.buildInstr(ExtLoad)
674  .addDef(TmpReg)
675  .addUse(PtrReg)
676  .addMemOperand(&MMO);
677  }
678 
679  if (ZExt)
680  MIRBuilder.buildZExt(DstReg, TmpReg);
681  else
682  MIRBuilder.buildSExt(DstReg, TmpReg);
683 
684  MI.eraseFromParent();
685  return Legalized;
686  }
687  case TargetOpcode::G_STORE: {
688  const auto &MMO = **MI.memoperands_begin();
689 
690  Register SrcReg = MI.getOperand(0).getReg();
691  LLT SrcTy = MRI.getType(SrcReg);
692  if (SrcTy.isVector())
693  return UnableToLegalize;
694 
695  int NumParts = SizeOp0 / NarrowSize;
696  unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
697  unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
698  if (SrcTy.isVector() && LeftoverBits != 0)
699  return UnableToLegalize;
700 
701  if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
702  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
703  auto &MMO = **MI.memoperands_begin();
704  MIRBuilder.buildTrunc(TmpReg, SrcReg);
705  MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
706  MI.eraseFromParent();
707  return Legalized;
708  }
709 
710  return reduceLoadStoreWidth(MI, 0, NarrowTy);
711  }
712  case TargetOpcode::G_SELECT:
713  return narrowScalarSelect(MI, TypeIdx, NarrowTy);
714  case TargetOpcode::G_AND:
715  case TargetOpcode::G_OR:
716  case TargetOpcode::G_XOR: {
717  // Legalize bitwise operation:
718  // A = BinOp<Ty> B, C
719  // into:
720  // B1, ..., BN = G_UNMERGE_VALUES B
721  // C1, ..., CN = G_UNMERGE_VALUES C
722  // A1 = BinOp<Ty/N> B1, C2
723  // ...
724  // AN = BinOp<Ty/N> BN, CN
725  // A = G_MERGE_VALUES A1, ..., AN
726  return narrowScalarBasic(MI, TypeIdx, NarrowTy);
727  }
728  case TargetOpcode::G_SHL:
729  case TargetOpcode::G_LSHR:
730  case TargetOpcode::G_ASHR:
731  return narrowScalarShift(MI, TypeIdx, NarrowTy);
732  case TargetOpcode::G_CTLZ:
733  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
734  case TargetOpcode::G_CTTZ:
735  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
736  case TargetOpcode::G_CTPOP:
737  if (TypeIdx != 0)
738  return UnableToLegalize; // TODO
739 
740  Observer.changingInstr(MI);
741  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
742  Observer.changedInstr(MI);
743  return Legalized;
744  case TargetOpcode::G_INTTOPTR:
745  if (TypeIdx != 1)
746  return UnableToLegalize;
747 
748  Observer.changingInstr(MI);
749  narrowScalarSrc(MI, NarrowTy, 1);
750  Observer.changedInstr(MI);
751  return Legalized;
752  case TargetOpcode::G_PTRTOINT:
753  if (TypeIdx != 0)
754  return UnableToLegalize;
755 
756  Observer.changingInstr(MI);
757  narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
758  Observer.changedInstr(MI);
759  return Legalized;
760  case TargetOpcode::G_PHI: {
761  unsigned NumParts = SizeOp0 / NarrowSize;
762  SmallVector<Register, 2> DstRegs;
764  DstRegs.resize(NumParts);
765  SrcRegs.resize(MI.getNumOperands() / 2);
766  Observer.changingInstr(MI);
767  for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
768  MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
769  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
770  extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
771  SrcRegs[i / 2]);
772  }
773  MachineBasicBlock &MBB = *MI.getParent();
774  MIRBuilder.setInsertPt(MBB, MI);
775  for (unsigned i = 0; i < NumParts; ++i) {
776  DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
777  MachineInstrBuilder MIB =
778  MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
779  for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
780  MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
781  }
782  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
783  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
784  Observer.changedInstr(MI);
785  MI.eraseFromParent();
786  return Legalized;
787  }
788  case TargetOpcode::G_EXTRACT_VECTOR_ELT:
789  case TargetOpcode::G_INSERT_VECTOR_ELT: {
790  if (TypeIdx != 2)
791  return UnableToLegalize;
792 
793  int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
794  Observer.changingInstr(MI);
795  narrowScalarSrc(MI, NarrowTy, OpIdx);
796  Observer.changedInstr(MI);
797  return Legalized;
798  }
799  case TargetOpcode::G_ICMP: {
800  uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
801  if (NarrowSize * 2 != SrcSize)
802  return UnableToLegalize;
803 
804  Observer.changingInstr(MI);
805  Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
806  Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
807  MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
808 
809  Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
810  Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
811  MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
812 
813  CmpInst::Predicate Pred =
814  static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
815 
816  if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
817  MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
818  MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
819  MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
820  MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
821  MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
822  } else {
823  const LLT s1 = LLT::scalar(1);
824  MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
825  MachineInstrBuilder CmpHEQ =
826  MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
828  ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
829  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
830  }
831  Observer.changedInstr(MI);
832  MI.eraseFromParent();
833  return Legalized;
834  }
835  }
836 }
837 
838 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
839  unsigned OpIdx, unsigned ExtOpcode) {
840  MachineOperand &MO = MI.getOperand(OpIdx);
841  auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO.getReg()});
842  MO.setReg(ExtB->getOperand(0).getReg());
843 }
844 
845 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
846  unsigned OpIdx) {
847  MachineOperand &MO = MI.getOperand(OpIdx);
848  auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
849  {MO.getReg()});
850  MO.setReg(ExtB->getOperand(0).getReg());
851 }
852 
853 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
854  unsigned OpIdx, unsigned TruncOpcode) {
855  MachineOperand &MO = MI.getOperand(OpIdx);
856  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
858  MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
859  MO.setReg(DstExt);
860 }
861 
862 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
863  unsigned OpIdx, unsigned ExtOpcode) {
864  MachineOperand &MO = MI.getOperand(OpIdx);
865  Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
867  MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
868  MO.setReg(DstTrunc);
869 }
870 
871 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
872  unsigned OpIdx) {
873  MachineOperand &MO = MI.getOperand(OpIdx);
874  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
876  MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
877  MO.setReg(DstExt);
878 }
879 
880 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
881  unsigned OpIdx) {
882  MachineOperand &MO = MI.getOperand(OpIdx);
883 
884  LLT OldTy = MRI.getType(MO.getReg());
885  unsigned OldElts = OldTy.getNumElements();
886  unsigned NewElts = MoreTy.getNumElements();
887 
888  unsigned NumParts = NewElts / OldElts;
889 
890  // Use concat_vectors if the result is a multiple of the number of elements.
891  if (NumParts * OldElts == NewElts) {
893  Parts.push_back(MO.getReg());
894 
895  Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
896  for (unsigned I = 1; I != NumParts; ++I)
897  Parts.push_back(ImpDef);
898 
899  auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
900  MO.setReg(Concat.getReg(0));
901  return;
902  }
903 
904  Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
905  Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
906  MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
907  MO.setReg(MoreReg);
908 }
909 
911 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
912  LLT WideTy) {
913  if (TypeIdx != 1)
914  return UnableToLegalize;
915 
916  Register DstReg = MI.getOperand(0).getReg();
917  LLT DstTy = MRI.getType(DstReg);
918  if (DstTy.isVector())
919  return UnableToLegalize;
920 
921  Register Src1 = MI.getOperand(1).getReg();
922  LLT SrcTy = MRI.getType(Src1);
923  const int DstSize = DstTy.getSizeInBits();
924  const int SrcSize = SrcTy.getSizeInBits();
925  const int WideSize = WideTy.getSizeInBits();
926  const int NumMerge = (DstSize + WideSize - 1) / WideSize;
927 
928  unsigned NumOps = MI.getNumOperands();
929  unsigned NumSrc = MI.getNumOperands() - 1;
930  unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
931 
932  if (WideSize >= DstSize) {
933  // Directly pack the bits in the target type.
934  Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
935 
936  for (unsigned I = 2; I != NumOps; ++I) {
937  const unsigned Offset = (I - 1) * PartSize;
938 
939  Register SrcReg = MI.getOperand(I).getReg();
940  assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
941 
942  auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
943 
944  Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
945  MRI.createGenericVirtualRegister(WideTy);
946 
947  auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
948  auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
949  MIRBuilder.buildOr(NextResult, ResultReg, Shl);
950  ResultReg = NextResult;
951  }
952 
953  if (WideSize > DstSize)
954  MIRBuilder.buildTrunc(DstReg, ResultReg);
955 
956  MI.eraseFromParent();
957  return Legalized;
958  }
959 
960  // Unmerge the original values to the GCD type, and recombine to the next
961  // multiple greater than the original type.
962  //
963  // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
964  // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
965  // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
966  // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
967  // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
968  // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
969  // %12:_(s12) = G_MERGE_VALUES %10, %11
970  //
971  // Padding with undef if necessary:
972  //
973  // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
974  // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
975  // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
976  // %7:_(s2) = G_IMPLICIT_DEF
977  // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
978  // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
979  // %10:_(s12) = G_MERGE_VALUES %8, %9
980 
981  const int GCD = greatestCommonDivisor(SrcSize, WideSize);
982  LLT GCDTy = LLT::scalar(GCD);
983 
985  SmallVector<Register, 8> NewMergeRegs;
986  SmallVector<Register, 8> Unmerges;
987  LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
988 
989  // Decompose the original operands if they don't evenly divide.
990  for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
991  Register SrcReg = MI.getOperand(I).getReg();
992  if (GCD == SrcSize) {
993  Unmerges.push_back(SrcReg);
994  } else {
995  auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
996  for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
997  Unmerges.push_back(Unmerge.getReg(J));
998  }
999  }
1000 
1001  // Pad with undef to the next size that is a multiple of the requested size.
1002  if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1003  Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1004  for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1005  Unmerges.push_back(UndefReg);
1006  }
1007 
1008  const int PartsPerGCD = WideSize / GCD;
1009 
1010  // Build merges of each piece.
1011  ArrayRef<Register> Slicer(Unmerges);
1012  for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1013  auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1014  NewMergeRegs.push_back(Merge.getReg(0));
1015  }
1016 
1017  // A truncate may be necessary if the requested type doesn't evenly divide the
1018  // original result type.
1019  if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1020  MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1021  } else {
1022  auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1023  MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1024  }
1025 
1026  MI.eraseFromParent();
1027  return Legalized;
1028 }
1029 
1031 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1032  LLT WideTy) {
1033  if (TypeIdx != 0)
1034  return UnableToLegalize;
1035 
1036  unsigned NumDst = MI.getNumOperands() - 1;
1037  Register SrcReg = MI.getOperand(NumDst).getReg();
1038  LLT SrcTy = MRI.getType(SrcReg);
1039  if (!SrcTy.isScalar())
1040  return UnableToLegalize;
1041 
1042  Register Dst0Reg = MI.getOperand(0).getReg();
1043  LLT DstTy = MRI.getType(Dst0Reg);
1044  if (!DstTy.isScalar())
1045  return UnableToLegalize;
1046 
1047  unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
1048  LLT NewSrcTy = LLT::scalar(NewSrcSize);
1049  unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
1050 
1051  auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
1052 
1053  for (unsigned I = 1; I != NumDst; ++I) {
1054  auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
1055  auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
1056  WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
1057  }
1058 
1059  Observer.changingInstr(MI);
1060 
1061  MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
1062  for (unsigned I = 0; I != NumDst; ++I)
1063  widenScalarDst(MI, WideTy, I);
1064 
1065  Observer.changedInstr(MI);
1066 
1067  return Legalized;
1068 }
1069 
1071 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1072  LLT WideTy) {
1073  Register DstReg = MI.getOperand(0).getReg();
1074  Register SrcReg = MI.getOperand(1).getReg();
1075  LLT SrcTy = MRI.getType(SrcReg);
1076 
1077  LLT DstTy = MRI.getType(DstReg);
1078  unsigned Offset = MI.getOperand(2).getImm();
1079 
1080  if (TypeIdx == 0) {
1081  if (SrcTy.isVector() || DstTy.isVector())
1082  return UnableToLegalize;
1083 
1084  SrcOp Src(SrcReg);
1085  if (SrcTy.isPointer()) {
1086  // Extracts from pointers can be handled only if they are really just
1087  // simple integers.
1088  const DataLayout &DL = MIRBuilder.getDataLayout();
1090  return UnableToLegalize;
1091 
1092  LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1093  Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1094  SrcTy = SrcAsIntTy;
1095  }
1096 
1097  if (DstTy.isPointer())
1098  return UnableToLegalize;
1099 
1100  if (Offset == 0) {
1101  // Avoid a shift in the degenerate case.
1102  MIRBuilder.buildTrunc(DstReg,
1103  MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1104  MI.eraseFromParent();
1105  return Legalized;
1106  }
1107 
1108  // Do a shift in the source type.
1109  LLT ShiftTy = SrcTy;
1110  if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1111  Src = MIRBuilder.buildAnyExt(WideTy, Src);
1112  ShiftTy = WideTy;
1113  } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
1114  return UnableToLegalize;
1115 
1116  auto LShr = MIRBuilder.buildLShr(
1117  ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1118  MIRBuilder.buildTrunc(DstReg, LShr);
1119  MI.eraseFromParent();
1120  return Legalized;
1121  }
1122 
1123  if (SrcTy.isScalar()) {
1124  Observer.changingInstr(MI);
1125  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1126  Observer.changedInstr(MI);
1127  return Legalized;
1128  }
1129 
1130  if (!SrcTy.isVector())
1131  return UnableToLegalize;
1132 
1133  if (DstTy != SrcTy.getElementType())
1134  return UnableToLegalize;
1135 
1136  if (Offset % SrcTy.getScalarSizeInBits() != 0)
1137  return UnableToLegalize;
1138 
1139  Observer.changingInstr(MI);
1140  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1141 
1142  MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1143  Offset);
1144  widenScalarDst(MI, WideTy.getScalarType(), 0);
1145  Observer.changedInstr(MI);
1146  return Legalized;
1147 }
1148 
1150 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1151  LLT WideTy) {
1152  if (TypeIdx != 0)
1153  return UnableToLegalize;
1154  Observer.changingInstr(MI);
1155  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1156  widenScalarDst(MI, WideTy);
1157  Observer.changedInstr(MI);
1158  return Legalized;
1159 }
1160 
1162 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1163  MIRBuilder.setInstr(MI);
1164 
1165  switch (MI.getOpcode()) {
1166  default:
1167  return UnableToLegalize;
1168  case TargetOpcode::G_EXTRACT:
1169  return widenScalarExtract(MI, TypeIdx, WideTy);
1170  case TargetOpcode::G_INSERT:
1171  return widenScalarInsert(MI, TypeIdx, WideTy);
1172  case TargetOpcode::G_MERGE_VALUES:
1173  return widenScalarMergeValues(MI, TypeIdx, WideTy);
1174  case TargetOpcode::G_UNMERGE_VALUES:
1175  return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
1176  case TargetOpcode::G_UADDO:
1177  case TargetOpcode::G_USUBO: {
1178  if (TypeIdx == 1)
1179  return UnableToLegalize; // TODO
1180  auto LHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1181  {MI.getOperand(2).getReg()});
1182  auto RHSZext = MIRBuilder.buildInstr(TargetOpcode::G_ZEXT, {WideTy},
1183  {MI.getOperand(3).getReg()});
1184  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_UADDO
1185  ? TargetOpcode::G_ADD
1186  : TargetOpcode::G_SUB;
1187  // Do the arithmetic in the larger type.
1188  auto NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSZext, RHSZext});
1189  LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1191  auto AndOp = MIRBuilder.buildInstr(
1192  TargetOpcode::G_AND, {WideTy},
1193  {NewOp, MIRBuilder.buildConstant(WideTy, Mask.getZExtValue())});
1194  // There is no overflow if the AndOp is the same as NewOp.
1196  AndOp);
1197  // Now trunc the NewOp to the original result.
1198  MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), NewOp);
1199  MI.eraseFromParent();
1200  return Legalized;
1201  }
1202  case TargetOpcode::G_CTTZ:
1203  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1204  case TargetOpcode::G_CTLZ:
1205  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1206  case TargetOpcode::G_CTPOP: {
1207  if (TypeIdx == 0) {
1208  Observer.changingInstr(MI);
1209  widenScalarDst(MI, WideTy, 0);
1210  Observer.changedInstr(MI);
1211  return Legalized;
1212  }
1213 
1214  Register SrcReg = MI.getOperand(1).getReg();
1215 
1216  // First ZEXT the input.
1217  auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
1218  LLT CurTy = MRI.getType(SrcReg);
1219  if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
1220  // The count is the same in the larger type except if the original
1221  // value was zero. This can be handled by setting the bit just off
1222  // the top of the original type.
1223  auto TopBit =
1224  APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
1225  MIBSrc = MIRBuilder.buildOr(
1226  WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
1227  }
1228 
1229  // Perform the operation at the larger size.
1230  auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
1231  // This is already the correct result for CTPOP and CTTZs
1232  if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
1233  MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
1234  // The correct result is NewOp - (Difference in widety and current ty).
1235  unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
1236  MIBNewOp = MIRBuilder.buildInstr(
1237  TargetOpcode::G_SUB, {WideTy},
1238  {MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
1239  }
1240 
1241  MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
1242  MI.eraseFromParent();
1243  return Legalized;
1244  }
1245  case TargetOpcode::G_BSWAP: {
1246  Observer.changingInstr(MI);
1247  Register DstReg = MI.getOperand(0).getReg();
1248 
1249  Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
1250  Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1251  Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
1252  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1253 
1254  MI.getOperand(0).setReg(DstExt);
1255 
1257 
1258  LLT Ty = MRI.getType(DstReg);
1259  unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
1260  MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
1261  MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
1262  .addDef(ShrReg)
1263  .addUse(DstExt)
1264  .addUse(ShiftAmtReg);
1265 
1266  MIRBuilder.buildTrunc(DstReg, ShrReg);
1267  Observer.changedInstr(MI);
1268  return Legalized;
1269  }
1270  case TargetOpcode::G_ADD:
1271  case TargetOpcode::G_AND:
1272  case TargetOpcode::G_MUL:
1273  case TargetOpcode::G_OR:
1274  case TargetOpcode::G_XOR:
1275  case TargetOpcode::G_SUB:
1276  // Perform operation at larger width (any extension is fines here, high bits
1277  // don't affect the result) and then truncate the result back to the
1278  // original type.
1279  Observer.changingInstr(MI);
1280  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1281  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1282  widenScalarDst(MI, WideTy);
1283  Observer.changedInstr(MI);
1284  return Legalized;
1285 
1286  case TargetOpcode::G_SHL:
1287  Observer.changingInstr(MI);
1288 
1289  if (TypeIdx == 0) {
1290  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1291  widenScalarDst(MI, WideTy);
1292  } else {
1293  assert(TypeIdx == 1);
1294  // The "number of bits to shift" operand must preserve its value as an
1295  // unsigned integer:
1296  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1297  }
1298 
1299  Observer.changedInstr(MI);
1300  return Legalized;
1301 
1302  case TargetOpcode::G_SDIV:
1303  case TargetOpcode::G_SREM:
1304  case TargetOpcode::G_SMIN:
1305  case TargetOpcode::G_SMAX:
1306  Observer.changingInstr(MI);
1307  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1308  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1309  widenScalarDst(MI, WideTy);
1310  Observer.changedInstr(MI);
1311  return Legalized;
1312 
1313  case TargetOpcode::G_ASHR:
1314  case TargetOpcode::G_LSHR:
1315  Observer.changingInstr(MI);
1316 
1317  if (TypeIdx == 0) {
1318  unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
1319  TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1320 
1321  widenScalarSrc(MI, WideTy, 1, CvtOp);
1322  widenScalarDst(MI, WideTy);
1323  } else {
1324  assert(TypeIdx == 1);
1325  // The "number of bits to shift" operand must preserve its value as an
1326  // unsigned integer:
1327  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1328  }
1329 
1330  Observer.changedInstr(MI);
1331  return Legalized;
1332  case TargetOpcode::G_UDIV:
1333  case TargetOpcode::G_UREM:
1334  case TargetOpcode::G_UMIN:
1335  case TargetOpcode::G_UMAX:
1336  Observer.changingInstr(MI);
1337  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1338  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
1339  widenScalarDst(MI, WideTy);
1340  Observer.changedInstr(MI);
1341  return Legalized;
1342 
1343  case TargetOpcode::G_SELECT:
1344  Observer.changingInstr(MI);
1345  if (TypeIdx == 0) {
1346  // Perform operation at larger width (any extension is fine here, high
1347  // bits don't affect the result) and then truncate the result back to the
1348  // original type.
1349  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1350  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1351  widenScalarDst(MI, WideTy);
1352  } else {
1353  bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
1354  // Explicit extension is required here since high bits affect the result.
1355  widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
1356  }
1357  Observer.changedInstr(MI);
1358  return Legalized;
1359 
1360  case TargetOpcode::G_FPTOSI:
1361  case TargetOpcode::G_FPTOUI:
1362  if (TypeIdx != 0)
1363  return UnableToLegalize;
1364  Observer.changingInstr(MI);
1365  widenScalarDst(MI, WideTy);
1366  Observer.changedInstr(MI);
1367  return Legalized;
1368 
1369  case TargetOpcode::G_SITOFP:
1370  if (TypeIdx != 1)
1371  return UnableToLegalize;
1372  Observer.changingInstr(MI);
1373  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
1374  Observer.changedInstr(MI);
1375  return Legalized;
1376 
1377  case TargetOpcode::G_UITOFP:
1378  if (TypeIdx != 1)
1379  return UnableToLegalize;
1380  Observer.changingInstr(MI);
1381  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1382  Observer.changedInstr(MI);
1383  return Legalized;
1384 
1385  case TargetOpcode::G_LOAD:
1386  case TargetOpcode::G_SEXTLOAD:
1387  case TargetOpcode::G_ZEXTLOAD:
1388  Observer.changingInstr(MI);
1389  widenScalarDst(MI, WideTy);
1390  Observer.changedInstr(MI);
1391  return Legalized;
1392 
1393  case TargetOpcode::G_STORE: {
1394  if (TypeIdx != 0)
1395  return UnableToLegalize;
1396 
1397  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1398  if (!isPowerOf2_32(Ty.getSizeInBits()))
1399  return UnableToLegalize;
1400 
1401  Observer.changingInstr(MI);
1402 
1403  unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
1404  TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
1405  widenScalarSrc(MI, WideTy, 0, ExtType);
1406 
1407  Observer.changedInstr(MI);
1408  return Legalized;
1409  }
1410  case TargetOpcode::G_CONSTANT: {
1411  MachineOperand &SrcMO = MI.getOperand(1);
1413  const APInt &Val = SrcMO.getCImm()->getValue().sext(WideTy.getSizeInBits());
1414  Observer.changingInstr(MI);
1415  SrcMO.setCImm(ConstantInt::get(Ctx, Val));
1416 
1417  widenScalarDst(MI, WideTy);
1418  Observer.changedInstr(MI);
1419  return Legalized;
1420  }
1421  case TargetOpcode::G_FCONSTANT: {
1422  MachineOperand &SrcMO = MI.getOperand(1);
1424  APFloat Val = SrcMO.getFPImm()->getValueAPF();
1425  bool LosesInfo;
1426  switch (WideTy.getSizeInBits()) {
1427  case 32:
1429  &LosesInfo);
1430  break;
1431  case 64:
1433  &LosesInfo);
1434  break;
1435  default:
1436  return UnableToLegalize;
1437  }
1438 
1439  assert(!LosesInfo && "extend should always be lossless");
1440 
1441  Observer.changingInstr(MI);
1442  SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
1443 
1444  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1445  Observer.changedInstr(MI);
1446  return Legalized;
1447  }
1448  case TargetOpcode::G_IMPLICIT_DEF: {
1449  Observer.changingInstr(MI);
1450  widenScalarDst(MI, WideTy);
1451  Observer.changedInstr(MI);
1452  return Legalized;
1453  }
1454  case TargetOpcode::G_BRCOND:
1455  Observer.changingInstr(MI);
1456  widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
1457  Observer.changedInstr(MI);
1458  return Legalized;
1459 
1460  case TargetOpcode::G_FCMP:
1461  Observer.changingInstr(MI);
1462  if (TypeIdx == 0)
1463  widenScalarDst(MI, WideTy);
1464  else {
1465  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
1466  widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
1467  }
1468  Observer.changedInstr(MI);
1469  return Legalized;
1470 
1471  case TargetOpcode::G_ICMP:
1472  Observer.changingInstr(MI);
1473  if (TypeIdx == 0)
1474  widenScalarDst(MI, WideTy);
1475  else {
1476  unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
1477  MI.getOperand(1).getPredicate()))
1478  ? TargetOpcode::G_SEXT
1479  : TargetOpcode::G_ZEXT;
1480  widenScalarSrc(MI, WideTy, 2, ExtOpcode);
1481  widenScalarSrc(MI, WideTy, 3, ExtOpcode);
1482  }
1483  Observer.changedInstr(MI);
1484  return Legalized;
1485 
1486  case TargetOpcode::G_GEP:
1487  assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
1488  Observer.changingInstr(MI);
1489  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1490  Observer.changedInstr(MI);
1491  return Legalized;
1492 
1493  case TargetOpcode::G_PHI: {
1494  assert(TypeIdx == 0 && "Expecting only Idx 0");
1495 
1496  Observer.changingInstr(MI);
1497  for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
1498  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
1499  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1500  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
1501  }
1502 
1503  MachineBasicBlock &MBB = *MI.getParent();
1504  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
1505  widenScalarDst(MI, WideTy);
1506  Observer.changedInstr(MI);
1507  return Legalized;
1508  }
1509  case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
1510  if (TypeIdx == 0) {
1511  Register VecReg = MI.getOperand(1).getReg();
1512  LLT VecTy = MRI.getType(VecReg);
1513  Observer.changingInstr(MI);
1514 
1515  widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
1516  WideTy.getSizeInBits()),
1517  1, TargetOpcode::G_SEXT);
1518 
1519  widenScalarDst(MI, WideTy, 0);
1520  Observer.changedInstr(MI);
1521  return Legalized;
1522  }
1523 
1524  if (TypeIdx != 2)
1525  return UnableToLegalize;
1526  Observer.changingInstr(MI);
1527  widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
1528  Observer.changedInstr(MI);
1529  return Legalized;
1530  }
1531  case TargetOpcode::G_FADD:
1532  case TargetOpcode::G_FMUL:
1533  case TargetOpcode::G_FSUB:
1534  case TargetOpcode::G_FMA:
1535  case TargetOpcode::G_FNEG:
1536  case TargetOpcode::G_FABS:
1537  case TargetOpcode::G_FCANONICALIZE:
1538  case TargetOpcode::G_FMINNUM:
1539  case TargetOpcode::G_FMAXNUM:
1540  case TargetOpcode::G_FMINNUM_IEEE:
1541  case TargetOpcode::G_FMAXNUM_IEEE:
1542  case TargetOpcode::G_FMINIMUM:
1543  case TargetOpcode::G_FMAXIMUM:
1544  case TargetOpcode::G_FDIV:
1545  case TargetOpcode::G_FREM:
1546  case TargetOpcode::G_FCEIL:
1547  case TargetOpcode::G_FFLOOR:
1548  case TargetOpcode::G_FCOS:
1549  case TargetOpcode::G_FSIN:
1550  case TargetOpcode::G_FLOG10:
1551  case TargetOpcode::G_FLOG:
1552  case TargetOpcode::G_FLOG2:
1553  case TargetOpcode::G_FRINT:
1554  case TargetOpcode::G_FNEARBYINT:
1555  case TargetOpcode::G_FSQRT:
1556  case TargetOpcode::G_FEXP:
1557  case TargetOpcode::G_FEXP2:
1558  case TargetOpcode::G_FPOW:
1559  case TargetOpcode::G_INTRINSIC_TRUNC:
1560  case TargetOpcode::G_INTRINSIC_ROUND:
1561  assert(TypeIdx == 0);
1562  Observer.changingInstr(MI);
1563 
1564  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
1565  widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
1566 
1567  widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
1568  Observer.changedInstr(MI);
1569  return Legalized;
1570  case TargetOpcode::G_INTTOPTR:
1571  if (TypeIdx != 1)
1572  return UnableToLegalize;
1573 
1574  Observer.changingInstr(MI);
1575  widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
1576  Observer.changedInstr(MI);
1577  return Legalized;
1578  case TargetOpcode::G_PTRTOINT:
1579  if (TypeIdx != 0)
1580  return UnableToLegalize;
1581 
1582  Observer.changingInstr(MI);
1583  widenScalarDst(MI, WideTy, 0);
1584  Observer.changedInstr(MI);
1585  return Legalized;
1586  case TargetOpcode::G_BUILD_VECTOR: {
1587  Observer.changingInstr(MI);
1588 
1589  const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
1590  for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
1591  widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
1592 
1593  // Avoid changing the result vector type if the source element type was
1594  // requested.
1595  if (TypeIdx == 1) {
1596  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
1597  MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
1598  } else {
1599  widenScalarDst(MI, WideTy, 0);
1600  }
1601 
1602  Observer.changedInstr(MI);
1603  return Legalized;
1604  }
1605  }
1606 }
1607 
1609 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
1610  using namespace TargetOpcode;
1611  MIRBuilder.setInstr(MI);
1612 
1613  switch(MI.getOpcode()) {
1614  default:
1615  return UnableToLegalize;
1616  case TargetOpcode::G_SREM:
1617  case TargetOpcode::G_UREM: {
1618  Register QuotReg = MRI.createGenericVirtualRegister(Ty);
1619  MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
1620  .addDef(QuotReg)
1621  .addUse(MI.getOperand(1).getReg())
1622  .addUse(MI.getOperand(2).getReg());
1623 
1624  Register ProdReg = MRI.createGenericVirtualRegister(Ty);
1625  MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
1627  ProdReg);
1628  MI.eraseFromParent();
1629  return Legalized;
1630  }
1631  case TargetOpcode::G_SMULO:
1632  case TargetOpcode::G_UMULO: {
1633  // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
1634  // result.
1635  Register Res = MI.getOperand(0).getReg();
1636  Register Overflow = MI.getOperand(1).getReg();
1637  Register LHS = MI.getOperand(2).getReg();
1638  Register RHS = MI.getOperand(3).getReg();
1639 
1640  MIRBuilder.buildMul(Res, LHS, RHS);
1641 
1642  unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
1643  ? TargetOpcode::G_SMULH
1644  : TargetOpcode::G_UMULH;
1645 
1646  Register HiPart = MRI.createGenericVirtualRegister(Ty);
1647  MIRBuilder.buildInstr(Opcode)
1648  .addDef(HiPart)
1649  .addUse(LHS)
1650  .addUse(RHS);
1651 
1652  Register Zero = MRI.createGenericVirtualRegister(Ty);
1653  MIRBuilder.buildConstant(Zero, 0);
1654 
1655  // For *signed* multiply, overflow is detected by checking:
1656  // (hi != (lo >> bitwidth-1))
1657  if (Opcode == TargetOpcode::G_SMULH) {
1658  Register Shifted = MRI.createGenericVirtualRegister(Ty);
1659  Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
1660  MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
1661  MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
1662  .addDef(Shifted)
1663  .addUse(Res)
1664  .addUse(ShiftAmt);
1665  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
1666  } else {
1667  MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
1668  }
1669  MI.eraseFromParent();
1670  return Legalized;
1671  }
1672  case TargetOpcode::G_FNEG: {
1673  // TODO: Handle vector types once we are able to
1674  // represent them.
1675  if (Ty.isVector())
1676  return UnableToLegalize;
1677  Register Res = MI.getOperand(0).getReg();
1678  Type *ZeroTy;
1680  switch (Ty.getSizeInBits()) {
1681  case 16:
1682  ZeroTy = Type::getHalfTy(Ctx);
1683  break;
1684  case 32:
1685  ZeroTy = Type::getFloatTy(Ctx);
1686  break;
1687  case 64:
1688  ZeroTy = Type::getDoubleTy(Ctx);
1689  break;
1690  case 128:
1691  ZeroTy = Type::getFP128Ty(Ctx);
1692  break;
1693  default:
1694  llvm_unreachable("unexpected floating-point type");
1695  }
1696  ConstantFP &ZeroForNegation =
1697  *cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
1698  auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
1699  Register SubByReg = MI.getOperand(1).getReg();
1700  Register ZeroReg = Zero->getOperand(0).getReg();
1701  MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
1702  MI.getFlags());
1703  MI.eraseFromParent();
1704  return Legalized;
1705  }
1706  case TargetOpcode::G_FSUB: {
1707  // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
1708  // First, check if G_FNEG is marked as Lower. If so, we may
1709  // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
1710  if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
1711  return UnableToLegalize;
1712  Register Res = MI.getOperand(0).getReg();
1713  Register LHS = MI.getOperand(1).getReg();
1714  Register RHS = MI.getOperand(2).getReg();
1715  Register Neg = MRI.createGenericVirtualRegister(Ty);
1716  MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
1717  MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
1718  MI.eraseFromParent();
1719  return Legalized;
1720  }
1721  case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
1722  Register OldValRes = MI.getOperand(0).getReg();
1723  Register SuccessRes = MI.getOperand(1).getReg();
1724  Register Addr = MI.getOperand(2).getReg();
1725  Register CmpVal = MI.getOperand(3).getReg();
1726  Register NewVal = MI.getOperand(4).getReg();
1727  MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
1728  **MI.memoperands_begin());
1729  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
1730  MI.eraseFromParent();
1731  return Legalized;
1732  }
1733  case TargetOpcode::G_LOAD:
1734  case TargetOpcode::G_SEXTLOAD:
1735  case TargetOpcode::G_ZEXTLOAD: {
1736  // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
1737  Register DstReg = MI.getOperand(0).getReg();
1738  Register PtrReg = MI.getOperand(1).getReg();
1739  LLT DstTy = MRI.getType(DstReg);
1740  auto &MMO = **MI.memoperands_begin();
1741 
1742  if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
1743  // In the case of G_LOAD, this was a non-extending load already and we're
1744  // about to lower to the same instruction.
1745  if (MI.getOpcode() == TargetOpcode::G_LOAD)
1746  return UnableToLegalize;
1747  MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
1748  MI.eraseFromParent();
1749  return Legalized;
1750  }
1751 
1752  if (DstTy.isScalar()) {
1753  Register TmpReg =
1754  MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
1755  MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
1756  switch (MI.getOpcode()) {
1757  default:
1758  llvm_unreachable("Unexpected opcode");
1759  case TargetOpcode::G_LOAD:
1760  MIRBuilder.buildAnyExt(DstReg, TmpReg);
1761  break;
1762  case TargetOpcode::G_SEXTLOAD:
1763  MIRBuilder.buildSExt(DstReg, TmpReg);
1764  break;
1765  case TargetOpcode::G_ZEXTLOAD:
1766  MIRBuilder.buildZExt(DstReg, TmpReg);
1767  break;
1768  }
1769  MI.eraseFromParent();
1770  return Legalized;
1771  }
1772 
1773  return UnableToLegalize;
1774  }
1775  case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1776  case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1777  case TargetOpcode::G_CTLZ:
1778  case TargetOpcode::G_CTTZ:
1779  case TargetOpcode::G_CTPOP:
1780  return lowerBitCount(MI, TypeIdx, Ty);
1781  case G_UADDO: {
1782  Register Res = MI.getOperand(0).getReg();
1783  Register CarryOut = MI.getOperand(1).getReg();
1784  Register LHS = MI.getOperand(2).getReg();
1785  Register RHS = MI.getOperand(3).getReg();
1786 
1787  MIRBuilder.buildAdd(Res, LHS, RHS);
1788  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
1789 
1790  MI.eraseFromParent();
1791  return Legalized;
1792  }
1793  case G_UADDE: {
1794  Register Res = MI.getOperand(0).getReg();
1795  Register CarryOut = MI.getOperand(1).getReg();
1796  Register LHS = MI.getOperand(2).getReg();
1797  Register RHS = MI.getOperand(3).getReg();
1798  Register CarryIn = MI.getOperand(4).getReg();
1799 
1800  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1801  Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
1802 
1803  MIRBuilder.buildAdd(TmpRes, LHS, RHS);
1804  MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
1805  MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
1806  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
1807 
1808  MI.eraseFromParent();
1809  return Legalized;
1810  }
1811  case G_USUBO: {
1812  Register Res = MI.getOperand(0).getReg();
1813  Register BorrowOut = MI.getOperand(1).getReg();
1814  Register LHS = MI.getOperand(2).getReg();
1815  Register RHS = MI.getOperand(3).getReg();
1816 
1817  MIRBuilder.buildSub(Res, LHS, RHS);
1818  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
1819 
1820  MI.eraseFromParent();
1821  return Legalized;
1822  }
1823  case G_USUBE: {
1824  Register Res = MI.getOperand(0).getReg();
1825  Register BorrowOut = MI.getOperand(1).getReg();
1826  Register LHS = MI.getOperand(2).getReg();
1827  Register RHS = MI.getOperand(3).getReg();
1828  Register BorrowIn = MI.getOperand(4).getReg();
1829 
1830  Register TmpRes = MRI.createGenericVirtualRegister(Ty);
1831  Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
1832  Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1833  Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
1834 
1835  MIRBuilder.buildSub(TmpRes, LHS, RHS);
1836  MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
1837  MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
1838  MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
1839  MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
1840  MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
1841 
1842  MI.eraseFromParent();
1843  return Legalized;
1844  }
1845  case G_UITOFP:
1846  return lowerUITOFP(MI, TypeIdx, Ty);
1847  case G_SITOFP:
1848  return lowerSITOFP(MI, TypeIdx, Ty);
1849  case G_SMIN:
1850  case G_SMAX:
1851  case G_UMIN:
1852  case G_UMAX:
1853  return lowerMinMax(MI, TypeIdx, Ty);
1854  case G_FCOPYSIGN:
1855  return lowerFCopySign(MI, TypeIdx, Ty);
1856  case G_FMINNUM:
1857  case G_FMAXNUM:
1858  return lowerFMinNumMaxNum(MI);
1859  }
1860 }
1861 
1863  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
1864  SmallVector<Register, 2> DstRegs;
1865 
1866  unsigned NarrowSize = NarrowTy.getSizeInBits();
1867  Register DstReg = MI.getOperand(0).getReg();
1868  unsigned Size = MRI.getType(DstReg).getSizeInBits();
1869  int NumParts = Size / NarrowSize;
1870  // FIXME: Don't know how to handle the situation where the small vectors
1871  // aren't all the same size yet.
1872  if (Size % NarrowSize != 0)
1873  return UnableToLegalize;
1874 
1875  for (int i = 0; i < NumParts; ++i) {
1876  Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
1877  MIRBuilder.buildUndef(TmpReg);
1878  DstRegs.push_back(TmpReg);
1879  }
1880 
1881  if (NarrowTy.isVector())
1882  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1883  else
1884  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1885 
1886  MI.eraseFromParent();
1887  return Legalized;
1888 }
1889 
1892  LLT NarrowTy) {
1893  const unsigned Opc = MI.getOpcode();
1894  const unsigned NumOps = MI.getNumOperands() - 1;
1895  const unsigned NarrowSize = NarrowTy.getSizeInBits();
1896  const Register DstReg = MI.getOperand(0).getReg();
1897  const unsigned Flags = MI.getFlags();
1898  const LLT DstTy = MRI.getType(DstReg);
1899  const unsigned Size = DstTy.getSizeInBits();
1900  const int NumParts = Size / NarrowSize;
1901  const LLT EltTy = DstTy.getElementType();
1902  const unsigned EltSize = EltTy.getSizeInBits();
1903  const unsigned BitsForNumParts = NarrowSize * NumParts;
1904 
1905  // Check if we have any leftovers. If we do, then only handle the case where
1906  // the leftover is one element.
1907  if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
1908  return UnableToLegalize;
1909 
1910  if (BitsForNumParts != Size) {
1911  Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
1912  MIRBuilder.buildUndef(AccumDstReg);
1913 
1914  // Handle the pieces which evenly divide into the requested type with
1915  // extract/op/insert sequence.
1916  for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
1917  SmallVector<SrcOp, 4> SrcOps;
1918  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1919  Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
1920  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
1921  SrcOps.push_back(PartOpReg);
1922  }
1923 
1924  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
1925  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1926 
1927  Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
1928  MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
1929  AccumDstReg = PartInsertReg;
1930  }
1931 
1932  // Handle the remaining element sized leftover piece.
1933  SmallVector<SrcOp, 4> SrcOps;
1934  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
1935  Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
1936  MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
1937  BitsForNumParts);
1938  SrcOps.push_back(PartOpReg);
1939  }
1940 
1941  Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
1942  MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
1943  MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
1944  MI.eraseFromParent();
1945 
1946  return Legalized;
1947  }
1948 
1949  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
1950 
1951  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
1952 
1953  if (NumOps >= 2)
1954  extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
1955 
1956  if (NumOps >= 3)
1957  extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
1958 
1959  for (int i = 0; i < NumParts; ++i) {
1960  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
1961 
1962  if (NumOps == 1)
1963  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
1964  else if (NumOps == 2) {
1965  MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
1966  } else if (NumOps == 3) {
1967  MIRBuilder.buildInstr(Opc, {DstReg},
1968  {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
1969  }
1970 
1971  DstRegs.push_back(DstReg);
1972  }
1973 
1974  if (NarrowTy.isVector())
1975  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
1976  else
1977  MIRBuilder.buildBuildVector(DstReg, DstRegs);
1978 
1979  MI.eraseFromParent();
1980  return Legalized;
1981 }
1982 
1983 // Handle splitting vector operations which need to have the same number of
1984 // elements in each type index, but each type index may have a different element
1985 // type.
1986 //
1987 // e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
1988 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1989 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1990 //
1991 // Also handles some irregular breakdown cases, e.g.
1992 // e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
1993 // <2 x s64> = G_SHL <2 x s64>, <2 x s32>
1994 // s64 = G_SHL s64, s32
1997  MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
1998  if (TypeIdx != 0)
1999  return UnableToLegalize;
2000 
2001  const LLT NarrowTy0 = NarrowTyArg;
2002  const unsigned NewNumElts =
2003  NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
2004 
2005  const Register DstReg = MI.getOperand(0).getReg();
2006  LLT DstTy = MRI.getType(DstReg);
2007  LLT LeftoverTy0;
2008 
2009  // All of the operands need to have the same number of elements, so if we can
2010  // determine a type breakdown for the result type, we can for all of the
2011  // source types.
2012  int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
2013  if (NumParts < 0)
2014  return UnableToLegalize;
2015 
2017 
2018  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2019  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2020 
2021  for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
2022  LLT LeftoverTy;
2023  Register SrcReg = MI.getOperand(I).getReg();
2024  LLT SrcTyI = MRI.getType(SrcReg);
2025  LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
2026  LLT LeftoverTyI;
2027 
2028  // Split this operand into the requested typed registers, and any leftover
2029  // required to reproduce the original type.
2030  if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
2031  LeftoverRegs))
2032  return UnableToLegalize;
2033 
2034  if (I == 1) {
2035  // For the first operand, create an instruction for each part and setup
2036  // the result.
2037  for (Register PartReg : PartRegs) {
2038  Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2040  .addDef(PartDstReg)
2041  .addUse(PartReg));
2042  DstRegs.push_back(PartDstReg);
2043  }
2044 
2045  for (Register LeftoverReg : LeftoverRegs) {
2046  Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
2048  .addDef(PartDstReg)
2049  .addUse(LeftoverReg));
2050  LeftoverDstRegs.push_back(PartDstReg);
2051  }
2052  } else {
2053  assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
2054 
2055  // Add the newly created operand splits to the existing instructions. The
2056  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2057  // pieces.
2058  unsigned InstCount = 0;
2059  for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
2060  NewInsts[InstCount++].addUse(PartRegs[J]);
2061  for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
2062  NewInsts[InstCount++].addUse(LeftoverRegs[J]);
2063  }
2064 
2065  PartRegs.clear();
2066  LeftoverRegs.clear();
2067  }
2068 
2069  // Insert the newly built operations and rebuild the result register.
2070  for (auto &MIB : NewInsts)
2071  MIRBuilder.insertInstr(MIB);
2072 
2073  insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
2074 
2075  MI.eraseFromParent();
2076  return Legalized;
2077 }
2078 
2081  LLT NarrowTy) {
2082  if (TypeIdx != 0)
2083  return UnableToLegalize;
2084 
2085  Register DstReg = MI.getOperand(0).getReg();
2086  Register SrcReg = MI.getOperand(1).getReg();
2087  LLT DstTy = MRI.getType(DstReg);
2088  LLT SrcTy = MRI.getType(SrcReg);
2089 
2090  LLT NarrowTy0 = NarrowTy;
2091  LLT NarrowTy1;
2092  unsigned NumParts;
2093 
2094  if (NarrowTy.isVector()) {
2095  // Uneven breakdown not handled.
2096  NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
2097  if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
2098  return UnableToLegalize;
2099 
2100  NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
2101  } else {
2102  NumParts = DstTy.getNumElements();
2103  NarrowTy1 = SrcTy.getElementType();
2104  }
2105 
2106  SmallVector<Register, 4> SrcRegs, DstRegs;
2107  extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
2108 
2109  for (unsigned I = 0; I < NumParts; ++I) {
2110  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2111  MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
2112  .addDef(DstReg)
2113  .addUse(SrcRegs[I]);
2114 
2115  NewInst->setFlags(MI.getFlags());
2116  DstRegs.push_back(DstReg);
2117  }
2118 
2119  if (NarrowTy.isVector())
2120  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2121  else
2122  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2123 
2124  MI.eraseFromParent();
2125  return Legalized;
2126 }
2127 
2130  LLT NarrowTy) {
2131  Register DstReg = MI.getOperand(0).getReg();
2132  Register Src0Reg = MI.getOperand(2).getReg();
2133  LLT DstTy = MRI.getType(DstReg);
2134  LLT SrcTy = MRI.getType(Src0Reg);
2135 
2136  unsigned NumParts;
2137  LLT NarrowTy0, NarrowTy1;
2138 
2139  if (TypeIdx == 0) {
2140  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2141  unsigned OldElts = DstTy.getNumElements();
2142 
2143  NarrowTy0 = NarrowTy;
2144  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
2145  NarrowTy1 = NarrowTy.isVector() ?
2146  LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
2147  SrcTy.getElementType();
2148 
2149  } else {
2150  unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
2151  unsigned OldElts = SrcTy.getNumElements();
2152 
2153  NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
2154  NarrowTy.getNumElements();
2155  NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
2156  DstTy.getScalarSizeInBits());
2157  NarrowTy1 = NarrowTy;
2158  }
2159 
2160  // FIXME: Don't know how to handle the situation where the small vectors
2161  // aren't all the same size yet.
2162  if (NarrowTy1.isVector() &&
2163  NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
2164  return UnableToLegalize;
2165 
2166  CmpInst::Predicate Pred
2167  = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
2168 
2169  SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
2170  extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
2171  extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
2172 
2173  for (unsigned I = 0; I < NumParts; ++I) {
2174  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2175  DstRegs.push_back(DstReg);
2176 
2177  if (MI.getOpcode() == TargetOpcode::G_ICMP)
2178  MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2179  else {
2180  MachineInstr *NewCmp
2181  = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
2182  NewCmp->setFlags(MI.getFlags());
2183  }
2184  }
2185 
2186  if (NarrowTy1.isVector())
2187  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2188  else
2189  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2190 
2191  MI.eraseFromParent();
2192  return Legalized;
2193 }
2194 
2197  LLT NarrowTy) {
2198  Register DstReg = MI.getOperand(0).getReg();
2199  Register CondReg = MI.getOperand(1).getReg();
2200 
2201  unsigned NumParts = 0;
2202  LLT NarrowTy0, NarrowTy1;
2203 
2204  LLT DstTy = MRI.getType(DstReg);
2205  LLT CondTy = MRI.getType(CondReg);
2206  unsigned Size = DstTy.getSizeInBits();
2207 
2208  assert(TypeIdx == 0 || CondTy.isVector());
2209 
2210  if (TypeIdx == 0) {
2211  NarrowTy0 = NarrowTy;
2212  NarrowTy1 = CondTy;
2213 
2214  unsigned NarrowSize = NarrowTy0.getSizeInBits();
2215  // FIXME: Don't know how to handle the situation where the small vectors
2216  // aren't all the same size yet.
2217  if (Size % NarrowSize != 0)
2218  return UnableToLegalize;
2219 
2220  NumParts = Size / NarrowSize;
2221 
2222  // Need to break down the condition type
2223  if (CondTy.isVector()) {
2224  if (CondTy.getNumElements() == NumParts)
2225  NarrowTy1 = CondTy.getElementType();
2226  else
2227  NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
2228  CondTy.getScalarSizeInBits());
2229  }
2230  } else {
2231  NumParts = CondTy.getNumElements();
2232  if (NarrowTy.isVector()) {
2233  // TODO: Handle uneven breakdown.
2234  if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
2235  return UnableToLegalize;
2236 
2237  return UnableToLegalize;
2238  } else {
2239  NarrowTy0 = DstTy.getElementType();
2240  NarrowTy1 = NarrowTy;
2241  }
2242  }
2243 
2244  SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
2245  if (CondTy.isVector())
2246  extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
2247 
2248  extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
2249  extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
2250 
2251  for (unsigned i = 0; i < NumParts; ++i) {
2252  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
2253  MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
2254  Src1Regs[i], Src2Regs[i]);
2255  DstRegs.push_back(DstReg);
2256  }
2257 
2258  if (NarrowTy0.isVector())
2259  MIRBuilder.buildConcatVectors(DstReg, DstRegs);
2260  else
2261  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2262 
2263  MI.eraseFromParent();
2264  return Legalized;
2265 }
2266 
2269  LLT NarrowTy) {
2270  const Register DstReg = MI.getOperand(0).getReg();
2271  LLT PhiTy = MRI.getType(DstReg);
2272  LLT LeftoverTy;
2273 
2274  // All of the operands need to have the same number of elements, so if we can
2275  // determine a type breakdown for the result type, we can for all of the
2276  // source types.
2277  int NumParts, NumLeftover;
2278  std::tie(NumParts, NumLeftover)
2279  = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
2280  if (NumParts < 0)
2281  return UnableToLegalize;
2282 
2283  SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
2285 
2286  const int TotalNumParts = NumParts + NumLeftover;
2287 
2288  // Insert the new phis in the result block first.
2289  for (int I = 0; I != TotalNumParts; ++I) {
2290  LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
2291  Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
2292  NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
2293  .addDef(PartDstReg));
2294  if (I < NumParts)
2295  DstRegs.push_back(PartDstReg);
2296  else
2297  LeftoverDstRegs.push_back(PartDstReg);
2298  }
2299 
2300  MachineBasicBlock *MBB = MI.getParent();
2301  MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
2302  insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
2303 
2304  SmallVector<Register, 4> PartRegs, LeftoverRegs;
2305 
2306  // Insert code to extract the incoming values in each predecessor block.
2307  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2308  PartRegs.clear();
2309  LeftoverRegs.clear();
2310 
2311  Register SrcReg = MI.getOperand(I).getReg();
2312  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2313  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2314 
2315  LLT Unused;
2316  if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
2317  LeftoverRegs))
2318  return UnableToLegalize;
2319 
2320  // Add the newly created operand splits to the existing instructions. The
2321  // odd-sized pieces are ordered after the requested NarrowTyArg sized
2322  // pieces.
2323  for (int J = 0; J != TotalNumParts; ++J) {
2324  MachineInstrBuilder MIB = NewInsts[J];
2325  MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
2326  MIB.addMBB(&OpMBB);
2327  }
2328  }
2329 
2330  MI.eraseFromParent();
2331  return Legalized;
2332 }
2333 
2336  LLT NarrowTy) {
2337  // FIXME: Don't know how to handle secondary types yet.
2338  if (TypeIdx != 0)
2339  return UnableToLegalize;
2340 
2341  MachineMemOperand *MMO = *MI.memoperands_begin();
2342 
2343  // This implementation doesn't work for atomics. Give up instead of doing
2344  // something invalid.
2345  if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
2347  return UnableToLegalize;
2348 
2349  bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
2350  Register ValReg = MI.getOperand(0).getReg();
2351  Register AddrReg = MI.getOperand(1).getReg();
2352  LLT ValTy = MRI.getType(ValReg);
2353 
2354  int NumParts = -1;
2355  int NumLeftover = -1;
2356  LLT LeftoverTy;
2357  SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
2358  if (IsLoad) {
2359  std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
2360  } else {
2361  if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
2362  NarrowLeftoverRegs)) {
2363  NumParts = NarrowRegs.size();
2364  NumLeftover = NarrowLeftoverRegs.size();
2365  }
2366  }
2367 
2368  if (NumParts == -1)
2369  return UnableToLegalize;
2370 
2371  const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
2372 
2373  unsigned TotalSize = ValTy.getSizeInBits();
2374 
2375  // Split the load/store into PartTy sized pieces starting at Offset. If this
2376  // is a load, return the new registers in ValRegs. For a store, each elements
2377  // of ValRegs should be PartTy. Returns the next offset that needs to be
2378  // handled.
2379  auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
2380  unsigned Offset) -> unsigned {
2382  unsigned PartSize = PartTy.getSizeInBits();
2383  for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
2384  Offset += PartSize, ++Idx) {
2385  unsigned ByteSize = PartSize / 8;
2386  unsigned ByteOffset = Offset / 8;
2387  Register NewAddrReg;
2388 
2389  MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
2390 
2391  MachineMemOperand *NewMMO =
2392  MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
2393 
2394  if (IsLoad) {
2395  Register Dst = MRI.createGenericVirtualRegister(PartTy);
2396  ValRegs.push_back(Dst);
2397  MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
2398  } else {
2399  MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
2400  }
2401  }
2402 
2403  return Offset;
2404  };
2405 
2406  unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
2407 
2408  // Handle the rest of the register if this isn't an even type breakdown.
2409  if (LeftoverTy.isValid())
2410  splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
2411 
2412  if (IsLoad) {
2413  insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
2414  LeftoverTy, NarrowLeftoverRegs);
2415  }
2416 
2417  MI.eraseFromParent();
2418  return Legalized;
2419 }
2420 
2423  LLT NarrowTy) {
2424  using namespace TargetOpcode;
2425 
2426  MIRBuilder.setInstr(MI);
2427  switch (MI.getOpcode()) {
2428  case G_IMPLICIT_DEF:
2429  return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
2430  case G_AND:
2431  case G_OR:
2432  case G_XOR:
2433  case G_ADD:
2434  case G_SUB:
2435  case G_MUL:
2436  case G_SMULH:
2437  case G_UMULH:
2438  case G_FADD:
2439  case G_FMUL:
2440  case G_FSUB:
2441  case G_FNEG:
2442  case G_FABS:
2443  case G_FCANONICALIZE:
2444  case G_FDIV:
2445  case G_FREM:
2446  case G_FMA:
2447  case G_FPOW:
2448  case G_FEXP:
2449  case G_FEXP2:
2450  case G_FLOG:
2451  case G_FLOG2:
2452  case G_FLOG10:
2453  case G_FNEARBYINT:
2454  case G_FCEIL:
2455  case G_FFLOOR:
2456  case G_FRINT:
2457  case G_INTRINSIC_ROUND:
2458  case G_INTRINSIC_TRUNC:
2459  case G_FCOS:
2460  case G_FSIN:
2461  case G_FSQRT:
2462  case G_BSWAP:
2463  case G_SDIV:
2464  case G_SMIN:
2465  case G_SMAX:
2466  case G_UMIN:
2467  case G_UMAX:
2468  case G_FMINNUM:
2469  case G_FMAXNUM:
2470  case G_FMINNUM_IEEE:
2471  case G_FMAXNUM_IEEE:
2472  case G_FMINIMUM:
2473  case G_FMAXIMUM:
2474  return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
2475  case G_SHL:
2476  case G_LSHR:
2477  case G_ASHR:
2478  case G_CTLZ:
2479  case G_CTLZ_ZERO_UNDEF:
2480  case G_CTTZ:
2481  case G_CTTZ_ZERO_UNDEF:
2482  case G_CTPOP:
2483  case G_FCOPYSIGN:
2484  return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
2485  case G_ZEXT:
2486  case G_SEXT:
2487  case G_ANYEXT:
2488  case G_FPEXT:
2489  case G_FPTRUNC:
2490  case G_SITOFP:
2491  case G_UITOFP:
2492  case G_FPTOSI:
2493  case G_FPTOUI:
2494  case G_INTTOPTR:
2495  case G_PTRTOINT:
2496  case G_ADDRSPACE_CAST:
2497  return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
2498  case G_ICMP:
2499  case G_FCMP:
2500  return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
2501  case G_SELECT:
2502  return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
2503  case G_PHI:
2504  return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
2505  case G_LOAD:
2506  case G_STORE:
2507  return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
2508  default:
2509  return UnableToLegalize;
2510  }
2511 }
2512 
2515  const LLT HalfTy, const LLT AmtTy) {
2516 
2517  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2518  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2519  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2520 
2521  if (Amt.isNullValue()) {
2522  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
2523  MI.eraseFromParent();
2524  return Legalized;
2525  }
2526 
2527  LLT NVT = HalfTy;
2528  unsigned NVTBits = HalfTy.getSizeInBits();
2529  unsigned VTBits = 2 * NVTBits;
2530 
2531  SrcOp Lo(Register(0)), Hi(Register(0));
2532  if (MI.getOpcode() == TargetOpcode::G_SHL) {
2533  if (Amt.ugt(VTBits)) {
2534  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2535  } else if (Amt.ugt(NVTBits)) {
2536  Lo = MIRBuilder.buildConstant(NVT, 0);
2537  Hi = MIRBuilder.buildShl(NVT, InL,
2538  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2539  } else if (Amt == NVTBits) {
2540  Lo = MIRBuilder.buildConstant(NVT, 0);
2541  Hi = InL;
2542  } else {
2543  Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
2544  auto OrLHS =
2545  MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
2546  auto OrRHS = MIRBuilder.buildLShr(
2547  NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2548  Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2549  }
2550  } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2551  if (Amt.ugt(VTBits)) {
2552  Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
2553  } else if (Amt.ugt(NVTBits)) {
2554  Lo = MIRBuilder.buildLShr(NVT, InH,
2555  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2556  Hi = MIRBuilder.buildConstant(NVT, 0);
2557  } else if (Amt == NVTBits) {
2558  Lo = InH;
2559  Hi = MIRBuilder.buildConstant(NVT, 0);
2560  } else {
2561  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2562 
2563  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2564  auto OrRHS = MIRBuilder.buildShl(
2565  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2566 
2567  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2568  Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
2569  }
2570  } else {
2571  if (Amt.ugt(VTBits)) {
2572  Hi = Lo = MIRBuilder.buildAShr(
2573  NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2574  } else if (Amt.ugt(NVTBits)) {
2575  Lo = MIRBuilder.buildAShr(NVT, InH,
2576  MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
2577  Hi = MIRBuilder.buildAShr(NVT, InH,
2578  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2579  } else if (Amt == NVTBits) {
2580  Lo = InH;
2581  Hi = MIRBuilder.buildAShr(NVT, InH,
2582  MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
2583  } else {
2584  auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
2585 
2586  auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
2587  auto OrRHS = MIRBuilder.buildShl(
2588  NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
2589 
2590  Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
2591  Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
2592  }
2593  }
2594 
2595  MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
2596  MI.eraseFromParent();
2597 
2598  return Legalized;
2599 }
2600 
2601 // TODO: Optimize if constant shift amount.
2604  LLT RequestedTy) {
2605  if (TypeIdx == 1) {
2606  Observer.changingInstr(MI);
2607  narrowScalarSrc(MI, RequestedTy, 2);
2608  Observer.changedInstr(MI);
2609  return Legalized;
2610  }
2611 
2612  Register DstReg = MI.getOperand(0).getReg();
2613  LLT DstTy = MRI.getType(DstReg);
2614  if (DstTy.isVector())
2615  return UnableToLegalize;
2616 
2617  Register Amt = MI.getOperand(2).getReg();
2618  LLT ShiftAmtTy = MRI.getType(Amt);
2619  const unsigned DstEltSize = DstTy.getScalarSizeInBits();
2620  if (DstEltSize % 2 != 0)
2621  return UnableToLegalize;
2622 
2623  // Ignore the input type. We can only go to exactly half the size of the
2624  // input. If that isn't small enough, the resulting pieces will be further
2625  // legalized.
2626  const unsigned NewBitSize = DstEltSize / 2;
2627  const LLT HalfTy = LLT::scalar(NewBitSize);
2628  const LLT CondTy = LLT::scalar(1);
2629 
2630  if (const MachineInstr *KShiftAmt =
2631  getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
2633  MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
2634  }
2635 
2636  // TODO: Expand with known bits.
2637 
2638  // Handle the fully general expansion by an unknown amount.
2639  auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
2640 
2641  Register InL = MRI.createGenericVirtualRegister(HalfTy);
2642  Register InH = MRI.createGenericVirtualRegister(HalfTy);
2643  MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
2644 
2645  auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
2646  auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
2647 
2648  auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
2649  auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
2650  auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
2651 
2652  Register ResultRegs[2];
2653  switch (MI.getOpcode()) {
2654  case TargetOpcode::G_SHL: {
2655  // Short: ShAmt < NewBitSize
2656  auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2657 
2658  auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
2659  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
2660  auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2661 
2662  // Long: ShAmt >= NewBitSize
2663  auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
2664  auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
2665 
2666  auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
2667  auto Hi = MIRBuilder.buildSelect(
2668  HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
2669 
2670  ResultRegs[0] = Lo.getReg(0);
2671  ResultRegs[1] = Hi.getReg(0);
2672  break;
2673  }
2674  case TargetOpcode::G_LSHR: {
2675  // Short: ShAmt < NewBitSize
2676  auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
2677 
2678  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2679  auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
2680  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2681 
2682  // Long: ShAmt >= NewBitSize
2683  auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
2684  auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2685 
2686  auto Lo = MIRBuilder.buildSelect(
2687  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2688  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2689 
2690  ResultRegs[0] = Lo.getReg(0);
2691  ResultRegs[1] = Hi.getReg(0);
2692  break;
2693  }
2694  case TargetOpcode::G_ASHR: {
2695  // Short: ShAmt < NewBitSize
2696  auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
2697 
2698  auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
2699  auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
2700  auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
2701 
2702  // Long: ShAmt >= NewBitSize
2703 
2704  // Sign of Hi part.
2705  auto HiL = MIRBuilder.buildAShr(
2706  HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
2707 
2708  auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
2709 
2710  auto Lo = MIRBuilder.buildSelect(
2711  HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
2712 
2713  auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
2714 
2715  ResultRegs[0] = Lo.getReg(0);
2716  ResultRegs[1] = Hi.getReg(0);
2717  break;
2718  }
2719  default:
2720  llvm_unreachable("not a shift");
2721  }
2722 
2723  MIRBuilder.buildMerge(DstReg, ResultRegs);
2724  MI.eraseFromParent();
2725  return Legalized;
2726 }
2727 
2730  LLT MoreTy) {
2731  assert(TypeIdx == 0 && "Expecting only Idx 0");
2732 
2733  Observer.changingInstr(MI);
2734  for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
2735  MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2736  MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2737  moreElementsVectorSrc(MI, MoreTy, I);
2738  }
2739 
2740  MachineBasicBlock &MBB = *MI.getParent();
2741  MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2742  moreElementsVectorDst(MI, MoreTy, 0);
2743  Observer.changedInstr(MI);
2744  return Legalized;
2745 }
2746 
2749  LLT MoreTy) {
2750  MIRBuilder.setInstr(MI);
2751  unsigned Opc = MI.getOpcode();
2752  switch (Opc) {
2753  case TargetOpcode::G_IMPLICIT_DEF: {
2754  Observer.changingInstr(MI);
2755  moreElementsVectorDst(MI, MoreTy, 0);
2756  Observer.changedInstr(MI);
2757  return Legalized;
2758  }
2759  case TargetOpcode::G_AND:
2760  case TargetOpcode::G_OR:
2761  case TargetOpcode::G_XOR:
2762  case TargetOpcode::G_SMIN:
2763  case TargetOpcode::G_SMAX:
2764  case TargetOpcode::G_UMIN:
2765  case TargetOpcode::G_UMAX: {
2766  Observer.changingInstr(MI);
2767  moreElementsVectorSrc(MI, MoreTy, 1);
2768  moreElementsVectorSrc(MI, MoreTy, 2);
2769  moreElementsVectorDst(MI, MoreTy, 0);
2770  Observer.changedInstr(MI);
2771  return Legalized;
2772  }
2773  case TargetOpcode::G_EXTRACT:
2774  if (TypeIdx != 1)
2775  return UnableToLegalize;
2776  Observer.changingInstr(MI);
2777  moreElementsVectorSrc(MI, MoreTy, 1);
2778  Observer.changedInstr(MI);
2779  return Legalized;
2780  case TargetOpcode::G_INSERT:
2781  if (TypeIdx != 0)
2782  return UnableToLegalize;
2783  Observer.changingInstr(MI);
2784  moreElementsVectorSrc(MI, MoreTy, 1);
2785  moreElementsVectorDst(MI, MoreTy, 0);
2786  Observer.changedInstr(MI);
2787  return Legalized;
2788  case TargetOpcode::G_SELECT:
2789  if (TypeIdx != 0)
2790  return UnableToLegalize;
2791  if (MRI.getType(MI.getOperand(1).getReg()).isVector())
2792  return UnableToLegalize;
2793 
2794  Observer.changingInstr(MI);
2795  moreElementsVectorSrc(MI, MoreTy, 2);
2796  moreElementsVectorSrc(MI, MoreTy, 3);
2797  moreElementsVectorDst(MI, MoreTy, 0);
2798  Observer.changedInstr(MI);
2799  return Legalized;
2800  case TargetOpcode::G_PHI:
2801  return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
2802  default:
2803  return UnableToLegalize;
2804  }
2805 }
2806 
2807 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
2808  ArrayRef<Register> Src1Regs,
2809  ArrayRef<Register> Src2Regs,
2810  LLT NarrowTy) {
2812  unsigned SrcParts = Src1Regs.size();
2813  unsigned DstParts = DstRegs.size();
2814 
2815  unsigned DstIdx = 0; // Low bits of the result.
2816  Register FactorSum =
2817  B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
2818  DstRegs[DstIdx] = FactorSum;
2819 
2820  unsigned CarrySumPrevDstIdx;
2821  SmallVector<Register, 4> Factors;
2822 
2823  for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
2824  // Collect low parts of muls for DstIdx.
2825  for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
2826  i <= std::min(DstIdx, SrcParts - 1); ++i) {
2827  MachineInstrBuilder Mul =
2828  B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
2829  Factors.push_back(Mul.getReg(0));
2830  }
2831  // Collect high parts of muls from previous DstIdx.
2832  for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
2833  i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
2834  MachineInstrBuilder Umulh =
2835  B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
2836  Factors.push_back(Umulh.getReg(0));
2837  }
2838  // Add CarrySum from additons calculated for previous DstIdx.
2839  if (DstIdx != 1) {
2840  Factors.push_back(CarrySumPrevDstIdx);
2841  }
2842 
2843  Register CarrySum;
2844  // Add all factors and accumulate all carries into CarrySum.
2845  if (DstIdx != DstParts - 1) {
2846  MachineInstrBuilder Uaddo =
2847  B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
2848  FactorSum = Uaddo.getReg(0);
2849  CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
2850  for (unsigned i = 2; i < Factors.size(); ++i) {
2851  MachineInstrBuilder Uaddo =
2852  B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
2853  FactorSum = Uaddo.getReg(0);
2854  MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
2855  CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
2856  }
2857  } else {
2858  // Since value for the next index is not calculated, neither is CarrySum.
2859  FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
2860  for (unsigned i = 2; i < Factors.size(); ++i)
2861  FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
2862  }
2863 
2864  CarrySumPrevDstIdx = CarrySum;
2865  DstRegs[DstIdx] = FactorSum;
2866  Factors.clear();
2867  }
2868 }
2869 
2872  Register DstReg = MI.getOperand(0).getReg();
2873  Register Src1 = MI.getOperand(1).getReg();
2874  Register Src2 = MI.getOperand(2).getReg();
2875 
2876  LLT Ty = MRI.getType(DstReg);
2877  if (Ty.isVector())
2878  return UnableToLegalize;
2879 
2880  unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
2881  unsigned DstSize = Ty.getSizeInBits();
2882  unsigned NarrowSize = NarrowTy.getSizeInBits();
2883  if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
2884  return UnableToLegalize;
2885 
2886  unsigned NumDstParts = DstSize / NarrowSize;
2887  unsigned NumSrcParts = SrcSize / NarrowSize;
2888  bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
2889  unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
2890 
2891  SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
2892  extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
2893  extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
2894  DstTmpRegs.resize(DstTmpParts);
2895  multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
2896 
2897  // Take only high half of registers if this is high mul.
2898  ArrayRef<Register> DstRegs(
2899  IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
2900  MIRBuilder.buildMerge(DstReg, DstRegs);
2901  MI.eraseFromParent();
2902  return Legalized;
2903 }
2904 
2907  LLT NarrowTy) {
2908  if (TypeIdx != 1)
2909  return UnableToLegalize;
2910 
2911  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2912 
2913  int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
2914  // FIXME: add support for when SizeOp1 isn't an exact multiple of
2915  // NarrowSize.
2916  if (SizeOp1 % NarrowSize != 0)
2917  return UnableToLegalize;
2918  int NumParts = SizeOp1 / NarrowSize;
2919 
2920  SmallVector<Register, 2> SrcRegs, DstRegs;
2921  SmallVector<uint64_t, 2> Indexes;
2922  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2923 
2924  Register OpReg = MI.getOperand(0).getReg();
2925  uint64_t OpStart = MI.getOperand(2).getImm();
2926  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2927  for (int i = 0; i < NumParts; ++i) {
2928  unsigned SrcStart = i * NarrowSize;
2929 
2930  if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
2931  // No part of the extract uses this subregister, ignore it.
2932  continue;
2933  } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
2934  // The entire subregister is extracted, forward the value.
2935  DstRegs.push_back(SrcRegs[i]);
2936  continue;
2937  }
2938 
2939  // OpSegStart is where this destination segment would start in OpReg if it
2940  // extended infinitely in both directions.
2941  int64_t ExtractOffset;
2942  uint64_t SegSize;
2943  if (OpStart < SrcStart) {
2944  ExtractOffset = 0;
2945  SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
2946  } else {
2947  ExtractOffset = OpStart - SrcStart;
2948  SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
2949  }
2950 
2951  Register SegReg = SrcRegs[i];
2952  if (ExtractOffset != 0 || SegSize != NarrowSize) {
2953  // A genuine extract is needed.
2954  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
2955  MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
2956  }
2957 
2958  DstRegs.push_back(SegReg);
2959  }
2960 
2961  Register DstReg = MI.getOperand(0).getReg();
2962  if(MRI.getType(DstReg).isVector())
2963  MIRBuilder.buildBuildVector(DstReg, DstRegs);
2964  else
2965  MIRBuilder.buildMerge(DstReg, DstRegs);
2966  MI.eraseFromParent();
2967  return Legalized;
2968 }
2969 
2972  LLT NarrowTy) {
2973  // FIXME: Don't know how to handle secondary types yet.
2974  if (TypeIdx != 0)
2975  return UnableToLegalize;
2976 
2977  uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
2978  uint64_t NarrowSize = NarrowTy.getSizeInBits();
2979 
2980  // FIXME: add support for when SizeOp0 isn't an exact multiple of
2981  // NarrowSize.
2982  if (SizeOp0 % NarrowSize != 0)
2983  return UnableToLegalize;
2984 
2985  int NumParts = SizeOp0 / NarrowSize;
2986 
2987  SmallVector<Register, 2> SrcRegs, DstRegs;
2988  SmallVector<uint64_t, 2> Indexes;
2989  extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
2990 
2991  Register OpReg = MI.getOperand(2).getReg();
2992  uint64_t OpStart = MI.getOperand(3).getImm();
2993  uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
2994  for (int i = 0; i < NumParts; ++i) {
2995  unsigned DstStart = i * NarrowSize;
2996 
2997  if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
2998  // No part of the insert affects this subregister, forward the original.
2999  DstRegs.push_back(SrcRegs[i]);
3000  continue;
3001  } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
3002  // The entire subregister is defined by this insert, forward the new
3003  // value.
3004  DstRegs.push_back(OpReg);
3005  continue;
3006  }
3007 
3008  // OpSegStart is where this destination segment would start in OpReg if it
3009  // extended infinitely in both directions.
3010  int64_t ExtractOffset, InsertOffset;
3011  uint64_t SegSize;
3012  if (OpStart < DstStart) {
3013  InsertOffset = 0;
3014  ExtractOffset = DstStart - OpStart;
3015  SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
3016  } else {
3017  InsertOffset = OpStart - DstStart;
3018  ExtractOffset = 0;
3019  SegSize =
3020  std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
3021  }
3022 
3023  Register SegReg = OpReg;
3024  if (ExtractOffset != 0 || SegSize != OpSize) {
3025  // A genuine extract is needed.
3026  SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
3027  MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
3028  }
3029 
3030  Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
3031  MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
3032  DstRegs.push_back(DstReg);
3033  }
3034 
3035  assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
3036  Register DstReg = MI.getOperand(0).getReg();
3037  if(MRI.getType(DstReg).isVector())
3038  MIRBuilder.buildBuildVector(DstReg, DstRegs);
3039  else
3040  MIRBuilder.buildMerge(DstReg, DstRegs);
3041  MI.eraseFromParent();
3042  return Legalized;
3043 }
3044 
3047  LLT NarrowTy) {
3048  Register DstReg = MI.getOperand(0).getReg();
3049  LLT DstTy = MRI.getType(DstReg);
3050 
3051  assert(MI.getNumOperands() == 3 && TypeIdx == 0);
3052 
3053  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3054  SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
3055  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3056  LLT LeftoverTy;
3057  if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
3058  Src0Regs, Src0LeftoverRegs))
3059  return UnableToLegalize;
3060 
3061  LLT Unused;
3062  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
3063  Src1Regs, Src1LeftoverRegs))
3064  llvm_unreachable("inconsistent extractParts result");
3065 
3066  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3067  auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
3068  {Src0Regs[I], Src1Regs[I]});
3069  DstRegs.push_back(Inst->getOperand(0).getReg());
3070  }
3071 
3072  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3073  auto Inst = MIRBuilder.buildInstr(
3074  MI.getOpcode(),
3075  {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
3076  DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
3077  }
3078 
3079  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3080  LeftoverTy, DstLeftoverRegs);
3081 
3082  MI.eraseFromParent();
3083  return Legalized;
3084 }
3085 
3088  LLT NarrowTy) {
3089  if (TypeIdx != 0)
3090  return UnableToLegalize;
3091 
3092  Register CondReg = MI.getOperand(1).getReg();
3093  LLT CondTy = MRI.getType(CondReg);
3094  if (CondTy.isVector()) // TODO: Handle vselect
3095  return UnableToLegalize;
3096 
3097  Register DstReg = MI.getOperand(0).getReg();
3098  LLT DstTy = MRI.getType(DstReg);
3099 
3100  SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
3101  SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
3102  SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
3103  LLT LeftoverTy;
3104  if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
3105  Src1Regs, Src1LeftoverRegs))
3106  return UnableToLegalize;
3107 
3108  LLT Unused;
3109  if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
3110  Src2Regs, Src2LeftoverRegs))
3111  llvm_unreachable("inconsistent extractParts result");
3112 
3113  for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
3114  auto Select = MIRBuilder.buildSelect(NarrowTy,
3115  CondReg, Src1Regs[I], Src2Regs[I]);
3116  DstRegs.push_back(Select->getOperand(0).getReg());
3117  }
3118 
3119  for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
3120  auto Select = MIRBuilder.buildSelect(
3121  LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
3122  DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
3123  }
3124 
3125  insertParts(DstReg, DstTy, NarrowTy, DstRegs,
3126  LeftoverTy, DstLeftoverRegs);
3127 
3128  MI.eraseFromParent();
3129  return Legalized;
3130 }
3131 
3134  unsigned Opc = MI.getOpcode();
3135  auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
3136  auto isSupported = [this](const LegalityQuery &Q) {
3137  auto QAction = LI.getAction(Q).Action;
3138  return QAction == Legal || QAction == Libcall || QAction == Custom;
3139  };
3140  switch (Opc) {
3141  default:
3142  return UnableToLegalize;
3143  case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
3144  // This trivially expands to CTLZ.
3145  Observer.changingInstr(MI);
3146  MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
3147  Observer.changedInstr(MI);
3148  return Legalized;
3149  }
3150  case TargetOpcode::G_CTLZ: {
3151  Register SrcReg = MI.getOperand(1).getReg();
3152  unsigned Len = Ty.getSizeInBits();
3153  if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
3154  // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
3155  auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
3156  {Ty}, {SrcReg});
3157  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3158  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3159  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3160  SrcReg, MIBZero);
3161  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3162  MIBCtlzZU);
3163  MI.eraseFromParent();
3164  return Legalized;
3165  }
3166  // for now, we do this:
3167  // NewLen = NextPowerOf2(Len);
3168  // x = x | (x >> 1);
3169  // x = x | (x >> 2);
3170  // ...
3171  // x = x | (x >>16);
3172  // x = x | (x >>32); // for 64-bit input
3173  // Upto NewLen/2
3174  // return Len - popcount(x);
3175  //
3176  // Ref: "Hacker's Delight" by Henry Warren
3177  Register Op = SrcReg;
3178  unsigned NewLen = PowerOf2Ceil(Len);
3179  for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
3180  auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
3181  auto MIBOp = MIRBuilder.buildInstr(
3182  TargetOpcode::G_OR, {Ty},
3183  {Op, MIRBuilder.buildInstr(TargetOpcode::G_LSHR, {Ty},
3184  {Op, MIBShiftAmt})});
3185  Op = MIBOp->getOperand(0).getReg();
3186  }
3187  auto MIBPop = MIRBuilder.buildInstr(TargetOpcode::G_CTPOP, {Ty}, {Op});
3188  MIRBuilder.buildInstr(TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3189  {MIRBuilder.buildConstant(Ty, Len), MIBPop});
3190  MI.eraseFromParent();
3191  return Legalized;
3192  }
3193  case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
3194  // This trivially expands to CTTZ.
3195  Observer.changingInstr(MI);
3196  MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
3197  Observer.changedInstr(MI);
3198  return Legalized;
3199  }
3200  case TargetOpcode::G_CTTZ: {
3201  Register SrcReg = MI.getOperand(1).getReg();
3202  unsigned Len = Ty.getSizeInBits();
3203  if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
3204  // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
3205  // zero.
3206  auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
3207  {Ty}, {SrcReg});
3208  auto MIBZero = MIRBuilder.buildConstant(Ty, 0);
3209  auto MIBLen = MIRBuilder.buildConstant(Ty, Len);
3210  auto MIBICmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
3211  SrcReg, MIBZero);
3212  MIRBuilder.buildSelect(MI.getOperand(0).getReg(), MIBICmp, MIBLen,
3213  MIBCttzZU);
3214  MI.eraseFromParent();
3215  return Legalized;
3216  }
3217  // for now, we use: { return popcount(~x & (x - 1)); }
3218  // unless the target has ctlz but not ctpop, in which case we use:
3219  // { return 32 - nlz(~x & (x-1)); }
3220  // Ref: "Hacker's Delight" by Henry Warren
3221  auto MIBCstNeg1 = MIRBuilder.buildConstant(Ty, -1);
3222  auto MIBNot =
3223  MIRBuilder.buildInstr(TargetOpcode::G_XOR, {Ty}, {SrcReg, MIBCstNeg1});
3224  auto MIBTmp = MIRBuilder.buildInstr(
3225  TargetOpcode::G_AND, {Ty},
3226  {MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
3227  {SrcReg, MIBCstNeg1})});
3228  if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
3229  isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
3230  auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
3232  TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
3233  {MIBCstLen,
3234  MIRBuilder.buildInstr(TargetOpcode::G_CTLZ, {Ty}, {MIBTmp})});
3235  MI.eraseFromParent();
3236  return Legalized;
3237  }
3238  MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
3239  MI.getOperand(1).setReg(MIBTmp->getOperand(0).getReg());
3240  return Legalized;
3241  }
3242  }
3243 }
3244 
3245 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
3246 // representation.
3249  Register Dst = MI.getOperand(0).getReg();
3250  Register Src = MI.getOperand(1).getReg();
3251  const LLT S64 = LLT::scalar(64);
3252  const LLT S32 = LLT::scalar(32);
3253  const LLT S1 = LLT::scalar(1);
3254 
3255  assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
3256 
3257  // unsigned cul2f(ulong u) {
3258  // uint lz = clz(u);
3259  // uint e = (u != 0) ? 127U + 63U - lz : 0;
3260  // u = (u << lz) & 0x7fffffffffffffffUL;
3261  // ulong t = u & 0xffffffffffUL;
3262  // uint v = (e << 23) | (uint)(u >> 40);
3263  // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
3264  // return as_float(v + r);
3265  // }
3266 
3267  auto Zero32 = MIRBuilder.buildConstant(S32, 0);
3268  auto Zero64 = MIRBuilder.buildConstant(S64, 0);
3269 
3270  auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
3271 
3272  auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
3273  auto Sub = MIRBuilder.buildSub(S32, K, LZ);
3274 
3275  auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
3276  auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
3277 
3278  auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
3279  auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
3280 
3281  auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
3282 
3283  auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
3284  auto T = MIRBuilder.buildAnd(S64, U, Mask1);
3285 
3286  auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
3287  auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
3288  auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
3289 
3290  auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
3291  auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
3292  auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
3293  auto One = MIRBuilder.buildConstant(S32, 1);
3294 
3295  auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
3296  auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
3297  auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
3298  MIRBuilder.buildAdd(Dst, V, R);
3299 
3300  return Legalized;
3301 }
3302 
3304 LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3305  Register Dst = MI.getOperand(0).getReg();
3306  Register Src = MI.getOperand(1).getReg();
3307  LLT DstTy = MRI.getType(Dst);
3308  LLT SrcTy = MRI.getType(Src);
3309 
3310  if (SrcTy != LLT::scalar(64))
3311  return UnableToLegalize;
3312 
3313  if (DstTy == LLT::scalar(32)) {
3314  // TODO: SelectionDAG has several alternative expansions to port which may
3315  // be more reasonble depending on the available instructions. If a target
3316  // has sitofp, does not have CTLZ, or can efficiently use f64 as an
3317  // intermediate type, this is probably worse.
3318  return lowerU64ToF32BitOps(MI);
3319  }
3320 
3321  return UnableToLegalize;
3322 }
3323 
3325 LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3326  Register Dst = MI.getOperand(0).getReg();
3327  Register Src = MI.getOperand(1).getReg();
3328  LLT DstTy = MRI.getType(Dst);
3329  LLT SrcTy = MRI.getType(Src);
3330 
3331  const LLT S64 = LLT::scalar(64);
3332  const LLT S32 = LLT::scalar(32);
3333  const LLT S1 = LLT::scalar(1);
3334 
3335  if (SrcTy != S64)
3336  return UnableToLegalize;
3337 
3338  if (DstTy == S32) {
3339  // signed cl2f(long l) {
3340  // long s = l >> 63;
3341  // float r = cul2f((l + s) ^ s);
3342  // return s ? -r : r;
3343  // }
3344  Register L = Src;
3345  auto SignBit = MIRBuilder.buildConstant(S64, 63);
3346  auto S = MIRBuilder.buildAShr(S64, L, SignBit);
3347 
3348  auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
3349  auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
3350  auto R = MIRBuilder.buildUITOFP(S32, Xor);
3351 
3352  auto RNeg = MIRBuilder.buildFNeg(S32, R);
3353  auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
3354  MIRBuilder.buildConstant(S64, 0));
3355  MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
3356  return Legalized;
3357  }
3358 
3359  return UnableToLegalize;
3360 }
3361 
3362 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
3363  switch (Opc) {
3364  case TargetOpcode::G_SMIN:
3365  return CmpInst::ICMP_SLT;
3366  case TargetOpcode::G_SMAX:
3367  return CmpInst::ICMP_SGT;
3368  case TargetOpcode::G_UMIN:
3369  return CmpInst::ICMP_ULT;
3370  case TargetOpcode::G_UMAX:
3371  return CmpInst::ICMP_UGT;
3372  default:
3373  llvm_unreachable("not in integer min/max");
3374  }
3375 }
3376 
3378 LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
3379  Register Dst = MI.getOperand(0).getReg();
3380  Register Src0 = MI.getOperand(1).getReg();
3381  Register Src1 = MI.getOperand(2).getReg();
3382 
3383  const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
3384  LLT CmpType = MRI.getType(Dst).changeElementSize(1);
3385 
3386  auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
3387  MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
3388 
3389  MI.eraseFromParent();
3390  return Legalized;
3391 }
3392 
3395  Register Dst = MI.getOperand(0).getReg();
3396  Register Src0 = MI.getOperand(1).getReg();
3397  Register Src1 = MI.getOperand(2).getReg();
3398 
3399  const LLT Src0Ty = MRI.getType(Src0);
3400  const LLT Src1Ty = MRI.getType(Src1);
3401 
3402  const int Src0Size = Src0Ty.getScalarSizeInBits();
3403  const int Src1Size = Src1Ty.getScalarSizeInBits();
3404 
3405  auto SignBitMask = MIRBuilder.buildConstant(
3406  Src0Ty, APInt::getSignMask(Src0Size));
3407 
3408  auto NotSignBitMask = MIRBuilder.buildConstant(
3409  Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
3410 
3411  auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
3412  MachineInstr *Or;
3413 
3414  if (Src0Ty == Src1Ty) {
3415  auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
3416  Or = MIRBuilder.buildOr(Dst, And0, And1);
3417  } else if (Src0Size > Src1Size) {
3418  auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
3419  auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
3420  auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
3421  auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
3422  Or = MIRBuilder.buildOr(Dst, And0, And1);
3423  } else {
3424  auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
3425  auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
3426  auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
3427  auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
3428  Or = MIRBuilder.buildOr(Dst, And0, And1);
3429  }
3430 
3431  // Be careful about setting nsz/nnan/ninf on every instruction, since the
3432  // constants are a nan and -0.0, but the final result should preserve
3433  // everything.
3434  if (unsigned Flags = MI.getFlags())
3435  Or->setFlags(Flags);
3436 
3437  MI.eraseFromParent();
3438  return Legalized;
3439 }
3440 
3443  unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
3444  TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
3445 
3446  Register Dst = MI.getOperand(0).getReg();
3447  Register Src0 = MI.getOperand(1).getReg();
3448  Register Src1 = MI.getOperand(2).getReg();
3449  LLT Ty = MRI.getType(Dst);
3450 
3451  if (!MI.getFlag(MachineInstr::FmNoNans)) {
3452  // Insert canonicalizes if it's possible we need to quiet to get correct
3453  // sNaN behavior.
3454 
3455  // Note this must be done here, and not as an optimization combine in the
3456  // absence of a dedicate quiet-snan instruction as we're using an
3457  // omni-purpose G_FCANONICALIZE.
3458  if (!isKnownNeverSNaN(Src0, MRI))
3459  Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
3460 
3461  if (!isKnownNeverSNaN(Src1, MRI))
3462  Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
3463  }
3464 
3465  // If there are no nans, it's safe to simply replace this with the non-IEEE
3466  // version.
3467  MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
3468  MI.eraseFromParent();
3469  return Legalized;
3470 }
static LegalizerHelper::LegalizeResult simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size, Type *OpType)
uint64_t CallInst * C
LegalizeResult fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
virtual MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val)
Build and insert Res = G_CONSTANT Val.
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
static Type * getDoubleTy(LLVMContext &C)
Definition: Type.cpp:164
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:836
MachineInstrBuilder buildZExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op, Res = G_TRUNC Op, or Res = COPY Op depending on the differing sizes...
MachineInstrBuilder buildUnmerge(ArrayRef< LLT > Res, const SrcOp &Op)
Build and insert Res0, ...
static APInt getAllOnesValue(unsigned numBits)
Get the all-ones value.
Definition: APInt.h:561
MachineInstrBuilder buildXor(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_XOR Op0, Op1.
MachineBasicBlock * getMBB() const
LegalizeResult lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MachineInstrBuilder buildInsert(Register Res, Register Src, Register Op, unsigned Index)
Register getReg(unsigned Idx) const
Get the register for the operand index.
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
unsigned getScalarSizeInBits() const
LegalizeResult fewerElementsVectorMultiEltType(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a instruction with a vector type where each operand may have a different element type...
The operation should be implemented in terms of a wider scalar base-type.
Definition: LegalizerInfo.h:57
unsigned getSizeInBits(Register Reg, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI) const
Get the size in bits of Reg.
void setFPImm(const ConstantFP *CFP)
LegalizeResult lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
AtomicOrdering getFailureOrdering() const
For cmpxchg atomic operations, return the atomic ordering requirements when store does not occur...
The LegalityQuery object bundles together all the information that&#39;s needed to decide whether a given...
bool isScalar() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LegalizeResult fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
unsigned Reg
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Get a value with low bits set.
Definition: APInt.h:647
The (vector) operation should be implemented by splitting it into sub-vectors where the operation is ...
Definition: LegalizerInfo.h:62
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
virtual const TargetLowering * getTargetLowering() const
unsigned less than
Definition: InstrTypes.h:757
LLT getScalarType() const
MachineInstrBuilder buildOr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_OR Op0, Op1.
LLT getType(unsigned Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register...
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:813
bool isNonIntegralAddressSpace(unsigned AddrSpace) const
Definition: DataLayout.h:371
static uint32_t Concat[]
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:176
LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildUAddo(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1)
Build and insert Res, CarryOut = G_UADDO Op0, Op1.
Libcall getFPROUND(EVT OpVT, EVT RetVT)
getFPROUND - Return the FPROUND_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Optional< MachineInstrBuilder > materializeGEP(Register &Res, Register Op0, const LLT &ValueTy, uint64_t Value)
Materialize and insert Res = G_GEP Op0, (G_CONSTANT Value)
bool isKnownNeverSNaN(Register Val, const MachineRegisterInfo &MRI)
Returns true if Val can be assumed to never be a signaling NaN.
Definition: Utils.h:160
MachineInstrBuilder buildExtract(const DstOp &Res, const SrcOp &Src, uint64_t Index)
Build and insert `Res0, ...
Libcall getUINTTOFP(EVT OpVT, EVT RetVT)
getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
The operation should be synthesized from multiple instructions acting on a narrower scalar base-type...
Definition: LegalizerInfo.h:52
bool isVector() const
void setMF(MachineFunction &MF)
LegalizeActionStep getAction(const LegalityQuery &Query) const
Determine what action should be taken to legalize the described instruction.
A description of a memory reference used in the backend.
bool isSigned() const
Definition: InstrTypes.h:902
LegalizeResult narrowScalarMul(MachineInstr &MI, LLT Ty)
void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II)
Set the insertion point before the specified position.
MachineIRBuilder & MIRBuilder
Expose MIRBuilder so clients can set their own RecordInsertInstruction functions. ...
MachineInstrBuilder buildAnyExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ANYEXT Op0.
LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy)
Legalize an instruction by performing the operation on a wider scalar type (for example a 16-bit addi...
LegalizeResult lowerFMinNumMaxNum(MachineInstr &MI)
const HexagonInstrInfo * TII
static Type * getFloatTy(LLVMContext &C)
Definition: Type.cpp:163
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
MachineInstrBuilder buildUAdde(const DstOp &Res, const DstOp &CarryOut, const SrcOp &Op0, const SrcOp &Op1, const SrcOp &CarryIn)
Build and insert Res, CarryOut = G_UADDE Op0, Op1, CarryIn.
const MachineInstrBuilder & addUse(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildAnyExtOrTrunc(const DstOp &Res, const SrcOp &Op)
Res = COPY Op depending on the differing sizes of Res and Op.
LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Legalize a vector instruction by increasing the number of vector elements involved and ignoring the a...
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, unsigned base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
LLT getElementType() const
Returns the vector&#39;s element type. Only valid for vector types.
LegalizerHelper::LegalizeResult createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall, const CallLowering::ArgInfo &Result, ArrayRef< CallLowering::ArgInfo > Args)
Helper function that creates the given libcall.
AtomicOrdering getOrdering() const
Return the atomic ordering requirements for this memory operation.
The (vector) operation should be implemented by widening the input vector and ignoring the lanes adde...
Definition: LegalizerInfo.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4483
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_FNEG Op0.
MachineInstrBuilder buildInstrNoInsert(unsigned Opcode)
Build but don&#39;t insert <empty> = Opcode <empty>.
MachineFunction & getMF()
Getter for the function we currently build.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:137
MachineInstrBuilder buildLShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
virtual const TargetInstrInfo * getInstrInfo() const
static const fltSemantics & IEEEdouble() LLVM_READNONE
Definition: APFloat.cpp:158
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
MachineInstrBuilder buildSub(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_SUB Op0, Op1.
LegalizeResult narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt, LLT HalfTy, LLT ShiftAmtTy)
void setChangeObserver(GISelChangeObserver &Observer)
MachineBasicBlock::iterator getInsertPt()
Current insertion point for new instructions.
LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &B)
LegalizeResult narrowScalarShift(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
T greatestCommonDivisor(T A, T B)
Return the greatest common divisor of the values using Euclid&#39;s algorithm.
Definition: MathExtras.h:563
MachineInstrBuilder buildSExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_SEXT Op.
Abstract class that contains various methods for clients to notify about changes. ...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
unsigned const MachineRegisterInfo * MRI
static LLT scalarOrVector(uint16_t NumElements, LLT ScalarTy)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
LegalizeResult legalizeInstrStep(MachineInstr &MI)
Replace MI by a sequence of legal instructions that can implement the same operation.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
MachineInstrBuilder buildInstr(unsigned Opcode)
Build and insert <empty> = Opcode <empty>.
LegalizeResult narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineInstrBuilder buildZExt(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_ZEXT Op.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
MachineInstrBuilder buildCTLZ_ZERO_UNDEF(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_CTLZ_ZERO_UNDEF Op0, Src0.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
MachineInstrBuilder buildBuildVector(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_BUILD_VECTOR Op0, ...
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:263
virtual bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder) const
Return true if MI is either legal or has been legalized and false if not legal.
Helper class to build MachineInstr.
Libcall getFPTOSINT(EVT OpVT, EVT RetVT)
getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition: APInt.h:587
MachineInstrBuilder buildUMulH(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
LegalizeResult reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:160
void setInstr(MachineInstr &MI)
Set the insertion point to before MI.
bool isValid() const
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:732
void setImm(int64_t immVal)
virtual const CallLowering * getCallLowering() const
LegalizerHelper::LegalizeResult createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI, MachineInstr &MI)
Create a libcall to memcpy et al.
unsigned getAddressSpace() const
static MVT getVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:312
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:205
LegalizeResult moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx, LLT MoreTy)
Some kind of error has occurred and we could not legalize this instruction.
static CmpInst::Predicate minMaxToCompare(unsigned Opc)
R600 Clause Merge
MachineInstrBuilder buildTrunc(const DstOp &Res, const SrcOp &Op)
Build and insert Res = G_TRUNC Op.
Instruction was already legal and no change was made to the MachineFunction.
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:303
size_t size() const
Definition: SmallVector.h:52
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:219
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
MachineInstrBuilder buildUITOFP(const DstOp &Dst, const SrcOp &Src0)
Build and insert Res = G_UITOFP Src0.
APInt lshr(unsigned shiftAmt) const
Logical right-shift function.
Definition: APInt.h:970
signed greater than
Definition: InstrTypes.h:759
static Type * getFP128Ty(LLVMContext &C)
Definition: Type.cpp:168
LegalizeResult lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
const APFloat & getValueAPF() const
Definition: Constants.h:302
Libcall getFPEXT(EVT OpVT, EVT RetVT)
getFPEXT - Return the FPEXT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
static APInt getSignMask(unsigned BitWidth)
Get the SignMask for a specific bit width.
Definition: APInt.h:554
static std::pair< int, int > getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy)
Try to break down OrigTy into NarrowTy sized pieces.
static Type * getHalfTy(LLVMContext &C)
Definition: Type.cpp:162
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:239
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:155
MachineInstrBuilder buildLoad(const DstOp &Res, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert Res = G_LOAD Addr, MMO.
MachineInstrBuilder buildMul(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_MUL Op0, Op1.
MachineInstrBuilder buildICmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_ICMP Pred, Op0, Op1.
void setDesc(const MCInstrDesc &tid)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one...
virtual MachineInstrBuilder buildFConstant(const DstOp &Res, const ConstantFP &Val)
Build and insert Res = G_FCONSTANT Val.
LegalizeResult libcall(MachineInstr &MI)
Legalize an instruction by emiting a runtime library call instead.
mmo_iterator memoperands_begin() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:536
void setFlags(unsigned flags)
Definition: MachineInstr.h:306
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size)
MachineOperand class - Representation of each machine instruction operand.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
LegalizeResult lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Intrinsic::ID getIntrinsicID() const
unsigned getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
signed less than
Definition: InstrTypes.h:761
Promote Memory to Register
Definition: Mem2Reg.cpp:109
LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a simple vector instruction where all operands are the same type by splitting into multiple ...
The target wants to do something special with this combination of operand and type.
Definition: LegalizerInfo.h:81
LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
Legalize an instruction by splitting it into simpler parts, hopefully understood by the target...
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:643
MachineInstrBuilder buildSelect(const DstOp &Res, const SrcOp &Tst, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_SELECT Tst, Op0, Op1.
static Constant * get(Type *Ty, double V)
This returns a ConstantFP, or a vector containing a splat of a ConstantFP, for the specified value in...
Definition: Constants.cpp:706
LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize a vector instruction by splitting into multiple components, each acting on the same scalar t...
int64_t getImm() const
const Function & getFunction() const
Return the LLVM function that this machine code represents.
virtual bool legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const
MachineInstrBuilder buildAShr(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
LegalizeResult fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Class for arbitrary precision integers.
Definition: APInt.h:69
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType, Type *FromType)
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
static unsigned getReg(const void *D, unsigned RC, unsigned RegNo)
static MachineOperand CreateES(const char *SymName, unsigned char TargetFlags=0)
Register getReg() const
unsigned getBoolExtOp(bool IsVec, bool IsFP) const
static LegalizerHelper::LegalizeResult conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType, Type *FromType)
bool isPointer() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:256
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
LegalizeResult lowerU64ToF32BitOps(MachineInstr &MI)
Representation of each machine instruction.
Definition: MachineInstr.h:64
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1254
Instruction has been legalized and the MachineFunction changed.
Libcall getFPTOUINT(EVT OpVT, EVT RetVT)
getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred, const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1)
Build and insert a Res = G_FCMP PredOp0, Op1.
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
MachineInstrBuilder buildFCanonicalize(const DstOp &Dst, const SrcOp &Src0, Optional< unsigned > Flags=None)
Build and insert Dst = G_FCANONICALIZE Src0.
MachineInstrBuilder buildAdd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
Build and insert Res = G_ADD Op0, Op1.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:187
const MachineBasicBlock & getMBB() const
Getter for the basic block we currently build.
LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
MachineInstrBuilder buildPtrToInt(const DstOp &Dst, const SrcOp &Src)
Build and insert a G_PTRTOINT instruction.
void setReg(unsigned Reg)
Change the register this operand corresponds to.
#define I(x, y, z)
Definition: MD5.cpp:58
static Constant * getZeroValueForNegation(Type *Ty)
Floating point negation must be implemented with f(x) = -0.0 - x.
Definition: Constants.cpp:792
MachineInstrBuilder buildStore(const SrcOp &Val, const SrcOp &Addr, MachineMemOperand &MMO)
Build and insert G_STORE Val, Addr, MMO.
uint32_t Size
Definition: Profile.cpp:46
void setCImm(const ConstantInt *CI)
const DataLayout & getDataLayout() const
iterator getFirstNonPHI()
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
uint16_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:292
Libcall getSINTTOFP(EVT OpVT, EVT RetVT)
getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or UNKNOWN_LIBCALL if there is none...
LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
Legalize an instruction by reducing the width of the underlying scalar type.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
MachineInstrBuilder insertInstr(MachineInstrBuilder MIB)
Insert an existing instruction at the insertion point.
uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
This file describes how to lower LLVM calls to machine code calls.
LegalizeResult narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT Ty)
MachineInstrBuilder buildShl(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1, Optional< unsigned > Flags=None)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
MachineInstrBuilder buildAnd(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1)
Build and insert Res = G_AND Op0, Op1.
MachineInstrBuilder buildMerge(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_MERGE_VALUES Op0, ...
IRTranslator LLVM IR MI
unsigned greater than
Definition: InstrTypes.h:755
const MachineInstrBuilder & addDef(unsigned RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
MachineInstrBuilder buildUndef(const DstOp &Res)
Build and insert Res = IMPLICIT_DEF.
static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits)
Get a low-level vector of some number of elements and element width.
Register createGenericVirtualRegister(LLT Ty, StringRef Name="")
Create and return a new generic virtual register with low-level type Ty.
Register getReg() const
getReg - Returns the register number.
MachineInstrBuilder buildConcatVectors(const DstOp &Res, ArrayRef< Register > Ops)
Build and insert Res = G_CONCAT_VECTORS Op0, ...
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned char TargetFlags=0) const
#define LLVM_DEBUG(X)
Definition: Debug.h:122
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
const ConstantInt * getCImm() const
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
MachineInstrBuilder buildAtomicCmpXchg(Register OldValRes, Register Addr, Register CmpVal, Register NewVal, MachineMemOperand &MMO)
Build and insert OldValRes<def> = G_ATOMIC_CMPXCHG Addr, CmpVal, NewVal, MMO.
The operation is expected to be selectable directly by the target, and no transformation is necessary...
Definition: LegalizerInfo.h:47
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:297
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LegalizeResult fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy)
bool isNullValue() const
Determine if all bits are clear.
Definition: APInt.h:405
Wrapper class representing virtual and physical registers.
Definition: Register.h:18
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:663
This file describes how to lower LLVM code to machine code.
unsigned getPredicate() const
void resize(size_type N)
Definition: SmallVector.h:344